diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,224085 @@ +{ + "best_global_step": 80000, + "best_metric": 0.09788688, + "best_model_checkpoint": "/home/fit02/dien_workspace/output/v6-20260117-103936/checkpoint-80000", + "epoch": 1.3834726893844411, + "eval_steps": 40000, + "global_step": 160000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 8.646704308652757e-06, + "grad_norm": 30.69170415356784, + "learning_rate": 5.9999999997232855e-06, + "loss": 0.505859375, + "step": 1 + }, + { + "epoch": 4.323352154326379e-05, + "grad_norm": 22.980350177112115, + "learning_rate": 5.999999993082132e-06, + "loss": 0.31048583984375, + "step": 5 + }, + { + "epoch": 8.646704308652757e-05, + "grad_norm": 29.166429649875333, + "learning_rate": 5.999999972328531e-06, + "loss": 0.32708740234375, + "step": 10 + }, + { + "epoch": 0.00012970056462979136, + "grad_norm": 25.540202001742195, + "learning_rate": 5.999999937739193e-06, + "loss": 0.257080078125, + "step": 15 + }, + { + "epoch": 0.00017293408617305515, + "grad_norm": 19.16529881734912, + "learning_rate": 5.999999889314121e-06, + "loss": 0.2935546875, + "step": 20 + }, + { + "epoch": 0.00021616760771631894, + "grad_norm": 17.634208453632606, + "learning_rate": 5.999999827053315e-06, + "loss": 0.13309326171875, + "step": 25 + }, + { + "epoch": 0.0002594011292595827, + "grad_norm": 42.13036139216939, + "learning_rate": 5.999999750956776e-06, + "loss": 0.20992431640625, + "step": 30 + }, + { + "epoch": 0.0003026346508028465, + "grad_norm": 23.019797090648016, + "learning_rate": 5.9999996610245e-06, + "loss": 0.19864501953125, + "step": 35 + }, + { + "epoch": 0.0003458681723461103, + "grad_norm": 15.708704613434696, + "learning_rate": 5.9999995572564936e-06, + "loss": 0.2221435546875, + "step": 40 + }, + { + "epoch": 0.00038910169388937406, + "grad_norm": 7.986770170379777, + "learning_rate": 5.999999439652753e-06, + "loss": 0.06697998046875, + "step": 45 + }, + { + "epoch": 0.0004323352154326379, + "grad_norm": 59.808383045400916, + "learning_rate": 5.99999930821328e-06, + "loss": 0.191656494140625, + "step": 50 + }, + { + "epoch": 0.00047556873697590164, + "grad_norm": 50.73213652704851, + "learning_rate": 5.9999991629380766e-06, + "loss": 0.386651611328125, + "step": 55 + }, + { + "epoch": 0.0005188022585191654, + "grad_norm": 5.384840576299853, + "learning_rate": 5.999999003827141e-06, + "loss": 0.2929443359375, + "step": 60 + }, + { + "epoch": 0.0005620357800624292, + "grad_norm": 43.321363821301375, + "learning_rate": 5.999998830880475e-06, + "loss": 0.216986083984375, + "step": 65 + }, + { + "epoch": 0.000605269301605693, + "grad_norm": 58.35276226004044, + "learning_rate": 5.99999864409808e-06, + "loss": 0.4148681640625, + "step": 70 + }, + { + "epoch": 0.0006485028231489568, + "grad_norm": 2.863501468177139, + "learning_rate": 5.999998443479956e-06, + "loss": 0.278558349609375, + "step": 75 + }, + { + "epoch": 0.0006917363446922206, + "grad_norm": 17.264981633685537, + "learning_rate": 5.999998229026104e-06, + "loss": 0.14821014404296876, + "step": 80 + }, + { + "epoch": 0.0007349698662354843, + "grad_norm": 2.093651587846957, + "learning_rate": 5.999998000736526e-06, + "loss": 0.185540771484375, + "step": 85 + }, + { + "epoch": 0.0007782033877787481, + "grad_norm": 79.22963800616513, + "learning_rate": 5.999997758611221e-06, + "loss": 0.47708587646484374, + "step": 90 + }, + { + "epoch": 0.0008214369093220119, + "grad_norm": 6.427005753081516, + "learning_rate": 5.999997502650193e-06, + "loss": 0.219439697265625, + "step": 95 + }, + { + "epoch": 0.0008646704308652757, + "grad_norm": 64.43567867823378, + "learning_rate": 5.999997232853442e-06, + "loss": 0.28521728515625, + "step": 100 + }, + { + "epoch": 0.0009079039524085395, + "grad_norm": 11.067257610006422, + "learning_rate": 5.999996949220967e-06, + "loss": 0.306146240234375, + "step": 105 + }, + { + "epoch": 0.0009511374739518033, + "grad_norm": 34.743812499121766, + "learning_rate": 5.999996651752772e-06, + "loss": 0.14287109375, + "step": 110 + }, + { + "epoch": 0.000994370995495067, + "grad_norm": 83.73033951086502, + "learning_rate": 5.999996340448857e-06, + "loss": 0.140740966796875, + "step": 115 + }, + { + "epoch": 0.001037604517038331, + "grad_norm": 44.038674453882734, + "learning_rate": 5.999996015309224e-06, + "loss": 0.220672607421875, + "step": 120 + }, + { + "epoch": 0.0010808380385815947, + "grad_norm": 43.2638646220819, + "learning_rate": 5.999995676333876e-06, + "loss": 0.191485595703125, + "step": 125 + }, + { + "epoch": 0.0011240715601248583, + "grad_norm": 49.620883431180424, + "learning_rate": 5.999995323522812e-06, + "loss": 0.4044708251953125, + "step": 130 + }, + { + "epoch": 0.0011673050816681221, + "grad_norm": 4.829138713687337, + "learning_rate": 5.9999949568760346e-06, + "loss": 0.109930419921875, + "step": 135 + }, + { + "epoch": 0.001210538603211386, + "grad_norm": 41.413917854690894, + "learning_rate": 5.999994576393546e-06, + "loss": 0.19189300537109374, + "step": 140 + }, + { + "epoch": 0.0012537721247546498, + "grad_norm": 20.57569994155041, + "learning_rate": 5.999994182075346e-06, + "loss": 0.08933563232421875, + "step": 145 + }, + { + "epoch": 0.0012970056462979136, + "grad_norm": 30.895044938584874, + "learning_rate": 5.999993773921439e-06, + "loss": 0.24969482421875, + "step": 150 + }, + { + "epoch": 0.0013402391678411774, + "grad_norm": 71.1664966365371, + "learning_rate": 5.9999933519318255e-06, + "loss": 0.14020614624023436, + "step": 155 + }, + { + "epoch": 0.0013834726893844412, + "grad_norm": 54.46453245776618, + "learning_rate": 5.999992916106509e-06, + "loss": 0.40771484375, + "step": 160 + }, + { + "epoch": 0.0014267062109277048, + "grad_norm": 5.672531312473166, + "learning_rate": 5.999992466445488e-06, + "loss": 0.0408233642578125, + "step": 165 + }, + { + "epoch": 0.0014699397324709686, + "grad_norm": 11.395618852789942, + "learning_rate": 5.999992002948768e-06, + "loss": 0.4129852294921875, + "step": 170 + }, + { + "epoch": 0.0015131732540142324, + "grad_norm": 11.485529243034025, + "learning_rate": 5.999991525616351e-06, + "loss": 0.22171173095703126, + "step": 175 + }, + { + "epoch": 0.0015564067755574962, + "grad_norm": 14.686943348690539, + "learning_rate": 5.999991034448237e-06, + "loss": 0.4372314453125, + "step": 180 + }, + { + "epoch": 0.00159964029710076, + "grad_norm": 57.39095628738474, + "learning_rate": 5.99999052944443e-06, + "loss": 0.517047119140625, + "step": 185 + }, + { + "epoch": 0.0016428738186440239, + "grad_norm": 14.13548483872567, + "learning_rate": 5.999990010604931e-06, + "loss": 0.36998291015625, + "step": 190 + }, + { + "epoch": 0.0016861073401872877, + "grad_norm": 22.16445332165216, + "learning_rate": 5.999989477929744e-06, + "loss": 0.1513214111328125, + "step": 195 + }, + { + "epoch": 0.0017293408617305515, + "grad_norm": 8.781857891244496, + "learning_rate": 5.9999889314188696e-06, + "loss": 0.16379547119140625, + "step": 200 + }, + { + "epoch": 0.001772574383273815, + "grad_norm": 23.81075639926766, + "learning_rate": 5.999988371072312e-06, + "loss": 0.7389820098876954, + "step": 205 + }, + { + "epoch": 0.001815807904817079, + "grad_norm": 7.547940479565619, + "learning_rate": 5.999987796890073e-06, + "loss": 0.2939453125, + "step": 210 + }, + { + "epoch": 0.0018590414263603427, + "grad_norm": 124.21452023636606, + "learning_rate": 5.9999872088721554e-06, + "loss": 0.3075439453125, + "step": 215 + }, + { + "epoch": 0.0019022749479036065, + "grad_norm": 16.918210419501165, + "learning_rate": 5.999986607018561e-06, + "loss": 0.096478271484375, + "step": 220 + }, + { + "epoch": 0.0019455084694468704, + "grad_norm": 46.28089864798654, + "learning_rate": 5.999985991329295e-06, + "loss": 0.43441162109375, + "step": 225 + }, + { + "epoch": 0.001988741990990134, + "grad_norm": 2.4646719744439065, + "learning_rate": 5.999985361804358e-06, + "loss": 0.17896728515625, + "step": 230 + }, + { + "epoch": 0.0020319755125333978, + "grad_norm": 33.56566476498144, + "learning_rate": 5.999984718443753e-06, + "loss": 0.142181396484375, + "step": 235 + }, + { + "epoch": 0.002075209034076662, + "grad_norm": 72.94922586819679, + "learning_rate": 5.999984061247485e-06, + "loss": 0.373876953125, + "step": 240 + }, + { + "epoch": 0.0021184425556199254, + "grad_norm": 9.187099491791454, + "learning_rate": 5.999983390215554e-06, + "loss": 0.103594970703125, + "step": 245 + }, + { + "epoch": 0.0021616760771631894, + "grad_norm": 12.75558723984392, + "learning_rate": 5.999982705347966e-06, + "loss": 0.1081939697265625, + "step": 250 + }, + { + "epoch": 0.002204909598706453, + "grad_norm": 3.7277787985900415, + "learning_rate": 5.999982006644723e-06, + "loss": 0.2233367919921875, + "step": 255 + }, + { + "epoch": 0.0022481431202497166, + "grad_norm": 52.35047885562998, + "learning_rate": 5.999981294105827e-06, + "loss": 0.31395263671875, + "step": 260 + }, + { + "epoch": 0.0022913766417929807, + "grad_norm": 51.31902652752893, + "learning_rate": 5.999980567731282e-06, + "loss": 0.5368408203125, + "step": 265 + }, + { + "epoch": 0.0023346101633362443, + "grad_norm": 3.6058958839020856, + "learning_rate": 5.999979827521092e-06, + "loss": 0.27401123046875, + "step": 270 + }, + { + "epoch": 0.0023778436848795083, + "grad_norm": 4.942259379887434, + "learning_rate": 5.999979073475261e-06, + "loss": 0.4035919189453125, + "step": 275 + }, + { + "epoch": 0.002421077206422772, + "grad_norm": 21.78749595605452, + "learning_rate": 5.999978305593791e-06, + "loss": 0.35301513671875, + "step": 280 + }, + { + "epoch": 0.002464310727966036, + "grad_norm": 32.31384584652121, + "learning_rate": 5.999977523876686e-06, + "loss": 0.221697998046875, + "step": 285 + }, + { + "epoch": 0.0025075442495092995, + "grad_norm": 12.4228521160173, + "learning_rate": 5.99997672832395e-06, + "loss": 0.3292724609375, + "step": 290 + }, + { + "epoch": 0.002550777771052563, + "grad_norm": 15.745172736244191, + "learning_rate": 5.999975918935587e-06, + "loss": 0.09034347534179688, + "step": 295 + }, + { + "epoch": 0.002594011292595827, + "grad_norm": 2.6214674889334706, + "learning_rate": 5.999975095711599e-06, + "loss": 0.07076263427734375, + "step": 300 + }, + { + "epoch": 0.0026372448141390907, + "grad_norm": 12.793173250259834, + "learning_rate": 5.999974258651991e-06, + "loss": 0.06512451171875, + "step": 305 + }, + { + "epoch": 0.0026804783356823548, + "grad_norm": 4.831093065909661, + "learning_rate": 5.999973407756767e-06, + "loss": 0.5762359619140625, + "step": 310 + }, + { + "epoch": 0.0027237118572256184, + "grad_norm": 31.90614887106872, + "learning_rate": 5.999972543025932e-06, + "loss": 0.32931060791015626, + "step": 315 + }, + { + "epoch": 0.0027669453787688824, + "grad_norm": 27.423380205763962, + "learning_rate": 5.999971664459487e-06, + "loss": 0.22844696044921875, + "step": 320 + }, + { + "epoch": 0.002810178900312146, + "grad_norm": 19.73334312165792, + "learning_rate": 5.9999707720574394e-06, + "loss": 0.175164794921875, + "step": 325 + }, + { + "epoch": 0.0028534124218554096, + "grad_norm": 172.6882312223815, + "learning_rate": 5.99996986581979e-06, + "loss": 0.4881591796875, + "step": 330 + }, + { + "epoch": 0.0028966459433986736, + "grad_norm": 15.8953437719488, + "learning_rate": 5.999968945746546e-06, + "loss": 0.25614013671875, + "step": 335 + }, + { + "epoch": 0.0029398794649419372, + "grad_norm": 5.985020754629737, + "learning_rate": 5.999968011837709e-06, + "loss": 0.3195068359375, + "step": 340 + }, + { + "epoch": 0.0029831129864852013, + "grad_norm": 24.417583069972004, + "learning_rate": 5.999967064093286e-06, + "loss": 0.13448715209960938, + "step": 345 + }, + { + "epoch": 0.003026346508028465, + "grad_norm": 8.508531029632286, + "learning_rate": 5.9999661025132785e-06, + "loss": 0.36274261474609376, + "step": 350 + }, + { + "epoch": 0.003069580029571729, + "grad_norm": 89.15089036281591, + "learning_rate": 5.999965127097694e-06, + "loss": 0.458349609375, + "step": 355 + }, + { + "epoch": 0.0031128135511149925, + "grad_norm": 23.51598237167435, + "learning_rate": 5.999964137846535e-06, + "loss": 0.16612548828125, + "step": 360 + }, + { + "epoch": 0.0031560470726582565, + "grad_norm": 38.905739566655186, + "learning_rate": 5.999963134759806e-06, + "loss": 0.2647216796875, + "step": 365 + }, + { + "epoch": 0.00319928059420152, + "grad_norm": 19.04730139017457, + "learning_rate": 5.999962117837512e-06, + "loss": 0.1453857421875, + "step": 370 + }, + { + "epoch": 0.0032425141157447837, + "grad_norm": 11.111614214191018, + "learning_rate": 5.9999610870796585e-06, + "loss": 0.28504638671875, + "step": 375 + }, + { + "epoch": 0.0032857476372880477, + "grad_norm": 22.8135530514771, + "learning_rate": 5.999960042486248e-06, + "loss": 0.2598423004150391, + "step": 380 + }, + { + "epoch": 0.0033289811588313113, + "grad_norm": 41.89682021345395, + "learning_rate": 5.999958984057289e-06, + "loss": 0.485400390625, + "step": 385 + }, + { + "epoch": 0.0033722146803745754, + "grad_norm": 45.914598586876295, + "learning_rate": 5.999957911792782e-06, + "loss": 0.29040069580078126, + "step": 390 + }, + { + "epoch": 0.003415448201917839, + "grad_norm": 39.63056754751017, + "learning_rate": 5.999956825692736e-06, + "loss": 0.39171600341796875, + "step": 395 + }, + { + "epoch": 0.003458681723461103, + "grad_norm": 4.707189182427223, + "learning_rate": 5.999955725757154e-06, + "loss": 0.1468475341796875, + "step": 400 + }, + { + "epoch": 0.0035019152450043666, + "grad_norm": 26.445785277572213, + "learning_rate": 5.99995461198604e-06, + "loss": 0.2367889404296875, + "step": 405 + }, + { + "epoch": 0.00354514876654763, + "grad_norm": 12.109686624679865, + "learning_rate": 5.999953484379402e-06, + "loss": 0.4776611328125, + "step": 410 + }, + { + "epoch": 0.0035883822880908942, + "grad_norm": 12.170216333017896, + "learning_rate": 5.999952342937243e-06, + "loss": 0.138092041015625, + "step": 415 + }, + { + "epoch": 0.003631615809634158, + "grad_norm": 38.601119410712485, + "learning_rate": 5.999951187659568e-06, + "loss": 0.22474365234375, + "step": 420 + }, + { + "epoch": 0.003674849331177422, + "grad_norm": 25.054794974028642, + "learning_rate": 5.9999500185463844e-06, + "loss": 0.4101806640625, + "step": 425 + }, + { + "epoch": 0.0037180828527206855, + "grad_norm": 59.21161038143297, + "learning_rate": 5.999948835597696e-06, + "loss": 0.3397705078125, + "step": 430 + }, + { + "epoch": 0.0037613163742639495, + "grad_norm": 35.23166003766818, + "learning_rate": 5.999947638813508e-06, + "loss": 0.3279052734375, + "step": 435 + }, + { + "epoch": 0.003804549895807213, + "grad_norm": 62.17118732230262, + "learning_rate": 5.999946428193828e-06, + "loss": 0.514801025390625, + "step": 440 + }, + { + "epoch": 0.0038477834173504767, + "grad_norm": 5.970692363049215, + "learning_rate": 5.999945203738658e-06, + "loss": 0.1433807373046875, + "step": 445 + }, + { + "epoch": 0.0038910169388937407, + "grad_norm": 54.50492844096543, + "learning_rate": 5.999943965448008e-06, + "loss": 0.2858154296875, + "step": 450 + }, + { + "epoch": 0.003934250460437005, + "grad_norm": 100.32593503048905, + "learning_rate": 5.99994271332188e-06, + "loss": 0.1806488037109375, + "step": 455 + }, + { + "epoch": 0.003977483981980268, + "grad_norm": 1.3462781791096388, + "learning_rate": 5.999941447360282e-06, + "loss": 0.2369781494140625, + "step": 460 + }, + { + "epoch": 0.004020717503523532, + "grad_norm": 7.007631765906755, + "learning_rate": 5.999940167563219e-06, + "loss": 0.208935546875, + "step": 465 + }, + { + "epoch": 0.0040639510250667955, + "grad_norm": 16.28267382630778, + "learning_rate": 5.999938873930698e-06, + "loss": 0.404833984375, + "step": 470 + }, + { + "epoch": 0.00410718454661006, + "grad_norm": 14.976406890743995, + "learning_rate": 5.999937566462722e-06, + "loss": 0.291845703125, + "step": 475 + }, + { + "epoch": 0.004150418068153324, + "grad_norm": 26.025086042114182, + "learning_rate": 5.9999362451593e-06, + "loss": 0.35698928833007815, + "step": 480 + }, + { + "epoch": 0.004193651589696587, + "grad_norm": 27.66939981816912, + "learning_rate": 5.999934910020438e-06, + "loss": 0.167236328125, + "step": 485 + }, + { + "epoch": 0.004236885111239851, + "grad_norm": 7.592517524850562, + "learning_rate": 5.9999335610461404e-06, + "loss": 0.20546875, + "step": 490 + }, + { + "epoch": 0.004280118632783114, + "grad_norm": 18.41087947579556, + "learning_rate": 5.999932198236414e-06, + "loss": 0.43670654296875, + "step": 495 + }, + { + "epoch": 0.004323352154326379, + "grad_norm": 24.5694551896633, + "learning_rate": 5.999930821591265e-06, + "loss": 0.30594482421875, + "step": 500 + }, + { + "epoch": 0.0043665856758696425, + "grad_norm": 8.095371086322404, + "learning_rate": 5.999929431110702e-06, + "loss": 0.28787841796875, + "step": 505 + }, + { + "epoch": 0.004409819197412906, + "grad_norm": 20.065422620793658, + "learning_rate": 5.9999280267947276e-06, + "loss": 0.1177276611328125, + "step": 510 + }, + { + "epoch": 0.00445305271895617, + "grad_norm": 55.29587224543418, + "learning_rate": 5.999926608643351e-06, + "loss": 0.28187026977539065, + "step": 515 + }, + { + "epoch": 0.004496286240499433, + "grad_norm": 7.05125009919743, + "learning_rate": 5.999925176656577e-06, + "loss": 0.1441436767578125, + "step": 520 + }, + { + "epoch": 0.004539519762042698, + "grad_norm": 2.640319149265586, + "learning_rate": 5.999923730834416e-06, + "loss": 0.184912109375, + "step": 525 + }, + { + "epoch": 0.004582753283585961, + "grad_norm": 104.68263246045126, + "learning_rate": 5.999922271176869e-06, + "loss": 0.38912353515625, + "step": 530 + }, + { + "epoch": 0.004625986805129225, + "grad_norm": 94.72144136028274, + "learning_rate": 5.999920797683947e-06, + "loss": 0.39127197265625, + "step": 535 + }, + { + "epoch": 0.0046692203266724885, + "grad_norm": 75.7092843557183, + "learning_rate": 5.999919310355655e-06, + "loss": 0.4608001708984375, + "step": 540 + }, + { + "epoch": 0.004712453848215753, + "grad_norm": 22.266064727334527, + "learning_rate": 5.999917809191999e-06, + "loss": 0.15302734375, + "step": 545 + }, + { + "epoch": 0.004755687369759017, + "grad_norm": 45.11106371094184, + "learning_rate": 5.999916294192989e-06, + "loss": 0.259820556640625, + "step": 550 + }, + { + "epoch": 0.00479892089130228, + "grad_norm": 4.909265146104176, + "learning_rate": 5.9999147653586295e-06, + "loss": 0.3109893798828125, + "step": 555 + }, + { + "epoch": 0.004842154412845544, + "grad_norm": 1.0672665376974728, + "learning_rate": 5.999913222688928e-06, + "loss": 0.051959228515625, + "step": 560 + }, + { + "epoch": 0.004885387934388807, + "grad_norm": 45.683059746935946, + "learning_rate": 5.999911666183891e-06, + "loss": 0.18974609375, + "step": 565 + }, + { + "epoch": 0.004928621455932072, + "grad_norm": 24.025690453444195, + "learning_rate": 5.999910095843528e-06, + "loss": 0.2554901123046875, + "step": 570 + }, + { + "epoch": 0.004971854977475335, + "grad_norm": 43.25082123641289, + "learning_rate": 5.999908511667844e-06, + "loss": 0.2219635009765625, + "step": 575 + }, + { + "epoch": 0.005015088499018599, + "grad_norm": 146.18675890665943, + "learning_rate": 5.999906913656847e-06, + "loss": 0.3737396240234375, + "step": 580 + }, + { + "epoch": 0.005058322020561863, + "grad_norm": 2.5878316041822926, + "learning_rate": 5.999905301810545e-06, + "loss": 0.11121063232421875, + "step": 585 + }, + { + "epoch": 0.005101555542105126, + "grad_norm": 2.4215333324042754, + "learning_rate": 5.999903676128943e-06, + "loss": 0.58470458984375, + "step": 590 + }, + { + "epoch": 0.005144789063648391, + "grad_norm": 5.32583396343559, + "learning_rate": 5.9999020366120526e-06, + "loss": 0.477655029296875, + "step": 595 + }, + { + "epoch": 0.005188022585191654, + "grad_norm": 67.80778601240252, + "learning_rate": 5.999900383259878e-06, + "loss": 0.19234619140625, + "step": 600 + }, + { + "epoch": 0.005231256106734918, + "grad_norm": 0.7418958391673961, + "learning_rate": 5.999898716072428e-06, + "loss": 0.3938690185546875, + "step": 605 + }, + { + "epoch": 0.0052744896282781815, + "grad_norm": 98.0219089287339, + "learning_rate": 5.99989703504971e-06, + "loss": 0.41383056640625, + "step": 610 + }, + { + "epoch": 0.005317723149821446, + "grad_norm": 17.363445059207415, + "learning_rate": 5.999895340191732e-06, + "loss": 0.369244384765625, + "step": 615 + }, + { + "epoch": 0.0053609566713647095, + "grad_norm": 4.342789793823146, + "learning_rate": 5.999893631498502e-06, + "loss": 0.28660888671875, + "step": 620 + }, + { + "epoch": 0.005404190192907973, + "grad_norm": 26.04147071053806, + "learning_rate": 5.999891908970028e-06, + "loss": 0.14121551513671876, + "step": 625 + }, + { + "epoch": 0.005447423714451237, + "grad_norm": 5.65839544643318, + "learning_rate": 5.999890172606317e-06, + "loss": 0.5223907470703125, + "step": 630 + }, + { + "epoch": 0.0054906572359945, + "grad_norm": 7.828919738853862, + "learning_rate": 5.999888422407378e-06, + "loss": 0.333868408203125, + "step": 635 + }, + { + "epoch": 0.005533890757537765, + "grad_norm": 72.75611269735138, + "learning_rate": 5.9998866583732176e-06, + "loss": 0.337396240234375, + "step": 640 + }, + { + "epoch": 0.005577124279081028, + "grad_norm": 35.186674621256515, + "learning_rate": 5.999884880503846e-06, + "loss": 0.368280029296875, + "step": 645 + }, + { + "epoch": 0.005620357800624292, + "grad_norm": 5.20784279506819, + "learning_rate": 5.99988308879927e-06, + "loss": 0.360693359375, + "step": 650 + }, + { + "epoch": 0.005663591322167556, + "grad_norm": 15.782033584653481, + "learning_rate": 5.999881283259499e-06, + "loss": 0.39125213623046873, + "step": 655 + }, + { + "epoch": 0.005706824843710819, + "grad_norm": 53.369411713990644, + "learning_rate": 5.999879463884539e-06, + "loss": 0.1732421875, + "step": 660 + }, + { + "epoch": 0.005750058365254084, + "grad_norm": 12.451639573425085, + "learning_rate": 5.999877630674402e-06, + "loss": 0.44468841552734373, + "step": 665 + }, + { + "epoch": 0.005793291886797347, + "grad_norm": 35.45567170416376, + "learning_rate": 5.999875783629094e-06, + "loss": 0.243359375, + "step": 670 + }, + { + "epoch": 0.005836525408340611, + "grad_norm": 3.551875681913648, + "learning_rate": 5.999873922748623e-06, + "loss": 0.28390960693359374, + "step": 675 + }, + { + "epoch": 0.0058797589298838744, + "grad_norm": 71.72780152014747, + "learning_rate": 5.999872048032999e-06, + "loss": 0.4650390625, + "step": 680 + }, + { + "epoch": 0.005922992451427139, + "grad_norm": 28.439718243153028, + "learning_rate": 5.99987015948223e-06, + "loss": 0.525933837890625, + "step": 685 + }, + { + "epoch": 0.0059662259729704025, + "grad_norm": 9.308143929796302, + "learning_rate": 5.999868257096325e-06, + "loss": 0.2978759765625, + "step": 690 + }, + { + "epoch": 0.006009459494513666, + "grad_norm": 66.1218945792235, + "learning_rate": 5.999866340875293e-06, + "loss": 0.2255157470703125, + "step": 695 + }, + { + "epoch": 0.00605269301605693, + "grad_norm": 0.6193870996660518, + "learning_rate": 5.999864410819143e-06, + "loss": 0.253424072265625, + "step": 700 + }, + { + "epoch": 0.006095926537600193, + "grad_norm": 3.1761570674527007, + "learning_rate": 5.999862466927882e-06, + "loss": 0.07955322265625, + "step": 705 + }, + { + "epoch": 0.006139160059143458, + "grad_norm": 5.042210254693577, + "learning_rate": 5.999860509201521e-06, + "loss": 0.0719482421875, + "step": 710 + }, + { + "epoch": 0.006182393580686721, + "grad_norm": 6.131410208880404, + "learning_rate": 5.999858537640068e-06, + "loss": 0.2712646484375, + "step": 715 + }, + { + "epoch": 0.006225627102229985, + "grad_norm": 1.8233141375774047, + "learning_rate": 5.999856552243534e-06, + "loss": 0.2665771484375, + "step": 720 + }, + { + "epoch": 0.0062688606237732486, + "grad_norm": 76.74344934360947, + "learning_rate": 5.999854553011924e-06, + "loss": 0.2696441650390625, + "step": 725 + }, + { + "epoch": 0.006312094145316513, + "grad_norm": 13.025206750127396, + "learning_rate": 5.999852539945252e-06, + "loss": 0.28521080017089845, + "step": 730 + }, + { + "epoch": 0.006355327666859777, + "grad_norm": 16.161631126502122, + "learning_rate": 5.999850513043525e-06, + "loss": 0.27386474609375, + "step": 735 + }, + { + "epoch": 0.00639856118840304, + "grad_norm": 31.503007878965455, + "learning_rate": 5.999848472306752e-06, + "loss": 0.36317138671875, + "step": 740 + }, + { + "epoch": 0.006441794709946304, + "grad_norm": 4.948330129607184, + "learning_rate": 5.999846417734942e-06, + "loss": 0.095733642578125, + "step": 745 + }, + { + "epoch": 0.006485028231489567, + "grad_norm": 2.114668662787215, + "learning_rate": 5.999844349328107e-06, + "loss": 0.6122505187988281, + "step": 750 + }, + { + "epoch": 0.006528261753032832, + "grad_norm": 20.166554245920192, + "learning_rate": 5.999842267086255e-06, + "loss": 0.241546630859375, + "step": 755 + }, + { + "epoch": 0.0065714952745760955, + "grad_norm": 40.43616428222124, + "learning_rate": 5.999840171009395e-06, + "loss": 0.2141693115234375, + "step": 760 + }, + { + "epoch": 0.006614728796119359, + "grad_norm": 24.586387374951368, + "learning_rate": 5.999838061097536e-06, + "loss": 0.5022216796875, + "step": 765 + }, + { + "epoch": 0.006657962317662623, + "grad_norm": 49.84216913762198, + "learning_rate": 5.999835937350691e-06, + "loss": 0.15997314453125, + "step": 770 + }, + { + "epoch": 0.006701195839205886, + "grad_norm": 11.00765055184327, + "learning_rate": 5.999833799768866e-06, + "loss": 0.248175048828125, + "step": 775 + }, + { + "epoch": 0.006744429360749151, + "grad_norm": 4.735034113145377, + "learning_rate": 5.999831648352074e-06, + "loss": 0.2687744140625, + "step": 780 + }, + { + "epoch": 0.006787662882292414, + "grad_norm": 31.405379607172698, + "learning_rate": 5.999829483100323e-06, + "loss": 0.2655517578125, + "step": 785 + }, + { + "epoch": 0.006830896403835678, + "grad_norm": 74.39410325947077, + "learning_rate": 5.999827304013623e-06, + "loss": 0.34338226318359377, + "step": 790 + }, + { + "epoch": 0.0068741299253789415, + "grad_norm": 2.4621660609169136, + "learning_rate": 5.999825111091984e-06, + "loss": 0.2324462890625, + "step": 795 + }, + { + "epoch": 0.006917363446922206, + "grad_norm": 107.31925187423163, + "learning_rate": 5.999822904335418e-06, + "loss": 0.1255157470703125, + "step": 800 + }, + { + "epoch": 0.00696059696846547, + "grad_norm": 32.164245582533155, + "learning_rate": 5.999820683743934e-06, + "loss": 0.23878173828125, + "step": 805 + }, + { + "epoch": 0.007003830490008733, + "grad_norm": 22.078191205559047, + "learning_rate": 5.999818449317542e-06, + "loss": 0.13090972900390624, + "step": 810 + }, + { + "epoch": 0.007047064011551997, + "grad_norm": 72.31241491889341, + "learning_rate": 5.999816201056252e-06, + "loss": 0.14386978149414062, + "step": 815 + }, + { + "epoch": 0.00709029753309526, + "grad_norm": 4.651251594342594, + "learning_rate": 5.999813938960075e-06, + "loss": 0.2696044921875, + "step": 820 + }, + { + "epoch": 0.007133531054638525, + "grad_norm": 6.883180393041758, + "learning_rate": 5.99981166302902e-06, + "loss": 0.272430419921875, + "step": 825 + }, + { + "epoch": 0.0071767645761817885, + "grad_norm": 14.08762748212726, + "learning_rate": 5.999809373263099e-06, + "loss": 0.2283477783203125, + "step": 830 + }, + { + "epoch": 0.007219998097725052, + "grad_norm": 41.51844055302396, + "learning_rate": 5.999807069662322e-06, + "loss": 0.38051605224609375, + "step": 835 + }, + { + "epoch": 0.007263231619268316, + "grad_norm": 12.905600081627105, + "learning_rate": 5.999804752226702e-06, + "loss": 0.32807350158691406, + "step": 840 + }, + { + "epoch": 0.007306465140811579, + "grad_norm": 0.4320694357815857, + "learning_rate": 5.999802420956245e-06, + "loss": 0.302911376953125, + "step": 845 + }, + { + "epoch": 0.007349698662354844, + "grad_norm": 18.930970394887773, + "learning_rate": 5.999800075850966e-06, + "loss": 0.305194091796875, + "step": 850 + }, + { + "epoch": 0.007392932183898107, + "grad_norm": 34.91647231204405, + "learning_rate": 5.999797716910873e-06, + "loss": 0.3130706787109375, + "step": 855 + }, + { + "epoch": 0.007436165705441371, + "grad_norm": 22.406638809889063, + "learning_rate": 5.999795344135979e-06, + "loss": 0.1224456787109375, + "step": 860 + }, + { + "epoch": 0.0074793992269846345, + "grad_norm": 33.15621391923392, + "learning_rate": 5.999792957526293e-06, + "loss": 0.2252716064453125, + "step": 865 + }, + { + "epoch": 0.007522632748527899, + "grad_norm": 4.424353936747384, + "learning_rate": 5.999790557081828e-06, + "loss": 0.26458740234375, + "step": 870 + }, + { + "epoch": 0.007565866270071163, + "grad_norm": 5.889522150933046, + "learning_rate": 5.999788142802593e-06, + "loss": 0.1656982421875, + "step": 875 + }, + { + "epoch": 0.007609099791614426, + "grad_norm": 30.56322408039023, + "learning_rate": 5.999785714688601e-06, + "loss": 0.093170166015625, + "step": 880 + }, + { + "epoch": 0.00765233331315769, + "grad_norm": 20.59309019090293, + "learning_rate": 5.999783272739862e-06, + "loss": 0.09834136962890624, + "step": 885 + }, + { + "epoch": 0.007695566834700953, + "grad_norm": 18.239529853311605, + "learning_rate": 5.999780816956386e-06, + "loss": 0.5489242553710938, + "step": 890 + }, + { + "epoch": 0.007738800356244218, + "grad_norm": 16.08066098653268, + "learning_rate": 5.999778347338189e-06, + "loss": 0.15268402099609374, + "step": 895 + }, + { + "epoch": 0.007782033877787481, + "grad_norm": 14.425071495842806, + "learning_rate": 5.999775863885277e-06, + "loss": 0.112255859375, + "step": 900 + }, + { + "epoch": 0.007825267399330746, + "grad_norm": 23.24070537034975, + "learning_rate": 5.999773366597665e-06, + "loss": 0.51697998046875, + "step": 905 + }, + { + "epoch": 0.00786850092087401, + "grad_norm": 59.54881762752419, + "learning_rate": 5.999770855475363e-06, + "loss": 0.49131011962890625, + "step": 910 + }, + { + "epoch": 0.007911734442417273, + "grad_norm": 15.961179791448544, + "learning_rate": 5.999768330518382e-06, + "loss": 0.4754913330078125, + "step": 915 + }, + { + "epoch": 0.007954967963960537, + "grad_norm": 72.57133358847081, + "learning_rate": 5.9997657917267355e-06, + "loss": 0.2086944580078125, + "step": 920 + }, + { + "epoch": 0.0079982014855038, + "grad_norm": 28.634895707558478, + "learning_rate": 5.999763239100434e-06, + "loss": 0.16082763671875, + "step": 925 + }, + { + "epoch": 0.008041435007047064, + "grad_norm": 0.584014889120324, + "learning_rate": 5.99976067263949e-06, + "loss": 0.08046035766601563, + "step": 930 + }, + { + "epoch": 0.008084668528590327, + "grad_norm": 97.30786171180542, + "learning_rate": 5.999758092343914e-06, + "loss": 0.3278076171875, + "step": 935 + }, + { + "epoch": 0.008127902050133591, + "grad_norm": 59.904623132945, + "learning_rate": 5.999755498213719e-06, + "loss": 0.38942947387695315, + "step": 940 + }, + { + "epoch": 0.008171135571676855, + "grad_norm": 30.578488057608535, + "learning_rate": 5.9997528902489164e-06, + "loss": 0.196563720703125, + "step": 945 + }, + { + "epoch": 0.00821436909322012, + "grad_norm": 14.266176267286063, + "learning_rate": 5.99975026844952e-06, + "loss": 0.1796142578125, + "step": 950 + }, + { + "epoch": 0.008257602614763384, + "grad_norm": 22.992400191787212, + "learning_rate": 5.999747632815539e-06, + "loss": 0.27050094604492186, + "step": 955 + }, + { + "epoch": 0.008300836136306647, + "grad_norm": 2.955753172032549, + "learning_rate": 5.999744983346987e-06, + "loss": 0.23094482421875, + "step": 960 + }, + { + "epoch": 0.00834406965784991, + "grad_norm": 7.600958240839907, + "learning_rate": 5.999742320043877e-06, + "loss": 0.2449951171875, + "step": 965 + }, + { + "epoch": 0.008387303179393174, + "grad_norm": 58.64480918015712, + "learning_rate": 5.99973964290622e-06, + "loss": 0.402081298828125, + "step": 970 + }, + { + "epoch": 0.008430536700936438, + "grad_norm": 44.01683141941139, + "learning_rate": 5.9997369519340285e-06, + "loss": 0.281561279296875, + "step": 975 + }, + { + "epoch": 0.008473770222479702, + "grad_norm": 46.166152068100416, + "learning_rate": 5.9997342471273156e-06, + "loss": 0.25382080078125, + "step": 980 + }, + { + "epoch": 0.008517003744022965, + "grad_norm": 42.0060586563442, + "learning_rate": 5.999731528486093e-06, + "loss": 0.57188720703125, + "step": 985 + }, + { + "epoch": 0.008560237265566229, + "grad_norm": 3.095408707900013, + "learning_rate": 5.999728796010375e-06, + "loss": 0.133538818359375, + "step": 990 + }, + { + "epoch": 0.008603470787109492, + "grad_norm": 3.0507165717705815, + "learning_rate": 5.999726049700171e-06, + "loss": 0.098370361328125, + "step": 995 + }, + { + "epoch": 0.008646704308652758, + "grad_norm": 17.947139622426086, + "learning_rate": 5.999723289555497e-06, + "loss": 0.160003662109375, + "step": 1000 + }, + { + "epoch": 0.008689937830196021, + "grad_norm": 84.67710548030593, + "learning_rate": 5.999720515576363e-06, + "loss": 0.2561004638671875, + "step": 1005 + }, + { + "epoch": 0.008733171351739285, + "grad_norm": 23.074629184408998, + "learning_rate": 5.999717727762784e-06, + "loss": 0.136962890625, + "step": 1010 + }, + { + "epoch": 0.008776404873282549, + "grad_norm": 26.382286122496822, + "learning_rate": 5.999714926114772e-06, + "loss": 0.32681884765625, + "step": 1015 + }, + { + "epoch": 0.008819638394825812, + "grad_norm": 3.0141095094232293, + "learning_rate": 5.999712110632339e-06, + "loss": 0.2332916259765625, + "step": 1020 + }, + { + "epoch": 0.008862871916369076, + "grad_norm": 20.689290495989372, + "learning_rate": 5.999709281315499e-06, + "loss": 0.20615386962890625, + "step": 1025 + }, + { + "epoch": 0.00890610543791234, + "grad_norm": 11.20497205622592, + "learning_rate": 5.999706438164265e-06, + "loss": 0.4220489501953125, + "step": 1030 + }, + { + "epoch": 0.008949338959455603, + "grad_norm": 3.347052669388772, + "learning_rate": 5.999703581178651e-06, + "loss": 0.20501708984375, + "step": 1035 + }, + { + "epoch": 0.008992572480998866, + "grad_norm": 70.06106735098851, + "learning_rate": 5.999700710358667e-06, + "loss": 0.55771484375, + "step": 1040 + }, + { + "epoch": 0.009035806002542132, + "grad_norm": 55.89595688096393, + "learning_rate": 5.99969782570433e-06, + "loss": 0.46756591796875, + "step": 1045 + }, + { + "epoch": 0.009079039524085395, + "grad_norm": 24.83565225010966, + "learning_rate": 5.999694927215651e-06, + "loss": 0.15169525146484375, + "step": 1050 + }, + { + "epoch": 0.009122273045628659, + "grad_norm": 33.24935292598076, + "learning_rate": 5.999692014892644e-06, + "loss": 0.205999755859375, + "step": 1055 + }, + { + "epoch": 0.009165506567171923, + "grad_norm": 7.421645167608678, + "learning_rate": 5.999689088735323e-06, + "loss": 0.2932830810546875, + "step": 1060 + }, + { + "epoch": 0.009208740088715186, + "grad_norm": 15.726335377261643, + "learning_rate": 5.9996861487437005e-06, + "loss": 0.137652587890625, + "step": 1065 + }, + { + "epoch": 0.00925197361025845, + "grad_norm": 24.614470965819542, + "learning_rate": 5.999683194917791e-06, + "loss": 0.195965576171875, + "step": 1070 + }, + { + "epoch": 0.009295207131801713, + "grad_norm": 64.48784353053716, + "learning_rate": 5.999680227257608e-06, + "loss": 0.4311767578125, + "step": 1075 + }, + { + "epoch": 0.009338440653344977, + "grad_norm": 8.751097579774013, + "learning_rate": 5.999677245763164e-06, + "loss": 0.250701904296875, + "step": 1080 + }, + { + "epoch": 0.00938167417488824, + "grad_norm": 55.35934817811069, + "learning_rate": 5.9996742504344744e-06, + "loss": 0.3984375, + "step": 1085 + }, + { + "epoch": 0.009424907696431506, + "grad_norm": 123.79157951459815, + "learning_rate": 5.999671241271552e-06, + "loss": 0.1496034622192383, + "step": 1090 + }, + { + "epoch": 0.00946814121797477, + "grad_norm": 3.53469356066154, + "learning_rate": 5.99966821827441e-06, + "loss": 0.2462371826171875, + "step": 1095 + }, + { + "epoch": 0.009511374739518033, + "grad_norm": 21.993612565870535, + "learning_rate": 5.999665181443064e-06, + "loss": 0.2023406982421875, + "step": 1100 + }, + { + "epoch": 0.009554608261061297, + "grad_norm": 26.38205228527881, + "learning_rate": 5.999662130777527e-06, + "loss": 0.4201263427734375, + "step": 1105 + }, + { + "epoch": 0.00959784178260456, + "grad_norm": 14.474197160523794, + "learning_rate": 5.999659066277813e-06, + "loss": 0.22752685546875, + "step": 1110 + }, + { + "epoch": 0.009641075304147824, + "grad_norm": 2.8823871419252396, + "learning_rate": 5.999655987943937e-06, + "loss": 0.398052978515625, + "step": 1115 + }, + { + "epoch": 0.009684308825691088, + "grad_norm": 14.747047255785882, + "learning_rate": 5.999652895775913e-06, + "loss": 0.115374755859375, + "step": 1120 + }, + { + "epoch": 0.009727542347234351, + "grad_norm": 31.789122397574193, + "learning_rate": 5.999649789773754e-06, + "loss": 0.3745361328125, + "step": 1125 + }, + { + "epoch": 0.009770775868777615, + "grad_norm": 7.0892270854232144, + "learning_rate": 5.999646669937476e-06, + "loss": 0.2119873046875, + "step": 1130 + }, + { + "epoch": 0.00981400939032088, + "grad_norm": 4.638194919715848, + "learning_rate": 5.999643536267092e-06, + "loss": 0.200213623046875, + "step": 1135 + }, + { + "epoch": 0.009857242911864144, + "grad_norm": 26.63062906754843, + "learning_rate": 5.999640388762617e-06, + "loss": 0.15408935546875, + "step": 1140 + }, + { + "epoch": 0.009900476433407407, + "grad_norm": 1.4540781065775639, + "learning_rate": 5.999637227424066e-06, + "loss": 0.06665191650390626, + "step": 1145 + }, + { + "epoch": 0.00994370995495067, + "grad_norm": 10.42894276512875, + "learning_rate": 5.999634052251453e-06, + "loss": 0.2409271240234375, + "step": 1150 + }, + { + "epoch": 0.009986943476493934, + "grad_norm": 58.643651340084205, + "learning_rate": 5.999630863244792e-06, + "loss": 0.232476806640625, + "step": 1155 + }, + { + "epoch": 0.010030176998037198, + "grad_norm": 15.568473533143043, + "learning_rate": 5.999627660404099e-06, + "loss": 0.3025848388671875, + "step": 1160 + }, + { + "epoch": 0.010073410519580462, + "grad_norm": 11.25329402852256, + "learning_rate": 5.999624443729389e-06, + "loss": 0.539898681640625, + "step": 1165 + }, + { + "epoch": 0.010116644041123725, + "grad_norm": 11.455207655728318, + "learning_rate": 5.9996212132206765e-06, + "loss": 0.0753448486328125, + "step": 1170 + }, + { + "epoch": 0.010159877562666989, + "grad_norm": 5.202348763866147, + "learning_rate": 5.999617968877975e-06, + "loss": 0.2951515197753906, + "step": 1175 + }, + { + "epoch": 0.010203111084210252, + "grad_norm": 23.992006705525387, + "learning_rate": 5.9996147107013e-06, + "loss": 0.10251846313476562, + "step": 1180 + }, + { + "epoch": 0.010246344605753518, + "grad_norm": 24.188142249027837, + "learning_rate": 5.9996114386906684e-06, + "loss": 0.3426727294921875, + "step": 1185 + }, + { + "epoch": 0.010289578127296781, + "grad_norm": 1.3848949914325466, + "learning_rate": 5.999608152846093e-06, + "loss": 0.084564208984375, + "step": 1190 + }, + { + "epoch": 0.010332811648840045, + "grad_norm": 21.52213518324404, + "learning_rate": 5.99960485316759e-06, + "loss": 0.3713134765625, + "step": 1195 + }, + { + "epoch": 0.010376045170383309, + "grad_norm": 30.147500882163293, + "learning_rate": 5.999601539655174e-06, + "loss": 0.33199462890625, + "step": 1200 + }, + { + "epoch": 0.010419278691926572, + "grad_norm": 5.689513818134866, + "learning_rate": 5.999598212308862e-06, + "loss": 0.6667667388916015, + "step": 1205 + }, + { + "epoch": 0.010462512213469836, + "grad_norm": 77.15368182019488, + "learning_rate": 5.9995948711286675e-06, + "loss": 0.760540771484375, + "step": 1210 + }, + { + "epoch": 0.0105057457350131, + "grad_norm": 26.857963176765427, + "learning_rate": 5.999591516114606e-06, + "loss": 0.22756423950195312, + "step": 1215 + }, + { + "epoch": 0.010548979256556363, + "grad_norm": 5.326991064879421, + "learning_rate": 5.999588147266693e-06, + "loss": 0.159930419921875, + "step": 1220 + }, + { + "epoch": 0.010592212778099627, + "grad_norm": 25.940460627457906, + "learning_rate": 5.999584764584947e-06, + "loss": 0.13883132934570314, + "step": 1225 + }, + { + "epoch": 0.010635446299642892, + "grad_norm": 21.2218639266948, + "learning_rate": 5.999581368069378e-06, + "loss": 0.23306884765625, + "step": 1230 + }, + { + "epoch": 0.010678679821186155, + "grad_norm": 3.3248386950993662, + "learning_rate": 5.999577957720007e-06, + "loss": 0.2746124267578125, + "step": 1235 + }, + { + "epoch": 0.010721913342729419, + "grad_norm": 57.441754710386185, + "learning_rate": 5.999574533536847e-06, + "loss": 0.364013671875, + "step": 1240 + }, + { + "epoch": 0.010765146864272683, + "grad_norm": 2.317924901922467, + "learning_rate": 5.999571095519913e-06, + "loss": 0.4121917724609375, + "step": 1245 + }, + { + "epoch": 0.010808380385815946, + "grad_norm": 105.0256909676317, + "learning_rate": 5.999567643669224e-06, + "loss": 0.24161376953125, + "step": 1250 + }, + { + "epoch": 0.01085161390735921, + "grad_norm": 4.933614262057416, + "learning_rate": 5.999564177984793e-06, + "loss": 0.4084381103515625, + "step": 1255 + }, + { + "epoch": 0.010894847428902473, + "grad_norm": 42.47078758186598, + "learning_rate": 5.999560698466638e-06, + "loss": 0.3037139892578125, + "step": 1260 + }, + { + "epoch": 0.010938080950445737, + "grad_norm": 22.676071434019466, + "learning_rate": 5.999557205114773e-06, + "loss": 0.155126953125, + "step": 1265 + }, + { + "epoch": 0.010981314471989, + "grad_norm": 12.93025308932436, + "learning_rate": 5.999553697929216e-06, + "loss": 0.136474609375, + "step": 1270 + }, + { + "epoch": 0.011024547993532266, + "grad_norm": 88.38540384253825, + "learning_rate": 5.999550176909981e-06, + "loss": 0.306298828125, + "step": 1275 + }, + { + "epoch": 0.01106778151507553, + "grad_norm": 35.247314053869005, + "learning_rate": 5.999546642057087e-06, + "loss": 0.19305419921875, + "step": 1280 + }, + { + "epoch": 0.011111015036618793, + "grad_norm": 2.825338997381401, + "learning_rate": 5.999543093370549e-06, + "loss": 0.381365966796875, + "step": 1285 + }, + { + "epoch": 0.011154248558162057, + "grad_norm": 36.909515291655325, + "learning_rate": 5.999539530850383e-06, + "loss": 0.811151123046875, + "step": 1290 + }, + { + "epoch": 0.01119748207970532, + "grad_norm": 12.032874838542854, + "learning_rate": 5.999535954496605e-06, + "loss": 0.1037689208984375, + "step": 1295 + }, + { + "epoch": 0.011240715601248584, + "grad_norm": 51.657222412012196, + "learning_rate": 5.999532364309233e-06, + "loss": 0.20723876953125, + "step": 1300 + }, + { + "epoch": 0.011283949122791848, + "grad_norm": 12.078982188905123, + "learning_rate": 5.999528760288283e-06, + "loss": 0.4170379638671875, + "step": 1305 + }, + { + "epoch": 0.011327182644335111, + "grad_norm": 25.519914793630104, + "learning_rate": 5.9995251424337705e-06, + "loss": 0.351129150390625, + "step": 1310 + }, + { + "epoch": 0.011370416165878375, + "grad_norm": 7.840290709011876, + "learning_rate": 5.999521510745714e-06, + "loss": 0.384619140625, + "step": 1315 + }, + { + "epoch": 0.011413649687421638, + "grad_norm": 20.05810237777458, + "learning_rate": 5.999517865224129e-06, + "loss": 0.705126953125, + "step": 1320 + }, + { + "epoch": 0.011456883208964904, + "grad_norm": 48.40040019562467, + "learning_rate": 5.999514205869033e-06, + "loss": 0.44560546875, + "step": 1325 + }, + { + "epoch": 0.011500116730508167, + "grad_norm": 28.309230688075115, + "learning_rate": 5.9995105326804415e-06, + "loss": 0.206243896484375, + "step": 1330 + }, + { + "epoch": 0.011543350252051431, + "grad_norm": 13.659472569059936, + "learning_rate": 5.999506845658373e-06, + "loss": 0.11526336669921874, + "step": 1335 + }, + { + "epoch": 0.011586583773594695, + "grad_norm": 56.89671115002555, + "learning_rate": 5.999503144802844e-06, + "loss": 0.5080780029296875, + "step": 1340 + }, + { + "epoch": 0.011629817295137958, + "grad_norm": 35.22261332115719, + "learning_rate": 5.999499430113872e-06, + "loss": 0.34613189697265623, + "step": 1345 + }, + { + "epoch": 0.011673050816681222, + "grad_norm": 24.32819250226551, + "learning_rate": 5.999495701591473e-06, + "loss": 0.21846389770507812, + "step": 1350 + }, + { + "epoch": 0.011716284338224485, + "grad_norm": 31.2321954749799, + "learning_rate": 5.999491959235665e-06, + "loss": 0.2014404296875, + "step": 1355 + }, + { + "epoch": 0.011759517859767749, + "grad_norm": 68.0263778951928, + "learning_rate": 5.999488203046465e-06, + "loss": 0.3137451171875, + "step": 1360 + }, + { + "epoch": 0.011802751381311012, + "grad_norm": 0.6227645172110449, + "learning_rate": 5.999484433023891e-06, + "loss": 0.25821533203125, + "step": 1365 + }, + { + "epoch": 0.011845984902854278, + "grad_norm": 71.20676324942261, + "learning_rate": 5.999480649167959e-06, + "loss": 0.4734375, + "step": 1370 + }, + { + "epoch": 0.011889218424397541, + "grad_norm": 46.183961318065926, + "learning_rate": 5.999476851478689e-06, + "loss": 0.382086181640625, + "step": 1375 + }, + { + "epoch": 0.011932451945940805, + "grad_norm": 5.211399232143838, + "learning_rate": 5.999473039956095e-06, + "loss": 0.08114089965820312, + "step": 1380 + }, + { + "epoch": 0.011975685467484069, + "grad_norm": 10.172037890255705, + "learning_rate": 5.999469214600197e-06, + "loss": 0.15668487548828125, + "step": 1385 + }, + { + "epoch": 0.012018918989027332, + "grad_norm": 45.40136800270573, + "learning_rate": 5.999465375411012e-06, + "loss": 0.429058837890625, + "step": 1390 + }, + { + "epoch": 0.012062152510570596, + "grad_norm": 20.200383536425857, + "learning_rate": 5.999461522388557e-06, + "loss": 0.5821792602539062, + "step": 1395 + }, + { + "epoch": 0.01210538603211386, + "grad_norm": 43.148643126756404, + "learning_rate": 5.9994576555328525e-06, + "loss": 0.156640625, + "step": 1400 + }, + { + "epoch": 0.012148619553657123, + "grad_norm": 59.79055074526543, + "learning_rate": 5.999453774843913e-06, + "loss": 0.547137451171875, + "step": 1405 + }, + { + "epoch": 0.012191853075200387, + "grad_norm": 25.872721083744807, + "learning_rate": 5.9994498803217575e-06, + "loss": 0.2706451416015625, + "step": 1410 + }, + { + "epoch": 0.012235086596743652, + "grad_norm": 29.021814496554757, + "learning_rate": 5.999445971966404e-06, + "loss": 0.095794677734375, + "step": 1415 + }, + { + "epoch": 0.012278320118286916, + "grad_norm": 23.323128297290822, + "learning_rate": 5.999442049777871e-06, + "loss": 0.0719268798828125, + "step": 1420 + }, + { + "epoch": 0.01232155363983018, + "grad_norm": 10.011526137223253, + "learning_rate": 5.999438113756177e-06, + "loss": 0.13036041259765624, + "step": 1425 + }, + { + "epoch": 0.012364787161373443, + "grad_norm": 13.973036116382996, + "learning_rate": 5.999434163901339e-06, + "loss": 0.15806884765625, + "step": 1430 + }, + { + "epoch": 0.012408020682916706, + "grad_norm": 46.56454081961037, + "learning_rate": 5.999430200213376e-06, + "loss": 0.3862548828125, + "step": 1435 + }, + { + "epoch": 0.01245125420445997, + "grad_norm": 30.434816648433998, + "learning_rate": 5.999426222692305e-06, + "loss": 0.16959075927734374, + "step": 1440 + }, + { + "epoch": 0.012494487726003234, + "grad_norm": 198.73846439017575, + "learning_rate": 5.999422231338145e-06, + "loss": 0.65189208984375, + "step": 1445 + }, + { + "epoch": 0.012537721247546497, + "grad_norm": 7.494860301019546, + "learning_rate": 5.999418226150916e-06, + "loss": 0.271337890625, + "step": 1450 + }, + { + "epoch": 0.01258095476908976, + "grad_norm": 26.112408709431822, + "learning_rate": 5.999414207130635e-06, + "loss": 0.12819671630859375, + "step": 1455 + }, + { + "epoch": 0.012624188290633026, + "grad_norm": 60.82843291134381, + "learning_rate": 5.99941017427732e-06, + "loss": 0.5763145446777344, + "step": 1460 + }, + { + "epoch": 0.01266742181217629, + "grad_norm": 31.719738946027874, + "learning_rate": 5.9994061275909905e-06, + "loss": 0.470343017578125, + "step": 1465 + }, + { + "epoch": 0.012710655333719553, + "grad_norm": 15.191707972758195, + "learning_rate": 5.999402067071666e-06, + "loss": 0.2991943359375, + "step": 1470 + }, + { + "epoch": 0.012753888855262817, + "grad_norm": 37.946862951208225, + "learning_rate": 5.999397992719363e-06, + "loss": 0.4806793212890625, + "step": 1475 + }, + { + "epoch": 0.01279712237680608, + "grad_norm": 11.877547208534995, + "learning_rate": 5.999393904534101e-06, + "loss": 0.4177001953125, + "step": 1480 + }, + { + "epoch": 0.012840355898349344, + "grad_norm": 49.39182082277611, + "learning_rate": 5.999389802515901e-06, + "loss": 0.238262939453125, + "step": 1485 + }, + { + "epoch": 0.012883589419892608, + "grad_norm": 1.584622361009868, + "learning_rate": 5.99938568666478e-06, + "loss": 0.093353271484375, + "step": 1490 + }, + { + "epoch": 0.012926822941435871, + "grad_norm": 71.47032228988076, + "learning_rate": 5.999381556980757e-06, + "loss": 0.241705322265625, + "step": 1495 + }, + { + "epoch": 0.012970056462979135, + "grad_norm": 64.08257036496202, + "learning_rate": 5.999377413463851e-06, + "loss": 0.8442138671875, + "step": 1500 + }, + { + "epoch": 0.013013289984522398, + "grad_norm": 12.201088378734607, + "learning_rate": 5.999373256114081e-06, + "loss": 0.23967056274414061, + "step": 1505 + }, + { + "epoch": 0.013056523506065664, + "grad_norm": 18.07564651611917, + "learning_rate": 5.999369084931467e-06, + "loss": 0.21085205078125, + "step": 1510 + }, + { + "epoch": 0.013099757027608927, + "grad_norm": 26.57657326282244, + "learning_rate": 5.9993648999160275e-06, + "loss": 0.604888916015625, + "step": 1515 + }, + { + "epoch": 0.013142990549152191, + "grad_norm": 2.4276366194038332, + "learning_rate": 5.999360701067782e-06, + "loss": 0.45150146484375, + "step": 1520 + }, + { + "epoch": 0.013186224070695455, + "grad_norm": 28.240685122343738, + "learning_rate": 5.999356488386751e-06, + "loss": 0.3083740234375, + "step": 1525 + }, + { + "epoch": 0.013229457592238718, + "grad_norm": 0.2649914211248649, + "learning_rate": 5.999352261872952e-06, + "loss": 0.27402801513671876, + "step": 1530 + }, + { + "epoch": 0.013272691113781982, + "grad_norm": 20.437262458537976, + "learning_rate": 5.9993480215264055e-06, + "loss": 0.50859375, + "step": 1535 + }, + { + "epoch": 0.013315924635325245, + "grad_norm": 6.443658357164249, + "learning_rate": 5.999343767347131e-06, + "loss": 0.238726806640625, + "step": 1540 + }, + { + "epoch": 0.013359158156868509, + "grad_norm": 97.1397286587378, + "learning_rate": 5.999339499335149e-06, + "loss": 0.3492767333984375, + "step": 1545 + }, + { + "epoch": 0.013402391678411773, + "grad_norm": 57.908205657085865, + "learning_rate": 5.999335217490477e-06, + "loss": 0.302685546875, + "step": 1550 + }, + { + "epoch": 0.013445625199955038, + "grad_norm": 1.7745527132130343, + "learning_rate": 5.9993309218131365e-06, + "loss": 0.42156982421875, + "step": 1555 + }, + { + "epoch": 0.013488858721498301, + "grad_norm": 4.899025695628489, + "learning_rate": 5.999326612303147e-06, + "loss": 0.15135498046875, + "step": 1560 + }, + { + "epoch": 0.013532092243041565, + "grad_norm": 19.408069257137747, + "learning_rate": 5.999322288960528e-06, + "loss": 0.19940185546875, + "step": 1565 + }, + { + "epoch": 0.013575325764584829, + "grad_norm": 37.308392478212916, + "learning_rate": 5.999317951785299e-06, + "loss": 0.53533935546875, + "step": 1570 + }, + { + "epoch": 0.013618559286128092, + "grad_norm": 136.34557198871343, + "learning_rate": 5.999313600777482e-06, + "loss": 0.4788787841796875, + "step": 1575 + }, + { + "epoch": 0.013661792807671356, + "grad_norm": 12.991813517459219, + "learning_rate": 5.999309235937095e-06, + "loss": 0.21937255859375, + "step": 1580 + }, + { + "epoch": 0.01370502632921462, + "grad_norm": 54.271894137363226, + "learning_rate": 5.999304857264159e-06, + "loss": 0.3357666015625, + "step": 1585 + }, + { + "epoch": 0.013748259850757883, + "grad_norm": 7.948112658291266, + "learning_rate": 5.999300464758693e-06, + "loss": 0.4671630859375, + "step": 1590 + }, + { + "epoch": 0.013791493372301147, + "grad_norm": 4.819279479348753, + "learning_rate": 5.999296058420719e-06, + "loss": 0.45455780029296877, + "step": 1595 + }, + { + "epoch": 0.013834726893844412, + "grad_norm": 25.06533908615039, + "learning_rate": 5.9992916382502566e-06, + "loss": 0.258251953125, + "step": 1600 + }, + { + "epoch": 0.013877960415387676, + "grad_norm": 14.441142254239793, + "learning_rate": 5.999287204247326e-06, + "loss": 0.13835906982421875, + "step": 1605 + }, + { + "epoch": 0.01392119393693094, + "grad_norm": 12.983255109550702, + "learning_rate": 5.9992827564119486e-06, + "loss": 0.33604888916015624, + "step": 1610 + }, + { + "epoch": 0.013964427458474203, + "grad_norm": 21.901279682598197, + "learning_rate": 5.9992782947441436e-06, + "loss": 0.27433929443359373, + "step": 1615 + }, + { + "epoch": 0.014007660980017466, + "grad_norm": 8.316977554305007, + "learning_rate": 5.999273819243932e-06, + "loss": 0.17952423095703124, + "step": 1620 + }, + { + "epoch": 0.01405089450156073, + "grad_norm": 3.419964267052973, + "learning_rate": 5.999269329911335e-06, + "loss": 0.1079742431640625, + "step": 1625 + }, + { + "epoch": 0.014094128023103994, + "grad_norm": 12.30457801192187, + "learning_rate": 5.999264826746373e-06, + "loss": 0.2862525939941406, + "step": 1630 + }, + { + "epoch": 0.014137361544647257, + "grad_norm": 8.927715849148413, + "learning_rate": 5.999260309749065e-06, + "loss": 0.2558837890625, + "step": 1635 + }, + { + "epoch": 0.01418059506619052, + "grad_norm": 21.58665168509805, + "learning_rate": 5.999255778919435e-06, + "loss": 0.3130218505859375, + "step": 1640 + }, + { + "epoch": 0.014223828587733786, + "grad_norm": 5.422588667932342, + "learning_rate": 5.9992512342575025e-06, + "loss": 0.19470367431640626, + "step": 1645 + }, + { + "epoch": 0.01426706210927705, + "grad_norm": 5.8677780481254995, + "learning_rate": 5.999246675763288e-06, + "loss": 0.065972900390625, + "step": 1650 + }, + { + "epoch": 0.014310295630820313, + "grad_norm": 23.34715500259292, + "learning_rate": 5.999242103436812e-06, + "loss": 0.259100341796875, + "step": 1655 + }, + { + "epoch": 0.014353529152363577, + "grad_norm": 74.6023519486189, + "learning_rate": 5.9992375172780976e-06, + "loss": 0.363385009765625, + "step": 1660 + }, + { + "epoch": 0.01439676267390684, + "grad_norm": 27.39536300968279, + "learning_rate": 5.9992329172871645e-06, + "loss": 0.26944580078125, + "step": 1665 + }, + { + "epoch": 0.014439996195450104, + "grad_norm": 3.048722344783873, + "learning_rate": 5.999228303464034e-06, + "loss": 0.2505584716796875, + "step": 1670 + }, + { + "epoch": 0.014483229716993368, + "grad_norm": 67.93374376130014, + "learning_rate": 5.999223675808728e-06, + "loss": 0.353765869140625, + "step": 1675 + }, + { + "epoch": 0.014526463238536631, + "grad_norm": 5.13608907135736, + "learning_rate": 5.999219034321266e-06, + "loss": 0.401043701171875, + "step": 1680 + }, + { + "epoch": 0.014569696760079895, + "grad_norm": 71.60251279429755, + "learning_rate": 5.999214379001672e-06, + "loss": 0.1449981689453125, + "step": 1685 + }, + { + "epoch": 0.014612930281623158, + "grad_norm": 6.854786211441044, + "learning_rate": 5.999209709849967e-06, + "loss": 0.228125, + "step": 1690 + }, + { + "epoch": 0.014656163803166424, + "grad_norm": 46.195665203243195, + "learning_rate": 5.999205026866171e-06, + "loss": 0.1592529296875, + "step": 1695 + }, + { + "epoch": 0.014699397324709687, + "grad_norm": 45.89086859179107, + "learning_rate": 5.999200330050306e-06, + "loss": 0.47275390625, + "step": 1700 + }, + { + "epoch": 0.014742630846252951, + "grad_norm": 4.9791201730107595, + "learning_rate": 5.999195619402394e-06, + "loss": 0.61473388671875, + "step": 1705 + }, + { + "epoch": 0.014785864367796215, + "grad_norm": 56.86143857116347, + "learning_rate": 5.999190894922457e-06, + "loss": 0.1508258819580078, + "step": 1710 + }, + { + "epoch": 0.014829097889339478, + "grad_norm": 131.82449215459474, + "learning_rate": 5.999186156610518e-06, + "loss": 0.24370880126953126, + "step": 1715 + }, + { + "epoch": 0.014872331410882742, + "grad_norm": 38.3555778820072, + "learning_rate": 5.9991814044665965e-06, + "loss": 0.376373291015625, + "step": 1720 + }, + { + "epoch": 0.014915564932426005, + "grad_norm": 15.005277584559494, + "learning_rate": 5.999176638490715e-06, + "loss": 0.210748291015625, + "step": 1725 + }, + { + "epoch": 0.014958798453969269, + "grad_norm": 22.772169485578512, + "learning_rate": 5.999171858682896e-06, + "loss": 0.6682510375976562, + "step": 1730 + }, + { + "epoch": 0.015002031975512533, + "grad_norm": 12.679661452313745, + "learning_rate": 5.999167065043162e-06, + "loss": 0.20877685546875, + "step": 1735 + }, + { + "epoch": 0.015045265497055798, + "grad_norm": 12.790795151180843, + "learning_rate": 5.999162257571534e-06, + "loss": 0.1003265380859375, + "step": 1740 + }, + { + "epoch": 0.015088499018599062, + "grad_norm": 41.331833518563556, + "learning_rate": 5.9991574362680345e-06, + "loss": 0.43896484375, + "step": 1745 + }, + { + "epoch": 0.015131732540142325, + "grad_norm": 31.22779044770662, + "learning_rate": 5.9991526011326864e-06, + "loss": 0.133795166015625, + "step": 1750 + }, + { + "epoch": 0.015174966061685589, + "grad_norm": 31.12329299001148, + "learning_rate": 5.999147752165511e-06, + "loss": 0.20987892150878906, + "step": 1755 + }, + { + "epoch": 0.015218199583228852, + "grad_norm": 2.7964922742139273, + "learning_rate": 5.999142889366531e-06, + "loss": 0.14477920532226562, + "step": 1760 + }, + { + "epoch": 0.015261433104772116, + "grad_norm": 0.9912607070141665, + "learning_rate": 5.99913801273577e-06, + "loss": 0.28494415283203123, + "step": 1765 + }, + { + "epoch": 0.01530466662631538, + "grad_norm": 4.947096169118334, + "learning_rate": 5.999133122273249e-06, + "loss": 0.08752751350402832, + "step": 1770 + }, + { + "epoch": 0.015347900147858643, + "grad_norm": 24.962497025994352, + "learning_rate": 5.999128217978991e-06, + "loss": 0.140960693359375, + "step": 1775 + }, + { + "epoch": 0.015391133669401907, + "grad_norm": 2.9721810438442318, + "learning_rate": 5.999123299853019e-06, + "loss": 0.1611419677734375, + "step": 1780 + }, + { + "epoch": 0.015434367190945172, + "grad_norm": 10.960363275105408, + "learning_rate": 5.999118367895355e-06, + "loss": 0.13330078125, + "step": 1785 + }, + { + "epoch": 0.015477600712488436, + "grad_norm": 26.191300792739614, + "learning_rate": 5.999113422106022e-06, + "loss": 0.1732421875, + "step": 1790 + }, + { + "epoch": 0.0155208342340317, + "grad_norm": 33.49451249748265, + "learning_rate": 5.999108462485043e-06, + "loss": 0.14404296875, + "step": 1795 + }, + { + "epoch": 0.015564067755574963, + "grad_norm": 21.14786018269144, + "learning_rate": 5.999103489032441e-06, + "loss": 0.0987823486328125, + "step": 1800 + }, + { + "epoch": 0.015607301277118226, + "grad_norm": 5.277079863300792, + "learning_rate": 5.999098501748239e-06, + "loss": 0.3642822265625, + "step": 1805 + }, + { + "epoch": 0.015650534798661492, + "grad_norm": 69.38959531705747, + "learning_rate": 5.999093500632458e-06, + "loss": 0.28946762084960936, + "step": 1810 + }, + { + "epoch": 0.015693768320204755, + "grad_norm": 18.777953997084126, + "learning_rate": 5.999088485685124e-06, + "loss": 0.2371368408203125, + "step": 1815 + }, + { + "epoch": 0.01573700184174802, + "grad_norm": 6.987756300972136, + "learning_rate": 5.999083456906259e-06, + "loss": 0.3241855621337891, + "step": 1820 + }, + { + "epoch": 0.015780235363291283, + "grad_norm": 24.33090315676269, + "learning_rate": 5.999078414295886e-06, + "loss": 0.06328125, + "step": 1825 + }, + { + "epoch": 0.015823468884834546, + "grad_norm": 35.05030236932646, + "learning_rate": 5.999073357854028e-06, + "loss": 0.6345703125, + "step": 1830 + }, + { + "epoch": 0.01586670240637781, + "grad_norm": 2.0153388207900016, + "learning_rate": 5.999068287580709e-06, + "loss": 0.24466552734375, + "step": 1835 + }, + { + "epoch": 0.015909935927921073, + "grad_norm": 5.794174098225368, + "learning_rate": 5.999063203475952e-06, + "loss": 0.30667724609375, + "step": 1840 + }, + { + "epoch": 0.015953169449464337, + "grad_norm": 5.011081796635994, + "learning_rate": 5.999058105539781e-06, + "loss": 0.09263916015625, + "step": 1845 + }, + { + "epoch": 0.0159964029710076, + "grad_norm": 23.73950251881533, + "learning_rate": 5.999052993772219e-06, + "loss": 0.22084197998046876, + "step": 1850 + }, + { + "epoch": 0.016039636492550864, + "grad_norm": 6.22754510878233, + "learning_rate": 5.999047868173289e-06, + "loss": 0.4479377746582031, + "step": 1855 + }, + { + "epoch": 0.016082870014094128, + "grad_norm": 35.5942393607147, + "learning_rate": 5.999042728743014e-06, + "loss": 0.10564041137695312, + "step": 1860 + }, + { + "epoch": 0.01612610353563739, + "grad_norm": 57.03514143169735, + "learning_rate": 5.9990375754814215e-06, + "loss": 0.4078765869140625, + "step": 1865 + }, + { + "epoch": 0.016169337057180655, + "grad_norm": 20.62340697839167, + "learning_rate": 5.999032408388532e-06, + "loss": 0.21805801391601562, + "step": 1870 + }, + { + "epoch": 0.01621257057872392, + "grad_norm": 3.134763167284692, + "learning_rate": 5.999027227464368e-06, + "loss": 0.28462982177734375, + "step": 1875 + }, + { + "epoch": 0.016255804100267182, + "grad_norm": 25.403968989308364, + "learning_rate": 5.999022032708958e-06, + "loss": 0.106634521484375, + "step": 1880 + }, + { + "epoch": 0.016299037621810446, + "grad_norm": 5.0596507441982626, + "learning_rate": 5.999016824122322e-06, + "loss": 0.194195556640625, + "step": 1885 + }, + { + "epoch": 0.01634227114335371, + "grad_norm": 5.213536493818506, + "learning_rate": 5.999011601704486e-06, + "loss": 0.21570281982421874, + "step": 1890 + }, + { + "epoch": 0.016385504664896973, + "grad_norm": 2.4620337544331226, + "learning_rate": 5.999006365455474e-06, + "loss": 0.2059539794921875, + "step": 1895 + }, + { + "epoch": 0.01642873818644024, + "grad_norm": 68.62748031602169, + "learning_rate": 5.999001115375308e-06, + "loss": 0.182666015625, + "step": 1900 + }, + { + "epoch": 0.016471971707983504, + "grad_norm": 1.4632640182149481, + "learning_rate": 5.998995851464015e-06, + "loss": 0.26763916015625, + "step": 1905 + }, + { + "epoch": 0.016515205229526767, + "grad_norm": 2.966705729851531, + "learning_rate": 5.998990573721619e-06, + "loss": 0.2338470458984375, + "step": 1910 + }, + { + "epoch": 0.01655843875107003, + "grad_norm": 25.91074711740251, + "learning_rate": 5.998985282148142e-06, + "loss": 0.1747802734375, + "step": 1915 + }, + { + "epoch": 0.016601672272613294, + "grad_norm": 7.901563820151338, + "learning_rate": 5.99897997674361e-06, + "loss": 0.12189788818359375, + "step": 1920 + }, + { + "epoch": 0.016644905794156558, + "grad_norm": 42.299459200449164, + "learning_rate": 5.998974657508047e-06, + "loss": 0.459716796875, + "step": 1925 + }, + { + "epoch": 0.01668813931569982, + "grad_norm": 3.8889493990839146, + "learning_rate": 5.998969324441479e-06, + "loss": 0.12099761962890625, + "step": 1930 + }, + { + "epoch": 0.016731372837243085, + "grad_norm": 4.801346358127452, + "learning_rate": 5.998963977543929e-06, + "loss": 0.15751953125, + "step": 1935 + }, + { + "epoch": 0.01677460635878635, + "grad_norm": 74.55486743913696, + "learning_rate": 5.998958616815422e-06, + "loss": 0.3522125244140625, + "step": 1940 + }, + { + "epoch": 0.016817839880329612, + "grad_norm": 13.071645626598812, + "learning_rate": 5.998953242255983e-06, + "loss": 0.4583984375, + "step": 1945 + }, + { + "epoch": 0.016861073401872876, + "grad_norm": 61.35117552428944, + "learning_rate": 5.9989478538656365e-06, + "loss": 0.333538818359375, + "step": 1950 + }, + { + "epoch": 0.01690430692341614, + "grad_norm": 9.377863896463753, + "learning_rate": 5.998942451644408e-06, + "loss": 0.142535400390625, + "step": 1955 + }, + { + "epoch": 0.016947540444959403, + "grad_norm": 12.287238901039498, + "learning_rate": 5.998937035592321e-06, + "loss": 0.2520538330078125, + "step": 1960 + }, + { + "epoch": 0.016990773966502667, + "grad_norm": 29.911927356302918, + "learning_rate": 5.998931605709402e-06, + "loss": 0.18187713623046875, + "step": 1965 + }, + { + "epoch": 0.01703400748804593, + "grad_norm": 8.360146555998506, + "learning_rate": 5.998926161995675e-06, + "loss": 0.2646484375, + "step": 1970 + }, + { + "epoch": 0.017077241009589194, + "grad_norm": 15.978095760188113, + "learning_rate": 5.998920704451166e-06, + "loss": 0.108587646484375, + "step": 1975 + }, + { + "epoch": 0.017120474531132458, + "grad_norm": 11.449448187921563, + "learning_rate": 5.998915233075899e-06, + "loss": 0.158758544921875, + "step": 1980 + }, + { + "epoch": 0.01716370805267572, + "grad_norm": 27.188977517583094, + "learning_rate": 5.9989097478699005e-06, + "loss": 0.27326507568359376, + "step": 1985 + }, + { + "epoch": 0.017206941574218985, + "grad_norm": 45.58464269312311, + "learning_rate": 5.998904248833195e-06, + "loss": 0.214520263671875, + "step": 1990 + }, + { + "epoch": 0.017250175095762252, + "grad_norm": 36.97020403999267, + "learning_rate": 5.998898735965809e-06, + "loss": 0.25567626953125, + "step": 1995 + }, + { + "epoch": 0.017293408617305515, + "grad_norm": 14.416812631810012, + "learning_rate": 5.9988932092677654e-06, + "loss": 0.32529296875, + "step": 2000 + }, + { + "epoch": 0.01733664213884878, + "grad_norm": 13.613209390229633, + "learning_rate": 5.998887668739092e-06, + "loss": 0.3499542236328125, + "step": 2005 + }, + { + "epoch": 0.017379875660392043, + "grad_norm": 31.092349374325142, + "learning_rate": 5.998882114379815e-06, + "loss": 0.17557373046875, + "step": 2010 + }, + { + "epoch": 0.017423109181935306, + "grad_norm": 6.816661072143682, + "learning_rate": 5.998876546189957e-06, + "loss": 0.10596923828125, + "step": 2015 + }, + { + "epoch": 0.01746634270347857, + "grad_norm": 66.1128088676067, + "learning_rate": 5.998870964169546e-06, + "loss": 0.2763053894042969, + "step": 2020 + }, + { + "epoch": 0.017509576225021833, + "grad_norm": 38.74481904161518, + "learning_rate": 5.998865368318606e-06, + "loss": 0.207183837890625, + "step": 2025 + }, + { + "epoch": 0.017552809746565097, + "grad_norm": 3.3470689958668625, + "learning_rate": 5.9988597586371655e-06, + "loss": 0.358245849609375, + "step": 2030 + }, + { + "epoch": 0.01759604326810836, + "grad_norm": 14.187902664170913, + "learning_rate": 5.998854135125248e-06, + "loss": 0.106488037109375, + "step": 2035 + }, + { + "epoch": 0.017639276789651624, + "grad_norm": 1.9339702246256614, + "learning_rate": 5.9988484977828805e-06, + "loss": 0.40565185546875, + "step": 2040 + }, + { + "epoch": 0.017682510311194888, + "grad_norm": 44.15329148955497, + "learning_rate": 5.998842846610088e-06, + "loss": 0.4584442138671875, + "step": 2045 + }, + { + "epoch": 0.01772574383273815, + "grad_norm": 1.1141645801187603, + "learning_rate": 5.998837181606898e-06, + "loss": 0.2024932861328125, + "step": 2050 + }, + { + "epoch": 0.017768977354281415, + "grad_norm": 1.013672845539717, + "learning_rate": 5.998831502773335e-06, + "loss": 0.15552215576171874, + "step": 2055 + }, + { + "epoch": 0.01781221087582468, + "grad_norm": 11.596395553109538, + "learning_rate": 5.998825810109428e-06, + "loss": 0.28316497802734375, + "step": 2060 + }, + { + "epoch": 0.017855444397367942, + "grad_norm": 78.64884578459387, + "learning_rate": 5.9988201036152e-06, + "loss": 0.355133056640625, + "step": 2065 + }, + { + "epoch": 0.017898677918911206, + "grad_norm": 83.00490521104443, + "learning_rate": 5.998814383290679e-06, + "loss": 0.269061279296875, + "step": 2070 + }, + { + "epoch": 0.01794191144045447, + "grad_norm": 1.8775004472519756, + "learning_rate": 5.998808649135891e-06, + "loss": 0.204351806640625, + "step": 2075 + }, + { + "epoch": 0.017985144961997733, + "grad_norm": 22.157916594789686, + "learning_rate": 5.998802901150862e-06, + "loss": 0.207025146484375, + "step": 2080 + }, + { + "epoch": 0.018028378483541, + "grad_norm": 19.81725714552153, + "learning_rate": 5.998797139335619e-06, + "loss": 0.557080078125, + "step": 2085 + }, + { + "epoch": 0.018071612005084264, + "grad_norm": 2.165304538284595, + "learning_rate": 5.998791363690189e-06, + "loss": 0.5327606201171875, + "step": 2090 + }, + { + "epoch": 0.018114845526627527, + "grad_norm": 5.085143482676793, + "learning_rate": 5.998785574214598e-06, + "loss": 0.156707763671875, + "step": 2095 + }, + { + "epoch": 0.01815807904817079, + "grad_norm": 4.431099485474278, + "learning_rate": 5.998779770908873e-06, + "loss": 0.18433685302734376, + "step": 2100 + }, + { + "epoch": 0.018201312569714054, + "grad_norm": 0.9797658032212556, + "learning_rate": 5.998773953773041e-06, + "loss": 0.17810516357421874, + "step": 2105 + }, + { + "epoch": 0.018244546091257318, + "grad_norm": 6.707750967117144, + "learning_rate": 5.998768122807129e-06, + "loss": 0.16854782104492189, + "step": 2110 + }, + { + "epoch": 0.01828777961280058, + "grad_norm": 2.3072983604404413, + "learning_rate": 5.998762278011162e-06, + "loss": 0.194140625, + "step": 2115 + }, + { + "epoch": 0.018331013134343845, + "grad_norm": 10.935574523561115, + "learning_rate": 5.998756419385168e-06, + "loss": 0.31851806640625, + "step": 2120 + }, + { + "epoch": 0.01837424665588711, + "grad_norm": 37.450091969769645, + "learning_rate": 5.9987505469291765e-06, + "loss": 0.2838623046875, + "step": 2125 + }, + { + "epoch": 0.018417480177430372, + "grad_norm": 57.12321270838071, + "learning_rate": 5.998744660643211e-06, + "loss": 0.1112060546875, + "step": 2130 + }, + { + "epoch": 0.018460713698973636, + "grad_norm": 18.157965830717092, + "learning_rate": 5.9987387605273e-06, + "loss": 0.5282470703125, + "step": 2135 + }, + { + "epoch": 0.0185039472205169, + "grad_norm": 5.482891381327876, + "learning_rate": 5.998732846581472e-06, + "loss": 0.209771728515625, + "step": 2140 + }, + { + "epoch": 0.018547180742060163, + "grad_norm": 24.88083272384327, + "learning_rate": 5.998726918805752e-06, + "loss": 0.2626007080078125, + "step": 2145 + }, + { + "epoch": 0.018590414263603427, + "grad_norm": 29.187835960703385, + "learning_rate": 5.998720977200168e-06, + "loss": 0.264825439453125, + "step": 2150 + }, + { + "epoch": 0.01863364778514669, + "grad_norm": 22.78371093213365, + "learning_rate": 5.998715021764749e-06, + "loss": 0.094744873046875, + "step": 2155 + }, + { + "epoch": 0.018676881306689954, + "grad_norm": 38.32772017286862, + "learning_rate": 5.99870905249952e-06, + "loss": 0.327978515625, + "step": 2160 + }, + { + "epoch": 0.018720114828233218, + "grad_norm": 18.92436375685499, + "learning_rate": 5.9987030694045115e-06, + "loss": 0.464910888671875, + "step": 2165 + }, + { + "epoch": 0.01876334834977648, + "grad_norm": 41.32615827645853, + "learning_rate": 5.998697072479748e-06, + "loss": 0.1587921142578125, + "step": 2170 + }, + { + "epoch": 0.018806581871319745, + "grad_norm": 17.229929223089087, + "learning_rate": 5.99869106172526e-06, + "loss": 0.5612357139587403, + "step": 2175 + }, + { + "epoch": 0.018849815392863012, + "grad_norm": 119.1158538501811, + "learning_rate": 5.998685037141072e-06, + "loss": 0.54295654296875, + "step": 2180 + }, + { + "epoch": 0.018893048914406275, + "grad_norm": 1.3782306047149762, + "learning_rate": 5.998678998727215e-06, + "loss": 0.495587158203125, + "step": 2185 + }, + { + "epoch": 0.01893628243594954, + "grad_norm": 54.99120676194636, + "learning_rate": 5.998672946483715e-06, + "loss": 0.341229248046875, + "step": 2190 + }, + { + "epoch": 0.018979515957492803, + "grad_norm": 95.32308723645251, + "learning_rate": 5.9986668804106e-06, + "loss": 0.3261932373046875, + "step": 2195 + }, + { + "epoch": 0.019022749479036066, + "grad_norm": 9.59054502676677, + "learning_rate": 5.998660800507898e-06, + "loss": 0.12318763732910157, + "step": 2200 + }, + { + "epoch": 0.01906598300057933, + "grad_norm": 1.5570070588773688, + "learning_rate": 5.998654706775638e-06, + "loss": 0.11727981567382813, + "step": 2205 + }, + { + "epoch": 0.019109216522122593, + "grad_norm": 26.009106044427963, + "learning_rate": 5.998648599213847e-06, + "loss": 0.4603118896484375, + "step": 2210 + }, + { + "epoch": 0.019152450043665857, + "grad_norm": 46.26931564002567, + "learning_rate": 5.998642477822554e-06, + "loss": 0.3288421630859375, + "step": 2215 + }, + { + "epoch": 0.01919568356520912, + "grad_norm": 7.949564783907723, + "learning_rate": 5.998636342601788e-06, + "loss": 0.16673431396484376, + "step": 2220 + }, + { + "epoch": 0.019238917086752384, + "grad_norm": 34.590496976408716, + "learning_rate": 5.9986301935515745e-06, + "loss": 0.3246002197265625, + "step": 2225 + }, + { + "epoch": 0.019282150608295648, + "grad_norm": 42.98653328923751, + "learning_rate": 5.998624030671944e-06, + "loss": 0.303302001953125, + "step": 2230 + }, + { + "epoch": 0.01932538412983891, + "grad_norm": 4.352570786135551, + "learning_rate": 5.998617853962925e-06, + "loss": 0.14111328125, + "step": 2235 + }, + { + "epoch": 0.019368617651382175, + "grad_norm": 13.08065264324717, + "learning_rate": 5.998611663424544e-06, + "loss": 0.19739990234375, + "step": 2240 + }, + { + "epoch": 0.01941185117292544, + "grad_norm": 16.230850242630975, + "learning_rate": 5.9986054590568326e-06, + "loss": 0.509991455078125, + "step": 2245 + }, + { + "epoch": 0.019455084694468702, + "grad_norm": 28.22010100122366, + "learning_rate": 5.998599240859817e-06, + "loss": 0.3948699951171875, + "step": 2250 + }, + { + "epoch": 0.019498318216011966, + "grad_norm": 8.079849232166206, + "learning_rate": 5.998593008833527e-06, + "loss": 0.183380126953125, + "step": 2255 + }, + { + "epoch": 0.01954155173755523, + "grad_norm": 59.619322725171365, + "learning_rate": 5.99858676297799e-06, + "loss": 0.27463531494140625, + "step": 2260 + }, + { + "epoch": 0.019584785259098493, + "grad_norm": 5.653709346733877, + "learning_rate": 5.998580503293237e-06, + "loss": 0.102459716796875, + "step": 2265 + }, + { + "epoch": 0.01962801878064176, + "grad_norm": 23.56926356423226, + "learning_rate": 5.998574229779295e-06, + "loss": 0.20005416870117188, + "step": 2270 + }, + { + "epoch": 0.019671252302185024, + "grad_norm": 15.304890169713657, + "learning_rate": 5.998567942436193e-06, + "loss": 0.190576171875, + "step": 2275 + }, + { + "epoch": 0.019714485823728287, + "grad_norm": 51.57058828999472, + "learning_rate": 5.9985616412639614e-06, + "loss": 0.2981903076171875, + "step": 2280 + }, + { + "epoch": 0.01975771934527155, + "grad_norm": 9.499988006311836, + "learning_rate": 5.998555326262628e-06, + "loss": 0.16576080322265624, + "step": 2285 + }, + { + "epoch": 0.019800952866814815, + "grad_norm": 11.056508598349795, + "learning_rate": 5.998548997432223e-06, + "loss": 0.13953857421875, + "step": 2290 + }, + { + "epoch": 0.019844186388358078, + "grad_norm": 5.173917743925908, + "learning_rate": 5.998542654772775e-06, + "loss": 0.1368133544921875, + "step": 2295 + }, + { + "epoch": 0.01988741990990134, + "grad_norm": 17.41658515144147, + "learning_rate": 5.998536298284313e-06, + "loss": 0.422552490234375, + "step": 2300 + }, + { + "epoch": 0.019930653431444605, + "grad_norm": 11.29413003563021, + "learning_rate": 5.998529927966867e-06, + "loss": 0.16658935546875, + "step": 2305 + }, + { + "epoch": 0.01997388695298787, + "grad_norm": 11.302952703870117, + "learning_rate": 5.9985235438204655e-06, + "loss": 0.20975341796875, + "step": 2310 + }, + { + "epoch": 0.020017120474531132, + "grad_norm": 11.386947322399136, + "learning_rate": 5.998517145845138e-06, + "loss": 0.2214569091796875, + "step": 2315 + }, + { + "epoch": 0.020060353996074396, + "grad_norm": 3.0958309985100043, + "learning_rate": 5.9985107340409155e-06, + "loss": 0.222125244140625, + "step": 2320 + }, + { + "epoch": 0.02010358751761766, + "grad_norm": 4.180842198511598, + "learning_rate": 5.9985043084078245e-06, + "loss": 0.1969970703125, + "step": 2325 + }, + { + "epoch": 0.020146821039160923, + "grad_norm": 24.787498325511958, + "learning_rate": 5.998497868945898e-06, + "loss": 0.22794189453125, + "step": 2330 + }, + { + "epoch": 0.020190054560704187, + "grad_norm": 55.22182645196355, + "learning_rate": 5.998491415655165e-06, + "loss": 0.51513671875, + "step": 2335 + }, + { + "epoch": 0.02023328808224745, + "grad_norm": 20.309555916905794, + "learning_rate": 5.998484948535653e-06, + "loss": 0.4919921875, + "step": 2340 + }, + { + "epoch": 0.020276521603790714, + "grad_norm": 6.292020531537329, + "learning_rate": 5.998478467587395e-06, + "loss": 0.11231689453125, + "step": 2345 + }, + { + "epoch": 0.020319755125333978, + "grad_norm": 2.653688147569982, + "learning_rate": 5.998471972810417e-06, + "loss": 0.118292236328125, + "step": 2350 + }, + { + "epoch": 0.02036298864687724, + "grad_norm": 0.690335954926542, + "learning_rate": 5.998465464204754e-06, + "loss": 0.1749725341796875, + "step": 2355 + }, + { + "epoch": 0.020406222168420505, + "grad_norm": 1.9466003943414056, + "learning_rate": 5.998458941770431e-06, + "loss": 0.28659820556640625, + "step": 2360 + }, + { + "epoch": 0.020449455689963772, + "grad_norm": 55.42128788046401, + "learning_rate": 5.998452405507481e-06, + "loss": 0.3239013671875, + "step": 2365 + }, + { + "epoch": 0.020492689211507036, + "grad_norm": 111.56419692393244, + "learning_rate": 5.998445855415933e-06, + "loss": 0.4449127197265625, + "step": 2370 + }, + { + "epoch": 0.0205359227330503, + "grad_norm": 4.85511463344986, + "learning_rate": 5.998439291495819e-06, + "loss": 0.17552490234375, + "step": 2375 + }, + { + "epoch": 0.020579156254593563, + "grad_norm": 49.36745007002281, + "learning_rate": 5.9984327137471665e-06, + "loss": 0.41702880859375, + "step": 2380 + }, + { + "epoch": 0.020622389776136826, + "grad_norm": 29.644581159629166, + "learning_rate": 5.998426122170008e-06, + "loss": 0.2177490234375, + "step": 2385 + }, + { + "epoch": 0.02066562329768009, + "grad_norm": 23.028032245699073, + "learning_rate": 5.998419516764372e-06, + "loss": 0.17884597778320313, + "step": 2390 + }, + { + "epoch": 0.020708856819223354, + "grad_norm": 1.8256143357771561, + "learning_rate": 5.9984128975302915e-06, + "loss": 0.095098876953125, + "step": 2395 + }, + { + "epoch": 0.020752090340766617, + "grad_norm": 23.133187752985876, + "learning_rate": 5.998406264467796e-06, + "loss": 0.1510009765625, + "step": 2400 + }, + { + "epoch": 0.02079532386230988, + "grad_norm": 94.9316849884995, + "learning_rate": 5.998399617576915e-06, + "loss": 0.308380126953125, + "step": 2405 + }, + { + "epoch": 0.020838557383853144, + "grad_norm": 63.96154914339019, + "learning_rate": 5.998392956857679e-06, + "loss": 0.26883544921875, + "step": 2410 + }, + { + "epoch": 0.020881790905396408, + "grad_norm": 1.7988698231709315, + "learning_rate": 5.99838628231012e-06, + "loss": 0.542218017578125, + "step": 2415 + }, + { + "epoch": 0.02092502442693967, + "grad_norm": 3.9613718905120447, + "learning_rate": 5.99837959393427e-06, + "loss": 0.3076681137084961, + "step": 2420 + }, + { + "epoch": 0.020968257948482935, + "grad_norm": 4.108601249769285, + "learning_rate": 5.998372891730157e-06, + "loss": 0.12229080200195312, + "step": 2425 + }, + { + "epoch": 0.0210114914700262, + "grad_norm": 35.65529491429013, + "learning_rate": 5.9983661756978135e-06, + "loss": 0.2680816650390625, + "step": 2430 + }, + { + "epoch": 0.021054724991569462, + "grad_norm": 1.6465718930585, + "learning_rate": 5.998359445837269e-06, + "loss": 0.20438232421875, + "step": 2435 + }, + { + "epoch": 0.021097958513112726, + "grad_norm": 5.782053212215411, + "learning_rate": 5.9983527021485575e-06, + "loss": 0.351275634765625, + "step": 2440 + }, + { + "epoch": 0.02114119203465599, + "grad_norm": 4.41386495480498, + "learning_rate": 5.9983459446317075e-06, + "loss": 0.1288818359375, + "step": 2445 + }, + { + "epoch": 0.021184425556199253, + "grad_norm": 7.3173403468577884, + "learning_rate": 5.9983391732867515e-06, + "loss": 0.18839263916015625, + "step": 2450 + }, + { + "epoch": 0.02122765907774252, + "grad_norm": 71.72176200452695, + "learning_rate": 5.998332388113718e-06, + "loss": 0.38502349853515627, + "step": 2455 + }, + { + "epoch": 0.021270892599285784, + "grad_norm": 29.86732055001166, + "learning_rate": 5.998325589112643e-06, + "loss": 0.37933349609375, + "step": 2460 + }, + { + "epoch": 0.021314126120829047, + "grad_norm": 20.16134563992899, + "learning_rate": 5.998318776283555e-06, + "loss": 0.110577392578125, + "step": 2465 + }, + { + "epoch": 0.02135735964237231, + "grad_norm": 3.3257065744569094, + "learning_rate": 5.998311949626485e-06, + "loss": 0.2826171875, + "step": 2470 + }, + { + "epoch": 0.021400593163915575, + "grad_norm": 37.318251201578335, + "learning_rate": 5.998305109141465e-06, + "loss": 0.360357666015625, + "step": 2475 + }, + { + "epoch": 0.021443826685458838, + "grad_norm": 22.76767455060262, + "learning_rate": 5.998298254828527e-06, + "loss": 0.14459686279296874, + "step": 2480 + }, + { + "epoch": 0.021487060207002102, + "grad_norm": 43.07264156643978, + "learning_rate": 5.998291386687702e-06, + "loss": 0.4365043640136719, + "step": 2485 + }, + { + "epoch": 0.021530293728545365, + "grad_norm": 33.565454981076456, + "learning_rate": 5.998284504719023e-06, + "loss": 0.2197601318359375, + "step": 2490 + }, + { + "epoch": 0.02157352725008863, + "grad_norm": 7.288629989188285, + "learning_rate": 5.9982776089225204e-06, + "loss": 0.17109375, + "step": 2495 + }, + { + "epoch": 0.021616760771631893, + "grad_norm": 83.43744312517919, + "learning_rate": 5.998270699298226e-06, + "loss": 0.34671630859375, + "step": 2500 + }, + { + "epoch": 0.021659994293175156, + "grad_norm": 40.371935041657, + "learning_rate": 5.998263775846173e-06, + "loss": 0.4296875, + "step": 2505 + }, + { + "epoch": 0.02170322781471842, + "grad_norm": 0.7329746105251513, + "learning_rate": 5.998256838566391e-06, + "loss": 0.28565521240234376, + "step": 2510 + }, + { + "epoch": 0.021746461336261683, + "grad_norm": 27.40049988333968, + "learning_rate": 5.998249887458915e-06, + "loss": 0.14293975830078126, + "step": 2515 + }, + { + "epoch": 0.021789694857804947, + "grad_norm": 20.452605420241444, + "learning_rate": 5.998242922523774e-06, + "loss": 0.238323974609375, + "step": 2520 + }, + { + "epoch": 0.02183292837934821, + "grad_norm": 13.40607460192467, + "learning_rate": 5.998235943761002e-06, + "loss": 0.254730224609375, + "step": 2525 + }, + { + "epoch": 0.021876161900891474, + "grad_norm": 7.302626491447924, + "learning_rate": 5.9982289511706315e-06, + "loss": 0.225860595703125, + "step": 2530 + }, + { + "epoch": 0.021919395422434738, + "grad_norm": 2.6671832304691043, + "learning_rate": 5.998221944752693e-06, + "loss": 0.2890380859375, + "step": 2535 + }, + { + "epoch": 0.021962628943978, + "grad_norm": 66.00842542578367, + "learning_rate": 5.99821492450722e-06, + "loss": 0.4829082489013672, + "step": 2540 + }, + { + "epoch": 0.022005862465521265, + "grad_norm": 20.998413508134192, + "learning_rate": 5.998207890434246e-06, + "loss": 0.328094482421875, + "step": 2545 + }, + { + "epoch": 0.022049095987064532, + "grad_norm": 11.869252368581579, + "learning_rate": 5.9982008425338e-06, + "loss": 0.2457489013671875, + "step": 2550 + }, + { + "epoch": 0.022092329508607796, + "grad_norm": 20.25775902853539, + "learning_rate": 5.998193780805918e-06, + "loss": 0.254541015625, + "step": 2555 + }, + { + "epoch": 0.02213556303015106, + "grad_norm": 18.527270730513038, + "learning_rate": 5.998186705250631e-06, + "loss": 0.166741943359375, + "step": 2560 + }, + { + "epoch": 0.022178796551694323, + "grad_norm": 3.368536828647671, + "learning_rate": 5.998179615867972e-06, + "loss": 0.2065643310546875, + "step": 2565 + }, + { + "epoch": 0.022222030073237586, + "grad_norm": 115.27740784986246, + "learning_rate": 5.998172512657974e-06, + "loss": 0.28062744140625, + "step": 2570 + }, + { + "epoch": 0.02226526359478085, + "grad_norm": 32.21134710032568, + "learning_rate": 5.998165395620668e-06, + "loss": 0.246148681640625, + "step": 2575 + }, + { + "epoch": 0.022308497116324114, + "grad_norm": 68.86585933784845, + "learning_rate": 5.9981582647560885e-06, + "loss": 0.47183837890625, + "step": 2580 + }, + { + "epoch": 0.022351730637867377, + "grad_norm": 0.13340114438943912, + "learning_rate": 5.998151120064269e-06, + "loss": 0.2786256790161133, + "step": 2585 + }, + { + "epoch": 0.02239496415941064, + "grad_norm": 20.97640923241511, + "learning_rate": 5.99814396154524e-06, + "loss": 0.0532928466796875, + "step": 2590 + }, + { + "epoch": 0.022438197680953904, + "grad_norm": 77.7189391874747, + "learning_rate": 5.998136789199037e-06, + "loss": 0.3734710693359375, + "step": 2595 + }, + { + "epoch": 0.022481431202497168, + "grad_norm": 6.963601539833865, + "learning_rate": 5.9981296030256915e-06, + "loss": 0.24317626953125, + "step": 2600 + }, + { + "epoch": 0.02252466472404043, + "grad_norm": 41.79506390298864, + "learning_rate": 5.998122403025238e-06, + "loss": 0.2158905029296875, + "step": 2605 + }, + { + "epoch": 0.022567898245583695, + "grad_norm": 31.675788557836345, + "learning_rate": 5.998115189197708e-06, + "loss": 0.11409378051757812, + "step": 2610 + }, + { + "epoch": 0.02261113176712696, + "grad_norm": 13.734270340623922, + "learning_rate": 5.998107961543136e-06, + "loss": 0.190777587890625, + "step": 2615 + }, + { + "epoch": 0.022654365288670222, + "grad_norm": 19.426553897626636, + "learning_rate": 5.998100720061555e-06, + "loss": 0.184033203125, + "step": 2620 + }, + { + "epoch": 0.022697598810213486, + "grad_norm": 3.2430384016807627, + "learning_rate": 5.998093464752998e-06, + "loss": 0.2681640625, + "step": 2625 + }, + { + "epoch": 0.02274083233175675, + "grad_norm": 11.243494042734048, + "learning_rate": 5.998086195617499e-06, + "loss": 0.05604248046875, + "step": 2630 + }, + { + "epoch": 0.022784065853300013, + "grad_norm": 7.322345905908056, + "learning_rate": 5.998078912655092e-06, + "loss": 0.323992919921875, + "step": 2635 + }, + { + "epoch": 0.022827299374843277, + "grad_norm": 18.086631363823265, + "learning_rate": 5.99807161586581e-06, + "loss": 0.5193084716796875, + "step": 2640 + }, + { + "epoch": 0.022870532896386544, + "grad_norm": 80.02714309033323, + "learning_rate": 5.998064305249686e-06, + "loss": 0.39014892578125, + "step": 2645 + }, + { + "epoch": 0.022913766417929807, + "grad_norm": 25.748287508469634, + "learning_rate": 5.9980569808067544e-06, + "loss": 0.13689155578613282, + "step": 2650 + }, + { + "epoch": 0.02295699993947307, + "grad_norm": 49.620252069404756, + "learning_rate": 5.99804964253705e-06, + "loss": 0.311785888671875, + "step": 2655 + }, + { + "epoch": 0.023000233461016335, + "grad_norm": 40.51664277551738, + "learning_rate": 5.9980422904406036e-06, + "loss": 0.15326766967773436, + "step": 2660 + }, + { + "epoch": 0.023043466982559598, + "grad_norm": 28.5948118038837, + "learning_rate": 5.998034924517452e-06, + "loss": 0.4631507873535156, + "step": 2665 + }, + { + "epoch": 0.023086700504102862, + "grad_norm": 71.06368900411948, + "learning_rate": 5.998027544767629e-06, + "loss": 0.375274658203125, + "step": 2670 + }, + { + "epoch": 0.023129934025646125, + "grad_norm": 12.787261641200962, + "learning_rate": 5.998020151191167e-06, + "loss": 0.3917755126953125, + "step": 2675 + }, + { + "epoch": 0.02317316754718939, + "grad_norm": 77.81926845264105, + "learning_rate": 5.998012743788101e-06, + "loss": 0.18221817016601563, + "step": 2680 + }, + { + "epoch": 0.023216401068732653, + "grad_norm": 103.85821107680692, + "learning_rate": 5.998005322558465e-06, + "loss": 0.580792236328125, + "step": 2685 + }, + { + "epoch": 0.023259634590275916, + "grad_norm": 70.08195714345425, + "learning_rate": 5.9979978875022936e-06, + "loss": 0.3008697509765625, + "step": 2690 + }, + { + "epoch": 0.02330286811181918, + "grad_norm": 9.065278455069272, + "learning_rate": 5.997990438619621e-06, + "loss": 0.3764312744140625, + "step": 2695 + }, + { + "epoch": 0.023346101633362443, + "grad_norm": 7.591791228990985, + "learning_rate": 5.9979829759104806e-06, + "loss": 0.22577133178710937, + "step": 2700 + }, + { + "epoch": 0.023389335154905707, + "grad_norm": 23.7311828332352, + "learning_rate": 5.997975499374909e-06, + "loss": 0.13056259155273436, + "step": 2705 + }, + { + "epoch": 0.02343256867644897, + "grad_norm": 93.6657062361169, + "learning_rate": 5.997968009012938e-06, + "loss": 0.2710693359375, + "step": 2710 + }, + { + "epoch": 0.023475802197992234, + "grad_norm": 16.822659519851573, + "learning_rate": 5.997960504824605e-06, + "loss": 0.1887481689453125, + "step": 2715 + }, + { + "epoch": 0.023519035719535498, + "grad_norm": 48.33223031919988, + "learning_rate": 5.997952986809942e-06, + "loss": 0.36198577880859373, + "step": 2720 + }, + { + "epoch": 0.02356226924107876, + "grad_norm": 2.0683093586425563, + "learning_rate": 5.997945454968984e-06, + "loss": 0.234759521484375, + "step": 2725 + }, + { + "epoch": 0.023605502762622025, + "grad_norm": 27.708616598112496, + "learning_rate": 5.997937909301768e-06, + "loss": 0.22835693359375, + "step": 2730 + }, + { + "epoch": 0.023648736284165292, + "grad_norm": 8.343092977025664, + "learning_rate": 5.997930349808327e-06, + "loss": 0.30819091796875, + "step": 2735 + }, + { + "epoch": 0.023691969805708556, + "grad_norm": 6.112031386400662, + "learning_rate": 5.997922776488695e-06, + "loss": 0.26456146240234374, + "step": 2740 + }, + { + "epoch": 0.02373520332725182, + "grad_norm": 31.83807882333566, + "learning_rate": 5.997915189342909e-06, + "loss": 0.10802001953125, + "step": 2745 + }, + { + "epoch": 0.023778436848795083, + "grad_norm": 48.35058901749898, + "learning_rate": 5.997907588371004e-06, + "loss": 0.1279327392578125, + "step": 2750 + }, + { + "epoch": 0.023821670370338346, + "grad_norm": 16.51528321332334, + "learning_rate": 5.9978999735730114e-06, + "loss": 0.1399627685546875, + "step": 2755 + }, + { + "epoch": 0.02386490389188161, + "grad_norm": 7.86666466801857, + "learning_rate": 5.997892344948971e-06, + "loss": 0.2607452392578125, + "step": 2760 + }, + { + "epoch": 0.023908137413424874, + "grad_norm": 101.21523433024784, + "learning_rate": 5.9978847024989155e-06, + "loss": 0.51126708984375, + "step": 2765 + }, + { + "epoch": 0.023951370934968137, + "grad_norm": 1.9608258541554962, + "learning_rate": 5.99787704622288e-06, + "loss": 0.13143310546875, + "step": 2770 + }, + { + "epoch": 0.0239946044565114, + "grad_norm": 25.436386772922678, + "learning_rate": 5.997869376120902e-06, + "loss": 0.2486328125, + "step": 2775 + }, + { + "epoch": 0.024037837978054664, + "grad_norm": 31.645287262215966, + "learning_rate": 5.997861692193014e-06, + "loss": 0.22600326538085938, + "step": 2780 + }, + { + "epoch": 0.024081071499597928, + "grad_norm": 60.38633435295071, + "learning_rate": 5.997853994439253e-06, + "loss": 0.44593505859375, + "step": 2785 + }, + { + "epoch": 0.02412430502114119, + "grad_norm": 42.03104834241949, + "learning_rate": 5.997846282859655e-06, + "loss": 0.3621734619140625, + "step": 2790 + }, + { + "epoch": 0.024167538542684455, + "grad_norm": 2.9814278773270613, + "learning_rate": 5.997838557454255e-06, + "loss": 0.43660888671875, + "step": 2795 + }, + { + "epoch": 0.02421077206422772, + "grad_norm": 1.330602674332869, + "learning_rate": 5.9978308182230875e-06, + "loss": 0.3610420227050781, + "step": 2800 + }, + { + "epoch": 0.024254005585770982, + "grad_norm": 7.214576244906337, + "learning_rate": 5.997823065166189e-06, + "loss": 0.1289398193359375, + "step": 2805 + }, + { + "epoch": 0.024297239107314246, + "grad_norm": 35.58307775519597, + "learning_rate": 5.997815298283597e-06, + "loss": 0.2060516357421875, + "step": 2810 + }, + { + "epoch": 0.02434047262885751, + "grad_norm": 18.875792368550904, + "learning_rate": 5.997807517575344e-06, + "loss": 0.16671142578125, + "step": 2815 + }, + { + "epoch": 0.024383706150400773, + "grad_norm": 23.15700776234825, + "learning_rate": 5.9977997230414686e-06, + "loss": 0.31947021484375, + "step": 2820 + }, + { + "epoch": 0.024426939671944037, + "grad_norm": 52.11033155770522, + "learning_rate": 5.997791914682005e-06, + "loss": 0.4217418670654297, + "step": 2825 + }, + { + "epoch": 0.024470173193487304, + "grad_norm": 35.41116564387331, + "learning_rate": 5.9977840924969905e-06, + "loss": 0.504559326171875, + "step": 2830 + }, + { + "epoch": 0.024513406715030567, + "grad_norm": 13.971566293826367, + "learning_rate": 5.99777625648646e-06, + "loss": 0.08721923828125, + "step": 2835 + }, + { + "epoch": 0.02455664023657383, + "grad_norm": 21.093255685040994, + "learning_rate": 5.997768406650451e-06, + "loss": 0.184588623046875, + "step": 2840 + }, + { + "epoch": 0.024599873758117095, + "grad_norm": 9.508181561661893, + "learning_rate": 5.997760542988999e-06, + "loss": 0.157720947265625, + "step": 2845 + }, + { + "epoch": 0.02464310727966036, + "grad_norm": 23.56605046250613, + "learning_rate": 5.997752665502141e-06, + "loss": 0.10666961669921875, + "step": 2850 + }, + { + "epoch": 0.024686340801203622, + "grad_norm": 44.47265041082804, + "learning_rate": 5.997744774189911e-06, + "loss": 0.2896587371826172, + "step": 2855 + }, + { + "epoch": 0.024729574322746885, + "grad_norm": 14.579798957716823, + "learning_rate": 5.9977368690523475e-06, + "loss": 0.1756927490234375, + "step": 2860 + }, + { + "epoch": 0.02477280784429015, + "grad_norm": 7.375173975149876, + "learning_rate": 5.997728950089487e-06, + "loss": 0.551678466796875, + "step": 2865 + }, + { + "epoch": 0.024816041365833413, + "grad_norm": 15.16599513244944, + "learning_rate": 5.997721017301365e-06, + "loss": 0.2669677734375, + "step": 2870 + }, + { + "epoch": 0.024859274887376676, + "grad_norm": 24.772346301116986, + "learning_rate": 5.997713070688018e-06, + "loss": 0.1708221435546875, + "step": 2875 + }, + { + "epoch": 0.02490250840891994, + "grad_norm": 10.562579660700841, + "learning_rate": 5.9977051102494845e-06, + "loss": 0.1812255859375, + "step": 2880 + }, + { + "epoch": 0.024945741930463203, + "grad_norm": 1.4809417510653369, + "learning_rate": 5.9976971359857995e-06, + "loss": 0.13055343627929689, + "step": 2885 + }, + { + "epoch": 0.024988975452006467, + "grad_norm": 7.0886088357632175, + "learning_rate": 5.997689147896999e-06, + "loss": 0.1756103515625, + "step": 2890 + }, + { + "epoch": 0.02503220897354973, + "grad_norm": 21.2770983832096, + "learning_rate": 5.997681145983123e-06, + "loss": 0.3319305419921875, + "step": 2895 + }, + { + "epoch": 0.025075442495092994, + "grad_norm": 12.616195587504663, + "learning_rate": 5.997673130244205e-06, + "loss": 0.2115966796875, + "step": 2900 + }, + { + "epoch": 0.025118676016636258, + "grad_norm": 1.3410583156073774, + "learning_rate": 5.997665100680284e-06, + "loss": 0.3109619140625, + "step": 2905 + }, + { + "epoch": 0.02516190953817952, + "grad_norm": 1.7093169913582922, + "learning_rate": 5.997657057291396e-06, + "loss": 0.222412109375, + "step": 2910 + }, + { + "epoch": 0.025205143059722785, + "grad_norm": 2.4118129854304966, + "learning_rate": 5.997649000077579e-06, + "loss": 0.06026763916015625, + "step": 2915 + }, + { + "epoch": 0.025248376581266052, + "grad_norm": 8.745206622483181, + "learning_rate": 5.997640929038869e-06, + "loss": 0.40472412109375, + "step": 2920 + }, + { + "epoch": 0.025291610102809316, + "grad_norm": 10.08870411408643, + "learning_rate": 5.997632844175305e-06, + "loss": 0.552099609375, + "step": 2925 + }, + { + "epoch": 0.02533484362435258, + "grad_norm": 24.456176968186227, + "learning_rate": 5.997624745486922e-06, + "loss": 0.183111572265625, + "step": 2930 + }, + { + "epoch": 0.025378077145895843, + "grad_norm": 63.5244369473623, + "learning_rate": 5.997616632973759e-06, + "loss": 0.6268058776855469, + "step": 2935 + }, + { + "epoch": 0.025421310667439107, + "grad_norm": 54.54848474726506, + "learning_rate": 5.997608506635853e-06, + "loss": 0.3764904022216797, + "step": 2940 + }, + { + "epoch": 0.02546454418898237, + "grad_norm": 21.263576265654308, + "learning_rate": 5.997600366473241e-06, + "loss": 0.3692626953125, + "step": 2945 + }, + { + "epoch": 0.025507777710525634, + "grad_norm": 1.6074080571941034, + "learning_rate": 5.997592212485962e-06, + "loss": 0.1380584716796875, + "step": 2950 + }, + { + "epoch": 0.025551011232068897, + "grad_norm": 42.928844680240374, + "learning_rate": 5.997584044674053e-06, + "loss": 0.2227203369140625, + "step": 2955 + }, + { + "epoch": 0.02559424475361216, + "grad_norm": 63.31496698541111, + "learning_rate": 5.99757586303755e-06, + "loss": 0.3593620300292969, + "step": 2960 + }, + { + "epoch": 0.025637478275155424, + "grad_norm": 14.786586660773526, + "learning_rate": 5.997567667576493e-06, + "loss": 0.2767486572265625, + "step": 2965 + }, + { + "epoch": 0.025680711796698688, + "grad_norm": 17.927523633035477, + "learning_rate": 5.997559458290917e-06, + "loss": 0.2374237060546875, + "step": 2970 + }, + { + "epoch": 0.02572394531824195, + "grad_norm": 18.67336974505587, + "learning_rate": 5.997551235180863e-06, + "loss": 0.24334716796875, + "step": 2975 + }, + { + "epoch": 0.025767178839785215, + "grad_norm": 3.2652934081938314, + "learning_rate": 5.997542998246367e-06, + "loss": 0.2560394287109375, + "step": 2980 + }, + { + "epoch": 0.02581041236132848, + "grad_norm": 13.682846752872754, + "learning_rate": 5.997534747487469e-06, + "loss": 0.2900848388671875, + "step": 2985 + }, + { + "epoch": 0.025853645882871742, + "grad_norm": 20.436379807891015, + "learning_rate": 5.997526482904204e-06, + "loss": 0.2799102783203125, + "step": 2990 + }, + { + "epoch": 0.025896879404415006, + "grad_norm": 14.417464438132459, + "learning_rate": 5.997518204496612e-06, + "loss": 0.15360374450683595, + "step": 2995 + }, + { + "epoch": 0.02594011292595827, + "grad_norm": 1.8716700068534777, + "learning_rate": 5.997509912264732e-06, + "loss": 0.14351119995117187, + "step": 3000 + }, + { + "epoch": 0.025983346447501533, + "grad_norm": 52.886530707313966, + "learning_rate": 5.9975016062085995e-06, + "loss": 0.68323974609375, + "step": 3005 + }, + { + "epoch": 0.026026579969044797, + "grad_norm": 38.792462889736086, + "learning_rate": 5.997493286328255e-06, + "loss": 0.2332061767578125, + "step": 3010 + }, + { + "epoch": 0.026069813490588064, + "grad_norm": 17.41141840960718, + "learning_rate": 5.9974849526237375e-06, + "loss": 0.17144775390625, + "step": 3015 + }, + { + "epoch": 0.026113047012131328, + "grad_norm": 13.814936329141107, + "learning_rate": 5.997476605095082e-06, + "loss": 0.3119243621826172, + "step": 3020 + }, + { + "epoch": 0.02615628053367459, + "grad_norm": 7.1008120757706585, + "learning_rate": 5.997468243742331e-06, + "loss": 0.4421905517578125, + "step": 3025 + }, + { + "epoch": 0.026199514055217855, + "grad_norm": 2.1577065374598874, + "learning_rate": 5.997459868565522e-06, + "loss": 0.20199623107910156, + "step": 3030 + }, + { + "epoch": 0.02624274757676112, + "grad_norm": 35.13734759711331, + "learning_rate": 5.997451479564692e-06, + "loss": 0.173529052734375, + "step": 3035 + }, + { + "epoch": 0.026285981098304382, + "grad_norm": 69.61751155136011, + "learning_rate": 5.997443076739879e-06, + "loss": 0.23892822265625, + "step": 3040 + }, + { + "epoch": 0.026329214619847646, + "grad_norm": 2.8137946023980307, + "learning_rate": 5.997434660091126e-06, + "loss": 0.436309814453125, + "step": 3045 + }, + { + "epoch": 0.02637244814139091, + "grad_norm": 18.361725388586635, + "learning_rate": 5.997426229618468e-06, + "loss": 0.2005615234375, + "step": 3050 + }, + { + "epoch": 0.026415681662934173, + "grad_norm": 26.848565354923903, + "learning_rate": 5.997417785321945e-06, + "loss": 0.17088050842285157, + "step": 3055 + }, + { + "epoch": 0.026458915184477436, + "grad_norm": 34.74080169835595, + "learning_rate": 5.9974093272015956e-06, + "loss": 0.48145751953125, + "step": 3060 + }, + { + "epoch": 0.0265021487060207, + "grad_norm": 54.53856665971964, + "learning_rate": 5.99740085525746e-06, + "loss": 0.38733673095703125, + "step": 3065 + }, + { + "epoch": 0.026545382227563964, + "grad_norm": 3.679741926551467, + "learning_rate": 5.997392369489577e-06, + "loss": 0.09459075927734376, + "step": 3070 + }, + { + "epoch": 0.026588615749107227, + "grad_norm": 49.38743752246049, + "learning_rate": 5.997383869897985e-06, + "loss": 0.24397735595703124, + "step": 3075 + }, + { + "epoch": 0.02663184927065049, + "grad_norm": 1.0446837103229294, + "learning_rate": 5.997375356482722e-06, + "loss": 0.13250808715820311, + "step": 3080 + }, + { + "epoch": 0.026675082792193754, + "grad_norm": 9.291373967783189, + "learning_rate": 5.99736682924383e-06, + "loss": 0.1861663818359375, + "step": 3085 + }, + { + "epoch": 0.026718316313737018, + "grad_norm": 3.8725614452115096, + "learning_rate": 5.997358288181347e-06, + "loss": 0.262213134765625, + "step": 3090 + }, + { + "epoch": 0.02676154983528028, + "grad_norm": 145.9333207164126, + "learning_rate": 5.997349733295312e-06, + "loss": 0.3151214599609375, + "step": 3095 + }, + { + "epoch": 0.026804783356823545, + "grad_norm": 47.851908968631, + "learning_rate": 5.997341164585766e-06, + "loss": 0.207794189453125, + "step": 3100 + }, + { + "epoch": 0.026848016878366812, + "grad_norm": 4.728578866750632, + "learning_rate": 5.9973325820527456e-06, + "loss": 0.09859466552734375, + "step": 3105 + }, + { + "epoch": 0.026891250399910076, + "grad_norm": 17.619698584830438, + "learning_rate": 5.997323985696293e-06, + "loss": 0.12610015869140626, + "step": 3110 + }, + { + "epoch": 0.02693448392145334, + "grad_norm": 80.72666923179347, + "learning_rate": 5.9973153755164476e-06, + "loss": 0.4305755615234375, + "step": 3115 + }, + { + "epoch": 0.026977717442996603, + "grad_norm": 9.441292945282546, + "learning_rate": 5.9973067515132475e-06, + "loss": 0.111090087890625, + "step": 3120 + }, + { + "epoch": 0.027020950964539867, + "grad_norm": 1.055409911348263, + "learning_rate": 5.997298113686734e-06, + "loss": 0.39486236572265626, + "step": 3125 + }, + { + "epoch": 0.02706418448608313, + "grad_norm": 29.64659101438804, + "learning_rate": 5.997289462036947e-06, + "loss": 0.103045654296875, + "step": 3130 + }, + { + "epoch": 0.027107418007626394, + "grad_norm": 29.797538746468156, + "learning_rate": 5.997280796563925e-06, + "loss": 0.315155029296875, + "step": 3135 + }, + { + "epoch": 0.027150651529169657, + "grad_norm": 12.987577558699488, + "learning_rate": 5.997272117267709e-06, + "loss": 0.21756591796875, + "step": 3140 + }, + { + "epoch": 0.02719388505071292, + "grad_norm": 12.298782493339774, + "learning_rate": 5.997263424148339e-06, + "loss": 0.09227447509765625, + "step": 3145 + }, + { + "epoch": 0.027237118572256185, + "grad_norm": 12.66489272787251, + "learning_rate": 5.9972547172058545e-06, + "loss": 0.235931396484375, + "step": 3150 + }, + { + "epoch": 0.027280352093799448, + "grad_norm": 12.189312285091548, + "learning_rate": 5.997245996440297e-06, + "loss": 0.44429931640625, + "step": 3155 + }, + { + "epoch": 0.027323585615342712, + "grad_norm": 17.417800001850626, + "learning_rate": 5.9972372618517055e-06, + "loss": 0.2357086181640625, + "step": 3160 + }, + { + "epoch": 0.027366819136885975, + "grad_norm": 13.999948142858274, + "learning_rate": 5.99722851344012e-06, + "loss": 0.349993896484375, + "step": 3165 + }, + { + "epoch": 0.02741005265842924, + "grad_norm": 7.704162197938255, + "learning_rate": 5.997219751205582e-06, + "loss": 0.282666015625, + "step": 3170 + }, + { + "epoch": 0.027453286179972503, + "grad_norm": 46.82042180101868, + "learning_rate": 5.9972109751481304e-06, + "loss": 0.21712150573730468, + "step": 3175 + }, + { + "epoch": 0.027496519701515766, + "grad_norm": 2.084323908134423, + "learning_rate": 5.997202185267809e-06, + "loss": 0.20875244140625, + "step": 3180 + }, + { + "epoch": 0.02753975322305903, + "grad_norm": 3.0957050866367397, + "learning_rate": 5.997193381564654e-06, + "loss": 0.403033447265625, + "step": 3185 + }, + { + "epoch": 0.027582986744602293, + "grad_norm": 0.6862442144992524, + "learning_rate": 5.997184564038708e-06, + "loss": 0.30354995727539064, + "step": 3190 + }, + { + "epoch": 0.027626220266145557, + "grad_norm": 5.799839042449476, + "learning_rate": 5.997175732690012e-06, + "loss": 0.26839599609375, + "step": 3195 + }, + { + "epoch": 0.027669453787688824, + "grad_norm": 18.749087667409288, + "learning_rate": 5.997166887518607e-06, + "loss": 0.21404266357421875, + "step": 3200 + }, + { + "epoch": 0.027712687309232088, + "grad_norm": 1.0524284502852963, + "learning_rate": 5.997158028524532e-06, + "loss": 0.16273651123046876, + "step": 3205 + }, + { + "epoch": 0.02775592083077535, + "grad_norm": 21.91139529087551, + "learning_rate": 5.997149155707829e-06, + "loss": 0.410211181640625, + "step": 3210 + }, + { + "epoch": 0.027799154352318615, + "grad_norm": 31.25793975778974, + "learning_rate": 5.997140269068539e-06, + "loss": 0.1681488037109375, + "step": 3215 + }, + { + "epoch": 0.02784238787386188, + "grad_norm": 29.688396160433925, + "learning_rate": 5.997131368606704e-06, + "loss": 0.40801849365234377, + "step": 3220 + }, + { + "epoch": 0.027885621395405142, + "grad_norm": 2.213107605654, + "learning_rate": 5.997122454322363e-06, + "loss": 0.07279052734375, + "step": 3225 + }, + { + "epoch": 0.027928854916948406, + "grad_norm": 27.031007505101464, + "learning_rate": 5.997113526215558e-06, + "loss": 0.17974853515625, + "step": 3230 + }, + { + "epoch": 0.02797208843849167, + "grad_norm": 14.77375193246778, + "learning_rate": 5.9971045842863304e-06, + "loss": 0.2685455322265625, + "step": 3235 + }, + { + "epoch": 0.028015321960034933, + "grad_norm": 74.07538677629535, + "learning_rate": 5.997095628534722e-06, + "loss": 0.42432708740234376, + "step": 3240 + }, + { + "epoch": 0.028058555481578196, + "grad_norm": 11.543939536105901, + "learning_rate": 5.997086658960773e-06, + "loss": 0.18756103515625, + "step": 3245 + }, + { + "epoch": 0.02810178900312146, + "grad_norm": 28.97870416608045, + "learning_rate": 5.997077675564525e-06, + "loss": 0.070654296875, + "step": 3250 + }, + { + "epoch": 0.028145022524664724, + "grad_norm": 48.07595864722879, + "learning_rate": 5.9970686783460196e-06, + "loss": 0.40447235107421875, + "step": 3255 + }, + { + "epoch": 0.028188256046207987, + "grad_norm": 17.55588083369903, + "learning_rate": 5.997059667305298e-06, + "loss": 0.4455841064453125, + "step": 3260 + }, + { + "epoch": 0.02823148956775125, + "grad_norm": 72.57056989971534, + "learning_rate": 5.997050642442402e-06, + "loss": 0.35833740234375, + "step": 3265 + }, + { + "epoch": 0.028274723089294514, + "grad_norm": 6.370243112931571, + "learning_rate": 5.997041603757373e-06, + "loss": 0.49825439453125, + "step": 3270 + }, + { + "epoch": 0.028317956610837778, + "grad_norm": 28.91141149716969, + "learning_rate": 5.9970325512502546e-06, + "loss": 0.27255096435546877, + "step": 3275 + }, + { + "epoch": 0.02836119013238104, + "grad_norm": 9.256921744516841, + "learning_rate": 5.997023484921086e-06, + "loss": 0.2096343994140625, + "step": 3280 + }, + { + "epoch": 0.028404423653924305, + "grad_norm": 8.024245340559172, + "learning_rate": 5.997014404769909e-06, + "loss": 0.128118896484375, + "step": 3285 + }, + { + "epoch": 0.028447657175467572, + "grad_norm": 42.033060329360744, + "learning_rate": 5.997005310796767e-06, + "loss": 0.29150390625, + "step": 3290 + }, + { + "epoch": 0.028490890697010836, + "grad_norm": 2.259246273208778, + "learning_rate": 5.996996203001702e-06, + "loss": 0.208941650390625, + "step": 3295 + }, + { + "epoch": 0.0285341242185541, + "grad_norm": 0.16510899505717194, + "learning_rate": 5.996987081384754e-06, + "loss": 0.31137847900390625, + "step": 3300 + }, + { + "epoch": 0.028577357740097363, + "grad_norm": 80.57817193995824, + "learning_rate": 5.9969779459459675e-06, + "loss": 0.2820220947265625, + "step": 3305 + }, + { + "epoch": 0.028620591261640627, + "grad_norm": 9.804699474584329, + "learning_rate": 5.996968796685383e-06, + "loss": 0.1303497314453125, + "step": 3310 + }, + { + "epoch": 0.02866382478318389, + "grad_norm": 18.14665383658453, + "learning_rate": 5.996959633603043e-06, + "loss": 0.2966552734375, + "step": 3315 + }, + { + "epoch": 0.028707058304727154, + "grad_norm": 19.07897166970668, + "learning_rate": 5.99695045669899e-06, + "loss": 0.10964736938476563, + "step": 3320 + }, + { + "epoch": 0.028750291826270417, + "grad_norm": 12.904850455645622, + "learning_rate": 5.996941265973267e-06, + "loss": 0.153790283203125, + "step": 3325 + }, + { + "epoch": 0.02879352534781368, + "grad_norm": 32.52600638125149, + "learning_rate": 5.996932061425916e-06, + "loss": 0.1558837890625, + "step": 3330 + }, + { + "epoch": 0.028836758869356945, + "grad_norm": 3.7503977760453564, + "learning_rate": 5.996922843056979e-06, + "loss": 0.2754638671875, + "step": 3335 + }, + { + "epoch": 0.028879992390900208, + "grad_norm": 8.645883202142748, + "learning_rate": 5.996913610866498e-06, + "loss": 0.19954071044921876, + "step": 3340 + }, + { + "epoch": 0.028923225912443472, + "grad_norm": 14.005410740765539, + "learning_rate": 5.996904364854516e-06, + "loss": 0.149029541015625, + "step": 3345 + }, + { + "epoch": 0.028966459433986735, + "grad_norm": 24.92721347019756, + "learning_rate": 5.996895105021076e-06, + "loss": 0.2620941162109375, + "step": 3350 + }, + { + "epoch": 0.02900969295553, + "grad_norm": 3.04910791088459, + "learning_rate": 5.9968858313662216e-06, + "loss": 0.4684539794921875, + "step": 3355 + }, + { + "epoch": 0.029052926477073263, + "grad_norm": 14.062360181944278, + "learning_rate": 5.996876543889994e-06, + "loss": 0.105029296875, + "step": 3360 + }, + { + "epoch": 0.029096159998616526, + "grad_norm": 4.077204256452031, + "learning_rate": 5.996867242592436e-06, + "loss": 0.164752197265625, + "step": 3365 + }, + { + "epoch": 0.02913939352015979, + "grad_norm": 11.33704687715369, + "learning_rate": 5.996857927473592e-06, + "loss": 0.0915679931640625, + "step": 3370 + }, + { + "epoch": 0.029182627041703053, + "grad_norm": 3.901707283810482, + "learning_rate": 5.996848598533503e-06, + "loss": 0.19094619750976563, + "step": 3375 + }, + { + "epoch": 0.029225860563246317, + "grad_norm": 1.4870554236468227, + "learning_rate": 5.996839255772214e-06, + "loss": 0.163604736328125, + "step": 3380 + }, + { + "epoch": 0.029269094084789584, + "grad_norm": 24.140011349262434, + "learning_rate": 5.996829899189768e-06, + "loss": 0.17459869384765625, + "step": 3385 + }, + { + "epoch": 0.029312327606332848, + "grad_norm": 13.158265429576902, + "learning_rate": 5.996820528786205e-06, + "loss": 0.360260009765625, + "step": 3390 + }, + { + "epoch": 0.02935556112787611, + "grad_norm": 54.368160743125216, + "learning_rate": 5.9968111445615715e-06, + "loss": 0.6174301147460938, + "step": 3395 + }, + { + "epoch": 0.029398794649419375, + "grad_norm": 0.867643045191869, + "learning_rate": 5.99680174651591e-06, + "loss": 0.0692169189453125, + "step": 3400 + }, + { + "epoch": 0.02944202817096264, + "grad_norm": 11.512034184212174, + "learning_rate": 5.996792334649264e-06, + "loss": 0.42943115234375, + "step": 3405 + }, + { + "epoch": 0.029485261692505902, + "grad_norm": 17.25017567215352, + "learning_rate": 5.996782908961676e-06, + "loss": 0.3407958984375, + "step": 3410 + }, + { + "epoch": 0.029528495214049166, + "grad_norm": 11.353753552272824, + "learning_rate": 5.99677346945319e-06, + "loss": 0.28388137817382814, + "step": 3415 + }, + { + "epoch": 0.02957172873559243, + "grad_norm": 25.202261136893235, + "learning_rate": 5.99676401612385e-06, + "loss": 0.2816905975341797, + "step": 3420 + }, + { + "epoch": 0.029614962257135693, + "grad_norm": 18.372482778650564, + "learning_rate": 5.996754548973699e-06, + "loss": 0.265313720703125, + "step": 3425 + }, + { + "epoch": 0.029658195778678956, + "grad_norm": 20.53342201697129, + "learning_rate": 5.99674506800278e-06, + "loss": 0.2863311767578125, + "step": 3430 + }, + { + "epoch": 0.02970142930022222, + "grad_norm": 15.4114921302518, + "learning_rate": 5.996735573211138e-06, + "loss": 0.170391845703125, + "step": 3435 + }, + { + "epoch": 0.029744662821765484, + "grad_norm": 3.143870309681989, + "learning_rate": 5.996726064598817e-06, + "loss": 0.16829681396484375, + "step": 3440 + }, + { + "epoch": 0.029787896343308747, + "grad_norm": 25.413276741038164, + "learning_rate": 5.99671654216586e-06, + "loss": 0.3943115234375, + "step": 3445 + }, + { + "epoch": 0.02983112986485201, + "grad_norm": 55.58218087073351, + "learning_rate": 5.99670700591231e-06, + "loss": 0.34083251953125, + "step": 3450 + }, + { + "epoch": 0.029874363386395274, + "grad_norm": 1.2156580268696426, + "learning_rate": 5.996697455838213e-06, + "loss": 0.149127197265625, + "step": 3455 + }, + { + "epoch": 0.029917596907938538, + "grad_norm": 29.947438587076025, + "learning_rate": 5.996687891943613e-06, + "loss": 0.4609619140625, + "step": 3460 + }, + { + "epoch": 0.0299608304294818, + "grad_norm": 34.100335091408915, + "learning_rate": 5.996678314228552e-06, + "loss": 0.427960205078125, + "step": 3465 + }, + { + "epoch": 0.030004063951025065, + "grad_norm": 14.960554849307984, + "learning_rate": 5.996668722693076e-06, + "loss": 0.148828125, + "step": 3470 + }, + { + "epoch": 0.030047297472568332, + "grad_norm": 63.99925596310221, + "learning_rate": 5.996659117337228e-06, + "loss": 0.3944000244140625, + "step": 3475 + }, + { + "epoch": 0.030090530994111596, + "grad_norm": 5.526860071348966, + "learning_rate": 5.996649498161053e-06, + "loss": 0.1851409912109375, + "step": 3480 + }, + { + "epoch": 0.03013376451565486, + "grad_norm": 13.466948340822842, + "learning_rate": 5.996639865164595e-06, + "loss": 0.1194549560546875, + "step": 3485 + }, + { + "epoch": 0.030176998037198123, + "grad_norm": 71.97531973743867, + "learning_rate": 5.996630218347901e-06, + "loss": 0.5230728149414062, + "step": 3490 + }, + { + "epoch": 0.030220231558741387, + "grad_norm": 3.786303546996447, + "learning_rate": 5.996620557711011e-06, + "loss": 0.291363525390625, + "step": 3495 + }, + { + "epoch": 0.03026346508028465, + "grad_norm": 10.169356997900382, + "learning_rate": 5.996610883253972e-06, + "loss": 0.24525299072265624, + "step": 3500 + }, + { + "epoch": 0.030306698601827914, + "grad_norm": 7.361379643105908, + "learning_rate": 5.996601194976829e-06, + "loss": 0.1677398681640625, + "step": 3505 + }, + { + "epoch": 0.030349932123371177, + "grad_norm": 34.19114106995583, + "learning_rate": 5.9965914928796255e-06, + "loss": 0.315673828125, + "step": 3510 + }, + { + "epoch": 0.03039316564491444, + "grad_norm": 7.474679845869253, + "learning_rate": 5.996581776962407e-06, + "loss": 0.178466796875, + "step": 3515 + }, + { + "epoch": 0.030436399166457705, + "grad_norm": 55.85878957245971, + "learning_rate": 5.996572047225219e-06, + "loss": 0.41751861572265625, + "step": 3520 + }, + { + "epoch": 0.030479632688000968, + "grad_norm": 30.728262778387496, + "learning_rate": 5.996562303668104e-06, + "loss": 0.422613525390625, + "step": 3525 + }, + { + "epoch": 0.030522866209544232, + "grad_norm": 2.1002541259604492, + "learning_rate": 5.996552546291109e-06, + "loss": 0.6267059326171875, + "step": 3530 + }, + { + "epoch": 0.030566099731087495, + "grad_norm": 7.382406438801549, + "learning_rate": 5.996542775094278e-06, + "loss": 0.2985992431640625, + "step": 3535 + }, + { + "epoch": 0.03060933325263076, + "grad_norm": 13.167003450670862, + "learning_rate": 5.996532990077657e-06, + "loss": 0.337408447265625, + "step": 3540 + }, + { + "epoch": 0.030652566774174023, + "grad_norm": 3.5641622739762786, + "learning_rate": 5.996523191241291e-06, + "loss": 0.548974609375, + "step": 3545 + }, + { + "epoch": 0.030695800295717286, + "grad_norm": 38.150681950212764, + "learning_rate": 5.9965133785852245e-06, + "loss": 0.1925537109375, + "step": 3550 + }, + { + "epoch": 0.03073903381726055, + "grad_norm": 15.785050746289002, + "learning_rate": 5.996503552109504e-06, + "loss": 0.27313232421875, + "step": 3555 + }, + { + "epoch": 0.030782267338803813, + "grad_norm": 25.19579044174956, + "learning_rate": 5.996493711814172e-06, + "loss": 0.25560302734375, + "step": 3560 + }, + { + "epoch": 0.030825500860347077, + "grad_norm": 45.97132494070094, + "learning_rate": 5.996483857699277e-06, + "loss": 0.178375244140625, + "step": 3565 + }, + { + "epoch": 0.030868734381890344, + "grad_norm": 22.157523663121832, + "learning_rate": 5.996473989764864e-06, + "loss": 0.329949951171875, + "step": 3570 + }, + { + "epoch": 0.030911967903433608, + "grad_norm": 67.84857791799067, + "learning_rate": 5.996464108010977e-06, + "loss": 0.297796630859375, + "step": 3575 + }, + { + "epoch": 0.03095520142497687, + "grad_norm": 69.01287824078044, + "learning_rate": 5.996454212437661e-06, + "loss": 0.138311767578125, + "step": 3580 + }, + { + "epoch": 0.030998434946520135, + "grad_norm": 44.201075468115164, + "learning_rate": 5.996444303044964e-06, + "loss": 0.2831817626953125, + "step": 3585 + }, + { + "epoch": 0.0310416684680634, + "grad_norm": 0.7455600085884804, + "learning_rate": 5.99643437983293e-06, + "loss": 0.24610595703125, + "step": 3590 + }, + { + "epoch": 0.031084901989606662, + "grad_norm": 13.459603478561197, + "learning_rate": 5.996424442801607e-06, + "loss": 0.084808349609375, + "step": 3595 + }, + { + "epoch": 0.031128135511149926, + "grad_norm": 78.60227205271379, + "learning_rate": 5.9964144919510385e-06, + "loss": 0.352679443359375, + "step": 3600 + }, + { + "epoch": 0.03117136903269319, + "grad_norm": 47.039743348789706, + "learning_rate": 5.996404527281271e-06, + "loss": 0.315203857421875, + "step": 3605 + }, + { + "epoch": 0.031214602554236453, + "grad_norm": 27.30091640248483, + "learning_rate": 5.99639454879235e-06, + "loss": 0.143878173828125, + "step": 3610 + }, + { + "epoch": 0.03125783607577972, + "grad_norm": 43.80606283334595, + "learning_rate": 5.996384556484323e-06, + "loss": 0.36147613525390626, + "step": 3615 + }, + { + "epoch": 0.031301069597322984, + "grad_norm": 32.550823367861696, + "learning_rate": 5.996374550357234e-06, + "loss": 0.1284576416015625, + "step": 3620 + }, + { + "epoch": 0.03134430311886625, + "grad_norm": 8.00774248648098, + "learning_rate": 5.996364530411132e-06, + "loss": 0.24020538330078126, + "step": 3625 + }, + { + "epoch": 0.03138753664040951, + "grad_norm": 8.438831616781275, + "learning_rate": 5.99635449664606e-06, + "loss": 0.116778564453125, + "step": 3630 + }, + { + "epoch": 0.031430770161952774, + "grad_norm": 0.9325939931565346, + "learning_rate": 5.996344449062067e-06, + "loss": 0.1499725341796875, + "step": 3635 + }, + { + "epoch": 0.03147400368349604, + "grad_norm": 74.856179640601, + "learning_rate": 5.9963343876591975e-06, + "loss": 0.40295257568359377, + "step": 3640 + }, + { + "epoch": 0.0315172372050393, + "grad_norm": 16.097897890788865, + "learning_rate": 5.996324312437498e-06, + "loss": 0.151629638671875, + "step": 3645 + }, + { + "epoch": 0.031560470726582565, + "grad_norm": 53.937208697367474, + "learning_rate": 5.996314223397016e-06, + "loss": 0.22799072265625, + "step": 3650 + }, + { + "epoch": 0.03160370424812583, + "grad_norm": 9.153258306654859, + "learning_rate": 5.996304120537799e-06, + "loss": 0.0952484130859375, + "step": 3655 + }, + { + "epoch": 0.03164693776966909, + "grad_norm": 97.6688197154626, + "learning_rate": 5.99629400385989e-06, + "loss": 0.3277740478515625, + "step": 3660 + }, + { + "epoch": 0.031690171291212356, + "grad_norm": 100.69605520297381, + "learning_rate": 5.996283873363339e-06, + "loss": 0.24210586547851562, + "step": 3665 + }, + { + "epoch": 0.03173340481275562, + "grad_norm": 24.90850485173193, + "learning_rate": 5.9962737290481915e-06, + "loss": 0.1045928955078125, + "step": 3670 + }, + { + "epoch": 0.03177663833429888, + "grad_norm": 35.15491301520147, + "learning_rate": 5.996263570914494e-06, + "loss": 0.12827911376953124, + "step": 3675 + }, + { + "epoch": 0.03181987185584215, + "grad_norm": 5.307707647452214, + "learning_rate": 5.996253398962294e-06, + "loss": 0.19818878173828125, + "step": 3680 + }, + { + "epoch": 0.03186310537738541, + "grad_norm": 41.353693496392, + "learning_rate": 5.996243213191637e-06, + "loss": 0.495654296875, + "step": 3685 + }, + { + "epoch": 0.031906338898928674, + "grad_norm": 28.269210359500864, + "learning_rate": 5.996233013602572e-06, + "loss": 0.41763916015625, + "step": 3690 + }, + { + "epoch": 0.03194957242047194, + "grad_norm": 9.503840079581016, + "learning_rate": 5.996222800195146e-06, + "loss": 0.3297393798828125, + "step": 3695 + }, + { + "epoch": 0.0319928059420152, + "grad_norm": 10.772436170620892, + "learning_rate": 5.9962125729694035e-06, + "loss": 0.2137908935546875, + "step": 3700 + }, + { + "epoch": 0.032036039463558465, + "grad_norm": 7.980186803469304, + "learning_rate": 5.996202331925395e-06, + "loss": 0.207470703125, + "step": 3705 + }, + { + "epoch": 0.03207927298510173, + "grad_norm": 38.17736152427998, + "learning_rate": 5.996192077063164e-06, + "loss": 0.22467041015625, + "step": 3710 + }, + { + "epoch": 0.03212250650664499, + "grad_norm": 48.32196975277328, + "learning_rate": 5.996181808382761e-06, + "loss": 0.354345703125, + "step": 3715 + }, + { + "epoch": 0.032165740028188256, + "grad_norm": 19.239616885785598, + "learning_rate": 5.996171525884232e-06, + "loss": 0.34300537109375, + "step": 3720 + }, + { + "epoch": 0.03220897354973152, + "grad_norm": 10.037765145300153, + "learning_rate": 5.996161229567626e-06, + "loss": 0.146514892578125, + "step": 3725 + }, + { + "epoch": 0.03225220707127478, + "grad_norm": 9.501342311713584, + "learning_rate": 5.996150919432988e-06, + "loss": 0.3154541015625, + "step": 3730 + }, + { + "epoch": 0.032295440592818046, + "grad_norm": 25.74244587516485, + "learning_rate": 5.996140595480367e-06, + "loss": 0.17959976196289062, + "step": 3735 + }, + { + "epoch": 0.03233867411436131, + "grad_norm": 18.86213633517142, + "learning_rate": 5.996130257709809e-06, + "loss": 0.199072265625, + "step": 3740 + }, + { + "epoch": 0.032381907635904573, + "grad_norm": 9.283095530007518, + "learning_rate": 5.9961199061213655e-06, + "loss": 0.5088531494140625, + "step": 3745 + }, + { + "epoch": 0.03242514115744784, + "grad_norm": 57.48387271604306, + "learning_rate": 5.996109540715081e-06, + "loss": 0.193621826171875, + "step": 3750 + }, + { + "epoch": 0.0324683746789911, + "grad_norm": 6.03157316959364, + "learning_rate": 5.996099161491002e-06, + "loss": 0.16477813720703124, + "step": 3755 + }, + { + "epoch": 0.032511608200534364, + "grad_norm": 49.204356002526595, + "learning_rate": 5.99608876844918e-06, + "loss": 0.267401123046875, + "step": 3760 + }, + { + "epoch": 0.03255484172207763, + "grad_norm": 30.611268751835322, + "learning_rate": 5.996078361589662e-06, + "loss": 0.2640838623046875, + "step": 3765 + }, + { + "epoch": 0.03259807524362089, + "grad_norm": 1.4795347873382605, + "learning_rate": 5.996067940912494e-06, + "loss": 0.108087158203125, + "step": 3770 + }, + { + "epoch": 0.032641308765164155, + "grad_norm": 40.76959993838463, + "learning_rate": 5.996057506417726e-06, + "loss": 0.3080352783203125, + "step": 3775 + }, + { + "epoch": 0.03268454228670742, + "grad_norm": 9.073583894556894, + "learning_rate": 5.996047058105405e-06, + "loss": 0.13972549438476561, + "step": 3780 + }, + { + "epoch": 0.03272777580825068, + "grad_norm": 47.570670249257006, + "learning_rate": 5.99603659597558e-06, + "loss": 0.373297119140625, + "step": 3785 + }, + { + "epoch": 0.032771009329793946, + "grad_norm": 16.93064147004713, + "learning_rate": 5.9960261200283e-06, + "loss": 0.08757171630859376, + "step": 3790 + }, + { + "epoch": 0.03281424285133721, + "grad_norm": 103.26965487037845, + "learning_rate": 5.996015630263611e-06, + "loss": 0.3307310104370117, + "step": 3795 + }, + { + "epoch": 0.03285747637288048, + "grad_norm": 5.827623468364239, + "learning_rate": 5.9960051266815625e-06, + "loss": 0.135699462890625, + "step": 3800 + }, + { + "epoch": 0.032900709894423744, + "grad_norm": 16.148994872190684, + "learning_rate": 5.9959946092822035e-06, + "loss": 0.108258056640625, + "step": 3805 + }, + { + "epoch": 0.03294394341596701, + "grad_norm": 1.274599920401651, + "learning_rate": 5.995984078065582e-06, + "loss": 0.32109375, + "step": 3810 + }, + { + "epoch": 0.03298717693751027, + "grad_norm": 9.451363028835418, + "learning_rate": 5.995973533031748e-06, + "loss": 0.20341339111328124, + "step": 3815 + }, + { + "epoch": 0.033030410459053534, + "grad_norm": 4.807262432117248, + "learning_rate": 5.995962974180747e-06, + "loss": 0.234765625, + "step": 3820 + }, + { + "epoch": 0.0330736439805968, + "grad_norm": 8.760117000556264, + "learning_rate": 5.99595240151263e-06, + "loss": 0.12141799926757812, + "step": 3825 + }, + { + "epoch": 0.03311687750214006, + "grad_norm": 0.3450540742628264, + "learning_rate": 5.995941815027445e-06, + "loss": 0.12026615142822265, + "step": 3830 + }, + { + "epoch": 0.033160111023683325, + "grad_norm": 5.50415989638137, + "learning_rate": 5.995931214725242e-06, + "loss": 0.14060897827148439, + "step": 3835 + }, + { + "epoch": 0.03320334454522659, + "grad_norm": 11.162395753154616, + "learning_rate": 5.995920600606068e-06, + "loss": 0.14638214111328124, + "step": 3840 + }, + { + "epoch": 0.03324657806676985, + "grad_norm": 16.321007137032026, + "learning_rate": 5.995909972669974e-06, + "loss": 0.182421875, + "step": 3845 + }, + { + "epoch": 0.033289811588313116, + "grad_norm": 12.921387105521992, + "learning_rate": 5.995899330917007e-06, + "loss": 0.065997314453125, + "step": 3850 + }, + { + "epoch": 0.03333304510985638, + "grad_norm": 176.99180011197137, + "learning_rate": 5.995888675347217e-06, + "loss": 0.2631500244140625, + "step": 3855 + }, + { + "epoch": 0.03337627863139964, + "grad_norm": 50.49160125820616, + "learning_rate": 5.9958780059606525e-06, + "loss": 0.3395988464355469, + "step": 3860 + }, + { + "epoch": 0.03341951215294291, + "grad_norm": 1.436897077520766, + "learning_rate": 5.995867322757365e-06, + "loss": 0.19686279296875, + "step": 3865 + }, + { + "epoch": 0.03346274567448617, + "grad_norm": 24.657556441885653, + "learning_rate": 5.995856625737401e-06, + "loss": 0.248504638671875, + "step": 3870 + }, + { + "epoch": 0.033505979196029434, + "grad_norm": 41.69669182728267, + "learning_rate": 5.995845914900812e-06, + "loss": 0.28360595703125, + "step": 3875 + }, + { + "epoch": 0.0335492127175727, + "grad_norm": 4.5038803586811405, + "learning_rate": 5.9958351902476455e-06, + "loss": 0.2310791015625, + "step": 3880 + }, + { + "epoch": 0.03359244623911596, + "grad_norm": 3.223273664559293, + "learning_rate": 5.995824451777952e-06, + "loss": 0.23779296875, + "step": 3885 + }, + { + "epoch": 0.033635679760659225, + "grad_norm": 13.386458066318594, + "learning_rate": 5.995813699491781e-06, + "loss": 0.193414306640625, + "step": 3890 + }, + { + "epoch": 0.03367891328220249, + "grad_norm": 30.37142307463635, + "learning_rate": 5.995802933389182e-06, + "loss": 0.77127685546875, + "step": 3895 + }, + { + "epoch": 0.03372214680374575, + "grad_norm": 2.0081063310577623, + "learning_rate": 5.995792153470204e-06, + "loss": 0.113494873046875, + "step": 3900 + }, + { + "epoch": 0.033765380325289016, + "grad_norm": 12.619077373146107, + "learning_rate": 5.995781359734897e-06, + "loss": 0.284185791015625, + "step": 3905 + }, + { + "epoch": 0.03380861384683228, + "grad_norm": 5.799180078191242, + "learning_rate": 5.9957705521833125e-06, + "loss": 0.44736328125, + "step": 3910 + }, + { + "epoch": 0.03385184736837554, + "grad_norm": 34.3165018941741, + "learning_rate": 5.9957597308154976e-06, + "loss": 0.29599609375, + "step": 3915 + }, + { + "epoch": 0.033895080889918806, + "grad_norm": 17.3437632367942, + "learning_rate": 5.995748895631505e-06, + "loss": 0.13062744140625, + "step": 3920 + }, + { + "epoch": 0.03393831441146207, + "grad_norm": 15.094927502520747, + "learning_rate": 5.995738046631381e-06, + "loss": 0.337188720703125, + "step": 3925 + }, + { + "epoch": 0.033981547933005334, + "grad_norm": 81.7067195094423, + "learning_rate": 5.9957271838151795e-06, + "loss": 0.39742279052734375, + "step": 3930 + }, + { + "epoch": 0.0340247814545486, + "grad_norm": 2.9712297235833054, + "learning_rate": 5.995716307182949e-06, + "loss": 0.0953216552734375, + "step": 3935 + }, + { + "epoch": 0.03406801497609186, + "grad_norm": 4.792187178128971, + "learning_rate": 5.995705416734739e-06, + "loss": 0.2487457275390625, + "step": 3940 + }, + { + "epoch": 0.034111248497635124, + "grad_norm": 38.26672749707947, + "learning_rate": 5.9956945124706e-06, + "loss": 0.44716796875, + "step": 3945 + }, + { + "epoch": 0.03415448201917839, + "grad_norm": 11.022559795392336, + "learning_rate": 5.9956835943905834e-06, + "loss": 0.198291015625, + "step": 3950 + }, + { + "epoch": 0.03419771554072165, + "grad_norm": 3.5128613058189146, + "learning_rate": 5.995672662494739e-06, + "loss": 0.1323638916015625, + "step": 3955 + }, + { + "epoch": 0.034240949062264915, + "grad_norm": 7.484802520944113, + "learning_rate": 5.995661716783116e-06, + "loss": 0.19259033203125, + "step": 3960 + }, + { + "epoch": 0.03428418258380818, + "grad_norm": 5.496117367184064, + "learning_rate": 5.995650757255767e-06, + "loss": 0.0825347900390625, + "step": 3965 + }, + { + "epoch": 0.03432741610535144, + "grad_norm": 21.818821288426044, + "learning_rate": 5.99563978391274e-06, + "loss": 0.4431243896484375, + "step": 3970 + }, + { + "epoch": 0.034370649626894706, + "grad_norm": 1.222331596804038, + "learning_rate": 5.995628796754087e-06, + "loss": 0.25610504150390623, + "step": 3975 + }, + { + "epoch": 0.03441388314843797, + "grad_norm": 5.86840785301387, + "learning_rate": 5.99561779577986e-06, + "loss": 0.3103271484375, + "step": 3980 + }, + { + "epoch": 0.03445711666998124, + "grad_norm": 11.169512942208145, + "learning_rate": 5.995606780990108e-06, + "loss": 0.090484619140625, + "step": 3985 + }, + { + "epoch": 0.034500350191524504, + "grad_norm": 50.295168095790125, + "learning_rate": 5.995595752384882e-06, + "loss": 0.24984130859375, + "step": 3990 + }, + { + "epoch": 0.03454358371306777, + "grad_norm": 36.16126167469503, + "learning_rate": 5.995584709964233e-06, + "loss": 0.41045989990234377, + "step": 3995 + }, + { + "epoch": 0.03458681723461103, + "grad_norm": 2.3070076378983555, + "learning_rate": 5.995573653728213e-06, + "loss": 0.15980129241943358, + "step": 4000 + }, + { + "epoch": 0.034630050756154294, + "grad_norm": 28.271745949756458, + "learning_rate": 5.99556258367687e-06, + "loss": 0.40101318359375, + "step": 4005 + }, + { + "epoch": 0.03467328427769756, + "grad_norm": 3.0142183306826853, + "learning_rate": 5.995551499810259e-06, + "loss": 0.413232421875, + "step": 4010 + }, + { + "epoch": 0.03471651779924082, + "grad_norm": 14.252945421441465, + "learning_rate": 5.995540402128428e-06, + "loss": 0.526104736328125, + "step": 4015 + }, + { + "epoch": 0.034759751320784085, + "grad_norm": 53.42470209266853, + "learning_rate": 5.99552929063143e-06, + "loss": 0.35851364135742186, + "step": 4020 + }, + { + "epoch": 0.03480298484232735, + "grad_norm": 7.5429919275569794, + "learning_rate": 5.9955181653193155e-06, + "loss": 0.265460205078125, + "step": 4025 + }, + { + "epoch": 0.03484621836387061, + "grad_norm": 16.04889563171861, + "learning_rate": 5.995507026192135e-06, + "loss": 0.25053253173828127, + "step": 4030 + }, + { + "epoch": 0.034889451885413876, + "grad_norm": 12.860694180435196, + "learning_rate": 5.995495873249943e-06, + "loss": 0.257977294921875, + "step": 4035 + }, + { + "epoch": 0.03493268540695714, + "grad_norm": 39.406316342461665, + "learning_rate": 5.995484706492786e-06, + "loss": 0.2563514709472656, + "step": 4040 + }, + { + "epoch": 0.0349759189285004, + "grad_norm": 11.048324226147916, + "learning_rate": 5.99547352592072e-06, + "loss": 0.1993408203125, + "step": 4045 + }, + { + "epoch": 0.03501915245004367, + "grad_norm": 8.789157547054998, + "learning_rate": 5.995462331533794e-06, + "loss": 0.2191558837890625, + "step": 4050 + }, + { + "epoch": 0.03506238597158693, + "grad_norm": 29.362499319149915, + "learning_rate": 5.99545112333206e-06, + "loss": 0.37391357421875, + "step": 4055 + }, + { + "epoch": 0.035105619493130194, + "grad_norm": 13.7315709643895, + "learning_rate": 5.995439901315571e-06, + "loss": 0.10457763671875, + "step": 4060 + }, + { + "epoch": 0.03514885301467346, + "grad_norm": 12.164208051069446, + "learning_rate": 5.995428665484378e-06, + "loss": 0.14051055908203125, + "step": 4065 + }, + { + "epoch": 0.03519208653621672, + "grad_norm": 19.747270865673354, + "learning_rate": 5.995417415838532e-06, + "loss": 0.255987548828125, + "step": 4070 + }, + { + "epoch": 0.035235320057759985, + "grad_norm": 30.522377722276417, + "learning_rate": 5.995406152378086e-06, + "loss": 0.193780517578125, + "step": 4075 + }, + { + "epoch": 0.03527855357930325, + "grad_norm": 30.494222278578977, + "learning_rate": 5.995394875103091e-06, + "loss": 0.24976806640625, + "step": 4080 + }, + { + "epoch": 0.03532178710084651, + "grad_norm": 3.5323480014337036, + "learning_rate": 5.995383584013601e-06, + "loss": 0.09151153564453125, + "step": 4085 + }, + { + "epoch": 0.035365020622389776, + "grad_norm": 1.9367111939628776, + "learning_rate": 5.995372279109665e-06, + "loss": 0.5802001953125, + "step": 4090 + }, + { + "epoch": 0.03540825414393304, + "grad_norm": 39.09606686780997, + "learning_rate": 5.995360960391338e-06, + "loss": 0.456964111328125, + "step": 4095 + }, + { + "epoch": 0.0354514876654763, + "grad_norm": 45.060525612700765, + "learning_rate": 5.995349627858669e-06, + "loss": 0.18196487426757812, + "step": 4100 + }, + { + "epoch": 0.035494721187019566, + "grad_norm": 80.23403376316354, + "learning_rate": 5.995338281511714e-06, + "loss": 0.4239288330078125, + "step": 4105 + }, + { + "epoch": 0.03553795470856283, + "grad_norm": 8.492781267472752, + "learning_rate": 5.995326921350522e-06, + "loss": 0.2684661865234375, + "step": 4110 + }, + { + "epoch": 0.035581188230106094, + "grad_norm": 23.53936250614929, + "learning_rate": 5.995315547375148e-06, + "loss": 0.11331787109375, + "step": 4115 + }, + { + "epoch": 0.03562442175164936, + "grad_norm": 5.851940740749776, + "learning_rate": 5.995304159585643e-06, + "loss": 0.1777313232421875, + "step": 4120 + }, + { + "epoch": 0.03566765527319262, + "grad_norm": 2.7698787036640744, + "learning_rate": 5.99529275798206e-06, + "loss": 0.09185028076171875, + "step": 4125 + }, + { + "epoch": 0.035710888794735884, + "grad_norm": 0.98107653799218, + "learning_rate": 5.995281342564451e-06, + "loss": 0.297442626953125, + "step": 4130 + }, + { + "epoch": 0.03575412231627915, + "grad_norm": 6.9027086967514455, + "learning_rate": 5.99526991333287e-06, + "loss": 0.3942352294921875, + "step": 4135 + }, + { + "epoch": 0.03579735583782241, + "grad_norm": 16.00259758718176, + "learning_rate": 5.995258470287368e-06, + "loss": 0.1462158203125, + "step": 4140 + }, + { + "epoch": 0.035840589359365675, + "grad_norm": 41.808376123701755, + "learning_rate": 5.995247013427999e-06, + "loss": 0.27618408203125, + "step": 4145 + }, + { + "epoch": 0.03588382288090894, + "grad_norm": 21.13425120816542, + "learning_rate": 5.995235542754816e-06, + "loss": 0.23951873779296876, + "step": 4150 + }, + { + "epoch": 0.0359270564024522, + "grad_norm": 46.18742668805337, + "learning_rate": 5.99522405826787e-06, + "loss": 0.243585205078125, + "step": 4155 + }, + { + "epoch": 0.035970289923995466, + "grad_norm": 6.676243751529632, + "learning_rate": 5.995212559967217e-06, + "loss": 0.095623779296875, + "step": 4160 + }, + { + "epoch": 0.03601352344553873, + "grad_norm": 7.267193411769935, + "learning_rate": 5.995201047852907e-06, + "loss": 0.327703857421875, + "step": 4165 + }, + { + "epoch": 0.036056756967082, + "grad_norm": 58.14550496092327, + "learning_rate": 5.995189521924995e-06, + "loss": 0.2163848876953125, + "step": 4170 + }, + { + "epoch": 0.036099990488625264, + "grad_norm": 37.41087719594199, + "learning_rate": 5.995177982183533e-06, + "loss": 0.312957763671875, + "step": 4175 + }, + { + "epoch": 0.03614322401016853, + "grad_norm": 38.57407160246001, + "learning_rate": 5.995166428628576e-06, + "loss": 0.195635986328125, + "step": 4180 + }, + { + "epoch": 0.03618645753171179, + "grad_norm": 31.751172505063426, + "learning_rate": 5.995154861260175e-06, + "loss": 0.24093399047851563, + "step": 4185 + }, + { + "epoch": 0.036229691053255055, + "grad_norm": 13.673747604664271, + "learning_rate": 5.995143280078385e-06, + "loss": 0.25694580078125, + "step": 4190 + }, + { + "epoch": 0.03627292457479832, + "grad_norm": 36.6824945867649, + "learning_rate": 5.995131685083258e-06, + "loss": 0.404730224609375, + "step": 4195 + }, + { + "epoch": 0.03631615809634158, + "grad_norm": 49.64135052464479, + "learning_rate": 5.9951200762748495e-06, + "loss": 0.5258544921875, + "step": 4200 + }, + { + "epoch": 0.036359391617884845, + "grad_norm": 4.289784565684946, + "learning_rate": 5.995108453653211e-06, + "loss": 0.5320785522460938, + "step": 4205 + }, + { + "epoch": 0.03640262513942811, + "grad_norm": 41.9413398080394, + "learning_rate": 5.9950968172183975e-06, + "loss": 0.22081298828125, + "step": 4210 + }, + { + "epoch": 0.03644585866097137, + "grad_norm": 34.04265657457219, + "learning_rate": 5.995085166970462e-06, + "loss": 0.35426483154296873, + "step": 4215 + }, + { + "epoch": 0.036489092182514636, + "grad_norm": 6.4243604887796915, + "learning_rate": 5.995073502909459e-06, + "loss": 0.1500091552734375, + "step": 4220 + }, + { + "epoch": 0.0365323257040579, + "grad_norm": 92.00911682249468, + "learning_rate": 5.99506182503544e-06, + "loss": 0.57880859375, + "step": 4225 + }, + { + "epoch": 0.03657555922560116, + "grad_norm": 2.2225205262882257, + "learning_rate": 5.9950501333484616e-06, + "loss": 0.2815521240234375, + "step": 4230 + }, + { + "epoch": 0.03661879274714443, + "grad_norm": 6.2165563946036135, + "learning_rate": 5.995038427848576e-06, + "loss": 0.557763671875, + "step": 4235 + }, + { + "epoch": 0.03666202626868769, + "grad_norm": 7.958809880896046, + "learning_rate": 5.995026708535838e-06, + "loss": 0.264080810546875, + "step": 4240 + }, + { + "epoch": 0.036705259790230954, + "grad_norm": 21.64842843730988, + "learning_rate": 5.995014975410302e-06, + "loss": 0.169378662109375, + "step": 4245 + }, + { + "epoch": 0.03674849331177422, + "grad_norm": 0.2466603990702153, + "learning_rate": 5.9950032284720214e-06, + "loss": 0.239898681640625, + "step": 4250 + }, + { + "epoch": 0.03679172683331748, + "grad_norm": 99.69536780338865, + "learning_rate": 5.994991467721051e-06, + "loss": 0.3973114013671875, + "step": 4255 + }, + { + "epoch": 0.036834960354860745, + "grad_norm": 8.62386156410222, + "learning_rate": 5.994979693157444e-06, + "loss": 0.245263671875, + "step": 4260 + }, + { + "epoch": 0.03687819387640401, + "grad_norm": 14.871310332399908, + "learning_rate": 5.994967904781255e-06, + "loss": 0.083660888671875, + "step": 4265 + }, + { + "epoch": 0.03692142739794727, + "grad_norm": 33.056092184193005, + "learning_rate": 5.994956102592538e-06, + "loss": 0.416534423828125, + "step": 4270 + }, + { + "epoch": 0.036964660919490536, + "grad_norm": 23.520053617074144, + "learning_rate": 5.99494428659135e-06, + "loss": 0.3259033203125, + "step": 4275 + }, + { + "epoch": 0.0370078944410338, + "grad_norm": 0.5604644152356079, + "learning_rate": 5.994932456777743e-06, + "loss": 0.36328125, + "step": 4280 + }, + { + "epoch": 0.03705112796257706, + "grad_norm": 41.22115665608996, + "learning_rate": 5.994920613151771e-06, + "loss": 0.5301513671875, + "step": 4285 + }, + { + "epoch": 0.037094361484120326, + "grad_norm": 0.3731622777099602, + "learning_rate": 5.994908755713491e-06, + "loss": 0.1852264404296875, + "step": 4290 + }, + { + "epoch": 0.03713759500566359, + "grad_norm": 0.24186935464193107, + "learning_rate": 5.994896884462955e-06, + "loss": 0.1124237060546875, + "step": 4295 + }, + { + "epoch": 0.037180828527206854, + "grad_norm": 12.151318208501849, + "learning_rate": 5.994884999400221e-06, + "loss": 0.2850738525390625, + "step": 4300 + }, + { + "epoch": 0.03722406204875012, + "grad_norm": 38.15720646828692, + "learning_rate": 5.994873100525342e-06, + "loss": 0.6646209716796875, + "step": 4305 + }, + { + "epoch": 0.03726729557029338, + "grad_norm": 9.72712767759651, + "learning_rate": 5.994861187838371e-06, + "loss": 0.313739013671875, + "step": 4310 + }, + { + "epoch": 0.037310529091836644, + "grad_norm": 10.267355781359553, + "learning_rate": 5.994849261339367e-06, + "loss": 0.0992462158203125, + "step": 4315 + }, + { + "epoch": 0.03735376261337991, + "grad_norm": 8.913842230377622, + "learning_rate": 5.9948373210283815e-06, + "loss": 0.1419525146484375, + "step": 4320 + }, + { + "epoch": 0.03739699613492317, + "grad_norm": 17.733929104904124, + "learning_rate": 5.994825366905472e-06, + "loss": 0.396630859375, + "step": 4325 + }, + { + "epoch": 0.037440229656466435, + "grad_norm": 118.69184443550357, + "learning_rate": 5.994813398970691e-06, + "loss": 0.241351318359375, + "step": 4330 + }, + { + "epoch": 0.0374834631780097, + "grad_norm": 23.679745112963356, + "learning_rate": 5.994801417224096e-06, + "loss": 0.135693359375, + "step": 4335 + }, + { + "epoch": 0.03752669669955296, + "grad_norm": 15.456738432336248, + "learning_rate": 5.994789421665742e-06, + "loss": 0.29403076171875, + "step": 4340 + }, + { + "epoch": 0.037569930221096226, + "grad_norm": 28.783280409642636, + "learning_rate": 5.994777412295683e-06, + "loss": 0.386431884765625, + "step": 4345 + }, + { + "epoch": 0.03761316374263949, + "grad_norm": 7.962058493666641, + "learning_rate": 5.994765389113975e-06, + "loss": 0.1506072998046875, + "step": 4350 + }, + { + "epoch": 0.03765639726418276, + "grad_norm": 8.12992392748416, + "learning_rate": 5.994753352120674e-06, + "loss": 0.3081024169921875, + "step": 4355 + }, + { + "epoch": 0.037699630785726024, + "grad_norm": 11.75772577118677, + "learning_rate": 5.994741301315835e-06, + "loss": 0.3542236328125, + "step": 4360 + }, + { + "epoch": 0.03774286430726929, + "grad_norm": 2.5765245550395512, + "learning_rate": 5.994729236699515e-06, + "loss": 0.1540435791015625, + "step": 4365 + }, + { + "epoch": 0.03778609782881255, + "grad_norm": 9.56003358635681, + "learning_rate": 5.9947171582717664e-06, + "loss": 0.1373687744140625, + "step": 4370 + }, + { + "epoch": 0.037829331350355815, + "grad_norm": 31.17107836488291, + "learning_rate": 5.994705066032648e-06, + "loss": 0.20551223754882814, + "step": 4375 + }, + { + "epoch": 0.03787256487189908, + "grad_norm": 8.405521433018864, + "learning_rate": 5.994692959982214e-06, + "loss": 0.3739898681640625, + "step": 4380 + }, + { + "epoch": 0.03791579839344234, + "grad_norm": 17.26575667851417, + "learning_rate": 5.994680840120522e-06, + "loss": 0.185845947265625, + "step": 4385 + }, + { + "epoch": 0.037959031914985605, + "grad_norm": 21.209471795432652, + "learning_rate": 5.994668706447626e-06, + "loss": 0.2765380859375, + "step": 4390 + }, + { + "epoch": 0.03800226543652887, + "grad_norm": 5.54809458613131, + "learning_rate": 5.994656558963581e-06, + "loss": 0.6253463745117187, + "step": 4395 + }, + { + "epoch": 0.03804549895807213, + "grad_norm": 4.093479060783493, + "learning_rate": 5.994644397668446e-06, + "loss": 0.36170654296875, + "step": 4400 + }, + { + "epoch": 0.038088732479615396, + "grad_norm": 2.1500802854463017, + "learning_rate": 5.994632222562275e-06, + "loss": 0.13331451416015624, + "step": 4405 + }, + { + "epoch": 0.03813196600115866, + "grad_norm": 32.12866754901266, + "learning_rate": 5.994620033645125e-06, + "loss": 0.457867431640625, + "step": 4410 + }, + { + "epoch": 0.03817519952270192, + "grad_norm": 24.033518666579777, + "learning_rate": 5.994607830917053e-06, + "loss": 0.406884765625, + "step": 4415 + }, + { + "epoch": 0.03821843304424519, + "grad_norm": 38.80102932631436, + "learning_rate": 5.9945956143781135e-06, + "loss": 0.29801025390625, + "step": 4420 + }, + { + "epoch": 0.03826166656578845, + "grad_norm": 7.099867123587539, + "learning_rate": 5.994583384028364e-06, + "loss": 0.34847412109375, + "step": 4425 + }, + { + "epoch": 0.038304900087331714, + "grad_norm": 3.2089655559796864, + "learning_rate": 5.9945711398678596e-06, + "loss": 0.37069091796875, + "step": 4430 + }, + { + "epoch": 0.03834813360887498, + "grad_norm": 10.278605050850869, + "learning_rate": 5.9945588818966585e-06, + "loss": 0.14915771484375, + "step": 4435 + }, + { + "epoch": 0.03839136713041824, + "grad_norm": 2.516055889627498, + "learning_rate": 5.9945466101148156e-06, + "loss": 0.12239532470703125, + "step": 4440 + }, + { + "epoch": 0.038434600651961505, + "grad_norm": 15.339954506062654, + "learning_rate": 5.994534324522389e-06, + "loss": 0.6231170654296875, + "step": 4445 + }, + { + "epoch": 0.03847783417350477, + "grad_norm": 57.65145943900323, + "learning_rate": 5.994522025119435e-06, + "loss": 0.24375, + "step": 4450 + }, + { + "epoch": 0.03852106769504803, + "grad_norm": 52.961410379638515, + "learning_rate": 5.9945097119060094e-06, + "loss": 0.2918914794921875, + "step": 4455 + }, + { + "epoch": 0.038564301216591296, + "grad_norm": 2.800343628244671, + "learning_rate": 5.99449738488217e-06, + "loss": 0.1043212890625, + "step": 4460 + }, + { + "epoch": 0.03860753473813456, + "grad_norm": 4.964418544416011, + "learning_rate": 5.994485044047973e-06, + "loss": 0.30429840087890625, + "step": 4465 + }, + { + "epoch": 0.03865076825967782, + "grad_norm": 2.4963975948850625, + "learning_rate": 5.994472689403476e-06, + "loss": 0.3175567626953125, + "step": 4470 + }, + { + "epoch": 0.038694001781221087, + "grad_norm": 47.33326351367324, + "learning_rate": 5.9944603209487345e-06, + "loss": 0.5134368896484375, + "step": 4475 + }, + { + "epoch": 0.03873723530276435, + "grad_norm": 2.2233239761163266, + "learning_rate": 5.994447938683808e-06, + "loss": 0.12955322265625, + "step": 4480 + }, + { + "epoch": 0.038780468824307614, + "grad_norm": 17.322206455003094, + "learning_rate": 5.994435542608752e-06, + "loss": 0.21707305908203126, + "step": 4485 + }, + { + "epoch": 0.03882370234585088, + "grad_norm": 0.8787864778498153, + "learning_rate": 5.994423132723623e-06, + "loss": 0.16429595947265624, + "step": 4490 + }, + { + "epoch": 0.03886693586739414, + "grad_norm": 19.60285141063411, + "learning_rate": 5.994410709028479e-06, + "loss": 0.33509521484375, + "step": 4495 + }, + { + "epoch": 0.038910169388937405, + "grad_norm": 19.405759612825864, + "learning_rate": 5.994398271523377e-06, + "loss": 0.382696533203125, + "step": 4500 + }, + { + "epoch": 0.03895340291048067, + "grad_norm": 4.770075320734937, + "learning_rate": 5.994385820208376e-06, + "loss": 0.24686279296875, + "step": 4505 + }, + { + "epoch": 0.03899663643202393, + "grad_norm": 6.334488243902135, + "learning_rate": 5.9943733550835324e-06, + "loss": 0.1374603271484375, + "step": 4510 + }, + { + "epoch": 0.039039869953567195, + "grad_norm": 11.823753266854526, + "learning_rate": 5.994360876148903e-06, + "loss": 0.1583688735961914, + "step": 4515 + }, + { + "epoch": 0.03908310347511046, + "grad_norm": 12.26931685220789, + "learning_rate": 5.994348383404546e-06, + "loss": 0.0991363525390625, + "step": 4520 + }, + { + "epoch": 0.03912633699665372, + "grad_norm": 0.5005419955985344, + "learning_rate": 5.994335876850518e-06, + "loss": 0.149786376953125, + "step": 4525 + }, + { + "epoch": 0.039169570518196986, + "grad_norm": 1.56661729807816, + "learning_rate": 5.994323356486878e-06, + "loss": 0.2452554702758789, + "step": 4530 + }, + { + "epoch": 0.03921280403974025, + "grad_norm": 3.417951853383692, + "learning_rate": 5.994310822313684e-06, + "loss": 0.43164520263671874, + "step": 4535 + }, + { + "epoch": 0.03925603756128352, + "grad_norm": 3.7882580886872095, + "learning_rate": 5.994298274330992e-06, + "loss": 0.06259613037109375, + "step": 4540 + }, + { + "epoch": 0.039299271082826784, + "grad_norm": 5.872422726034751, + "learning_rate": 5.994285712538862e-06, + "loss": 0.17889938354492188, + "step": 4545 + }, + { + "epoch": 0.03934250460437005, + "grad_norm": 41.950955677848775, + "learning_rate": 5.99427313693735e-06, + "loss": 0.268035888671875, + "step": 4550 + }, + { + "epoch": 0.03938573812591331, + "grad_norm": 12.910878441493791, + "learning_rate": 5.9942605475265145e-06, + "loss": 0.5483016967773438, + "step": 4555 + }, + { + "epoch": 0.039428971647456575, + "grad_norm": 4.103292050543791, + "learning_rate": 5.9942479443064154e-06, + "loss": 0.11714630126953125, + "step": 4560 + }, + { + "epoch": 0.03947220516899984, + "grad_norm": 13.990105820468722, + "learning_rate": 5.994235327277109e-06, + "loss": 0.23053359985351562, + "step": 4565 + }, + { + "epoch": 0.0395154386905431, + "grad_norm": 9.269442538456738, + "learning_rate": 5.9942226964386516e-06, + "loss": 0.21064453125, + "step": 4570 + }, + { + "epoch": 0.039558672212086365, + "grad_norm": 66.6926780459758, + "learning_rate": 5.994210051791106e-06, + "loss": 0.2771453857421875, + "step": 4575 + }, + { + "epoch": 0.03960190573362963, + "grad_norm": 69.77307315415669, + "learning_rate": 5.994197393334528e-06, + "loss": 0.4384033203125, + "step": 4580 + }, + { + "epoch": 0.03964513925517289, + "grad_norm": 14.946370813304894, + "learning_rate": 5.994184721068976e-06, + "loss": 0.37850341796875, + "step": 4585 + }, + { + "epoch": 0.039688372776716156, + "grad_norm": 34.04646390950821, + "learning_rate": 5.994172034994508e-06, + "loss": 0.298089599609375, + "step": 4590 + }, + { + "epoch": 0.03973160629825942, + "grad_norm": 29.849541535697945, + "learning_rate": 5.994159335111184e-06, + "loss": 0.2793212890625, + "step": 4595 + }, + { + "epoch": 0.03977483981980268, + "grad_norm": 9.673641717197478, + "learning_rate": 5.9941466214190615e-06, + "loss": 0.2631103515625, + "step": 4600 + }, + { + "epoch": 0.03981807334134595, + "grad_norm": 12.3440076150117, + "learning_rate": 5.994133893918199e-06, + "loss": 0.126116943359375, + "step": 4605 + }, + { + "epoch": 0.03986130686288921, + "grad_norm": 3.452513682857165, + "learning_rate": 5.994121152608655e-06, + "loss": 0.09528274536132812, + "step": 4610 + }, + { + "epoch": 0.039904540384432474, + "grad_norm": 30.475951315030873, + "learning_rate": 5.9941083974904895e-06, + "loss": 0.19931640625, + "step": 4615 + }, + { + "epoch": 0.03994777390597574, + "grad_norm": 33.38640310863349, + "learning_rate": 5.994095628563761e-06, + "loss": 0.303857421875, + "step": 4620 + }, + { + "epoch": 0.039991007427519, + "grad_norm": 20.5092909551066, + "learning_rate": 5.994082845828527e-06, + "loss": 0.2135589599609375, + "step": 4625 + }, + { + "epoch": 0.040034240949062265, + "grad_norm": 14.53761555813935, + "learning_rate": 5.9940700492848496e-06, + "loss": 0.08797225952148438, + "step": 4630 + }, + { + "epoch": 0.04007747447060553, + "grad_norm": 81.92077623452396, + "learning_rate": 5.994057238932784e-06, + "loss": 0.485015869140625, + "step": 4635 + }, + { + "epoch": 0.04012070799214879, + "grad_norm": 34.62334335512866, + "learning_rate": 5.994044414772392e-06, + "loss": 0.46762237548828123, + "step": 4640 + }, + { + "epoch": 0.040163941513692056, + "grad_norm": 16.12219719539507, + "learning_rate": 5.99403157680373e-06, + "loss": 0.40089874267578124, + "step": 4645 + }, + { + "epoch": 0.04020717503523532, + "grad_norm": 62.00520787669035, + "learning_rate": 5.99401872502686e-06, + "loss": 0.2740570068359375, + "step": 4650 + }, + { + "epoch": 0.04025040855677858, + "grad_norm": 1.6230647684122816, + "learning_rate": 5.99400585944184e-06, + "loss": 0.30257568359375, + "step": 4655 + }, + { + "epoch": 0.04029364207832185, + "grad_norm": 10.673272176274581, + "learning_rate": 5.99399298004873e-06, + "loss": 0.28931884765625, + "step": 4660 + }, + { + "epoch": 0.04033687559986511, + "grad_norm": 12.211329056977942, + "learning_rate": 5.993980086847588e-06, + "loss": 0.14881591796875, + "step": 4665 + }, + { + "epoch": 0.040380109121408374, + "grad_norm": 1.515559360454444, + "learning_rate": 5.993967179838475e-06, + "loss": 0.041943359375, + "step": 4670 + }, + { + "epoch": 0.04042334264295164, + "grad_norm": 4.540144836387348, + "learning_rate": 5.99395425902145e-06, + "loss": 0.357861328125, + "step": 4675 + }, + { + "epoch": 0.0404665761644949, + "grad_norm": 13.532198102702182, + "learning_rate": 5.993941324396572e-06, + "loss": 0.2975288391113281, + "step": 4680 + }, + { + "epoch": 0.040509809686038165, + "grad_norm": 6.435724719075154, + "learning_rate": 5.993928375963901e-06, + "loss": 0.47152118682861327, + "step": 4685 + }, + { + "epoch": 0.04055304320758143, + "grad_norm": 80.95301586915262, + "learning_rate": 5.9939154137234985e-06, + "loss": 0.3964672088623047, + "step": 4690 + }, + { + "epoch": 0.04059627672912469, + "grad_norm": 6.372447645935549, + "learning_rate": 5.993902437675421e-06, + "loss": 0.39435577392578125, + "step": 4695 + }, + { + "epoch": 0.040639510250667955, + "grad_norm": 6.112679560884395, + "learning_rate": 5.99388944781973e-06, + "loss": 0.2014129638671875, + "step": 4700 + }, + { + "epoch": 0.04068274377221122, + "grad_norm": 22.124235492070866, + "learning_rate": 5.993876444156487e-06, + "loss": 0.175213623046875, + "step": 4705 + }, + { + "epoch": 0.04072597729375448, + "grad_norm": 13.03228552397711, + "learning_rate": 5.993863426685749e-06, + "loss": 0.07935676574707032, + "step": 4710 + }, + { + "epoch": 0.040769210815297746, + "grad_norm": 18.89336066708093, + "learning_rate": 5.993850395407578e-06, + "loss": 0.14735260009765624, + "step": 4715 + }, + { + "epoch": 0.04081244433684101, + "grad_norm": 12.099214708331795, + "learning_rate": 5.993837350322034e-06, + "loss": 0.09800338745117188, + "step": 4720 + }, + { + "epoch": 0.04085567785838428, + "grad_norm": 5.087538473428322, + "learning_rate": 5.993824291429176e-06, + "loss": 0.1077911376953125, + "step": 4725 + }, + { + "epoch": 0.040898911379927544, + "grad_norm": 2.110973819964856, + "learning_rate": 5.993811218729064e-06, + "loss": 0.0525787353515625, + "step": 4730 + }, + { + "epoch": 0.04094214490147081, + "grad_norm": 8.585040397964981, + "learning_rate": 5.99379813222176e-06, + "loss": 0.2261474609375, + "step": 4735 + }, + { + "epoch": 0.04098537842301407, + "grad_norm": 8.554218240051915, + "learning_rate": 5.993785031907324e-06, + "loss": 0.34869384765625, + "step": 4740 + }, + { + "epoch": 0.041028611944557335, + "grad_norm": 10.44023276138021, + "learning_rate": 5.993771917785815e-06, + "loss": 0.1299102783203125, + "step": 4745 + }, + { + "epoch": 0.0410718454661006, + "grad_norm": 16.798868807586185, + "learning_rate": 5.993758789857296e-06, + "loss": 0.195208740234375, + "step": 4750 + }, + { + "epoch": 0.04111507898764386, + "grad_norm": 4.1571192011891265, + "learning_rate": 5.993745648121824e-06, + "loss": 0.317626953125, + "step": 4755 + }, + { + "epoch": 0.041158312509187125, + "grad_norm": 13.100074757271473, + "learning_rate": 5.993732492579463e-06, + "loss": 0.32596282958984374, + "step": 4760 + }, + { + "epoch": 0.04120154603073039, + "grad_norm": 44.56117069130143, + "learning_rate": 5.9937193232302725e-06, + "loss": 0.19561767578125, + "step": 4765 + }, + { + "epoch": 0.04124477955227365, + "grad_norm": 11.966540624367545, + "learning_rate": 5.993706140074312e-06, + "loss": 0.10261459350585937, + "step": 4770 + }, + { + "epoch": 0.041288013073816916, + "grad_norm": 15.225701555423582, + "learning_rate": 5.993692943111644e-06, + "loss": 0.34290084838867185, + "step": 4775 + }, + { + "epoch": 0.04133124659536018, + "grad_norm": 3.4573519032108933, + "learning_rate": 5.993679732342328e-06, + "loss": 0.21327247619628906, + "step": 4780 + }, + { + "epoch": 0.04137448011690344, + "grad_norm": 21.67076588539965, + "learning_rate": 5.9936665077664255e-06, + "loss": 0.271734619140625, + "step": 4785 + }, + { + "epoch": 0.04141771363844671, + "grad_norm": 34.647659531185184, + "learning_rate": 5.993653269383999e-06, + "loss": 0.1666330337524414, + "step": 4790 + }, + { + "epoch": 0.04146094715998997, + "grad_norm": 0.8792579173900145, + "learning_rate": 5.993640017195107e-06, + "loss": 0.4184600830078125, + "step": 4795 + }, + { + "epoch": 0.041504180681533234, + "grad_norm": 7.899510826354947, + "learning_rate": 5.993626751199812e-06, + "loss": 0.1684112548828125, + "step": 4800 + }, + { + "epoch": 0.0415474142030765, + "grad_norm": 41.6769869404634, + "learning_rate": 5.993613471398175e-06, + "loss": 0.3797607421875, + "step": 4805 + }, + { + "epoch": 0.04159064772461976, + "grad_norm": 3.5814281106056662, + "learning_rate": 5.993600177790257e-06, + "loss": 0.30099029541015626, + "step": 4810 + }, + { + "epoch": 0.041633881246163025, + "grad_norm": 2.8470029470918896, + "learning_rate": 5.99358687037612e-06, + "loss": 0.04986763000488281, + "step": 4815 + }, + { + "epoch": 0.04167711476770629, + "grad_norm": 17.648942883686036, + "learning_rate": 5.993573549155825e-06, + "loss": 0.22022705078125, + "step": 4820 + }, + { + "epoch": 0.04172034828924955, + "grad_norm": 11.933823256018723, + "learning_rate": 5.993560214129432e-06, + "loss": 0.2338531494140625, + "step": 4825 + }, + { + "epoch": 0.041763581810792816, + "grad_norm": 46.55108732723695, + "learning_rate": 5.993546865297005e-06, + "loss": 0.6072021484375, + "step": 4830 + }, + { + "epoch": 0.04180681533233608, + "grad_norm": 34.556450904363196, + "learning_rate": 5.993533502658604e-06, + "loss": 0.4841796875, + "step": 4835 + }, + { + "epoch": 0.04185004885387934, + "grad_norm": 9.213519977539116, + "learning_rate": 5.993520126214291e-06, + "loss": 0.1169464111328125, + "step": 4840 + }, + { + "epoch": 0.04189328237542261, + "grad_norm": 85.89604003491709, + "learning_rate": 5.9935067359641275e-06, + "loss": 0.6806396484375, + "step": 4845 + }, + { + "epoch": 0.04193651589696587, + "grad_norm": 22.53939894827532, + "learning_rate": 5.993493331908175e-06, + "loss": 0.2947509765625, + "step": 4850 + }, + { + "epoch": 0.041979749418509134, + "grad_norm": 3.6029480712515296, + "learning_rate": 5.993479914046497e-06, + "loss": 0.215484619140625, + "step": 4855 + }, + { + "epoch": 0.0420229829400524, + "grad_norm": 54.98690162687589, + "learning_rate": 5.993466482379154e-06, + "loss": 0.253173828125, + "step": 4860 + }, + { + "epoch": 0.04206621646159566, + "grad_norm": 8.334424708716112, + "learning_rate": 5.993453036906207e-06, + "loss": 0.10166854858398437, + "step": 4865 + }, + { + "epoch": 0.042109449983138925, + "grad_norm": 4.571959244709068, + "learning_rate": 5.99343957762772e-06, + "loss": 0.133306884765625, + "step": 4870 + }, + { + "epoch": 0.04215268350468219, + "grad_norm": 24.153141010992925, + "learning_rate": 5.9934261045437536e-06, + "loss": 0.24415283203125, + "step": 4875 + }, + { + "epoch": 0.04219591702622545, + "grad_norm": 5.865595676329635, + "learning_rate": 5.993412617654371e-06, + "loss": 0.08895416259765625, + "step": 4880 + }, + { + "epoch": 0.042239150547768715, + "grad_norm": 9.956355016757769, + "learning_rate": 5.993399116959633e-06, + "loss": 0.105438232421875, + "step": 4885 + }, + { + "epoch": 0.04228238406931198, + "grad_norm": 6.33196928007966, + "learning_rate": 5.993385602459604e-06, + "loss": 0.13446502685546874, + "step": 4890 + }, + { + "epoch": 0.04232561759085524, + "grad_norm": 16.379555368304572, + "learning_rate": 5.993372074154345e-06, + "loss": 0.120294189453125, + "step": 4895 + }, + { + "epoch": 0.042368851112398506, + "grad_norm": 67.30072882028126, + "learning_rate": 5.993358532043917e-06, + "loss": 0.2014251708984375, + "step": 4900 + }, + { + "epoch": 0.04241208463394177, + "grad_norm": 52.9792176312431, + "learning_rate": 5.993344976128386e-06, + "loss": 0.5816139221191406, + "step": 4905 + }, + { + "epoch": 0.04245531815548504, + "grad_norm": 17.790308728412896, + "learning_rate": 5.9933314064078104e-06, + "loss": 0.16073532104492189, + "step": 4910 + }, + { + "epoch": 0.042498551677028304, + "grad_norm": 27.843100861142734, + "learning_rate": 5.993317822882257e-06, + "loss": 0.3128021240234375, + "step": 4915 + }, + { + "epoch": 0.04254178519857157, + "grad_norm": 18.044559033173872, + "learning_rate": 5.993304225551785e-06, + "loss": 0.24166107177734375, + "step": 4920 + }, + { + "epoch": 0.04258501872011483, + "grad_norm": 2.614241586458319, + "learning_rate": 5.993290614416459e-06, + "loss": 0.186505126953125, + "step": 4925 + }, + { + "epoch": 0.042628252241658095, + "grad_norm": 6.114365715925545, + "learning_rate": 5.99327698947634e-06, + "loss": 0.08980712890625, + "step": 4930 + }, + { + "epoch": 0.04267148576320136, + "grad_norm": 13.631923933634331, + "learning_rate": 5.9932633507314935e-06, + "loss": 0.4137397766113281, + "step": 4935 + }, + { + "epoch": 0.04271471928474462, + "grad_norm": 7.137954640389229, + "learning_rate": 5.99324969818198e-06, + "loss": 0.1073272705078125, + "step": 4940 + }, + { + "epoch": 0.042757952806287886, + "grad_norm": 15.250233784840681, + "learning_rate": 5.993236031827863e-06, + "loss": 0.3600830078125, + "step": 4945 + }, + { + "epoch": 0.04280118632783115, + "grad_norm": 20.84577089297286, + "learning_rate": 5.993222351669207e-06, + "loss": 0.254620361328125, + "step": 4950 + }, + { + "epoch": 0.04284441984937441, + "grad_norm": 36.59003073800181, + "learning_rate": 5.993208657706074e-06, + "loss": 0.229400634765625, + "step": 4955 + }, + { + "epoch": 0.042887653370917676, + "grad_norm": 34.29422728902959, + "learning_rate": 5.993194949938527e-06, + "loss": 0.348944091796875, + "step": 4960 + }, + { + "epoch": 0.04293088689246094, + "grad_norm": 10.201280203481643, + "learning_rate": 5.993181228366629e-06, + "loss": 0.390411376953125, + "step": 4965 + }, + { + "epoch": 0.042974120414004204, + "grad_norm": 11.570971254615149, + "learning_rate": 5.993167492990443e-06, + "loss": 0.287164306640625, + "step": 4970 + }, + { + "epoch": 0.04301735393554747, + "grad_norm": 7.616921487416304, + "learning_rate": 5.993153743810034e-06, + "loss": 0.393536376953125, + "step": 4975 + }, + { + "epoch": 0.04306058745709073, + "grad_norm": 24.93447363246874, + "learning_rate": 5.993139980825464e-06, + "loss": 0.4179534912109375, + "step": 4980 + }, + { + "epoch": 0.043103820978633994, + "grad_norm": 9.219800352229392, + "learning_rate": 5.993126204036797e-06, + "loss": 0.1065185546875, + "step": 4985 + }, + { + "epoch": 0.04314705450017726, + "grad_norm": 4.348020048219271, + "learning_rate": 5.993112413444097e-06, + "loss": 0.29765472412109373, + "step": 4990 + }, + { + "epoch": 0.04319028802172052, + "grad_norm": 26.59185937208567, + "learning_rate": 5.993098609047426e-06, + "loss": 0.9328582763671875, + "step": 4995 + }, + { + "epoch": 0.043233521543263785, + "grad_norm": 63.27637381367987, + "learning_rate": 5.993084790846849e-06, + "loss": 0.6402053833007812, + "step": 5000 + }, + { + "epoch": 0.04327675506480705, + "grad_norm": 59.74239318197652, + "learning_rate": 5.9930709588424295e-06, + "loss": 0.297821044921875, + "step": 5005 + }, + { + "epoch": 0.04331998858635031, + "grad_norm": 9.76460749327522, + "learning_rate": 5.993057113034231e-06, + "loss": 0.1773193359375, + "step": 5010 + }, + { + "epoch": 0.043363222107893576, + "grad_norm": 4.308596522228607, + "learning_rate": 5.993043253422318e-06, + "loss": 0.1598541259765625, + "step": 5015 + }, + { + "epoch": 0.04340645562943684, + "grad_norm": 11.975165289281026, + "learning_rate": 5.993029380006754e-06, + "loss": 0.394476318359375, + "step": 5020 + }, + { + "epoch": 0.0434496891509801, + "grad_norm": 11.415575888797678, + "learning_rate": 5.993015492787603e-06, + "loss": 0.17757225036621094, + "step": 5025 + }, + { + "epoch": 0.04349292267252337, + "grad_norm": 25.258033885329365, + "learning_rate": 5.993001591764929e-06, + "loss": 0.126446533203125, + "step": 5030 + }, + { + "epoch": 0.04353615619406663, + "grad_norm": 7.33836202039701, + "learning_rate": 5.992987676938796e-06, + "loss": 0.12371635437011719, + "step": 5035 + }, + { + "epoch": 0.043579389715609894, + "grad_norm": 1.4519252361671209, + "learning_rate": 5.992973748309268e-06, + "loss": 0.233355712890625, + "step": 5040 + }, + { + "epoch": 0.04362262323715316, + "grad_norm": 3.350938682621421, + "learning_rate": 5.99295980587641e-06, + "loss": 0.2321441650390625, + "step": 5045 + }, + { + "epoch": 0.04366585675869642, + "grad_norm": 28.5946099909493, + "learning_rate": 5.992945849640285e-06, + "loss": 0.3859230041503906, + "step": 5050 + }, + { + "epoch": 0.043709090280239685, + "grad_norm": 9.498073405561168, + "learning_rate": 5.99293187960096e-06, + "loss": 0.15914306640625, + "step": 5055 + }, + { + "epoch": 0.04375232380178295, + "grad_norm": 9.480914938331487, + "learning_rate": 5.992917895758495e-06, + "loss": 0.131756591796875, + "step": 5060 + }, + { + "epoch": 0.04379555732332621, + "grad_norm": 4.744039985214977, + "learning_rate": 5.99290389811296e-06, + "loss": 0.0863311767578125, + "step": 5065 + }, + { + "epoch": 0.043838790844869475, + "grad_norm": 8.173949205638676, + "learning_rate": 5.992889886664414e-06, + "loss": 0.6499267578125, + "step": 5070 + }, + { + "epoch": 0.04388202436641274, + "grad_norm": 20.408329467763554, + "learning_rate": 5.9928758614129254e-06, + "loss": 0.156732177734375, + "step": 5075 + }, + { + "epoch": 0.043925257887956, + "grad_norm": 85.98625797804516, + "learning_rate": 5.992861822358559e-06, + "loss": 0.74190673828125, + "step": 5080 + }, + { + "epoch": 0.043968491409499266, + "grad_norm": 11.299762690256214, + "learning_rate": 5.992847769501377e-06, + "loss": 0.43668212890625, + "step": 5085 + }, + { + "epoch": 0.04401172493104253, + "grad_norm": 18.343114029377908, + "learning_rate": 5.992833702841445e-06, + "loss": 0.3595062255859375, + "step": 5090 + }, + { + "epoch": 0.04405495845258579, + "grad_norm": 33.83477488137649, + "learning_rate": 5.992819622378829e-06, + "loss": 0.496490478515625, + "step": 5095 + }, + { + "epoch": 0.044098191974129064, + "grad_norm": 4.869752356586641, + "learning_rate": 5.992805528113595e-06, + "loss": 0.085418701171875, + "step": 5100 + }, + { + "epoch": 0.04414142549567233, + "grad_norm": 3.7521527882763035, + "learning_rate": 5.992791420045804e-06, + "loss": 0.1541412353515625, + "step": 5105 + }, + { + "epoch": 0.04418465901721559, + "grad_norm": 15.00884459302742, + "learning_rate": 5.992777298175525e-06, + "loss": 0.0536865234375, + "step": 5110 + }, + { + "epoch": 0.044227892538758855, + "grad_norm": 32.23176343251736, + "learning_rate": 5.99276316250282e-06, + "loss": 0.2980194091796875, + "step": 5115 + }, + { + "epoch": 0.04427112606030212, + "grad_norm": 3.1414306445468374, + "learning_rate": 5.9927490130277565e-06, + "loss": 0.263720703125, + "step": 5120 + }, + { + "epoch": 0.04431435958184538, + "grad_norm": 10.837130625722995, + "learning_rate": 5.9927348497504e-06, + "loss": 0.21568527221679687, + "step": 5125 + }, + { + "epoch": 0.044357593103388646, + "grad_norm": 2.1394911999758963, + "learning_rate": 5.9927206726708135e-06, + "loss": 0.028388214111328126, + "step": 5130 + }, + { + "epoch": 0.04440082662493191, + "grad_norm": 30.61065198384019, + "learning_rate": 5.992706481789064e-06, + "loss": 0.15987701416015626, + "step": 5135 + }, + { + "epoch": 0.04444406014647517, + "grad_norm": 8.20353169882918, + "learning_rate": 5.992692277105217e-06, + "loss": 0.22063217163085938, + "step": 5140 + }, + { + "epoch": 0.044487293668018436, + "grad_norm": 53.69913006353032, + "learning_rate": 5.992678058619337e-06, + "loss": 0.501153564453125, + "step": 5145 + }, + { + "epoch": 0.0445305271895617, + "grad_norm": 1.7003684220222839, + "learning_rate": 5.992663826331491e-06, + "loss": 0.14174652099609375, + "step": 5150 + }, + { + "epoch": 0.044573760711104964, + "grad_norm": 82.52837094838806, + "learning_rate": 5.992649580241744e-06, + "loss": 0.276068115234375, + "step": 5155 + }, + { + "epoch": 0.04461699423264823, + "grad_norm": 12.296279319928324, + "learning_rate": 5.992635320350161e-06, + "loss": 0.2113677978515625, + "step": 5160 + }, + { + "epoch": 0.04466022775419149, + "grad_norm": 19.06107608971957, + "learning_rate": 5.992621046656808e-06, + "loss": 0.1692474365234375, + "step": 5165 + }, + { + "epoch": 0.044703461275734754, + "grad_norm": 16.626106329790627, + "learning_rate": 5.992606759161752e-06, + "loss": 0.16316986083984375, + "step": 5170 + }, + { + "epoch": 0.04474669479727802, + "grad_norm": 29.41299020219326, + "learning_rate": 5.992592457865058e-06, + "loss": 0.327783203125, + "step": 5175 + }, + { + "epoch": 0.04478992831882128, + "grad_norm": 5.929458933924725, + "learning_rate": 5.992578142766791e-06, + "loss": 0.1285491943359375, + "step": 5180 + }, + { + "epoch": 0.044833161840364545, + "grad_norm": 30.996325668786863, + "learning_rate": 5.992563813867019e-06, + "loss": 0.5677490234375, + "step": 5185 + }, + { + "epoch": 0.04487639536190781, + "grad_norm": 19.7386448892094, + "learning_rate": 5.992549471165807e-06, + "loss": 0.09358291625976563, + "step": 5190 + }, + { + "epoch": 0.04491962888345107, + "grad_norm": 11.663400389689656, + "learning_rate": 5.992535114663221e-06, + "loss": 0.247900390625, + "step": 5195 + }, + { + "epoch": 0.044962862404994336, + "grad_norm": 28.268835098182137, + "learning_rate": 5.992520744359327e-06, + "loss": 0.2512518882751465, + "step": 5200 + }, + { + "epoch": 0.0450060959265376, + "grad_norm": 12.050801335885017, + "learning_rate": 5.992506360254193e-06, + "loss": 0.209967041015625, + "step": 5205 + }, + { + "epoch": 0.04504932944808086, + "grad_norm": 13.547537880938307, + "learning_rate": 5.992491962347883e-06, + "loss": 0.213275146484375, + "step": 5210 + }, + { + "epoch": 0.04509256296962413, + "grad_norm": 21.22962028969486, + "learning_rate": 5.992477550640465e-06, + "loss": 0.21924591064453125, + "step": 5215 + }, + { + "epoch": 0.04513579649116739, + "grad_norm": 2.3880495633003287, + "learning_rate": 5.992463125132006e-06, + "loss": 0.2731231689453125, + "step": 5220 + }, + { + "epoch": 0.045179030012710654, + "grad_norm": 23.72703570079012, + "learning_rate": 5.992448685822569e-06, + "loss": 0.16650848388671874, + "step": 5225 + }, + { + "epoch": 0.04522226353425392, + "grad_norm": 1.4923288533742756, + "learning_rate": 5.992434232712224e-06, + "loss": 0.09830818176269532, + "step": 5230 + }, + { + "epoch": 0.04526549705579718, + "grad_norm": 2.3291455077376484, + "learning_rate": 5.992419765801037e-06, + "loss": 0.12141876220703125, + "step": 5235 + }, + { + "epoch": 0.045308730577340445, + "grad_norm": 43.78865383683149, + "learning_rate": 5.992405285089076e-06, + "loss": 0.3643310546875, + "step": 5240 + }, + { + "epoch": 0.04535196409888371, + "grad_norm": 41.196027891312845, + "learning_rate": 5.992390790576404e-06, + "loss": 0.31319580078125, + "step": 5245 + }, + { + "epoch": 0.04539519762042697, + "grad_norm": 7.286004280706111, + "learning_rate": 5.9923762822630916e-06, + "loss": 0.11712646484375, + "step": 5250 + }, + { + "epoch": 0.045438431141970236, + "grad_norm": 24.03646061296651, + "learning_rate": 5.992361760149203e-06, + "loss": 0.1716461181640625, + "step": 5255 + }, + { + "epoch": 0.0454816646635135, + "grad_norm": 76.44425899781386, + "learning_rate": 5.992347224234807e-06, + "loss": 0.391839599609375, + "step": 5260 + }, + { + "epoch": 0.04552489818505676, + "grad_norm": 5.172400988660927, + "learning_rate": 5.99233267451997e-06, + "loss": 0.1346435546875, + "step": 5265 + }, + { + "epoch": 0.045568131706600026, + "grad_norm": 10.656034751786777, + "learning_rate": 5.992318111004759e-06, + "loss": 0.43778076171875, + "step": 5270 + }, + { + "epoch": 0.04561136522814329, + "grad_norm": 3.614363966702664, + "learning_rate": 5.992303533689242e-06, + "loss": 0.1264739990234375, + "step": 5275 + }, + { + "epoch": 0.045654598749686554, + "grad_norm": 0.6489531760048832, + "learning_rate": 5.992288942573484e-06, + "loss": 0.31519775390625, + "step": 5280 + }, + { + "epoch": 0.045697832271229824, + "grad_norm": 6.991048476570912, + "learning_rate": 5.992274337657555e-06, + "loss": 0.313214111328125, + "step": 5285 + }, + { + "epoch": 0.04574106579277309, + "grad_norm": 10.927998049848838, + "learning_rate": 5.99225971894152e-06, + "loss": 0.10377197265625, + "step": 5290 + }, + { + "epoch": 0.04578429931431635, + "grad_norm": 56.22420313643989, + "learning_rate": 5.992245086425449e-06, + "loss": 0.30891571044921873, + "step": 5295 + }, + { + "epoch": 0.045827532835859615, + "grad_norm": 5.8063611018682675, + "learning_rate": 5.992230440109407e-06, + "loss": 0.7464569091796875, + "step": 5300 + }, + { + "epoch": 0.04587076635740288, + "grad_norm": 53.681341818267214, + "learning_rate": 5.992215779993463e-06, + "loss": 0.4928955078125, + "step": 5305 + }, + { + "epoch": 0.04591399987894614, + "grad_norm": 15.774968163672975, + "learning_rate": 5.9922011060776835e-06, + "loss": 0.21908111572265626, + "step": 5310 + }, + { + "epoch": 0.045957233400489406, + "grad_norm": 9.005584913827612, + "learning_rate": 5.992186418362138e-06, + "loss": 0.455487060546875, + "step": 5315 + }, + { + "epoch": 0.04600046692203267, + "grad_norm": 3.8503215901245764, + "learning_rate": 5.9921717168468935e-06, + "loss": 0.16568603515625, + "step": 5320 + }, + { + "epoch": 0.04604370044357593, + "grad_norm": 32.431733553595535, + "learning_rate": 5.9921570015320165e-06, + "loss": 0.485693359375, + "step": 5325 + }, + { + "epoch": 0.046086933965119196, + "grad_norm": 19.331090291993405, + "learning_rate": 5.992142272417576e-06, + "loss": 0.144403076171875, + "step": 5330 + }, + { + "epoch": 0.04613016748666246, + "grad_norm": 15.266804679387057, + "learning_rate": 5.99212752950364e-06, + "loss": 0.170318603515625, + "step": 5335 + }, + { + "epoch": 0.046173401008205724, + "grad_norm": 15.969803658710331, + "learning_rate": 5.992112772790275e-06, + "loss": 0.1173583984375, + "step": 5340 + }, + { + "epoch": 0.04621663452974899, + "grad_norm": 19.146502441249865, + "learning_rate": 5.9920980022775515e-06, + "loss": 0.1763641357421875, + "step": 5345 + }, + { + "epoch": 0.04625986805129225, + "grad_norm": 18.39422827460778, + "learning_rate": 5.9920832179655364e-06, + "loss": 0.34879150390625, + "step": 5350 + }, + { + "epoch": 0.046303101572835514, + "grad_norm": 10.636580661274575, + "learning_rate": 5.992068419854298e-06, + "loss": 0.3195556640625, + "step": 5355 + }, + { + "epoch": 0.04634633509437878, + "grad_norm": 7.977010458927949, + "learning_rate": 5.992053607943904e-06, + "loss": 0.1600128173828125, + "step": 5360 + }, + { + "epoch": 0.04638956861592204, + "grad_norm": 11.684389311798721, + "learning_rate": 5.992038782234422e-06, + "loss": 0.21513519287109376, + "step": 5365 + }, + { + "epoch": 0.046432802137465305, + "grad_norm": 51.04389114914081, + "learning_rate": 5.992023942725923e-06, + "loss": 0.21299057006835936, + "step": 5370 + }, + { + "epoch": 0.04647603565900857, + "grad_norm": 31.738481744115898, + "learning_rate": 5.992009089418474e-06, + "loss": 0.215020751953125, + "step": 5375 + }, + { + "epoch": 0.04651926918055183, + "grad_norm": 10.300296918870458, + "learning_rate": 5.9919942223121435e-06, + "loss": 0.211083984375, + "step": 5380 + }, + { + "epoch": 0.046562502702095096, + "grad_norm": 13.558396828959024, + "learning_rate": 5.991979341406999e-06, + "loss": 0.3111904144287109, + "step": 5385 + }, + { + "epoch": 0.04660573622363836, + "grad_norm": 18.805694031070534, + "learning_rate": 5.991964446703111e-06, + "loss": 0.23170166015625, + "step": 5390 + }, + { + "epoch": 0.04664896974518162, + "grad_norm": 3.6887954004992376, + "learning_rate": 5.991949538200547e-06, + "loss": 0.08118820190429688, + "step": 5395 + }, + { + "epoch": 0.04669220326672489, + "grad_norm": 30.93653300793523, + "learning_rate": 5.991934615899376e-06, + "loss": 0.2014617919921875, + "step": 5400 + }, + { + "epoch": 0.04673543678826815, + "grad_norm": 59.46396915921273, + "learning_rate": 5.991919679799668e-06, + "loss": 0.424200439453125, + "step": 5405 + }, + { + "epoch": 0.046778670309811414, + "grad_norm": 5.831104581975946, + "learning_rate": 5.991904729901489e-06, + "loss": 0.11322174072265626, + "step": 5410 + }, + { + "epoch": 0.04682190383135468, + "grad_norm": 72.15678155591795, + "learning_rate": 5.991889766204911e-06, + "loss": 0.524749755859375, + "step": 5415 + }, + { + "epoch": 0.04686513735289794, + "grad_norm": 42.573933232783006, + "learning_rate": 5.991874788710002e-06, + "loss": 0.22782554626464843, + "step": 5420 + }, + { + "epoch": 0.046908370874441205, + "grad_norm": 20.71718982816622, + "learning_rate": 5.99185979741683e-06, + "loss": 0.12984561920166016, + "step": 5425 + }, + { + "epoch": 0.04695160439598447, + "grad_norm": 19.3827918291705, + "learning_rate": 5.9918447923254654e-06, + "loss": 0.1361083984375, + "step": 5430 + }, + { + "epoch": 0.04699483791752773, + "grad_norm": 4.2430425767208115, + "learning_rate": 5.991829773435977e-06, + "loss": 0.2940834045410156, + "step": 5435 + }, + { + "epoch": 0.047038071439070996, + "grad_norm": 11.538460311320542, + "learning_rate": 5.991814740748434e-06, + "loss": 0.1454071044921875, + "step": 5440 + }, + { + "epoch": 0.04708130496061426, + "grad_norm": 25.7406048316848, + "learning_rate": 5.991799694262905e-06, + "loss": 0.18662109375, + "step": 5445 + }, + { + "epoch": 0.04712453848215752, + "grad_norm": 45.58596486166943, + "learning_rate": 5.991784633979461e-06, + "loss": 0.291790771484375, + "step": 5450 + }, + { + "epoch": 0.047167772003700786, + "grad_norm": 1.1139362103304185, + "learning_rate": 5.99176955989817e-06, + "loss": 0.20712509155273437, + "step": 5455 + }, + { + "epoch": 0.04721100552524405, + "grad_norm": 24.78748400350814, + "learning_rate": 5.991754472019104e-06, + "loss": 0.2532470703125, + "step": 5460 + }, + { + "epoch": 0.047254239046787314, + "grad_norm": 10.404738888424596, + "learning_rate": 5.991739370342328e-06, + "loss": 0.33775177001953127, + "step": 5465 + }, + { + "epoch": 0.047297472568330584, + "grad_norm": 36.130658772410946, + "learning_rate": 5.991724254867916e-06, + "loss": 0.3421043395996094, + "step": 5470 + }, + { + "epoch": 0.04734070608987385, + "grad_norm": 3.699947192880029, + "learning_rate": 5.9917091255959365e-06, + "loss": 0.09137840270996093, + "step": 5475 + }, + { + "epoch": 0.04738393961141711, + "grad_norm": 1.3651021648765227, + "learning_rate": 5.991693982526458e-06, + "loss": 0.389752197265625, + "step": 5480 + }, + { + "epoch": 0.047427173132960375, + "grad_norm": 24.599496638222973, + "learning_rate": 5.991678825659551e-06, + "loss": 0.32618408203125, + "step": 5485 + }, + { + "epoch": 0.04747040665450364, + "grad_norm": 53.10010880389464, + "learning_rate": 5.991663654995287e-06, + "loss": 0.1435791015625, + "step": 5490 + }, + { + "epoch": 0.0475136401760469, + "grad_norm": 6.321816640063645, + "learning_rate": 5.9916484705337335e-06, + "loss": 0.2631500244140625, + "step": 5495 + }, + { + "epoch": 0.047556873697590166, + "grad_norm": 0.825682533672026, + "learning_rate": 5.991633272274961e-06, + "loss": 0.13858642578125, + "step": 5500 + }, + { + "epoch": 0.04760010721913343, + "grad_norm": 43.546303101991896, + "learning_rate": 5.991618060219041e-06, + "loss": 0.1105743408203125, + "step": 5505 + }, + { + "epoch": 0.04764334074067669, + "grad_norm": 8.089616225687083, + "learning_rate": 5.991602834366043e-06, + "loss": 0.21411895751953125, + "step": 5510 + }, + { + "epoch": 0.047686574262219956, + "grad_norm": 40.78453281447651, + "learning_rate": 5.991587594716037e-06, + "loss": 0.2440673828125, + "step": 5515 + }, + { + "epoch": 0.04772980778376322, + "grad_norm": 5.49142587040015, + "learning_rate": 5.991572341269093e-06, + "loss": 0.278558349609375, + "step": 5520 + }, + { + "epoch": 0.047773041305306484, + "grad_norm": 3.1797675117649695, + "learning_rate": 5.991557074025282e-06, + "loss": 0.288446044921875, + "step": 5525 + }, + { + "epoch": 0.04781627482684975, + "grad_norm": 5.91685954040495, + "learning_rate": 5.991541792984673e-06, + "loss": 0.18703155517578124, + "step": 5530 + }, + { + "epoch": 0.04785950834839301, + "grad_norm": 14.17854109033618, + "learning_rate": 5.991526498147339e-06, + "loss": 0.09680252075195313, + "step": 5535 + }, + { + "epoch": 0.047902741869936274, + "grad_norm": 7.18749211287506, + "learning_rate": 5.9915111895133485e-06, + "loss": 0.1025054931640625, + "step": 5540 + }, + { + "epoch": 0.04794597539147954, + "grad_norm": 41.930664967060046, + "learning_rate": 5.991495867082773e-06, + "loss": 0.23470458984375, + "step": 5545 + }, + { + "epoch": 0.0479892089130228, + "grad_norm": 6.3102695991435604, + "learning_rate": 5.991480530855683e-06, + "loss": 0.41558685302734377, + "step": 5550 + }, + { + "epoch": 0.048032442434566065, + "grad_norm": 3.1679101135878645, + "learning_rate": 5.9914651808321485e-06, + "loss": 0.19187774658203124, + "step": 5555 + }, + { + "epoch": 0.04807567595610933, + "grad_norm": 24.28019886036319, + "learning_rate": 5.991449817012242e-06, + "loss": 0.44095306396484374, + "step": 5560 + }, + { + "epoch": 0.04811890947765259, + "grad_norm": 30.655298580104557, + "learning_rate": 5.991434439396032e-06, + "loss": 0.091375732421875, + "step": 5565 + }, + { + "epoch": 0.048162142999195856, + "grad_norm": 3.796257391402833, + "learning_rate": 5.991419047983592e-06, + "loss": 0.159130859375, + "step": 5570 + }, + { + "epoch": 0.04820537652073912, + "grad_norm": 1.6794770747020618, + "learning_rate": 5.9914036427749905e-06, + "loss": 0.0536285400390625, + "step": 5575 + }, + { + "epoch": 0.04824861004228238, + "grad_norm": 85.18997877062215, + "learning_rate": 5.9913882237703e-06, + "loss": 0.35983123779296877, + "step": 5580 + }, + { + "epoch": 0.04829184356382565, + "grad_norm": 5.5781935839328805, + "learning_rate": 5.991372790969592e-06, + "loss": 0.24957122802734374, + "step": 5585 + }, + { + "epoch": 0.04833507708536891, + "grad_norm": 20.93719654481842, + "learning_rate": 5.991357344372936e-06, + "loss": 0.40439929962158205, + "step": 5590 + }, + { + "epoch": 0.048378310606912174, + "grad_norm": 7.895897859932411, + "learning_rate": 5.991341883980405e-06, + "loss": 0.3046173095703125, + "step": 5595 + }, + { + "epoch": 0.04842154412845544, + "grad_norm": 0.19736791597281267, + "learning_rate": 5.99132640979207e-06, + "loss": 0.32474365234375, + "step": 5600 + }, + { + "epoch": 0.0484647776499987, + "grad_norm": 5.86574809855355, + "learning_rate": 5.9913109218080015e-06, + "loss": 0.286871337890625, + "step": 5605 + }, + { + "epoch": 0.048508011171541965, + "grad_norm": 56.773430051067194, + "learning_rate": 5.99129542002827e-06, + "loss": 0.438348388671875, + "step": 5610 + }, + { + "epoch": 0.04855124469308523, + "grad_norm": 28.06264718518467, + "learning_rate": 5.99127990445295e-06, + "loss": 0.460107421875, + "step": 5615 + }, + { + "epoch": 0.04859447821462849, + "grad_norm": 25.442460470566175, + "learning_rate": 5.991264375082111e-06, + "loss": 0.29886703491210936, + "step": 5620 + }, + { + "epoch": 0.048637711736171756, + "grad_norm": 36.72605380814695, + "learning_rate": 5.991248831915824e-06, + "loss": 0.162896728515625, + "step": 5625 + }, + { + "epoch": 0.04868094525771502, + "grad_norm": 8.2400746206033, + "learning_rate": 5.991233274954163e-06, + "loss": 0.1352203369140625, + "step": 5630 + }, + { + "epoch": 0.04872417877925828, + "grad_norm": 4.048490252786663, + "learning_rate": 5.991217704197198e-06, + "loss": 0.32560272216796876, + "step": 5635 + }, + { + "epoch": 0.048767412300801546, + "grad_norm": 24.272113959505415, + "learning_rate": 5.991202119645001e-06, + "loss": 0.2224578857421875, + "step": 5640 + }, + { + "epoch": 0.04881064582234481, + "grad_norm": 5.244722339456694, + "learning_rate": 5.991186521297645e-06, + "loss": 0.14744338989257813, + "step": 5645 + }, + { + "epoch": 0.048853879343888074, + "grad_norm": 3.431806786365262, + "learning_rate": 5.9911709091552e-06, + "loss": 0.08762283325195312, + "step": 5650 + }, + { + "epoch": 0.048897112865431344, + "grad_norm": 37.52484072809897, + "learning_rate": 5.99115528321774e-06, + "loss": 0.6543914794921875, + "step": 5655 + }, + { + "epoch": 0.04894034638697461, + "grad_norm": 49.73684596146965, + "learning_rate": 5.991139643485335e-06, + "loss": 0.172369384765625, + "step": 5660 + }, + { + "epoch": 0.04898357990851787, + "grad_norm": 32.99394498138074, + "learning_rate": 5.991123989958059e-06, + "loss": 0.23685989379882813, + "step": 5665 + }, + { + "epoch": 0.049026813430061135, + "grad_norm": 3.7188061968181128, + "learning_rate": 5.991108322635983e-06, + "loss": 0.259844970703125, + "step": 5670 + }, + { + "epoch": 0.0490700469516044, + "grad_norm": 14.523530039604768, + "learning_rate": 5.991092641519181e-06, + "loss": 0.23304443359375, + "step": 5675 + }, + { + "epoch": 0.04911328047314766, + "grad_norm": 9.10207422187583, + "learning_rate": 5.9910769466077226e-06, + "loss": 0.19621810913085938, + "step": 5680 + }, + { + "epoch": 0.049156513994690926, + "grad_norm": 48.702379244331674, + "learning_rate": 5.991061237901683e-06, + "loss": 0.35057373046875, + "step": 5685 + }, + { + "epoch": 0.04919974751623419, + "grad_norm": 7.504215385751538, + "learning_rate": 5.991045515401132e-06, + "loss": 0.2747520446777344, + "step": 5690 + }, + { + "epoch": 0.04924298103777745, + "grad_norm": 11.63539444006882, + "learning_rate": 5.991029779106144e-06, + "loss": 0.264178466796875, + "step": 5695 + }, + { + "epoch": 0.04928621455932072, + "grad_norm": 29.949125999766892, + "learning_rate": 5.991014029016791e-06, + "loss": 0.36942138671875, + "step": 5700 + }, + { + "epoch": 0.04932944808086398, + "grad_norm": 16.236158778376506, + "learning_rate": 5.990998265133144e-06, + "loss": 0.207623291015625, + "step": 5705 + }, + { + "epoch": 0.049372681602407244, + "grad_norm": 22.452961314403222, + "learning_rate": 5.99098248745528e-06, + "loss": 0.180450439453125, + "step": 5710 + }, + { + "epoch": 0.04941591512395051, + "grad_norm": 6.908966416463533, + "learning_rate": 5.990966695983267e-06, + "loss": 0.104534912109375, + "step": 5715 + }, + { + "epoch": 0.04945914864549377, + "grad_norm": 11.675084681068842, + "learning_rate": 5.9909508907171805e-06, + "loss": 0.1356597900390625, + "step": 5720 + }, + { + "epoch": 0.049502382167037035, + "grad_norm": 7.054240029042663, + "learning_rate": 5.990935071657093e-06, + "loss": 0.3472450256347656, + "step": 5725 + }, + { + "epoch": 0.0495456156885803, + "grad_norm": 6.455013016335319, + "learning_rate": 5.990919238803077e-06, + "loss": 0.1850341796875, + "step": 5730 + }, + { + "epoch": 0.04958884921012356, + "grad_norm": 16.41229722438304, + "learning_rate": 5.990903392155206e-06, + "loss": 0.22532958984375, + "step": 5735 + }, + { + "epoch": 0.049632082731666825, + "grad_norm": 46.73642047023574, + "learning_rate": 5.990887531713553e-06, + "loss": 0.37891845703125, + "step": 5740 + }, + { + "epoch": 0.04967531625321009, + "grad_norm": 17.265556344232913, + "learning_rate": 5.9908716574781904e-06, + "loss": 0.192144775390625, + "step": 5745 + }, + { + "epoch": 0.04971854977475335, + "grad_norm": 24.101344443667635, + "learning_rate": 5.990855769449192e-06, + "loss": 0.39649658203125, + "step": 5750 + }, + { + "epoch": 0.049761783296296616, + "grad_norm": 37.935740558840855, + "learning_rate": 5.990839867626631e-06, + "loss": 0.13017425537109376, + "step": 5755 + }, + { + "epoch": 0.04980501681783988, + "grad_norm": 0.3331078932993158, + "learning_rate": 5.990823952010581e-06, + "loss": 0.34348602294921876, + "step": 5760 + }, + { + "epoch": 0.04984825033938314, + "grad_norm": 6.999081598163134, + "learning_rate": 5.9908080226011155e-06, + "loss": 0.11233062744140625, + "step": 5765 + }, + { + "epoch": 0.04989148386092641, + "grad_norm": 2.876159723220014, + "learning_rate": 5.990792079398308e-06, + "loss": 0.07938613891601562, + "step": 5770 + }, + { + "epoch": 0.04993471738246967, + "grad_norm": 25.75930588637063, + "learning_rate": 5.99077612240223e-06, + "loss": 0.38751983642578125, + "step": 5775 + }, + { + "epoch": 0.049977950904012934, + "grad_norm": 32.87061502406354, + "learning_rate": 5.990760151612959e-06, + "loss": 0.20768394470214843, + "step": 5780 + }, + { + "epoch": 0.0500211844255562, + "grad_norm": 6.539087250072669, + "learning_rate": 5.990744167030565e-06, + "loss": 0.4195068359375, + "step": 5785 + }, + { + "epoch": 0.05006441794709946, + "grad_norm": 9.033683939340511, + "learning_rate": 5.990728168655124e-06, + "loss": 0.3314544677734375, + "step": 5790 + }, + { + "epoch": 0.050107651468642725, + "grad_norm": 2.695604992094483, + "learning_rate": 5.990712156486708e-06, + "loss": 0.099456787109375, + "step": 5795 + }, + { + "epoch": 0.05015088499018599, + "grad_norm": 17.42130366673752, + "learning_rate": 5.990696130525393e-06, + "loss": 0.20919189453125, + "step": 5800 + }, + { + "epoch": 0.05019411851172925, + "grad_norm": 1.0673074513614202, + "learning_rate": 5.990680090771251e-06, + "loss": 0.2626213073730469, + "step": 5805 + }, + { + "epoch": 0.050237352033272516, + "grad_norm": 5.848231562693053, + "learning_rate": 5.990664037224356e-06, + "loss": 0.229827880859375, + "step": 5810 + }, + { + "epoch": 0.05028058555481578, + "grad_norm": 15.091987364336436, + "learning_rate": 5.990647969884784e-06, + "loss": 0.31578369140625, + "step": 5815 + }, + { + "epoch": 0.05032381907635904, + "grad_norm": 24.29578666413523, + "learning_rate": 5.990631888752608e-06, + "loss": 0.1866485595703125, + "step": 5820 + }, + { + "epoch": 0.050367052597902306, + "grad_norm": 20.521440300290443, + "learning_rate": 5.990615793827901e-06, + "loss": 0.3220428466796875, + "step": 5825 + }, + { + "epoch": 0.05041028611944557, + "grad_norm": 9.115855009224546, + "learning_rate": 5.990599685110739e-06, + "loss": 0.3164794921875, + "step": 5830 + }, + { + "epoch": 0.050453519640988834, + "grad_norm": 3.456421160098619, + "learning_rate": 5.990583562601194e-06, + "loss": 0.0747650146484375, + "step": 5835 + }, + { + "epoch": 0.050496753162532104, + "grad_norm": 92.04441914495627, + "learning_rate": 5.990567426299343e-06, + "loss": 0.15792236328125, + "step": 5840 + }, + { + "epoch": 0.05053998668407537, + "grad_norm": 26.690643268695805, + "learning_rate": 5.990551276205259e-06, + "loss": 0.281494140625, + "step": 5845 + }, + { + "epoch": 0.05058322020561863, + "grad_norm": 20.133715926919965, + "learning_rate": 5.990535112319017e-06, + "loss": 0.120294189453125, + "step": 5850 + }, + { + "epoch": 0.050626453727161895, + "grad_norm": 5.611150561255582, + "learning_rate": 5.990518934640691e-06, + "loss": 0.334735107421875, + "step": 5855 + }, + { + "epoch": 0.05066968724870516, + "grad_norm": 6.666848445575557, + "learning_rate": 5.990502743170356e-06, + "loss": 0.421923828125, + "step": 5860 + }, + { + "epoch": 0.05071292077024842, + "grad_norm": 17.343028627451904, + "learning_rate": 5.990486537908087e-06, + "loss": 0.24091110229492188, + "step": 5865 + }, + { + "epoch": 0.050756154291791686, + "grad_norm": 1.7524377789082881, + "learning_rate": 5.990470318853958e-06, + "loss": 0.15697021484375, + "step": 5870 + }, + { + "epoch": 0.05079938781333495, + "grad_norm": 21.251502434567726, + "learning_rate": 5.9904540860080444e-06, + "loss": 0.233685302734375, + "step": 5875 + }, + { + "epoch": 0.05084262133487821, + "grad_norm": 23.841522966462893, + "learning_rate": 5.99043783937042e-06, + "loss": 0.3464862823486328, + "step": 5880 + }, + { + "epoch": 0.05088585485642148, + "grad_norm": 5.172520448601026, + "learning_rate": 5.990421578941162e-06, + "loss": 0.2395050048828125, + "step": 5885 + }, + { + "epoch": 0.05092908837796474, + "grad_norm": 21.457342101190452, + "learning_rate": 5.990405304720343e-06, + "loss": 0.2264129638671875, + "step": 5890 + }, + { + "epoch": 0.050972321899508004, + "grad_norm": 14.336523251792206, + "learning_rate": 5.9903890167080385e-06, + "loss": 0.434356689453125, + "step": 5895 + }, + { + "epoch": 0.05101555542105127, + "grad_norm": 2.160130519080903, + "learning_rate": 5.990372714904325e-06, + "loss": 0.16024627685546874, + "step": 5900 + }, + { + "epoch": 0.05105878894259453, + "grad_norm": 6.42332711034847, + "learning_rate": 5.990356399309276e-06, + "loss": 0.12638397216796876, + "step": 5905 + }, + { + "epoch": 0.051102022464137795, + "grad_norm": 14.21354876996501, + "learning_rate": 5.990340069922967e-06, + "loss": 0.12627716064453126, + "step": 5910 + }, + { + "epoch": 0.05114525598568106, + "grad_norm": 44.56353829143995, + "learning_rate": 5.990323726745475e-06, + "loss": 0.2958587646484375, + "step": 5915 + }, + { + "epoch": 0.05118848950722432, + "grad_norm": 10.442643249457534, + "learning_rate": 5.990307369776875e-06, + "loss": 0.277569580078125, + "step": 5920 + }, + { + "epoch": 0.051231723028767585, + "grad_norm": 10.766879159824855, + "learning_rate": 5.99029099901724e-06, + "loss": 0.214593505859375, + "step": 5925 + }, + { + "epoch": 0.05127495655031085, + "grad_norm": 2.093119637848589, + "learning_rate": 5.990274614466648e-06, + "loss": 0.278143310546875, + "step": 5930 + }, + { + "epoch": 0.05131819007185411, + "grad_norm": 34.12719065238651, + "learning_rate": 5.990258216125172e-06, + "loss": 0.3439849853515625, + "step": 5935 + }, + { + "epoch": 0.051361423593397376, + "grad_norm": 4.6050341016256215, + "learning_rate": 5.990241803992891e-06, + "loss": 0.33254852294921877, + "step": 5940 + }, + { + "epoch": 0.05140465711494064, + "grad_norm": 12.571190936327726, + "learning_rate": 5.990225378069879e-06, + "loss": 0.3219451904296875, + "step": 5945 + }, + { + "epoch": 0.0514478906364839, + "grad_norm": 19.123240521513377, + "learning_rate": 5.990208938356212e-06, + "loss": 0.6510658264160156, + "step": 5950 + }, + { + "epoch": 0.05149112415802717, + "grad_norm": 23.504920781271643, + "learning_rate": 5.9901924848519645e-06, + "loss": 0.606884765625, + "step": 5955 + }, + { + "epoch": 0.05153435767957043, + "grad_norm": 11.687394742668694, + "learning_rate": 5.990176017557214e-06, + "loss": 0.34431304931640627, + "step": 5960 + }, + { + "epoch": 0.051577591201113694, + "grad_norm": 5.332216391739885, + "learning_rate": 5.990159536472037e-06, + "loss": 0.298284912109375, + "step": 5965 + }, + { + "epoch": 0.05162082472265696, + "grad_norm": 10.163592623113177, + "learning_rate": 5.990143041596507e-06, + "loss": 0.2527008056640625, + "step": 5970 + }, + { + "epoch": 0.05166405824420022, + "grad_norm": 12.464352836350306, + "learning_rate": 5.990126532930702e-06, + "loss": 0.54053955078125, + "step": 5975 + }, + { + "epoch": 0.051707291765743485, + "grad_norm": 34.527004257059836, + "learning_rate": 5.990110010474698e-06, + "loss": 0.413232421875, + "step": 5980 + }, + { + "epoch": 0.05175052528728675, + "grad_norm": 15.635398806512002, + "learning_rate": 5.99009347422857e-06, + "loss": 0.23136215209960936, + "step": 5985 + }, + { + "epoch": 0.05179375880883001, + "grad_norm": 3.0785669935266453, + "learning_rate": 5.990076924192395e-06, + "loss": 0.13506927490234374, + "step": 5990 + }, + { + "epoch": 0.051836992330373276, + "grad_norm": 7.891222892419634, + "learning_rate": 5.99006036036625e-06, + "loss": 0.1238311767578125, + "step": 5995 + }, + { + "epoch": 0.05188022585191654, + "grad_norm": 57.738130951074915, + "learning_rate": 5.99004378275021e-06, + "loss": 0.1376983642578125, + "step": 6000 + }, + { + "epoch": 0.0519234593734598, + "grad_norm": 0.7079129787351767, + "learning_rate": 5.990027191344354e-06, + "loss": 0.2752525329589844, + "step": 6005 + }, + { + "epoch": 0.05196669289500307, + "grad_norm": 18.606738491616742, + "learning_rate": 5.990010586148756e-06, + "loss": 0.45446929931640623, + "step": 6010 + }, + { + "epoch": 0.05200992641654633, + "grad_norm": 3.25139458974645, + "learning_rate": 5.989993967163493e-06, + "loss": 0.11529083251953125, + "step": 6015 + }, + { + "epoch": 0.052053159938089594, + "grad_norm": 11.836717584086088, + "learning_rate": 5.989977334388642e-06, + "loss": 0.347796630859375, + "step": 6020 + }, + { + "epoch": 0.052096393459632864, + "grad_norm": 3.6793043761522886, + "learning_rate": 5.9899606878242796e-06, + "loss": 0.4477264404296875, + "step": 6025 + }, + { + "epoch": 0.05213962698117613, + "grad_norm": 6.146268959899052, + "learning_rate": 5.989944027470483e-06, + "loss": 0.2206727981567383, + "step": 6030 + }, + { + "epoch": 0.05218286050271939, + "grad_norm": 36.229264262959816, + "learning_rate": 5.9899273533273296e-06, + "loss": 0.13801002502441406, + "step": 6035 + }, + { + "epoch": 0.052226094024262655, + "grad_norm": 11.225451002221515, + "learning_rate": 5.9899106653948945e-06, + "loss": 0.50455322265625, + "step": 6040 + }, + { + "epoch": 0.05226932754580592, + "grad_norm": 81.69961420366981, + "learning_rate": 5.989893963673255e-06, + "loss": 0.33173828125, + "step": 6045 + }, + { + "epoch": 0.05231256106734918, + "grad_norm": 14.664648758279121, + "learning_rate": 5.98987724816249e-06, + "loss": 0.098052978515625, + "step": 6050 + }, + { + "epoch": 0.052355794588892446, + "grad_norm": 21.353530413421552, + "learning_rate": 5.989860518862675e-06, + "loss": 0.087689208984375, + "step": 6055 + }, + { + "epoch": 0.05239902811043571, + "grad_norm": 36.838708737217566, + "learning_rate": 5.989843775773888e-06, + "loss": 0.38897552490234377, + "step": 6060 + }, + { + "epoch": 0.05244226163197897, + "grad_norm": 29.927215055861414, + "learning_rate": 5.989827018896204e-06, + "loss": 0.1000457763671875, + "step": 6065 + }, + { + "epoch": 0.05248549515352224, + "grad_norm": 14.643229734092289, + "learning_rate": 5.989810248229703e-06, + "loss": 0.16366729736328126, + "step": 6070 + }, + { + "epoch": 0.0525287286750655, + "grad_norm": 8.28677780273019, + "learning_rate": 5.989793463774462e-06, + "loss": 0.157647705078125, + "step": 6075 + }, + { + "epoch": 0.052571962196608764, + "grad_norm": 3.453840867798241, + "learning_rate": 5.989776665530556e-06, + "loss": 0.43291549682617186, + "step": 6080 + }, + { + "epoch": 0.05261519571815203, + "grad_norm": 9.682277153386591, + "learning_rate": 5.989759853498066e-06, + "loss": 0.091192626953125, + "step": 6085 + }, + { + "epoch": 0.05265842923969529, + "grad_norm": 2.302759411758226, + "learning_rate": 5.989743027677067e-06, + "loss": 0.155096435546875, + "step": 6090 + }, + { + "epoch": 0.052701662761238555, + "grad_norm": 14.560025962372503, + "learning_rate": 5.989726188067637e-06, + "loss": 0.3204132080078125, + "step": 6095 + }, + { + "epoch": 0.05274489628278182, + "grad_norm": 18.88669447072718, + "learning_rate": 5.9897093346698555e-06, + "loss": 0.15635948181152343, + "step": 6100 + }, + { + "epoch": 0.05278812980432508, + "grad_norm": 78.30057303841335, + "learning_rate": 5.989692467483797e-06, + "loss": 0.19756507873535156, + "step": 6105 + }, + { + "epoch": 0.052831363325868345, + "grad_norm": 8.75111484381708, + "learning_rate": 5.989675586509541e-06, + "loss": 0.5751708984375, + "step": 6110 + }, + { + "epoch": 0.05287459684741161, + "grad_norm": 5.508384506538185, + "learning_rate": 5.989658691747167e-06, + "loss": 0.17559852600097656, + "step": 6115 + }, + { + "epoch": 0.05291783036895487, + "grad_norm": 6.28167687102444, + "learning_rate": 5.9896417831967495e-06, + "loss": 0.093548583984375, + "step": 6120 + }, + { + "epoch": 0.052961063890498136, + "grad_norm": 20.27877942137751, + "learning_rate": 5.989624860858369e-06, + "loss": 0.1420166015625, + "step": 6125 + }, + { + "epoch": 0.0530042974120414, + "grad_norm": 28.769402458993326, + "learning_rate": 5.989607924732102e-06, + "loss": 0.1720062255859375, + "step": 6130 + }, + { + "epoch": 0.05304753093358466, + "grad_norm": 24.46929913276026, + "learning_rate": 5.989590974818029e-06, + "loss": 0.3107269287109375, + "step": 6135 + }, + { + "epoch": 0.05309076445512793, + "grad_norm": 20.885928133833186, + "learning_rate": 5.989574011116226e-06, + "loss": 0.22763824462890625, + "step": 6140 + }, + { + "epoch": 0.05313399797667119, + "grad_norm": 32.93768721310352, + "learning_rate": 5.989557033626771e-06, + "loss": 0.42392578125, + "step": 6145 + }, + { + "epoch": 0.053177231498214454, + "grad_norm": 17.66786074767603, + "learning_rate": 5.9895400423497435e-06, + "loss": 0.432562255859375, + "step": 6150 + }, + { + "epoch": 0.05322046501975772, + "grad_norm": 6.967714139710072, + "learning_rate": 5.989523037285222e-06, + "loss": 0.10483474731445312, + "step": 6155 + }, + { + "epoch": 0.05326369854130098, + "grad_norm": 11.363356603441874, + "learning_rate": 5.989506018433284e-06, + "loss": 0.28016357421875, + "step": 6160 + }, + { + "epoch": 0.053306932062844245, + "grad_norm": 13.949979682513952, + "learning_rate": 5.989488985794008e-06, + "loss": 0.266229248046875, + "step": 6165 + }, + { + "epoch": 0.05335016558438751, + "grad_norm": 9.954585072214199, + "learning_rate": 5.989471939367473e-06, + "loss": 0.29259033203125, + "step": 6170 + }, + { + "epoch": 0.05339339910593077, + "grad_norm": 4.952254486905613, + "learning_rate": 5.989454879153759e-06, + "loss": 0.195562744140625, + "step": 6175 + }, + { + "epoch": 0.053436632627474036, + "grad_norm": 27.02204380859462, + "learning_rate": 5.989437805152942e-06, + "loss": 0.1536041259765625, + "step": 6180 + }, + { + "epoch": 0.0534798661490173, + "grad_norm": 1.287835419748995, + "learning_rate": 5.989420717365102e-06, + "loss": 0.2527374267578125, + "step": 6185 + }, + { + "epoch": 0.05352309967056056, + "grad_norm": 14.200024295715652, + "learning_rate": 5.989403615790319e-06, + "loss": 0.139532470703125, + "step": 6190 + }, + { + "epoch": 0.05356633319210383, + "grad_norm": 14.97194182202907, + "learning_rate": 5.989386500428669e-06, + "loss": 0.20598602294921875, + "step": 6195 + }, + { + "epoch": 0.05360956671364709, + "grad_norm": 16.89910745208828, + "learning_rate": 5.989369371280233e-06, + "loss": 0.30275726318359375, + "step": 6200 + }, + { + "epoch": 0.053652800235190354, + "grad_norm": 34.30553038781233, + "learning_rate": 5.989352228345091e-06, + "loss": 0.5645538330078125, + "step": 6205 + }, + { + "epoch": 0.053696033756733624, + "grad_norm": 4.427732570180271, + "learning_rate": 5.989335071623319e-06, + "loss": 0.4647216796875, + "step": 6210 + }, + { + "epoch": 0.05373926727827689, + "grad_norm": 12.497242718459741, + "learning_rate": 5.989317901114998e-06, + "loss": 0.248040771484375, + "step": 6215 + }, + { + "epoch": 0.05378250079982015, + "grad_norm": 29.69848614067171, + "learning_rate": 5.9893007168202075e-06, + "loss": 0.42506103515625, + "step": 6220 + }, + { + "epoch": 0.053825734321363415, + "grad_norm": 2.5711161866320364, + "learning_rate": 5.989283518739026e-06, + "loss": 0.105108642578125, + "step": 6225 + }, + { + "epoch": 0.05386896784290668, + "grad_norm": 9.550399303974382, + "learning_rate": 5.989266306871533e-06, + "loss": 0.267852783203125, + "step": 6230 + }, + { + "epoch": 0.05391220136444994, + "grad_norm": 4.753423026975241, + "learning_rate": 5.989249081217808e-06, + "loss": 0.1549346923828125, + "step": 6235 + }, + { + "epoch": 0.053955434885993206, + "grad_norm": 20.322147233441363, + "learning_rate": 5.989231841777931e-06, + "loss": 0.177325439453125, + "step": 6240 + }, + { + "epoch": 0.05399866840753647, + "grad_norm": 9.536558367370969, + "learning_rate": 5.98921458855198e-06, + "loss": 0.1953125, + "step": 6245 + }, + { + "epoch": 0.05404190192907973, + "grad_norm": 55.0044181598354, + "learning_rate": 5.989197321540036e-06, + "loss": 0.548486328125, + "step": 6250 + }, + { + "epoch": 0.054085135450623, + "grad_norm": 53.2801623543539, + "learning_rate": 5.989180040742178e-06, + "loss": 0.2865020751953125, + "step": 6255 + }, + { + "epoch": 0.05412836897216626, + "grad_norm": 3.523705196308838, + "learning_rate": 5.989162746158485e-06, + "loss": 0.38496551513671873, + "step": 6260 + }, + { + "epoch": 0.054171602493709524, + "grad_norm": 10.408438482692244, + "learning_rate": 5.989145437789039e-06, + "loss": 0.266949462890625, + "step": 6265 + }, + { + "epoch": 0.05421483601525279, + "grad_norm": 25.218065984650213, + "learning_rate": 5.989128115633917e-06, + "loss": 0.302099609375, + "step": 6270 + }, + { + "epoch": 0.05425806953679605, + "grad_norm": 59.19835419529653, + "learning_rate": 5.989110779693202e-06, + "loss": 0.2971649169921875, + "step": 6275 + }, + { + "epoch": 0.054301303058339315, + "grad_norm": 14.488576859298448, + "learning_rate": 5.98909342996697e-06, + "loss": 0.147906494140625, + "step": 6280 + }, + { + "epoch": 0.05434453657988258, + "grad_norm": 25.87072657069233, + "learning_rate": 5.989076066455305e-06, + "loss": 0.17337646484375, + "step": 6285 + }, + { + "epoch": 0.05438777010142584, + "grad_norm": 3.2541622669511834, + "learning_rate": 5.989058689158284e-06, + "loss": 0.1962677001953125, + "step": 6290 + }, + { + "epoch": 0.054431003622969105, + "grad_norm": 9.85044206372148, + "learning_rate": 5.989041298075989e-06, + "loss": 0.3478302001953125, + "step": 6295 + }, + { + "epoch": 0.05447423714451237, + "grad_norm": 8.232563762227075, + "learning_rate": 5.9890238932085e-06, + "loss": 0.17574462890625, + "step": 6300 + }, + { + "epoch": 0.05451747066605563, + "grad_norm": 12.046053565613068, + "learning_rate": 5.989006474555896e-06, + "loss": 0.1551116943359375, + "step": 6305 + }, + { + "epoch": 0.054560704187598896, + "grad_norm": 92.72066981419472, + "learning_rate": 5.988989042118259e-06, + "loss": 0.7409423828125, + "step": 6310 + }, + { + "epoch": 0.05460393770914216, + "grad_norm": 33.92850917082805, + "learning_rate": 5.988971595895669e-06, + "loss": 0.224169921875, + "step": 6315 + }, + { + "epoch": 0.054647171230685423, + "grad_norm": 11.977649469987155, + "learning_rate": 5.988954135888205e-06, + "loss": 0.092999267578125, + "step": 6320 + }, + { + "epoch": 0.05469040475222869, + "grad_norm": 14.489646128134678, + "learning_rate": 5.988936662095949e-06, + "loss": 0.110205078125, + "step": 6325 + }, + { + "epoch": 0.05473363827377195, + "grad_norm": 27.780938644007694, + "learning_rate": 5.988919174518981e-06, + "loss": 0.1559326171875, + "step": 6330 + }, + { + "epoch": 0.054776871795315214, + "grad_norm": 12.816357924783196, + "learning_rate": 5.988901673157383e-06, + "loss": 0.2402679443359375, + "step": 6335 + }, + { + "epoch": 0.05482010531685848, + "grad_norm": 21.53351294538774, + "learning_rate": 5.988884158011233e-06, + "loss": 0.201678466796875, + "step": 6340 + }, + { + "epoch": 0.05486333883840174, + "grad_norm": 1.488983398202143, + "learning_rate": 5.988866629080615e-06, + "loss": 0.0678802490234375, + "step": 6345 + }, + { + "epoch": 0.054906572359945005, + "grad_norm": 13.883651430799693, + "learning_rate": 5.988849086365607e-06, + "loss": 0.12684783935546876, + "step": 6350 + }, + { + "epoch": 0.05494980588148827, + "grad_norm": 7.654257661921595, + "learning_rate": 5.988831529866292e-06, + "loss": 0.3094602584838867, + "step": 6355 + }, + { + "epoch": 0.05499303940303153, + "grad_norm": 17.827806761619378, + "learning_rate": 5.98881395958275e-06, + "loss": 0.219140625, + "step": 6360 + }, + { + "epoch": 0.055036272924574796, + "grad_norm": 6.873752965897173, + "learning_rate": 5.9887963755150615e-06, + "loss": 0.16688232421875, + "step": 6365 + }, + { + "epoch": 0.05507950644611806, + "grad_norm": 3.7722471811502927, + "learning_rate": 5.988778777663308e-06, + "loss": 0.219549560546875, + "step": 6370 + }, + { + "epoch": 0.05512273996766132, + "grad_norm": 15.43533027352225, + "learning_rate": 5.988761166027572e-06, + "loss": 0.147735595703125, + "step": 6375 + }, + { + "epoch": 0.05516597348920459, + "grad_norm": 2.40434252892306, + "learning_rate": 5.988743540607932e-06, + "loss": 0.11253852844238281, + "step": 6380 + }, + { + "epoch": 0.05520920701074785, + "grad_norm": 25.816662764352493, + "learning_rate": 5.988725901404472e-06, + "loss": 0.2330596923828125, + "step": 6385 + }, + { + "epoch": 0.055252440532291114, + "grad_norm": 50.09758618927913, + "learning_rate": 5.988708248417272e-06, + "loss": 0.239404296875, + "step": 6390 + }, + { + "epoch": 0.055295674053834384, + "grad_norm": 21.878753003739572, + "learning_rate": 5.9886905816464144e-06, + "loss": 0.2404149055480957, + "step": 6395 + }, + { + "epoch": 0.05533890757537765, + "grad_norm": 4.9358826256992465, + "learning_rate": 5.988672901091979e-06, + "loss": 0.3874755859375, + "step": 6400 + }, + { + "epoch": 0.05538214109692091, + "grad_norm": 10.726212627780551, + "learning_rate": 5.9886552067540485e-06, + "loss": 0.7289901733398437, + "step": 6405 + }, + { + "epoch": 0.055425374618464175, + "grad_norm": 13.932212425860774, + "learning_rate": 5.988637498632704e-06, + "loss": 0.285955810546875, + "step": 6410 + }, + { + "epoch": 0.05546860814000744, + "grad_norm": 33.51283504373474, + "learning_rate": 5.988619776728028e-06, + "loss": 0.1954357147216797, + "step": 6415 + }, + { + "epoch": 0.0555118416615507, + "grad_norm": 34.686318075555384, + "learning_rate": 5.988602041040101e-06, + "loss": 0.2401580810546875, + "step": 6420 + }, + { + "epoch": 0.055555075183093966, + "grad_norm": 34.94094454698152, + "learning_rate": 5.988584291569006e-06, + "loss": 0.2020904541015625, + "step": 6425 + }, + { + "epoch": 0.05559830870463723, + "grad_norm": 3.6580438212795245, + "learning_rate": 5.988566528314825e-06, + "loss": 0.238165283203125, + "step": 6430 + }, + { + "epoch": 0.05564154222618049, + "grad_norm": 37.826115684209775, + "learning_rate": 5.988548751277638e-06, + "loss": 0.474310302734375, + "step": 6435 + }, + { + "epoch": 0.05568477574772376, + "grad_norm": 4.947330605450396, + "learning_rate": 5.988530960457527e-06, + "loss": 0.10198974609375, + "step": 6440 + }, + { + "epoch": 0.05572800926926702, + "grad_norm": 28.388981135690262, + "learning_rate": 5.988513155854578e-06, + "loss": 0.24854736328125, + "step": 6445 + }, + { + "epoch": 0.055771242790810284, + "grad_norm": 1.7248619685658226, + "learning_rate": 5.988495337468869e-06, + "loss": 0.28440132141113283, + "step": 6450 + }, + { + "epoch": 0.05581447631235355, + "grad_norm": 14.167829876542866, + "learning_rate": 5.9884775053004845e-06, + "loss": 0.06775741577148438, + "step": 6455 + }, + { + "epoch": 0.05585770983389681, + "grad_norm": 5.566863724977187, + "learning_rate": 5.988459659349505e-06, + "loss": 0.19974365234375, + "step": 6460 + }, + { + "epoch": 0.055900943355440075, + "grad_norm": 8.130881264653238, + "learning_rate": 5.988441799616014e-06, + "loss": 0.12739906311035157, + "step": 6465 + }, + { + "epoch": 0.05594417687698334, + "grad_norm": 3.2425408778778255, + "learning_rate": 5.988423926100093e-06, + "loss": 0.1770050048828125, + "step": 6470 + }, + { + "epoch": 0.0559874103985266, + "grad_norm": 6.658613393337649, + "learning_rate": 5.988406038801825e-06, + "loss": 0.112188720703125, + "step": 6475 + }, + { + "epoch": 0.056030643920069866, + "grad_norm": 6.865032319833133, + "learning_rate": 5.988388137721293e-06, + "loss": 0.11175880432128907, + "step": 6480 + }, + { + "epoch": 0.05607387744161313, + "grad_norm": 2.341715209463163, + "learning_rate": 5.988370222858578e-06, + "loss": 0.602081298828125, + "step": 6485 + }, + { + "epoch": 0.05611711096315639, + "grad_norm": 4.968692743882861, + "learning_rate": 5.988352294213764e-06, + "loss": 0.3009361267089844, + "step": 6490 + }, + { + "epoch": 0.056160344484699656, + "grad_norm": 46.55127239761504, + "learning_rate": 5.988334351786933e-06, + "loss": 0.255133056640625, + "step": 6495 + }, + { + "epoch": 0.05620357800624292, + "grad_norm": 11.785904801307504, + "learning_rate": 5.98831639557817e-06, + "loss": 0.299163818359375, + "step": 6500 + }, + { + "epoch": 0.056246811527786184, + "grad_norm": 15.29892430545126, + "learning_rate": 5.988298425587553e-06, + "loss": 0.531341552734375, + "step": 6505 + }, + { + "epoch": 0.05629004504932945, + "grad_norm": 1.558076322544612, + "learning_rate": 5.988280441815169e-06, + "loss": 0.152618408203125, + "step": 6510 + }, + { + "epoch": 0.05633327857087271, + "grad_norm": 0.6605780861750016, + "learning_rate": 5.9882624442611e-06, + "loss": 0.5343704223632812, + "step": 6515 + }, + { + "epoch": 0.056376512092415974, + "grad_norm": 1.1336219340660192, + "learning_rate": 5.988244432925428e-06, + "loss": 0.023590087890625, + "step": 6520 + }, + { + "epoch": 0.05641974561395924, + "grad_norm": 7.666118900635384, + "learning_rate": 5.9882264078082374e-06, + "loss": 0.4462249755859375, + "step": 6525 + }, + { + "epoch": 0.0564629791355025, + "grad_norm": 26.188034270372185, + "learning_rate": 5.9882083689096104e-06, + "loss": 0.3285179138183594, + "step": 6530 + }, + { + "epoch": 0.056506212657045765, + "grad_norm": 7.892665571560003, + "learning_rate": 5.9881903162296295e-06, + "loss": 0.234942626953125, + "step": 6535 + }, + { + "epoch": 0.05654944617858903, + "grad_norm": 49.56403589455168, + "learning_rate": 5.98817224976838e-06, + "loss": 0.3453369140625, + "step": 6540 + }, + { + "epoch": 0.05659267970013229, + "grad_norm": 2.688103460661735, + "learning_rate": 5.9881541695259446e-06, + "loss": 0.22970733642578126, + "step": 6545 + }, + { + "epoch": 0.056635913221675556, + "grad_norm": 1.3460850039957284, + "learning_rate": 5.988136075502405e-06, + "loss": 0.119464111328125, + "step": 6550 + }, + { + "epoch": 0.05667914674321882, + "grad_norm": 3.4002973631576126, + "learning_rate": 5.988117967697847e-06, + "loss": 0.0648193359375, + "step": 6555 + }, + { + "epoch": 0.05672238026476208, + "grad_norm": 45.31446473736835, + "learning_rate": 5.988099846112351e-06, + "loss": 0.541552734375, + "step": 6560 + }, + { + "epoch": 0.05676561378630535, + "grad_norm": 39.6113286291308, + "learning_rate": 5.988081710746004e-06, + "loss": 0.329705810546875, + "step": 6565 + }, + { + "epoch": 0.05680884730784861, + "grad_norm": 9.995253600749033, + "learning_rate": 5.9880635615988885e-06, + "loss": 0.2397979736328125, + "step": 6570 + }, + { + "epoch": 0.056852080829391874, + "grad_norm": 24.53965571501581, + "learning_rate": 5.988045398671086e-06, + "loss": 0.14229583740234375, + "step": 6575 + }, + { + "epoch": 0.056895314350935144, + "grad_norm": 5.675180309403652, + "learning_rate": 5.988027221962684e-06, + "loss": 0.211798095703125, + "step": 6580 + }, + { + "epoch": 0.05693854787247841, + "grad_norm": 30.25179223003392, + "learning_rate": 5.988009031473765e-06, + "loss": 0.16822509765625, + "step": 6585 + }, + { + "epoch": 0.05698178139402167, + "grad_norm": 6.359835139849915, + "learning_rate": 5.987990827204411e-06, + "loss": 0.207879638671875, + "step": 6590 + }, + { + "epoch": 0.057025014915564935, + "grad_norm": 7.359896503008147, + "learning_rate": 5.987972609154707e-06, + "loss": 0.149853515625, + "step": 6595 + }, + { + "epoch": 0.0570682484371082, + "grad_norm": 33.118384097913214, + "learning_rate": 5.987954377324738e-06, + "loss": 0.5269979476928711, + "step": 6600 + }, + { + "epoch": 0.05711148195865146, + "grad_norm": 9.572802900849098, + "learning_rate": 5.987936131714588e-06, + "loss": 0.11392364501953126, + "step": 6605 + }, + { + "epoch": 0.057154715480194726, + "grad_norm": 7.410497833580914, + "learning_rate": 5.98791787232434e-06, + "loss": 0.4846343994140625, + "step": 6610 + }, + { + "epoch": 0.05719794900173799, + "grad_norm": 25.396422971206746, + "learning_rate": 5.987899599154079e-06, + "loss": 0.5823226928710937, + "step": 6615 + }, + { + "epoch": 0.05724118252328125, + "grad_norm": 7.183569472488958, + "learning_rate": 5.987881312203889e-06, + "loss": 0.49287261962890627, + "step": 6620 + }, + { + "epoch": 0.05728441604482452, + "grad_norm": 20.05153223486897, + "learning_rate": 5.9878630114738544e-06, + "loss": 0.1078155517578125, + "step": 6625 + }, + { + "epoch": 0.05732764956636778, + "grad_norm": 4.373601906320577, + "learning_rate": 5.987844696964059e-06, + "loss": 0.09884796142578126, + "step": 6630 + }, + { + "epoch": 0.057370883087911044, + "grad_norm": 20.69080387017889, + "learning_rate": 5.9878263686745885e-06, + "loss": 0.2669036865234375, + "step": 6635 + }, + { + "epoch": 0.05741411660945431, + "grad_norm": 0.5990554918564157, + "learning_rate": 5.987808026605527e-06, + "loss": 0.122076416015625, + "step": 6640 + }, + { + "epoch": 0.05745735013099757, + "grad_norm": 7.5496614486680995, + "learning_rate": 5.987789670756958e-06, + "loss": 0.34996337890625, + "step": 6645 + }, + { + "epoch": 0.057500583652540835, + "grad_norm": 27.21577802964631, + "learning_rate": 5.987771301128969e-06, + "loss": 0.23111572265625, + "step": 6650 + }, + { + "epoch": 0.0575438171740841, + "grad_norm": 10.389946843376281, + "learning_rate": 5.98775291772164e-06, + "loss": 0.1244140625, + "step": 6655 + }, + { + "epoch": 0.05758705069562736, + "grad_norm": 23.32313712845842, + "learning_rate": 5.987734520535061e-06, + "loss": 0.19346923828125, + "step": 6660 + }, + { + "epoch": 0.057630284217170626, + "grad_norm": 2.0081145734738715, + "learning_rate": 5.987716109569313e-06, + "loss": 0.1716217041015625, + "step": 6665 + }, + { + "epoch": 0.05767351773871389, + "grad_norm": 6.74157986532923, + "learning_rate": 5.987697684824482e-06, + "loss": 0.1510498046875, + "step": 6670 + }, + { + "epoch": 0.05771675126025715, + "grad_norm": 5.714290066465693, + "learning_rate": 5.987679246300654e-06, + "loss": 0.143511962890625, + "step": 6675 + }, + { + "epoch": 0.057759984781800416, + "grad_norm": 6.723565972266086, + "learning_rate": 5.987660793997913e-06, + "loss": 0.21044464111328126, + "step": 6680 + }, + { + "epoch": 0.05780321830334368, + "grad_norm": 5.920591091062968, + "learning_rate": 5.987642327916345e-06, + "loss": 0.0677642822265625, + "step": 6685 + }, + { + "epoch": 0.057846451824886944, + "grad_norm": 6.244442320879515, + "learning_rate": 5.987623848056034e-06, + "loss": 0.37908935546875, + "step": 6690 + }, + { + "epoch": 0.05788968534643021, + "grad_norm": 12.660404752844551, + "learning_rate": 5.987605354417066e-06, + "loss": 0.34984130859375, + "step": 6695 + }, + { + "epoch": 0.05793291886797347, + "grad_norm": 1.0027230201895923, + "learning_rate": 5.987586846999526e-06, + "loss": 0.1604644775390625, + "step": 6700 + }, + { + "epoch": 0.057976152389516734, + "grad_norm": 1.582773853866542, + "learning_rate": 5.9875683258035e-06, + "loss": 0.30054244995117185, + "step": 6705 + }, + { + "epoch": 0.05801938591106, + "grad_norm": 51.22404883002378, + "learning_rate": 5.987549790829072e-06, + "loss": 0.5071060180664062, + "step": 6710 + }, + { + "epoch": 0.05806261943260326, + "grad_norm": 15.883645357779635, + "learning_rate": 5.987531242076329e-06, + "loss": 0.4895347595214844, + "step": 6715 + }, + { + "epoch": 0.058105852954146525, + "grad_norm": 1.4274392355752579, + "learning_rate": 5.987512679545356e-06, + "loss": 0.0433135986328125, + "step": 6720 + }, + { + "epoch": 0.05814908647568979, + "grad_norm": 0.993599524351508, + "learning_rate": 5.987494103236238e-06, + "loss": 0.38721923828125, + "step": 6725 + }, + { + "epoch": 0.05819231999723305, + "grad_norm": 0.6511565287274389, + "learning_rate": 5.98747551314906e-06, + "loss": 0.662127685546875, + "step": 6730 + }, + { + "epoch": 0.058235553518776316, + "grad_norm": 5.178141959265238, + "learning_rate": 5.987456909283911e-06, + "loss": 0.17806396484375, + "step": 6735 + }, + { + "epoch": 0.05827878704031958, + "grad_norm": 26.16033272482549, + "learning_rate": 5.987438291640874e-06, + "loss": 0.2427764892578125, + "step": 6740 + }, + { + "epoch": 0.05832202056186284, + "grad_norm": 6.203256493219313, + "learning_rate": 5.987419660220036e-06, + "loss": 0.064752197265625, + "step": 6745 + }, + { + "epoch": 0.05836525408340611, + "grad_norm": 5.943486265142193, + "learning_rate": 5.987401015021482e-06, + "loss": 0.3089111328125, + "step": 6750 + }, + { + "epoch": 0.05840848760494937, + "grad_norm": 28.315719675106966, + "learning_rate": 5.987382356045298e-06, + "loss": 0.12679443359375, + "step": 6755 + }, + { + "epoch": 0.058451721126492634, + "grad_norm": 3.5145006028764265, + "learning_rate": 5.987363683291571e-06, + "loss": 0.315435791015625, + "step": 6760 + }, + { + "epoch": 0.058494954648035904, + "grad_norm": 18.248146841860038, + "learning_rate": 5.987344996760387e-06, + "loss": 0.0624755859375, + "step": 6765 + }, + { + "epoch": 0.05853818816957917, + "grad_norm": 21.285428468480514, + "learning_rate": 5.987326296451832e-06, + "loss": 0.179833984375, + "step": 6770 + }, + { + "epoch": 0.05858142169112243, + "grad_norm": 12.82691495046103, + "learning_rate": 5.98730758236599e-06, + "loss": 0.2126220703125, + "step": 6775 + }, + { + "epoch": 0.058624655212665695, + "grad_norm": 145.97420612374668, + "learning_rate": 5.987288854502952e-06, + "loss": 0.243115234375, + "step": 6780 + }, + { + "epoch": 0.05866788873420896, + "grad_norm": 15.591003166882542, + "learning_rate": 5.987270112862801e-06, + "loss": 0.226019287109375, + "step": 6785 + }, + { + "epoch": 0.05871112225575222, + "grad_norm": 30.74098427640111, + "learning_rate": 5.987251357445624e-06, + "loss": 0.3072364807128906, + "step": 6790 + }, + { + "epoch": 0.058754355777295486, + "grad_norm": 5.712587277748757, + "learning_rate": 5.987232588251508e-06, + "loss": 0.1674713134765625, + "step": 6795 + }, + { + "epoch": 0.05879758929883875, + "grad_norm": 14.952493813571305, + "learning_rate": 5.987213805280538e-06, + "loss": 0.5604522705078125, + "step": 6800 + }, + { + "epoch": 0.05884082282038201, + "grad_norm": 25.14682960903301, + "learning_rate": 5.987195008532803e-06, + "loss": 0.3945472717285156, + "step": 6805 + }, + { + "epoch": 0.05888405634192528, + "grad_norm": 7.88279068010645, + "learning_rate": 5.9871761980083885e-06, + "loss": 0.2494274139404297, + "step": 6810 + }, + { + "epoch": 0.05892728986346854, + "grad_norm": 5.493631558737745, + "learning_rate": 5.987157373707381e-06, + "loss": 0.180548095703125, + "step": 6815 + }, + { + "epoch": 0.058970523385011804, + "grad_norm": 18.14276630940938, + "learning_rate": 5.987138535629868e-06, + "loss": 0.222314453125, + "step": 6820 + }, + { + "epoch": 0.05901375690655507, + "grad_norm": 66.52483653283005, + "learning_rate": 5.987119683775936e-06, + "loss": 0.2741706848144531, + "step": 6825 + }, + { + "epoch": 0.05905699042809833, + "grad_norm": 44.7318085267996, + "learning_rate": 5.987100818145672e-06, + "loss": 0.6509689331054688, + "step": 6830 + }, + { + "epoch": 0.059100223949641595, + "grad_norm": 5.69876063316472, + "learning_rate": 5.987081938739163e-06, + "loss": 0.18264923095703126, + "step": 6835 + }, + { + "epoch": 0.05914345747118486, + "grad_norm": 29.796075709296517, + "learning_rate": 5.987063045556496e-06, + "loss": 0.1426422119140625, + "step": 6840 + }, + { + "epoch": 0.05918669099272812, + "grad_norm": 36.41809876198827, + "learning_rate": 5.987044138597757e-06, + "loss": 0.2724334716796875, + "step": 6845 + }, + { + "epoch": 0.059229924514271386, + "grad_norm": 21.916121016433063, + "learning_rate": 5.987025217863036e-06, + "loss": 0.226092529296875, + "step": 6850 + }, + { + "epoch": 0.05927315803581465, + "grad_norm": 6.518853428186534, + "learning_rate": 5.987006283352419e-06, + "loss": 0.44632568359375, + "step": 6855 + }, + { + "epoch": 0.05931639155735791, + "grad_norm": 29.580300541759886, + "learning_rate": 5.9869873350659906e-06, + "loss": 0.278997802734375, + "step": 6860 + }, + { + "epoch": 0.059359625078901176, + "grad_norm": 37.286773575844315, + "learning_rate": 5.986968373003842e-06, + "loss": 0.164996337890625, + "step": 6865 + }, + { + "epoch": 0.05940285860044444, + "grad_norm": 2.625162623430265, + "learning_rate": 5.98694939716606e-06, + "loss": 0.1907470703125, + "step": 6870 + }, + { + "epoch": 0.059446092121987704, + "grad_norm": 15.547590460689753, + "learning_rate": 5.98693040755273e-06, + "loss": 0.4313232421875, + "step": 6875 + }, + { + "epoch": 0.05948932564353097, + "grad_norm": 1.0281902766336222, + "learning_rate": 5.986911404163941e-06, + "loss": 0.67850341796875, + "step": 6880 + }, + { + "epoch": 0.05953255916507423, + "grad_norm": 75.7648504675348, + "learning_rate": 5.986892386999781e-06, + "loss": 0.32578125, + "step": 6885 + }, + { + "epoch": 0.059575792686617494, + "grad_norm": 34.03617190586413, + "learning_rate": 5.986873356060337e-06, + "loss": 0.19498367309570314, + "step": 6890 + }, + { + "epoch": 0.05961902620816076, + "grad_norm": 24.739711735986763, + "learning_rate": 5.986854311345697e-06, + "loss": 0.410009765625, + "step": 6895 + }, + { + "epoch": 0.05966225972970402, + "grad_norm": 53.96737715209798, + "learning_rate": 5.986835252855949e-06, + "loss": 0.27275772094726564, + "step": 6900 + }, + { + "epoch": 0.059705493251247285, + "grad_norm": 2.329567918885942, + "learning_rate": 5.986816180591181e-06, + "loss": 0.3850341796875, + "step": 6905 + }, + { + "epoch": 0.05974872677279055, + "grad_norm": 18.96279654697661, + "learning_rate": 5.98679709455148e-06, + "loss": 0.27756195068359374, + "step": 6910 + }, + { + "epoch": 0.05979196029433381, + "grad_norm": 23.29515341785764, + "learning_rate": 5.986777994736935e-06, + "loss": 0.216351318359375, + "step": 6915 + }, + { + "epoch": 0.059835193815877076, + "grad_norm": 21.150072346925423, + "learning_rate": 5.986758881147635e-06, + "loss": 0.66226806640625, + "step": 6920 + }, + { + "epoch": 0.05987842733742034, + "grad_norm": 5.61651434909479, + "learning_rate": 5.986739753783666e-06, + "loss": 0.2956756591796875, + "step": 6925 + }, + { + "epoch": 0.0599216608589636, + "grad_norm": 11.789469191039933, + "learning_rate": 5.986720612645116e-06, + "loss": 0.1623291015625, + "step": 6930 + }, + { + "epoch": 0.05996489438050687, + "grad_norm": 24.06345226237578, + "learning_rate": 5.986701457732076e-06, + "loss": 0.1104766845703125, + "step": 6935 + }, + { + "epoch": 0.06000812790205013, + "grad_norm": 16.36031593574522, + "learning_rate": 5.986682289044632e-06, + "loss": 0.549591064453125, + "step": 6940 + }, + { + "epoch": 0.060051361423593394, + "grad_norm": 9.408094751217053, + "learning_rate": 5.9866631065828734e-06, + "loss": 0.38217697143554685, + "step": 6945 + }, + { + "epoch": 0.060094594945136665, + "grad_norm": 3.597454936042768, + "learning_rate": 5.986643910346889e-06, + "loss": 0.27681121826171873, + "step": 6950 + }, + { + "epoch": 0.06013782846667993, + "grad_norm": 4.625601450259682, + "learning_rate": 5.986624700336766e-06, + "loss": 0.801318359375, + "step": 6955 + }, + { + "epoch": 0.06018106198822319, + "grad_norm": 4.10609376882263, + "learning_rate": 5.9866054765525945e-06, + "loss": 0.070257568359375, + "step": 6960 + }, + { + "epoch": 0.060224295509766455, + "grad_norm": 61.41414931586364, + "learning_rate": 5.986586238994461e-06, + "loss": 0.3594825744628906, + "step": 6965 + }, + { + "epoch": 0.06026752903130972, + "grad_norm": 4.943386454808211, + "learning_rate": 5.986566987662457e-06, + "loss": 0.20334014892578126, + "step": 6970 + }, + { + "epoch": 0.06031076255285298, + "grad_norm": 14.68780913656551, + "learning_rate": 5.98654772255667e-06, + "loss": 0.21892776489257812, + "step": 6975 + }, + { + "epoch": 0.060353996074396246, + "grad_norm": 50.813384932269, + "learning_rate": 5.986528443677189e-06, + "loss": 0.364208984375, + "step": 6980 + }, + { + "epoch": 0.06039722959593951, + "grad_norm": 5.3109431334703405, + "learning_rate": 5.9865091510241015e-06, + "loss": 0.06706695556640625, + "step": 6985 + }, + { + "epoch": 0.06044046311748277, + "grad_norm": 55.70863203241484, + "learning_rate": 5.986489844597498e-06, + "loss": 0.3051422119140625, + "step": 6990 + }, + { + "epoch": 0.06048369663902604, + "grad_norm": 25.196473546276412, + "learning_rate": 5.986470524397467e-06, + "loss": 0.18998565673828124, + "step": 6995 + }, + { + "epoch": 0.0605269301605693, + "grad_norm": 31.688826341215126, + "learning_rate": 5.9864511904241e-06, + "loss": 0.232672119140625, + "step": 7000 + }, + { + "epoch": 0.060570163682112564, + "grad_norm": 6.808285719926633, + "learning_rate": 5.986431842677482e-06, + "loss": 0.100921630859375, + "step": 7005 + }, + { + "epoch": 0.06061339720365583, + "grad_norm": 16.482299816223353, + "learning_rate": 5.986412481157705e-06, + "loss": 0.1092529296875, + "step": 7010 + }, + { + "epoch": 0.06065663072519909, + "grad_norm": 23.18110408568205, + "learning_rate": 5.986393105864856e-06, + "loss": 0.13861541748046874, + "step": 7015 + }, + { + "epoch": 0.060699864246742355, + "grad_norm": 25.157963999325688, + "learning_rate": 5.986373716799027e-06, + "loss": 0.2788177490234375, + "step": 7020 + }, + { + "epoch": 0.06074309776828562, + "grad_norm": 33.999262810071805, + "learning_rate": 5.986354313960307e-06, + "loss": 0.242279052734375, + "step": 7025 + }, + { + "epoch": 0.06078633128982888, + "grad_norm": 65.09712883137937, + "learning_rate": 5.986334897348784e-06, + "loss": 0.388092041015625, + "step": 7030 + }, + { + "epoch": 0.060829564811372146, + "grad_norm": 4.370919860570539, + "learning_rate": 5.986315466964549e-06, + "loss": 0.026455497741699217, + "step": 7035 + }, + { + "epoch": 0.06087279833291541, + "grad_norm": 33.40613728840607, + "learning_rate": 5.98629602280769e-06, + "loss": 0.101605224609375, + "step": 7040 + }, + { + "epoch": 0.06091603185445867, + "grad_norm": 30.85163370106887, + "learning_rate": 5.986276564878298e-06, + "loss": 0.1306488037109375, + "step": 7045 + }, + { + "epoch": 0.060959265376001937, + "grad_norm": 14.359311766121019, + "learning_rate": 5.986257093176462e-06, + "loss": 0.1797882080078125, + "step": 7050 + }, + { + "epoch": 0.0610024988975452, + "grad_norm": 6.478021710359545, + "learning_rate": 5.986237607702273e-06, + "loss": 0.06270790100097656, + "step": 7055 + }, + { + "epoch": 0.061045732419088464, + "grad_norm": 8.691867697198159, + "learning_rate": 5.986218108455819e-06, + "loss": 0.117437744140625, + "step": 7060 + }, + { + "epoch": 0.06108896594063173, + "grad_norm": 12.585726046073168, + "learning_rate": 5.9861985954371915e-06, + "loss": 0.563623046875, + "step": 7065 + }, + { + "epoch": 0.06113219946217499, + "grad_norm": 1.013361214406276, + "learning_rate": 5.9861790686464795e-06, + "loss": 0.2742828369140625, + "step": 7070 + }, + { + "epoch": 0.061175432983718254, + "grad_norm": 7.470681662388342, + "learning_rate": 5.986159528083774e-06, + "loss": 0.0541259765625, + "step": 7075 + }, + { + "epoch": 0.06121866650526152, + "grad_norm": 2.954116502314928, + "learning_rate": 5.986139973749165e-06, + "loss": 0.133001708984375, + "step": 7080 + }, + { + "epoch": 0.06126190002680478, + "grad_norm": 9.26983426256546, + "learning_rate": 5.986120405642742e-06, + "loss": 0.1304534912109375, + "step": 7085 + }, + { + "epoch": 0.061305133548348045, + "grad_norm": 1.3874216999060267, + "learning_rate": 5.986100823764595e-06, + "loss": 0.27458343505859373, + "step": 7090 + }, + { + "epoch": 0.06134836706989131, + "grad_norm": 1.820763691447704, + "learning_rate": 5.9860812281148156e-06, + "loss": 0.3466217041015625, + "step": 7095 + }, + { + "epoch": 0.06139160059143457, + "grad_norm": 4.314446662826769, + "learning_rate": 5.986061618693493e-06, + "loss": 0.1471893310546875, + "step": 7100 + }, + { + "epoch": 0.061434834112977836, + "grad_norm": 33.767013850313, + "learning_rate": 5.986041995500718e-06, + "loss": 0.29635162353515626, + "step": 7105 + }, + { + "epoch": 0.0614780676345211, + "grad_norm": 13.386508595683607, + "learning_rate": 5.986022358536581e-06, + "loss": 0.1512786865234375, + "step": 7110 + }, + { + "epoch": 0.06152130115606436, + "grad_norm": 11.567196995665277, + "learning_rate": 5.986002707801173e-06, + "loss": 0.29996337890625, + "step": 7115 + }, + { + "epoch": 0.06156453467760763, + "grad_norm": 5.706447739307616, + "learning_rate": 5.985983043294585e-06, + "loss": 0.2516754150390625, + "step": 7120 + }, + { + "epoch": 0.06160776819915089, + "grad_norm": 6.085139043805599, + "learning_rate": 5.985963365016906e-06, + "loss": 0.17459716796875, + "step": 7125 + }, + { + "epoch": 0.061651001720694154, + "grad_norm": 48.97016430493129, + "learning_rate": 5.985943672968228e-06, + "loss": 0.5583770751953125, + "step": 7130 + }, + { + "epoch": 0.061694235242237425, + "grad_norm": 9.838540816449404, + "learning_rate": 5.985923967148642e-06, + "loss": 0.108306884765625, + "step": 7135 + }, + { + "epoch": 0.06173746876378069, + "grad_norm": 2.838947599119657, + "learning_rate": 5.985904247558238e-06, + "loss": 0.07516517639160156, + "step": 7140 + }, + { + "epoch": 0.06178070228532395, + "grad_norm": 25.478810691458673, + "learning_rate": 5.985884514197108e-06, + "loss": 0.1749725341796875, + "step": 7145 + }, + { + "epoch": 0.061823935806867215, + "grad_norm": 11.331124858032952, + "learning_rate": 5.985864767065342e-06, + "loss": 0.056810760498046876, + "step": 7150 + }, + { + "epoch": 0.06186716932841048, + "grad_norm": 5.329592813871143, + "learning_rate": 5.985845006163033e-06, + "loss": 0.0609161376953125, + "step": 7155 + }, + { + "epoch": 0.06191040284995374, + "grad_norm": 30.877565197463802, + "learning_rate": 5.985825231490269e-06, + "loss": 0.31368255615234375, + "step": 7160 + }, + { + "epoch": 0.061953636371497006, + "grad_norm": 4.459888853257102, + "learning_rate": 5.985805443047142e-06, + "loss": 0.355816650390625, + "step": 7165 + }, + { + "epoch": 0.06199686989304027, + "grad_norm": 10.842117633767222, + "learning_rate": 5.985785640833747e-06, + "loss": 0.152923583984375, + "step": 7170 + }, + { + "epoch": 0.06204010341458353, + "grad_norm": 47.72719843477032, + "learning_rate": 5.98576582485017e-06, + "loss": 0.654266357421875, + "step": 7175 + }, + { + "epoch": 0.0620833369361268, + "grad_norm": 1.1933777334677997, + "learning_rate": 5.985745995096506e-06, + "loss": 0.0921356201171875, + "step": 7180 + }, + { + "epoch": 0.06212657045767006, + "grad_norm": 7.839790839809891, + "learning_rate": 5.985726151572846e-06, + "loss": 0.211456298828125, + "step": 7185 + }, + { + "epoch": 0.062169803979213324, + "grad_norm": 18.447030548138258, + "learning_rate": 5.985706294279279e-06, + "loss": 0.24698486328125, + "step": 7190 + }, + { + "epoch": 0.06221303750075659, + "grad_norm": 49.971472090471934, + "learning_rate": 5.985686423215899e-06, + "loss": 0.2658660888671875, + "step": 7195 + }, + { + "epoch": 0.06225627102229985, + "grad_norm": 23.335935340353107, + "learning_rate": 5.9856665383827976e-06, + "loss": 0.244464111328125, + "step": 7200 + }, + { + "epoch": 0.062299504543843115, + "grad_norm": 10.284161244655428, + "learning_rate": 5.985646639780066e-06, + "loss": 0.07874603271484375, + "step": 7205 + }, + { + "epoch": 0.06234273806538638, + "grad_norm": 6.343772271115469, + "learning_rate": 5.9856267274077955e-06, + "loss": 0.4363800048828125, + "step": 7210 + }, + { + "epoch": 0.06238597158692964, + "grad_norm": 10.125653358718457, + "learning_rate": 5.985606801266078e-06, + "loss": 0.09927215576171874, + "step": 7215 + }, + { + "epoch": 0.062429205108472906, + "grad_norm": 6.222765246337794, + "learning_rate": 5.985586861355006e-06, + "loss": 0.57882080078125, + "step": 7220 + }, + { + "epoch": 0.06247243863001617, + "grad_norm": 19.721620130201266, + "learning_rate": 5.985566907674671e-06, + "loss": 0.1725433349609375, + "step": 7225 + }, + { + "epoch": 0.06251567215155944, + "grad_norm": 52.366649424487925, + "learning_rate": 5.985546940225167e-06, + "loss": 0.55069580078125, + "step": 7230 + }, + { + "epoch": 0.0625589056731027, + "grad_norm": 22.677428213281107, + "learning_rate": 5.985526959006582e-06, + "loss": 0.16351318359375, + "step": 7235 + }, + { + "epoch": 0.06260213919464597, + "grad_norm": 38.84600192612874, + "learning_rate": 5.985506964019012e-06, + "loss": 0.2023712158203125, + "step": 7240 + }, + { + "epoch": 0.06264537271618922, + "grad_norm": 2.199203756482297, + "learning_rate": 5.985486955262547e-06, + "loss": 0.184771728515625, + "step": 7245 + }, + { + "epoch": 0.0626886062377325, + "grad_norm": 13.192377095138205, + "learning_rate": 5.98546693273728e-06, + "loss": 0.24935302734375, + "step": 7250 + }, + { + "epoch": 0.06273183975927575, + "grad_norm": 10.986085952317021, + "learning_rate": 5.9854468964433025e-06, + "loss": 0.21720123291015625, + "step": 7255 + }, + { + "epoch": 0.06277507328081902, + "grad_norm": 20.274327449518577, + "learning_rate": 5.985426846380709e-06, + "loss": 0.1502838134765625, + "step": 7260 + }, + { + "epoch": 0.06281830680236228, + "grad_norm": 75.67738846069835, + "learning_rate": 5.98540678254959e-06, + "loss": 0.2759857177734375, + "step": 7265 + }, + { + "epoch": 0.06286154032390555, + "grad_norm": 65.57706251321898, + "learning_rate": 5.985386704950039e-06, + "loss": 0.2800537109375, + "step": 7270 + }, + { + "epoch": 0.0629047738454488, + "grad_norm": 15.254493187810425, + "learning_rate": 5.985366613582148e-06, + "loss": 0.330401611328125, + "step": 7275 + }, + { + "epoch": 0.06294800736699208, + "grad_norm": 3.453555460624964, + "learning_rate": 5.98534650844601e-06, + "loss": 0.125189208984375, + "step": 7280 + }, + { + "epoch": 0.06299124088853533, + "grad_norm": 5.830712524527114, + "learning_rate": 5.985326389541718e-06, + "loss": 0.1407470703125, + "step": 7285 + }, + { + "epoch": 0.0630344744100786, + "grad_norm": 14.701237570932655, + "learning_rate": 5.985306256869365e-06, + "loss": 0.28673095703125, + "step": 7290 + }, + { + "epoch": 0.06307770793162186, + "grad_norm": 6.354493859258391, + "learning_rate": 5.9852861104290424e-06, + "loss": 0.0706268310546875, + "step": 7295 + }, + { + "epoch": 0.06312094145316513, + "grad_norm": 40.14101461989431, + "learning_rate": 5.985265950220844e-06, + "loss": 0.1458770751953125, + "step": 7300 + }, + { + "epoch": 0.06316417497470839, + "grad_norm": 54.210915245183735, + "learning_rate": 5.985245776244863e-06, + "loss": 0.12491836547851562, + "step": 7305 + }, + { + "epoch": 0.06320740849625166, + "grad_norm": 1.021568615444756, + "learning_rate": 5.985225588501193e-06, + "loss": 0.36185150146484374, + "step": 7310 + }, + { + "epoch": 0.06325064201779491, + "grad_norm": 20.76145407208608, + "learning_rate": 5.9852053869899254e-06, + "loss": 0.46036376953125, + "step": 7315 + }, + { + "epoch": 0.06329387553933818, + "grad_norm": 20.86465890014067, + "learning_rate": 5.985185171711155e-06, + "loss": 0.2639442443847656, + "step": 7320 + }, + { + "epoch": 0.06333710906088144, + "grad_norm": 70.73905009709362, + "learning_rate": 5.985164942664974e-06, + "loss": 0.3012973785400391, + "step": 7325 + }, + { + "epoch": 0.06338034258242471, + "grad_norm": 40.53125872135531, + "learning_rate": 5.985144699851476e-06, + "loss": 0.50235595703125, + "step": 7330 + }, + { + "epoch": 0.06342357610396797, + "grad_norm": 6.821868639154191, + "learning_rate": 5.985124443270755e-06, + "loss": 0.1679595947265625, + "step": 7335 + }, + { + "epoch": 0.06346680962551124, + "grad_norm": 5.205720844297822, + "learning_rate": 5.985104172922903e-06, + "loss": 0.0992156982421875, + "step": 7340 + }, + { + "epoch": 0.0635100431470545, + "grad_norm": 70.2890089740524, + "learning_rate": 5.985083888808015e-06, + "loss": 0.566650390625, + "step": 7345 + }, + { + "epoch": 0.06355327666859777, + "grad_norm": 34.13238188730644, + "learning_rate": 5.985063590926184e-06, + "loss": 0.61693115234375, + "step": 7350 + }, + { + "epoch": 0.06359651019014102, + "grad_norm": 47.666094376192575, + "learning_rate": 5.985043279277503e-06, + "loss": 0.4473602294921875, + "step": 7355 + }, + { + "epoch": 0.0636397437116843, + "grad_norm": 1.0144815090498815, + "learning_rate": 5.985022953862066e-06, + "loss": 0.13769683837890626, + "step": 7360 + }, + { + "epoch": 0.06368297723322755, + "grad_norm": 3.892896288140947, + "learning_rate": 5.985002614679967e-06, + "loss": 0.2476898193359375, + "step": 7365 + }, + { + "epoch": 0.06372621075477082, + "grad_norm": 2.9463962802694152, + "learning_rate": 5.9849822617313e-06, + "loss": 0.18663330078125, + "step": 7370 + }, + { + "epoch": 0.06376944427631408, + "grad_norm": 7.581763315230119, + "learning_rate": 5.984961895016158e-06, + "loss": 0.35782470703125, + "step": 7375 + }, + { + "epoch": 0.06381267779785735, + "grad_norm": 40.68923583100281, + "learning_rate": 5.984941514534637e-06, + "loss": 0.3649383544921875, + "step": 7380 + }, + { + "epoch": 0.0638559113194006, + "grad_norm": 4.9787174901870745, + "learning_rate": 5.984921120286827e-06, + "loss": 0.3585723876953125, + "step": 7385 + }, + { + "epoch": 0.06389914484094388, + "grad_norm": 10.590416396108138, + "learning_rate": 5.984900712272827e-06, + "loss": 0.1458984375, + "step": 7390 + }, + { + "epoch": 0.06394237836248713, + "grad_norm": 33.941430788762815, + "learning_rate": 5.984880290492727e-06, + "loss": 0.2622802734375, + "step": 7395 + }, + { + "epoch": 0.0639856118840304, + "grad_norm": 34.173552185190815, + "learning_rate": 5.984859854946623e-06, + "loss": 0.44094696044921877, + "step": 7400 + }, + { + "epoch": 0.06402884540557366, + "grad_norm": 17.675553404670467, + "learning_rate": 5.98483940563461e-06, + "loss": 0.30146484375, + "step": 7405 + }, + { + "epoch": 0.06407207892711693, + "grad_norm": 21.54488995523119, + "learning_rate": 5.984818942556781e-06, + "loss": 0.18397064208984376, + "step": 7410 + }, + { + "epoch": 0.0641153124486602, + "grad_norm": 32.77587573333895, + "learning_rate": 5.984798465713231e-06, + "loss": 0.37373046875, + "step": 7415 + }, + { + "epoch": 0.06415854597020346, + "grad_norm": 20.611462629401423, + "learning_rate": 5.984777975104053e-06, + "loss": 0.31929931640625, + "step": 7420 + }, + { + "epoch": 0.06420177949174673, + "grad_norm": 1.7815938750495384, + "learning_rate": 5.9847574707293444e-06, + "loss": 0.305859375, + "step": 7425 + }, + { + "epoch": 0.06424501301328998, + "grad_norm": 22.7156365191401, + "learning_rate": 5.984736952589197e-06, + "loss": 0.2639007568359375, + "step": 7430 + }, + { + "epoch": 0.06428824653483325, + "grad_norm": 13.426348731339662, + "learning_rate": 5.9847164206837075e-06, + "loss": 0.0759429931640625, + "step": 7435 + }, + { + "epoch": 0.06433148005637651, + "grad_norm": 17.09283765270829, + "learning_rate": 5.984695875012969e-06, + "loss": 0.2184661865234375, + "step": 7440 + }, + { + "epoch": 0.06437471357791978, + "grad_norm": 38.318049160855054, + "learning_rate": 5.984675315577076e-06, + "loss": 0.21437225341796876, + "step": 7445 + }, + { + "epoch": 0.06441794709946304, + "grad_norm": 2.501647704315048, + "learning_rate": 5.984654742376125e-06, + "loss": 0.398828125, + "step": 7450 + }, + { + "epoch": 0.06446118062100631, + "grad_norm": 77.21801780619263, + "learning_rate": 5.98463415541021e-06, + "loss": 0.2189300537109375, + "step": 7455 + }, + { + "epoch": 0.06450441414254957, + "grad_norm": 11.295709172320077, + "learning_rate": 5.984613554679426e-06, + "loss": 0.1172760009765625, + "step": 7460 + }, + { + "epoch": 0.06454764766409284, + "grad_norm": 0.4053025354310935, + "learning_rate": 5.9845929401838685e-06, + "loss": 0.13114089965820314, + "step": 7465 + }, + { + "epoch": 0.06459088118563609, + "grad_norm": 17.484352177749738, + "learning_rate": 5.984572311923631e-06, + "loss": 0.30492362976074217, + "step": 7470 + }, + { + "epoch": 0.06463411470717936, + "grad_norm": 19.486732142561596, + "learning_rate": 5.9845516698988106e-06, + "loss": 0.10284881591796875, + "step": 7475 + }, + { + "epoch": 0.06467734822872262, + "grad_norm": 5.15904081897677, + "learning_rate": 5.984531014109502e-06, + "loss": 0.665716552734375, + "step": 7480 + }, + { + "epoch": 0.06472058175026589, + "grad_norm": 20.824932048208506, + "learning_rate": 5.984510344555799e-06, + "loss": 0.23197097778320314, + "step": 7485 + }, + { + "epoch": 0.06476381527180915, + "grad_norm": 33.62214201208377, + "learning_rate": 5.984489661237799e-06, + "loss": 0.528289794921875, + "step": 7490 + }, + { + "epoch": 0.06480704879335242, + "grad_norm": 37.65056540303211, + "learning_rate": 5.984468964155595e-06, + "loss": 0.43573684692382814, + "step": 7495 + }, + { + "epoch": 0.06485028231489567, + "grad_norm": 29.83366514505084, + "learning_rate": 5.984448253309285e-06, + "loss": 0.2563507080078125, + "step": 7500 + }, + { + "epoch": 0.06489351583643894, + "grad_norm": 3.3850276446806022, + "learning_rate": 5.984427528698963e-06, + "loss": 0.2460601806640625, + "step": 7505 + }, + { + "epoch": 0.0649367493579822, + "grad_norm": 22.072598858378704, + "learning_rate": 5.9844067903247254e-06, + "loss": 0.261541748046875, + "step": 7510 + }, + { + "epoch": 0.06497998287952547, + "grad_norm": 1.0323185807023358, + "learning_rate": 5.984386038186667e-06, + "loss": 0.2233612060546875, + "step": 7515 + }, + { + "epoch": 0.06502321640106873, + "grad_norm": 32.68017262817794, + "learning_rate": 5.9843652722848835e-06, + "loss": 0.370751953125, + "step": 7520 + }, + { + "epoch": 0.065066449922612, + "grad_norm": 12.054194972896859, + "learning_rate": 5.984344492619471e-06, + "loss": 0.24647216796875, + "step": 7525 + }, + { + "epoch": 0.06510968344415526, + "grad_norm": 8.588117549477687, + "learning_rate": 5.984323699190526e-06, + "loss": 0.2399139404296875, + "step": 7530 + }, + { + "epoch": 0.06515291696569853, + "grad_norm": 15.341306343122474, + "learning_rate": 5.984302891998144e-06, + "loss": 0.25660400390625, + "step": 7535 + }, + { + "epoch": 0.06519615048724178, + "grad_norm": 51.2554690044937, + "learning_rate": 5.98428207104242e-06, + "loss": 0.2804222106933594, + "step": 7540 + }, + { + "epoch": 0.06523938400878505, + "grad_norm": 34.91796706990348, + "learning_rate": 5.984261236323451e-06, + "loss": 0.10927200317382812, + "step": 7545 + }, + { + "epoch": 0.06528261753032831, + "grad_norm": 37.45341237750162, + "learning_rate": 5.9842403878413336e-06, + "loss": 0.24090576171875, + "step": 7550 + }, + { + "epoch": 0.06532585105187158, + "grad_norm": 2.3494372177288185, + "learning_rate": 5.984219525596163e-06, + "loss": 0.147064208984375, + "step": 7555 + }, + { + "epoch": 0.06536908457341484, + "grad_norm": 19.565159144602912, + "learning_rate": 5.984198649588035e-06, + "loss": 0.5309232711791992, + "step": 7560 + }, + { + "epoch": 0.06541231809495811, + "grad_norm": 7.73638338505827, + "learning_rate": 5.984177759817047e-06, + "loss": 0.0853668212890625, + "step": 7565 + }, + { + "epoch": 0.06545555161650136, + "grad_norm": 11.94442206252975, + "learning_rate": 5.984156856283295e-06, + "loss": 0.1072723388671875, + "step": 7570 + }, + { + "epoch": 0.06549878513804464, + "grad_norm": 16.4943789358292, + "learning_rate": 5.9841359389868744e-06, + "loss": 0.39105224609375, + "step": 7575 + }, + { + "epoch": 0.06554201865958789, + "grad_norm": 1.4442892134888994, + "learning_rate": 5.984115007927883e-06, + "loss": 0.1086395263671875, + "step": 7580 + }, + { + "epoch": 0.06558525218113116, + "grad_norm": 0.18329003287352788, + "learning_rate": 5.984094063106417e-06, + "loss": 0.0332763671875, + "step": 7585 + }, + { + "epoch": 0.06562848570267442, + "grad_norm": 5.805288645715813, + "learning_rate": 5.9840731045225734e-06, + "loss": 0.0970733642578125, + "step": 7590 + }, + { + "epoch": 0.06567171922421769, + "grad_norm": 1.3674483006655709, + "learning_rate": 5.9840521321764476e-06, + "loss": 0.05819854736328125, + "step": 7595 + }, + { + "epoch": 0.06571495274576096, + "grad_norm": 46.39266603302469, + "learning_rate": 5.984031146068137e-06, + "loss": 0.20220565795898438, + "step": 7600 + }, + { + "epoch": 0.06575818626730422, + "grad_norm": 3.6860386479924987, + "learning_rate": 5.984010146197739e-06, + "loss": 0.459466552734375, + "step": 7605 + }, + { + "epoch": 0.06580141978884749, + "grad_norm": 19.01197146955999, + "learning_rate": 5.983989132565349e-06, + "loss": 0.1209228515625, + "step": 7610 + }, + { + "epoch": 0.06584465331039074, + "grad_norm": 35.66932693577322, + "learning_rate": 5.9839681051710655e-06, + "loss": 0.32550888061523436, + "step": 7615 + }, + { + "epoch": 0.06588788683193401, + "grad_norm": 10.224292479033545, + "learning_rate": 5.983947064014984e-06, + "loss": 0.1306365966796875, + "step": 7620 + }, + { + "epoch": 0.06593112035347727, + "grad_norm": 21.78701295915985, + "learning_rate": 5.983926009097203e-06, + "loss": 0.2920555114746094, + "step": 7625 + }, + { + "epoch": 0.06597435387502054, + "grad_norm": 2.503849014850054, + "learning_rate": 5.983904940417819e-06, + "loss": 0.13079376220703126, + "step": 7630 + }, + { + "epoch": 0.0660175873965638, + "grad_norm": 39.00914993962339, + "learning_rate": 5.9838838579769295e-06, + "loss": 0.350653076171875, + "step": 7635 + }, + { + "epoch": 0.06606082091810707, + "grad_norm": 18.98580535328862, + "learning_rate": 5.98386276177463e-06, + "loss": 0.08719329833984375, + "step": 7640 + }, + { + "epoch": 0.06610405443965033, + "grad_norm": 10.616453867606388, + "learning_rate": 5.9838416518110194e-06, + "loss": 0.07613525390625, + "step": 7645 + }, + { + "epoch": 0.0661472879611936, + "grad_norm": 16.270010262077438, + "learning_rate": 5.983820528086196e-06, + "loss": 0.26969757080078127, + "step": 7650 + }, + { + "epoch": 0.06619052148273685, + "grad_norm": 3.4163828068015465, + "learning_rate": 5.983799390600255e-06, + "loss": 0.1771270751953125, + "step": 7655 + }, + { + "epoch": 0.06623375500428012, + "grad_norm": 4.924163370371957, + "learning_rate": 5.983778239353295e-06, + "loss": 0.3638866424560547, + "step": 7660 + }, + { + "epoch": 0.06627698852582338, + "grad_norm": 31.049183344008593, + "learning_rate": 5.983757074345413e-06, + "loss": 0.1159423828125, + "step": 7665 + }, + { + "epoch": 0.06632022204736665, + "grad_norm": 24.719980143781893, + "learning_rate": 5.983735895576707e-06, + "loss": 0.20744781494140624, + "step": 7670 + }, + { + "epoch": 0.06636345556890991, + "grad_norm": 29.257243369226057, + "learning_rate": 5.983714703047276e-06, + "loss": 0.23479766845703126, + "step": 7675 + }, + { + "epoch": 0.06640668909045318, + "grad_norm": 14.497931695350282, + "learning_rate": 5.983693496757215e-06, + "loss": 0.1124542236328125, + "step": 7680 + }, + { + "epoch": 0.06644992261199643, + "grad_norm": 17.156242097993218, + "learning_rate": 5.9836722767066235e-06, + "loss": 0.11964111328125, + "step": 7685 + }, + { + "epoch": 0.0664931561335397, + "grad_norm": 1.3790953165946407, + "learning_rate": 5.983651042895599e-06, + "loss": 0.1022918701171875, + "step": 7690 + }, + { + "epoch": 0.06653638965508296, + "grad_norm": 7.741860498355035, + "learning_rate": 5.98362979532424e-06, + "loss": 0.08522491455078125, + "step": 7695 + }, + { + "epoch": 0.06657962317662623, + "grad_norm": 0.9188409301132905, + "learning_rate": 5.983608533992644e-06, + "loss": 0.28893356323242186, + "step": 7700 + }, + { + "epoch": 0.06662285669816949, + "grad_norm": 35.36519334004482, + "learning_rate": 5.983587258900909e-06, + "loss": 0.4942413330078125, + "step": 7705 + }, + { + "epoch": 0.06666609021971276, + "grad_norm": 20.619604448132648, + "learning_rate": 5.983565970049134e-06, + "loss": 0.122235107421875, + "step": 7710 + }, + { + "epoch": 0.06670932374125602, + "grad_norm": 35.924641640102394, + "learning_rate": 5.983544667437415e-06, + "loss": 0.358245849609375, + "step": 7715 + }, + { + "epoch": 0.06675255726279929, + "grad_norm": 43.90976660022059, + "learning_rate": 5.983523351065853e-06, + "loss": 0.487646484375, + "step": 7720 + }, + { + "epoch": 0.06679579078434254, + "grad_norm": 1.2392657644968261, + "learning_rate": 5.983502020934544e-06, + "loss": 0.5260566711425781, + "step": 7725 + }, + { + "epoch": 0.06683902430588581, + "grad_norm": 35.414989859808074, + "learning_rate": 5.983480677043586e-06, + "loss": 0.144073486328125, + "step": 7730 + }, + { + "epoch": 0.06688225782742907, + "grad_norm": 4.383571580944663, + "learning_rate": 5.983459319393081e-06, + "loss": 0.4257568359375, + "step": 7735 + }, + { + "epoch": 0.06692549134897234, + "grad_norm": 4.019821862550249, + "learning_rate": 5.983437947983125e-06, + "loss": 0.8563232421875, + "step": 7740 + }, + { + "epoch": 0.0669687248705156, + "grad_norm": 8.153475281121246, + "learning_rate": 5.983416562813816e-06, + "loss": 0.054180908203125, + "step": 7745 + }, + { + "epoch": 0.06701195839205887, + "grad_norm": 10.12598252320204, + "learning_rate": 5.983395163885253e-06, + "loss": 0.1010986328125, + "step": 7750 + }, + { + "epoch": 0.06705519191360212, + "grad_norm": 0.5289907215368841, + "learning_rate": 5.983373751197536e-06, + "loss": 0.10904998779296875, + "step": 7755 + }, + { + "epoch": 0.0670984254351454, + "grad_norm": 82.13426182118599, + "learning_rate": 5.983352324750763e-06, + "loss": 0.56873779296875, + "step": 7760 + }, + { + "epoch": 0.06714165895668865, + "grad_norm": 37.68522076892617, + "learning_rate": 5.983330884545032e-06, + "loss": 0.5251953125, + "step": 7765 + }, + { + "epoch": 0.06718489247823192, + "grad_norm": 1.410539236294467, + "learning_rate": 5.9833094305804425e-06, + "loss": 0.24443359375, + "step": 7770 + }, + { + "epoch": 0.06722812599977518, + "grad_norm": 47.74860816941741, + "learning_rate": 5.983287962857094e-06, + "loss": 0.2614349365234375, + "step": 7775 + }, + { + "epoch": 0.06727135952131845, + "grad_norm": 19.04883280336779, + "learning_rate": 5.983266481375085e-06, + "loss": 0.355523681640625, + "step": 7780 + }, + { + "epoch": 0.06731459304286172, + "grad_norm": 22.07582499790419, + "learning_rate": 5.983244986134514e-06, + "loss": 0.5911422729492187, + "step": 7785 + }, + { + "epoch": 0.06735782656440498, + "grad_norm": 12.091379382447379, + "learning_rate": 5.983223477135481e-06, + "loss": 0.36107177734375, + "step": 7790 + }, + { + "epoch": 0.06740106008594825, + "grad_norm": 3.139692512058579, + "learning_rate": 5.983201954378086e-06, + "loss": 0.2193115234375, + "step": 7795 + }, + { + "epoch": 0.0674442936074915, + "grad_norm": 8.11250616086428, + "learning_rate": 5.983180417862426e-06, + "loss": 0.115667724609375, + "step": 7800 + }, + { + "epoch": 0.06748752712903477, + "grad_norm": 2.839733045211475, + "learning_rate": 5.983158867588602e-06, + "loss": 0.21786575317382811, + "step": 7805 + }, + { + "epoch": 0.06753076065057803, + "grad_norm": 5.391922890788551, + "learning_rate": 5.983137303556713e-06, + "loss": 0.1190093994140625, + "step": 7810 + }, + { + "epoch": 0.0675739941721213, + "grad_norm": 43.89186880011146, + "learning_rate": 5.9831157257668584e-06, + "loss": 0.22034912109375, + "step": 7815 + }, + { + "epoch": 0.06761722769366456, + "grad_norm": 6.275233176132006, + "learning_rate": 5.983094134219137e-06, + "loss": 0.3613433837890625, + "step": 7820 + }, + { + "epoch": 0.06766046121520783, + "grad_norm": 4.158301767687799, + "learning_rate": 5.983072528913649e-06, + "loss": 0.16099853515625, + "step": 7825 + }, + { + "epoch": 0.06770369473675109, + "grad_norm": 18.443528180326233, + "learning_rate": 5.983050909850495e-06, + "loss": 0.09251708984375, + "step": 7830 + }, + { + "epoch": 0.06774692825829436, + "grad_norm": 21.31702896940149, + "learning_rate": 5.9830292770297736e-06, + "loss": 0.10356063842773437, + "step": 7835 + }, + { + "epoch": 0.06779016177983761, + "grad_norm": 12.294628727380944, + "learning_rate": 5.983007630451585e-06, + "loss": 0.2249755859375, + "step": 7840 + }, + { + "epoch": 0.06783339530138088, + "grad_norm": 57.17860253157511, + "learning_rate": 5.982985970116027e-06, + "loss": 0.4202880859375, + "step": 7845 + }, + { + "epoch": 0.06787662882292414, + "grad_norm": 31.46114860567691, + "learning_rate": 5.982964296023203e-06, + "loss": 0.567578125, + "step": 7850 + }, + { + "epoch": 0.06791986234446741, + "grad_norm": 0.18004906306977858, + "learning_rate": 5.982942608173211e-06, + "loss": 0.22883453369140624, + "step": 7855 + }, + { + "epoch": 0.06796309586601067, + "grad_norm": 4.358086204837963, + "learning_rate": 5.982920906566151e-06, + "loss": 0.6825614929199219, + "step": 7860 + }, + { + "epoch": 0.06800632938755394, + "grad_norm": 35.35226845385556, + "learning_rate": 5.982899191202123e-06, + "loss": 0.256707763671875, + "step": 7865 + }, + { + "epoch": 0.0680495629090972, + "grad_norm": 27.441967008075725, + "learning_rate": 5.9828774620812285e-06, + "loss": 0.3933349609375, + "step": 7870 + }, + { + "epoch": 0.06809279643064046, + "grad_norm": 14.48610786667105, + "learning_rate": 5.982855719203566e-06, + "loss": 0.5262252807617187, + "step": 7875 + }, + { + "epoch": 0.06813602995218372, + "grad_norm": 7.8981391610342975, + "learning_rate": 5.9828339625692365e-06, + "loss": 0.065887451171875, + "step": 7880 + }, + { + "epoch": 0.06817926347372699, + "grad_norm": 3.731364187816992, + "learning_rate": 5.98281219217834e-06, + "loss": 0.16113967895507814, + "step": 7885 + }, + { + "epoch": 0.06822249699527025, + "grad_norm": 32.27005099893371, + "learning_rate": 5.982790408030977e-06, + "loss": 0.269757080078125, + "step": 7890 + }, + { + "epoch": 0.06826573051681352, + "grad_norm": 16.68472617029849, + "learning_rate": 5.982768610127251e-06, + "loss": 0.1395050048828125, + "step": 7895 + }, + { + "epoch": 0.06830896403835678, + "grad_norm": 14.45075649804219, + "learning_rate": 5.9827467984672565e-06, + "loss": 0.13636474609375, + "step": 7900 + }, + { + "epoch": 0.06835219755990005, + "grad_norm": 7.835406943191326, + "learning_rate": 5.982724973051098e-06, + "loss": 0.4070281982421875, + "step": 7905 + }, + { + "epoch": 0.0683954310814433, + "grad_norm": 43.04365184978359, + "learning_rate": 5.982703133878877e-06, + "loss": 0.4547607421875, + "step": 7910 + }, + { + "epoch": 0.06843866460298657, + "grad_norm": 31.591623741869892, + "learning_rate": 5.9826812809506905e-06, + "loss": 0.173992919921875, + "step": 7915 + }, + { + "epoch": 0.06848189812452983, + "grad_norm": 52.743707739241565, + "learning_rate": 5.982659414266643e-06, + "loss": 0.4704742431640625, + "step": 7920 + }, + { + "epoch": 0.0685251316460731, + "grad_norm": 16.098964018387463, + "learning_rate": 5.982637533826834e-06, + "loss": 0.3514404296875, + "step": 7925 + }, + { + "epoch": 0.06856836516761636, + "grad_norm": 6.901984263404069, + "learning_rate": 5.982615639631364e-06, + "loss": 0.1846832275390625, + "step": 7930 + }, + { + "epoch": 0.06861159868915963, + "grad_norm": 13.441118099421248, + "learning_rate": 5.982593731680334e-06, + "loss": 0.1512451171875, + "step": 7935 + }, + { + "epoch": 0.06865483221070288, + "grad_norm": 2.529961882081544, + "learning_rate": 5.982571809973845e-06, + "loss": 0.1293304443359375, + "step": 7940 + }, + { + "epoch": 0.06869806573224616, + "grad_norm": 12.170030031166355, + "learning_rate": 5.982549874511998e-06, + "loss": 0.318865966796875, + "step": 7945 + }, + { + "epoch": 0.06874129925378941, + "grad_norm": 27.632320558707654, + "learning_rate": 5.982527925294895e-06, + "loss": 0.2369873046875, + "step": 7950 + }, + { + "epoch": 0.06878453277533268, + "grad_norm": 3.454060576961688, + "learning_rate": 5.982505962322637e-06, + "loss": 0.1507598876953125, + "step": 7955 + }, + { + "epoch": 0.06882776629687594, + "grad_norm": 16.487473720247966, + "learning_rate": 5.982483985595324e-06, + "loss": 0.23675537109375, + "step": 7960 + }, + { + "epoch": 0.06887099981841921, + "grad_norm": 1.559741170589323, + "learning_rate": 5.98246199511306e-06, + "loss": 0.0931182861328125, + "step": 7965 + }, + { + "epoch": 0.06891423333996248, + "grad_norm": 16.217738091360406, + "learning_rate": 5.982439990875943e-06, + "loss": 0.5340240478515625, + "step": 7970 + }, + { + "epoch": 0.06895746686150574, + "grad_norm": 6.45516762358546, + "learning_rate": 5.982417972884079e-06, + "loss": 0.205999755859375, + "step": 7975 + }, + { + "epoch": 0.06900070038304901, + "grad_norm": 23.970501663987527, + "learning_rate": 5.982395941137565e-06, + "loss": 0.44271240234375, + "step": 7980 + }, + { + "epoch": 0.06904393390459226, + "grad_norm": 31.607876031415316, + "learning_rate": 5.982373895636505e-06, + "loss": 0.43511962890625, + "step": 7985 + }, + { + "epoch": 0.06908716742613553, + "grad_norm": 7.861821136974702, + "learning_rate": 5.982351836380999e-06, + "loss": 0.409100341796875, + "step": 7990 + }, + { + "epoch": 0.06913040094767879, + "grad_norm": 3.666068273506239, + "learning_rate": 5.98232976337115e-06, + "loss": 0.1139617919921875, + "step": 7995 + }, + { + "epoch": 0.06917363446922206, + "grad_norm": 10.584145790832588, + "learning_rate": 5.982307676607061e-06, + "loss": 0.46164703369140625, + "step": 8000 + }, + { + "epoch": 0.06921686799076532, + "grad_norm": 43.19877108520564, + "learning_rate": 5.982285576088832e-06, + "loss": 0.200830078125, + "step": 8005 + }, + { + "epoch": 0.06926010151230859, + "grad_norm": 25.29206732492556, + "learning_rate": 5.982263461816565e-06, + "loss": 0.26909866333007815, + "step": 8010 + }, + { + "epoch": 0.06930333503385185, + "grad_norm": 1.7355386404718913, + "learning_rate": 5.982241333790362e-06, + "loss": 0.149371337890625, + "step": 8015 + }, + { + "epoch": 0.06934656855539512, + "grad_norm": 26.056560921738043, + "learning_rate": 5.9822191920103264e-06, + "loss": 0.298193359375, + "step": 8020 + }, + { + "epoch": 0.06938980207693837, + "grad_norm": 20.998431187668636, + "learning_rate": 5.982197036476559e-06, + "loss": 0.24342041015625, + "step": 8025 + }, + { + "epoch": 0.06943303559848164, + "grad_norm": 1.5792632470145378, + "learning_rate": 5.982174867189163e-06, + "loss": 0.6676761627197265, + "step": 8030 + }, + { + "epoch": 0.0694762691200249, + "grad_norm": 4.755580294095398, + "learning_rate": 5.982152684148239e-06, + "loss": 0.126837158203125, + "step": 8035 + }, + { + "epoch": 0.06951950264156817, + "grad_norm": 2.6607506019034726, + "learning_rate": 5.982130487353891e-06, + "loss": 0.5063777923583984, + "step": 8040 + }, + { + "epoch": 0.06956273616311143, + "grad_norm": 4.184742831279821, + "learning_rate": 5.98210827680622e-06, + "loss": 0.34389419555664064, + "step": 8045 + }, + { + "epoch": 0.0696059696846547, + "grad_norm": 18.07283489465846, + "learning_rate": 5.98208605250533e-06, + "loss": 0.30757904052734375, + "step": 8050 + }, + { + "epoch": 0.06964920320619795, + "grad_norm": 41.50464103864124, + "learning_rate": 5.982063814451323e-06, + "loss": 0.21404342651367186, + "step": 8055 + }, + { + "epoch": 0.06969243672774122, + "grad_norm": 4.332002103598077, + "learning_rate": 5.9820415626443e-06, + "loss": 0.20894775390625, + "step": 8060 + }, + { + "epoch": 0.06973567024928448, + "grad_norm": 20.518120310976286, + "learning_rate": 5.982019297084366e-06, + "loss": 0.324847412109375, + "step": 8065 + }, + { + "epoch": 0.06977890377082775, + "grad_norm": 1.1558081684738541, + "learning_rate": 5.981997017771621e-06, + "loss": 0.2871795654296875, + "step": 8070 + }, + { + "epoch": 0.06982213729237101, + "grad_norm": 10.370564134055405, + "learning_rate": 5.98197472470617e-06, + "loss": 0.08283843994140624, + "step": 8075 + }, + { + "epoch": 0.06986537081391428, + "grad_norm": 10.004922962447273, + "learning_rate": 5.981952417888116e-06, + "loss": 0.26651611328125, + "step": 8080 + }, + { + "epoch": 0.06990860433545754, + "grad_norm": 16.410953912146795, + "learning_rate": 5.98193009731756e-06, + "loss": 0.324609375, + "step": 8085 + }, + { + "epoch": 0.0699518378570008, + "grad_norm": 33.250737886682174, + "learning_rate": 5.981907762994605e-06, + "loss": 0.7393051147460937, + "step": 8090 + }, + { + "epoch": 0.06999507137854406, + "grad_norm": 11.041957900739206, + "learning_rate": 5.981885414919356e-06, + "loss": 0.145684814453125, + "step": 8095 + }, + { + "epoch": 0.07003830490008733, + "grad_norm": 55.183275552371896, + "learning_rate": 5.981863053091915e-06, + "loss": 0.2662330627441406, + "step": 8100 + }, + { + "epoch": 0.07008153842163059, + "grad_norm": 79.49009703996923, + "learning_rate": 5.981840677512385e-06, + "loss": 0.47430953979492185, + "step": 8105 + }, + { + "epoch": 0.07012477194317386, + "grad_norm": 21.729540160559882, + "learning_rate": 5.981818288180869e-06, + "loss": 0.09823455810546874, + "step": 8110 + }, + { + "epoch": 0.07016800546471712, + "grad_norm": 3.8306554554614527, + "learning_rate": 5.981795885097471e-06, + "loss": 0.22764129638671876, + "step": 8115 + }, + { + "epoch": 0.07021123898626039, + "grad_norm": 10.33081411287576, + "learning_rate": 5.981773468262293e-06, + "loss": 0.144268798828125, + "step": 8120 + }, + { + "epoch": 0.07025447250780364, + "grad_norm": 5.747947322763925, + "learning_rate": 5.9817510376754395e-06, + "loss": 0.144677734375, + "step": 8125 + }, + { + "epoch": 0.07029770602934692, + "grad_norm": 1.8018408894047788, + "learning_rate": 5.981728593337015e-06, + "loss": 0.138592529296875, + "step": 8130 + }, + { + "epoch": 0.07034093955089017, + "grad_norm": 23.593972860980667, + "learning_rate": 5.98170613524712e-06, + "loss": 0.1920440673828125, + "step": 8135 + }, + { + "epoch": 0.07038417307243344, + "grad_norm": 14.416826880078133, + "learning_rate": 5.98168366340586e-06, + "loss": 0.38294677734375, + "step": 8140 + }, + { + "epoch": 0.0704274065939767, + "grad_norm": 23.722746393831887, + "learning_rate": 5.981661177813339e-06, + "loss": 0.387890625, + "step": 8145 + }, + { + "epoch": 0.07047064011551997, + "grad_norm": 19.857626227991297, + "learning_rate": 5.98163867846966e-06, + "loss": 0.17018585205078124, + "step": 8150 + }, + { + "epoch": 0.07051387363706324, + "grad_norm": 4.76262708812557, + "learning_rate": 5.981616165374927e-06, + "loss": 0.286517333984375, + "step": 8155 + }, + { + "epoch": 0.0705571071586065, + "grad_norm": 7.750049387733714, + "learning_rate": 5.981593638529244e-06, + "loss": 0.21763153076171876, + "step": 8160 + }, + { + "epoch": 0.07060034068014977, + "grad_norm": 2.6729398695869246, + "learning_rate": 5.981571097932713e-06, + "loss": 0.44575843811035154, + "step": 8165 + }, + { + "epoch": 0.07064357420169302, + "grad_norm": 2.0024402223314346, + "learning_rate": 5.981548543585441e-06, + "loss": 0.0902069091796875, + "step": 8170 + }, + { + "epoch": 0.0706868077232363, + "grad_norm": 1.706919489062809, + "learning_rate": 5.98152597548753e-06, + "loss": 0.2329345703125, + "step": 8175 + }, + { + "epoch": 0.07073004124477955, + "grad_norm": 8.103285300765922, + "learning_rate": 5.981503393639086e-06, + "loss": 0.11658859252929688, + "step": 8180 + }, + { + "epoch": 0.07077327476632282, + "grad_norm": 8.497273985203831, + "learning_rate": 5.98148079804021e-06, + "loss": 0.2077667236328125, + "step": 8185 + }, + { + "epoch": 0.07081650828786608, + "grad_norm": 5.02916837659049, + "learning_rate": 5.981458188691008e-06, + "loss": 0.12254486083984376, + "step": 8190 + }, + { + "epoch": 0.07085974180940935, + "grad_norm": 6.523947725414758, + "learning_rate": 5.981435565591586e-06, + "loss": 0.3102294921875, + "step": 8195 + }, + { + "epoch": 0.0709029753309526, + "grad_norm": 0.08382796803517993, + "learning_rate": 5.981412928742045e-06, + "loss": 0.4051685333251953, + "step": 8200 + }, + { + "epoch": 0.07094620885249588, + "grad_norm": 59.5853984776661, + "learning_rate": 5.981390278142492e-06, + "loss": 0.55548095703125, + "step": 8205 + }, + { + "epoch": 0.07098944237403913, + "grad_norm": 15.52611220004276, + "learning_rate": 5.98136761379303e-06, + "loss": 0.491363525390625, + "step": 8210 + }, + { + "epoch": 0.0710326758955824, + "grad_norm": 23.45112311882645, + "learning_rate": 5.981344935693764e-06, + "loss": 0.13139190673828124, + "step": 8215 + }, + { + "epoch": 0.07107590941712566, + "grad_norm": 3.079739131347528, + "learning_rate": 5.981322243844799e-06, + "loss": 0.24001846313476563, + "step": 8220 + }, + { + "epoch": 0.07111914293866893, + "grad_norm": 25.92509394012825, + "learning_rate": 5.981299538246238e-06, + "loss": 0.278045654296875, + "step": 8225 + }, + { + "epoch": 0.07116237646021219, + "grad_norm": 15.793702877732578, + "learning_rate": 5.981276818898188e-06, + "loss": 0.14574737548828126, + "step": 8230 + }, + { + "epoch": 0.07120560998175546, + "grad_norm": 42.64136054552176, + "learning_rate": 5.981254085800753e-06, + "loss": 0.409820556640625, + "step": 8235 + }, + { + "epoch": 0.07124884350329871, + "grad_norm": 48.772555644234586, + "learning_rate": 5.9812313389540365e-06, + "loss": 0.339117431640625, + "step": 8240 + }, + { + "epoch": 0.07129207702484198, + "grad_norm": 9.025734240556863, + "learning_rate": 5.981208578358146e-06, + "loss": 0.144683837890625, + "step": 8245 + }, + { + "epoch": 0.07133531054638524, + "grad_norm": 2.129919461323107, + "learning_rate": 5.981185804013184e-06, + "loss": 0.3052398681640625, + "step": 8250 + }, + { + "epoch": 0.07137854406792851, + "grad_norm": 7.436220916187445, + "learning_rate": 5.981163015919257e-06, + "loss": 0.4022735595703125, + "step": 8255 + }, + { + "epoch": 0.07142177758947177, + "grad_norm": 10.104015953275848, + "learning_rate": 5.981140214076469e-06, + "loss": 0.3692626953125, + "step": 8260 + }, + { + "epoch": 0.07146501111101504, + "grad_norm": 1.2899925204267642, + "learning_rate": 5.9811173984849255e-06, + "loss": 0.0333831787109375, + "step": 8265 + }, + { + "epoch": 0.0715082446325583, + "grad_norm": 15.89249378990258, + "learning_rate": 5.981094569144733e-06, + "loss": 0.14394073486328124, + "step": 8270 + }, + { + "epoch": 0.07155147815410157, + "grad_norm": 33.70302413700851, + "learning_rate": 5.981071726055995e-06, + "loss": 0.124896240234375, + "step": 8275 + }, + { + "epoch": 0.07159471167564482, + "grad_norm": 18.907049301197375, + "learning_rate": 5.9810488692188175e-06, + "loss": 0.3084320068359375, + "step": 8280 + }, + { + "epoch": 0.0716379451971881, + "grad_norm": 9.53544064851108, + "learning_rate": 5.981025998633307e-06, + "loss": 0.124700927734375, + "step": 8285 + }, + { + "epoch": 0.07168117871873135, + "grad_norm": 12.29873438935736, + "learning_rate": 5.981003114299567e-06, + "loss": 0.136895751953125, + "step": 8290 + }, + { + "epoch": 0.07172441224027462, + "grad_norm": 41.94627989244765, + "learning_rate": 5.980980216217705e-06, + "loss": 0.09967041015625, + "step": 8295 + }, + { + "epoch": 0.07176764576181788, + "grad_norm": 0.9888885162434278, + "learning_rate": 5.980957304387824e-06, + "loss": 0.17067108154296876, + "step": 8300 + }, + { + "epoch": 0.07181087928336115, + "grad_norm": 11.99870816078589, + "learning_rate": 5.980934378810033e-06, + "loss": 0.1983489990234375, + "step": 8305 + }, + { + "epoch": 0.0718541128049044, + "grad_norm": 24.03197270082046, + "learning_rate": 5.9809114394844345e-06, + "loss": 0.26607666015625, + "step": 8310 + }, + { + "epoch": 0.07189734632644768, + "grad_norm": 8.614444032749875, + "learning_rate": 5.980888486411138e-06, + "loss": 0.3445556640625, + "step": 8315 + }, + { + "epoch": 0.07194057984799093, + "grad_norm": 4.422013995391082, + "learning_rate": 5.9808655195902445e-06, + "loss": 0.3189483642578125, + "step": 8320 + }, + { + "epoch": 0.0719838133695342, + "grad_norm": 12.867217606308973, + "learning_rate": 5.980842539021864e-06, + "loss": 0.197943115234375, + "step": 8325 + }, + { + "epoch": 0.07202704689107746, + "grad_norm": 6.956444112967165, + "learning_rate": 5.980819544706102e-06, + "loss": 0.15053558349609375, + "step": 8330 + }, + { + "epoch": 0.07207028041262073, + "grad_norm": 70.09210344475191, + "learning_rate": 5.980796536643062e-06, + "loss": 0.503436279296875, + "step": 8335 + }, + { + "epoch": 0.072113513934164, + "grad_norm": 26.13661795505296, + "learning_rate": 5.9807735148328534e-06, + "loss": 0.136578369140625, + "step": 8340 + }, + { + "epoch": 0.07215674745570726, + "grad_norm": 0.42794292113017957, + "learning_rate": 5.98075047927558e-06, + "loss": 0.21656036376953125, + "step": 8345 + }, + { + "epoch": 0.07219998097725053, + "grad_norm": 18.902756708818604, + "learning_rate": 5.980727429971348e-06, + "loss": 0.25789566040039064, + "step": 8350 + }, + { + "epoch": 0.07224321449879378, + "grad_norm": 2.2551442825069277, + "learning_rate": 5.980704366920266e-06, + "loss": 0.1163604736328125, + "step": 8355 + }, + { + "epoch": 0.07228644802033705, + "grad_norm": 24.148052568334506, + "learning_rate": 5.980681290122438e-06, + "loss": 0.26406707763671877, + "step": 8360 + }, + { + "epoch": 0.07232968154188031, + "grad_norm": 34.855091930324, + "learning_rate": 5.980658199577971e-06, + "loss": 0.2327789306640625, + "step": 8365 + }, + { + "epoch": 0.07237291506342358, + "grad_norm": 4.2942530298820705, + "learning_rate": 5.980635095286972e-06, + "loss": 0.17288589477539062, + "step": 8370 + }, + { + "epoch": 0.07241614858496684, + "grad_norm": 34.58465214462065, + "learning_rate": 5.9806119772495474e-06, + "loss": 0.250225830078125, + "step": 8375 + }, + { + "epoch": 0.07245938210651011, + "grad_norm": 10.497640962828019, + "learning_rate": 5.980588845465803e-06, + "loss": 0.14734039306640626, + "step": 8380 + }, + { + "epoch": 0.07250261562805337, + "grad_norm": 49.47006250548812, + "learning_rate": 5.980565699935847e-06, + "loss": 0.3252555847167969, + "step": 8385 + }, + { + "epoch": 0.07254584914959664, + "grad_norm": 21.311334980591017, + "learning_rate": 5.980542540659786e-06, + "loss": 0.3034637451171875, + "step": 8390 + }, + { + "epoch": 0.07258908267113989, + "grad_norm": 50.51008572489165, + "learning_rate": 5.9805193676377246e-06, + "loss": 0.272845458984375, + "step": 8395 + }, + { + "epoch": 0.07263231619268316, + "grad_norm": 4.925363413943632, + "learning_rate": 5.980496180869772e-06, + "loss": 0.0630828857421875, + "step": 8400 + }, + { + "epoch": 0.07267554971422642, + "grad_norm": 9.694011137805518, + "learning_rate": 5.980472980356035e-06, + "loss": 0.379205322265625, + "step": 8405 + }, + { + "epoch": 0.07271878323576969, + "grad_norm": 3.530591431303073, + "learning_rate": 5.980449766096619e-06, + "loss": 0.187908935546875, + "step": 8410 + }, + { + "epoch": 0.07276201675731295, + "grad_norm": 28.840830649105246, + "learning_rate": 5.980426538091633e-06, + "loss": 0.25074310302734376, + "step": 8415 + }, + { + "epoch": 0.07280525027885622, + "grad_norm": 30.386412789699012, + "learning_rate": 5.980403296341182e-06, + "loss": 0.411578369140625, + "step": 8420 + }, + { + "epoch": 0.07284848380039947, + "grad_norm": 19.740677985395237, + "learning_rate": 5.980380040845374e-06, + "loss": 0.28632659912109376, + "step": 8425 + }, + { + "epoch": 0.07289171732194275, + "grad_norm": 22.406233973154812, + "learning_rate": 5.9803567716043186e-06, + "loss": 0.1901153564453125, + "step": 8430 + }, + { + "epoch": 0.072934950843486, + "grad_norm": 66.37424900695468, + "learning_rate": 5.98033348861812e-06, + "loss": 0.2841522216796875, + "step": 8435 + }, + { + "epoch": 0.07297818436502927, + "grad_norm": 4.459032588012246, + "learning_rate": 5.980310191886887e-06, + "loss": 0.2472900390625, + "step": 8440 + }, + { + "epoch": 0.07302141788657253, + "grad_norm": 8.765534353740888, + "learning_rate": 5.980286881410727e-06, + "loss": 0.19537353515625, + "step": 8445 + }, + { + "epoch": 0.0730646514081158, + "grad_norm": 1.7899989457024155, + "learning_rate": 5.980263557189746e-06, + "loss": 0.25376052856445314, + "step": 8450 + }, + { + "epoch": 0.07310788492965906, + "grad_norm": 1.3952874896042073, + "learning_rate": 5.980240219224053e-06, + "loss": 0.486309814453125, + "step": 8455 + }, + { + "epoch": 0.07315111845120233, + "grad_norm": 3.140620916436154, + "learning_rate": 5.980216867513756e-06, + "loss": 0.5535568237304688, + "step": 8460 + }, + { + "epoch": 0.07319435197274558, + "grad_norm": 0.8788766004064322, + "learning_rate": 5.980193502058962e-06, + "loss": 0.244537353515625, + "step": 8465 + }, + { + "epoch": 0.07323758549428885, + "grad_norm": 0.32904235351836747, + "learning_rate": 5.98017012285978e-06, + "loss": 0.1657135009765625, + "step": 8470 + }, + { + "epoch": 0.07328081901583211, + "grad_norm": 18.521942157382995, + "learning_rate": 5.980146729916315e-06, + "loss": 0.13960723876953124, + "step": 8475 + }, + { + "epoch": 0.07332405253737538, + "grad_norm": 7.262380024882953, + "learning_rate": 5.980123323228678e-06, + "loss": 0.13132476806640625, + "step": 8480 + }, + { + "epoch": 0.07336728605891864, + "grad_norm": 23.61284234552271, + "learning_rate": 5.9800999027969746e-06, + "loss": 0.48829345703125, + "step": 8485 + }, + { + "epoch": 0.07341051958046191, + "grad_norm": 9.82280166129483, + "learning_rate": 5.980076468621315e-06, + "loss": 0.2700439453125, + "step": 8490 + }, + { + "epoch": 0.07345375310200516, + "grad_norm": 23.868154662778483, + "learning_rate": 5.980053020701805e-06, + "loss": 1.05028076171875, + "step": 8495 + }, + { + "epoch": 0.07349698662354844, + "grad_norm": 28.825116870364386, + "learning_rate": 5.980029559038554e-06, + "loss": 0.2683013916015625, + "step": 8500 + }, + { + "epoch": 0.07354022014509169, + "grad_norm": 31.0153875424708, + "learning_rate": 5.980006083631669e-06, + "loss": 0.20293731689453126, + "step": 8505 + }, + { + "epoch": 0.07358345366663496, + "grad_norm": 1.48541594483469, + "learning_rate": 5.97998259448126e-06, + "loss": 0.24383544921875, + "step": 8510 + }, + { + "epoch": 0.07362668718817822, + "grad_norm": 13.873857653963979, + "learning_rate": 5.979959091587435e-06, + "loss": 0.23358230590820311, + "step": 8515 + }, + { + "epoch": 0.07366992070972149, + "grad_norm": 13.944275845824746, + "learning_rate": 5.979935574950302e-06, + "loss": 0.16703338623046876, + "step": 8520 + }, + { + "epoch": 0.07371315423126476, + "grad_norm": 7.524155901794601, + "learning_rate": 5.9799120445699695e-06, + "loss": 0.2816253662109375, + "step": 8525 + }, + { + "epoch": 0.07375638775280802, + "grad_norm": 42.77926182503163, + "learning_rate": 5.979888500446546e-06, + "loss": 0.28407135009765627, + "step": 8530 + }, + { + "epoch": 0.07379962127435129, + "grad_norm": 29.991381828678367, + "learning_rate": 5.979864942580139e-06, + "loss": 0.276727294921875, + "step": 8535 + }, + { + "epoch": 0.07384285479589454, + "grad_norm": 24.833029996406392, + "learning_rate": 5.97984137097086e-06, + "loss": 0.1551605224609375, + "step": 8540 + }, + { + "epoch": 0.07388608831743781, + "grad_norm": 3.4164219391355397, + "learning_rate": 5.979817785618814e-06, + "loss": 0.15417404174804689, + "step": 8545 + }, + { + "epoch": 0.07392932183898107, + "grad_norm": 4.176854125034674, + "learning_rate": 5.9797941865241124e-06, + "loss": 0.3477264404296875, + "step": 8550 + }, + { + "epoch": 0.07397255536052434, + "grad_norm": 29.940216630937606, + "learning_rate": 5.979770573686863e-06, + "loss": 0.416558837890625, + "step": 8555 + }, + { + "epoch": 0.0740157888820676, + "grad_norm": 3.1097364603081648, + "learning_rate": 5.979746947107176e-06, + "loss": 0.23563690185546876, + "step": 8560 + }, + { + "epoch": 0.07405902240361087, + "grad_norm": 15.887856813376283, + "learning_rate": 5.979723306785158e-06, + "loss": 0.25849609375, + "step": 8565 + }, + { + "epoch": 0.07410225592515413, + "grad_norm": 1.62167190708301, + "learning_rate": 5.9796996527209204e-06, + "loss": 0.1259326934814453, + "step": 8570 + }, + { + "epoch": 0.0741454894466974, + "grad_norm": 9.404633679758016, + "learning_rate": 5.979675984914572e-06, + "loss": 0.12180938720703124, + "step": 8575 + }, + { + "epoch": 0.07418872296824065, + "grad_norm": 49.46099396861432, + "learning_rate": 5.97965230336622e-06, + "loss": 0.307073974609375, + "step": 8580 + }, + { + "epoch": 0.07423195648978392, + "grad_norm": 3.5038644406011126, + "learning_rate": 5.9796286080759745e-06, + "loss": 0.348529052734375, + "step": 8585 + }, + { + "epoch": 0.07427519001132718, + "grad_norm": 66.36948051703378, + "learning_rate": 5.979604899043946e-06, + "loss": 0.20474090576171874, + "step": 8590 + }, + { + "epoch": 0.07431842353287045, + "grad_norm": 4.54789300379334, + "learning_rate": 5.979581176270243e-06, + "loss": 0.085943603515625, + "step": 8595 + }, + { + "epoch": 0.07436165705441371, + "grad_norm": 30.684473829856486, + "learning_rate": 5.979557439754975e-06, + "loss": 0.08620834350585938, + "step": 8600 + }, + { + "epoch": 0.07440489057595698, + "grad_norm": 18.25627432848076, + "learning_rate": 5.979533689498251e-06, + "loss": 0.2137237548828125, + "step": 8605 + }, + { + "epoch": 0.07444812409750023, + "grad_norm": 23.001250763112548, + "learning_rate": 5.979509925500181e-06, + "loss": 0.26997222900390627, + "step": 8610 + }, + { + "epoch": 0.0744913576190435, + "grad_norm": 45.17658456861925, + "learning_rate": 5.979486147760874e-06, + "loss": 0.3775238037109375, + "step": 8615 + }, + { + "epoch": 0.07453459114058676, + "grad_norm": 1.5211602040033727, + "learning_rate": 5.979462356280441e-06, + "loss": 0.4538166046142578, + "step": 8620 + }, + { + "epoch": 0.07457782466213003, + "grad_norm": 7.7748948728654526, + "learning_rate": 5.97943855105899e-06, + "loss": 0.1942058563232422, + "step": 8625 + }, + { + "epoch": 0.07462105818367329, + "grad_norm": 12.383266648927913, + "learning_rate": 5.9794147320966324e-06, + "loss": 0.28917884826660156, + "step": 8630 + }, + { + "epoch": 0.07466429170521656, + "grad_norm": 5.864727299485902, + "learning_rate": 5.9793908993934765e-06, + "loss": 0.058160400390625, + "step": 8635 + }, + { + "epoch": 0.07470752522675982, + "grad_norm": 7.756231108092057, + "learning_rate": 5.979367052949634e-06, + "loss": 0.442388916015625, + "step": 8640 + }, + { + "epoch": 0.07475075874830309, + "grad_norm": 98.92990538095432, + "learning_rate": 5.979343192765214e-06, + "loss": 0.1935577392578125, + "step": 8645 + }, + { + "epoch": 0.07479399226984634, + "grad_norm": 3.078882405351248, + "learning_rate": 5.979319318840326e-06, + "loss": 0.0875335693359375, + "step": 8650 + }, + { + "epoch": 0.07483722579138961, + "grad_norm": 20.562792249841646, + "learning_rate": 5.9792954311750806e-06, + "loss": 0.249945068359375, + "step": 8655 + }, + { + "epoch": 0.07488045931293287, + "grad_norm": 1.717700697938772, + "learning_rate": 5.979271529769587e-06, + "loss": 0.4210693359375, + "step": 8660 + }, + { + "epoch": 0.07492369283447614, + "grad_norm": 4.982152229539973, + "learning_rate": 5.979247614623958e-06, + "loss": 0.4058509826660156, + "step": 8665 + }, + { + "epoch": 0.0749669263560194, + "grad_norm": 13.353766271088958, + "learning_rate": 5.979223685738301e-06, + "loss": 0.22115631103515626, + "step": 8670 + }, + { + "epoch": 0.07501015987756267, + "grad_norm": 2.1529207919464772, + "learning_rate": 5.979199743112728e-06, + "loss": 0.13254241943359374, + "step": 8675 + }, + { + "epoch": 0.07505339339910592, + "grad_norm": 20.153691832846636, + "learning_rate": 5.979175786747349e-06, + "loss": 0.22319812774658204, + "step": 8680 + }, + { + "epoch": 0.0750966269206492, + "grad_norm": 25.164120812975742, + "learning_rate": 5.979151816642275e-06, + "loss": 0.46113739013671873, + "step": 8685 + }, + { + "epoch": 0.07513986044219245, + "grad_norm": 39.12947025869925, + "learning_rate": 5.9791278327976164e-06, + "loss": 0.29049072265625, + "step": 8690 + }, + { + "epoch": 0.07518309396373572, + "grad_norm": 5.67084909882586, + "learning_rate": 5.979103835213482e-06, + "loss": 0.144329833984375, + "step": 8695 + }, + { + "epoch": 0.07522632748527898, + "grad_norm": 0.4171443154666093, + "learning_rate": 5.979079823889985e-06, + "loss": 0.1619537353515625, + "step": 8700 + }, + { + "epoch": 0.07526956100682225, + "grad_norm": 6.108056400282241, + "learning_rate": 5.979055798827234e-06, + "loss": 0.2948272705078125, + "step": 8705 + }, + { + "epoch": 0.07531279452836552, + "grad_norm": 2.4910703768477287, + "learning_rate": 5.979031760025343e-06, + "loss": 0.135479736328125, + "step": 8710 + }, + { + "epoch": 0.07535602804990878, + "grad_norm": 16.409211827634966, + "learning_rate": 5.979007707484419e-06, + "loss": 0.29057159423828127, + "step": 8715 + }, + { + "epoch": 0.07539926157145205, + "grad_norm": 2.636407925360121, + "learning_rate": 5.978983641204575e-06, + "loss": 0.061871337890625, + "step": 8720 + }, + { + "epoch": 0.0754424950929953, + "grad_norm": 28.948328362333765, + "learning_rate": 5.978959561185922e-06, + "loss": 0.29965972900390625, + "step": 8725 + }, + { + "epoch": 0.07548572861453857, + "grad_norm": 7.415788497065855, + "learning_rate": 5.97893546742857e-06, + "loss": 0.2126708984375, + "step": 8730 + }, + { + "epoch": 0.07552896213608183, + "grad_norm": 4.4489457048365155, + "learning_rate": 5.978911359932632e-06, + "loss": 0.16948699951171875, + "step": 8735 + }, + { + "epoch": 0.0755721956576251, + "grad_norm": 18.16599524644508, + "learning_rate": 5.978887238698217e-06, + "loss": 0.298797607421875, + "step": 8740 + }, + { + "epoch": 0.07561542917916836, + "grad_norm": 14.90908714965715, + "learning_rate": 5.978863103725438e-06, + "loss": 0.1343109130859375, + "step": 8745 + }, + { + "epoch": 0.07565866270071163, + "grad_norm": 0.45360825398195065, + "learning_rate": 5.978838955014406e-06, + "loss": 0.2889373779296875, + "step": 8750 + }, + { + "epoch": 0.07570189622225489, + "grad_norm": 26.963020594547434, + "learning_rate": 5.978814792565231e-06, + "loss": 0.5131210327148438, + "step": 8755 + }, + { + "epoch": 0.07574512974379816, + "grad_norm": 1.4334256336322186, + "learning_rate": 5.978790616378026e-06, + "loss": 0.38660888671875, + "step": 8760 + }, + { + "epoch": 0.07578836326534141, + "grad_norm": 9.3738240001971, + "learning_rate": 5.978766426452901e-06, + "loss": 0.38077239990234374, + "step": 8765 + }, + { + "epoch": 0.07583159678688468, + "grad_norm": 15.729166432269185, + "learning_rate": 5.9787422227899684e-06, + "loss": 0.168768310546875, + "step": 8770 + }, + { + "epoch": 0.07587483030842794, + "grad_norm": 40.442895326182644, + "learning_rate": 5.978718005389341e-06, + "loss": 0.384716796875, + "step": 8775 + }, + { + "epoch": 0.07591806382997121, + "grad_norm": 3.011283447772427, + "learning_rate": 5.978693774251129e-06, + "loss": 0.227960205078125, + "step": 8780 + }, + { + "epoch": 0.07596129735151447, + "grad_norm": 19.542844097896786, + "learning_rate": 5.978669529375444e-06, + "loss": 0.1964141845703125, + "step": 8785 + }, + { + "epoch": 0.07600453087305774, + "grad_norm": 27.67942003716519, + "learning_rate": 5.978645270762399e-06, + "loss": 0.7707172393798828, + "step": 8790 + }, + { + "epoch": 0.076047764394601, + "grad_norm": 14.830066255813845, + "learning_rate": 5.978620998412104e-06, + "loss": 0.151080322265625, + "step": 8795 + }, + { + "epoch": 0.07609099791614427, + "grad_norm": 13.174721974608907, + "learning_rate": 5.978596712324673e-06, + "loss": 0.08379669189453125, + "step": 8800 + }, + { + "epoch": 0.07613423143768752, + "grad_norm": 4.428950445410255, + "learning_rate": 5.978572412500218e-06, + "loss": 0.15521240234375, + "step": 8805 + }, + { + "epoch": 0.07617746495923079, + "grad_norm": 69.20541230313432, + "learning_rate": 5.978548098938849e-06, + "loss": 0.361865234375, + "step": 8810 + }, + { + "epoch": 0.07622069848077405, + "grad_norm": 52.33422244223033, + "learning_rate": 5.97852377164068e-06, + "loss": 0.2043285369873047, + "step": 8815 + }, + { + "epoch": 0.07626393200231732, + "grad_norm": 42.82582061429009, + "learning_rate": 5.978499430605822e-06, + "loss": 0.363775634765625, + "step": 8820 + }, + { + "epoch": 0.07630716552386058, + "grad_norm": 22.450689169208253, + "learning_rate": 5.9784750758343875e-06, + "loss": 0.16881103515625, + "step": 8825 + }, + { + "epoch": 0.07635039904540385, + "grad_norm": 12.500135669372403, + "learning_rate": 5.97845070732649e-06, + "loss": 0.4246917724609375, + "step": 8830 + }, + { + "epoch": 0.0763936325669471, + "grad_norm": 12.82917316332589, + "learning_rate": 5.978426325082241e-06, + "loss": 0.31070709228515625, + "step": 8835 + }, + { + "epoch": 0.07643686608849037, + "grad_norm": 20.46978833020878, + "learning_rate": 5.978401929101753e-06, + "loss": 0.23080596923828126, + "step": 8840 + }, + { + "epoch": 0.07648009961003363, + "grad_norm": 15.247873390279445, + "learning_rate": 5.978377519385138e-06, + "loss": 0.235003662109375, + "step": 8845 + }, + { + "epoch": 0.0765233331315769, + "grad_norm": 11.789648337478226, + "learning_rate": 5.97835309593251e-06, + "loss": 0.1722259521484375, + "step": 8850 + }, + { + "epoch": 0.07656656665312016, + "grad_norm": 3.2514290681226203, + "learning_rate": 5.978328658743979e-06, + "loss": 0.22811279296875, + "step": 8855 + }, + { + "epoch": 0.07660980017466343, + "grad_norm": 20.234399745363618, + "learning_rate": 5.978304207819661e-06, + "loss": 0.29125518798828126, + "step": 8860 + }, + { + "epoch": 0.07665303369620668, + "grad_norm": 10.938279127584606, + "learning_rate": 5.9782797431596665e-06, + "loss": 0.25845947265625, + "step": 8865 + }, + { + "epoch": 0.07669626721774996, + "grad_norm": 6.147141616643055, + "learning_rate": 5.978255264764109e-06, + "loss": 0.32234344482421873, + "step": 8870 + }, + { + "epoch": 0.07673950073929321, + "grad_norm": 19.477513503709105, + "learning_rate": 5.978230772633101e-06, + "loss": 0.1149261474609375, + "step": 8875 + }, + { + "epoch": 0.07678273426083648, + "grad_norm": 16.25122899367549, + "learning_rate": 5.978206266766756e-06, + "loss": 0.124334716796875, + "step": 8880 + }, + { + "epoch": 0.07682596778237974, + "grad_norm": 43.58474260544167, + "learning_rate": 5.978181747165187e-06, + "loss": 0.18107337951660157, + "step": 8885 + }, + { + "epoch": 0.07686920130392301, + "grad_norm": 30.14415089438932, + "learning_rate": 5.978157213828508e-06, + "loss": 0.2802093505859375, + "step": 8890 + }, + { + "epoch": 0.07691243482546628, + "grad_norm": 6.981593047827186, + "learning_rate": 5.978132666756829e-06, + "loss": 0.30447025299072267, + "step": 8895 + }, + { + "epoch": 0.07695566834700954, + "grad_norm": 28.191872106722226, + "learning_rate": 5.978108105950266e-06, + "loss": 0.17995796203613282, + "step": 8900 + }, + { + "epoch": 0.07699890186855281, + "grad_norm": 2.13557045401864, + "learning_rate": 5.978083531408932e-06, + "loss": 0.355859375, + "step": 8905 + }, + { + "epoch": 0.07704213539009606, + "grad_norm": 42.666784875551826, + "learning_rate": 5.97805894313294e-06, + "loss": 0.508001708984375, + "step": 8910 + }, + { + "epoch": 0.07708536891163933, + "grad_norm": 4.493419576742584, + "learning_rate": 5.978034341122404e-06, + "loss": 0.2560302734375, + "step": 8915 + }, + { + "epoch": 0.07712860243318259, + "grad_norm": 14.653906601143557, + "learning_rate": 5.978009725377435e-06, + "loss": 0.2644378662109375, + "step": 8920 + }, + { + "epoch": 0.07717183595472586, + "grad_norm": 4.718678565291195, + "learning_rate": 5.97798509589815e-06, + "loss": 0.43170166015625, + "step": 8925 + }, + { + "epoch": 0.07721506947626912, + "grad_norm": 36.65170361835453, + "learning_rate": 5.9779604526846604e-06, + "loss": 0.21855621337890624, + "step": 8930 + }, + { + "epoch": 0.07725830299781239, + "grad_norm": 1.8323101326419664, + "learning_rate": 5.977935795737079e-06, + "loss": 0.098779296875, + "step": 8935 + }, + { + "epoch": 0.07730153651935565, + "grad_norm": 36.52441740270857, + "learning_rate": 5.977911125055522e-06, + "loss": 0.53074951171875, + "step": 8940 + }, + { + "epoch": 0.07734477004089892, + "grad_norm": 8.641270624643445, + "learning_rate": 5.977886440640102e-06, + "loss": 0.373236083984375, + "step": 8945 + }, + { + "epoch": 0.07738800356244217, + "grad_norm": 3.5653051529280217, + "learning_rate": 5.977861742490933e-06, + "loss": 0.203271484375, + "step": 8950 + }, + { + "epoch": 0.07743123708398544, + "grad_norm": 21.460393871005493, + "learning_rate": 5.977837030608129e-06, + "loss": 0.30014724731445314, + "step": 8955 + }, + { + "epoch": 0.0774744706055287, + "grad_norm": 8.837249824997373, + "learning_rate": 5.977812304991803e-06, + "loss": 0.1814178466796875, + "step": 8960 + }, + { + "epoch": 0.07751770412707197, + "grad_norm": 23.67535408017826, + "learning_rate": 5.97778756564207e-06, + "loss": 0.2436126708984375, + "step": 8965 + }, + { + "epoch": 0.07756093764861523, + "grad_norm": 7.3447234758569895, + "learning_rate": 5.977762812559043e-06, + "loss": 0.3354034423828125, + "step": 8970 + }, + { + "epoch": 0.0776041711701585, + "grad_norm": 5.026198470259687, + "learning_rate": 5.977738045742838e-06, + "loss": 0.2921356201171875, + "step": 8975 + }, + { + "epoch": 0.07764740469170175, + "grad_norm": 12.551368454663717, + "learning_rate": 5.977713265193568e-06, + "loss": 0.464459228515625, + "step": 8980 + }, + { + "epoch": 0.07769063821324503, + "grad_norm": 16.194930769804973, + "learning_rate": 5.9776884709113484e-06, + "loss": 0.2688934326171875, + "step": 8985 + }, + { + "epoch": 0.07773387173478828, + "grad_norm": 0.5577456368669366, + "learning_rate": 5.977663662896292e-06, + "loss": 0.1492218017578125, + "step": 8990 + }, + { + "epoch": 0.07777710525633155, + "grad_norm": 39.64723496936373, + "learning_rate": 5.977638841148514e-06, + "loss": 0.2081146240234375, + "step": 8995 + }, + { + "epoch": 0.07782033877787481, + "grad_norm": 13.248801803244453, + "learning_rate": 5.977614005668129e-06, + "loss": 0.2670318603515625, + "step": 9000 + }, + { + "epoch": 0.07786357229941808, + "grad_norm": 10.76050654973934, + "learning_rate": 5.977589156455251e-06, + "loss": 0.19356460571289064, + "step": 9005 + }, + { + "epoch": 0.07790680582096134, + "grad_norm": 16.397702173095745, + "learning_rate": 5.977564293509994e-06, + "loss": 0.12672653198242187, + "step": 9010 + }, + { + "epoch": 0.0779500393425046, + "grad_norm": 11.545776322942757, + "learning_rate": 5.977539416832475e-06, + "loss": 0.27646331787109374, + "step": 9015 + }, + { + "epoch": 0.07799327286404786, + "grad_norm": 31.86688962254, + "learning_rate": 5.977514526422807e-06, + "loss": 0.266162109375, + "step": 9020 + }, + { + "epoch": 0.07803650638559113, + "grad_norm": 4.407300902965463, + "learning_rate": 5.977489622281105e-06, + "loss": 0.1494232177734375, + "step": 9025 + }, + { + "epoch": 0.07807973990713439, + "grad_norm": 22.327229847948917, + "learning_rate": 5.977464704407484e-06, + "loss": 0.1675872802734375, + "step": 9030 + }, + { + "epoch": 0.07812297342867766, + "grad_norm": 14.83473550647777, + "learning_rate": 5.977439772802058e-06, + "loss": 0.146563720703125, + "step": 9035 + }, + { + "epoch": 0.07816620695022092, + "grad_norm": 0.6451148711617317, + "learning_rate": 5.977414827464943e-06, + "loss": 0.34095458984375, + "step": 9040 + }, + { + "epoch": 0.07820944047176419, + "grad_norm": 5.683341215387772, + "learning_rate": 5.9773898683962545e-06, + "loss": 0.1060943603515625, + "step": 9045 + }, + { + "epoch": 0.07825267399330744, + "grad_norm": 9.357794779901326, + "learning_rate": 5.977364895596108e-06, + "loss": 0.10186614990234374, + "step": 9050 + }, + { + "epoch": 0.07829590751485072, + "grad_norm": 2.1711505386724284, + "learning_rate": 5.977339909064616e-06, + "loss": 0.051678466796875, + "step": 9055 + }, + { + "epoch": 0.07833914103639397, + "grad_norm": 11.454413202682943, + "learning_rate": 5.977314908801896e-06, + "loss": 0.26998138427734375, + "step": 9060 + }, + { + "epoch": 0.07838237455793724, + "grad_norm": 11.760251902530268, + "learning_rate": 5.977289894808063e-06, + "loss": 0.29180908203125, + "step": 9065 + }, + { + "epoch": 0.0784256080794805, + "grad_norm": 9.062483283110678, + "learning_rate": 5.977264867083231e-06, + "loss": 0.21548233032226563, + "step": 9070 + }, + { + "epoch": 0.07846884160102377, + "grad_norm": 0.23368670262348853, + "learning_rate": 5.9772398256275185e-06, + "loss": 0.13119354248046874, + "step": 9075 + }, + { + "epoch": 0.07851207512256704, + "grad_norm": 52.495516599136586, + "learning_rate": 5.9772147704410375e-06, + "loss": 0.22706375122070313, + "step": 9080 + }, + { + "epoch": 0.0785553086441103, + "grad_norm": 3.6793451411504146, + "learning_rate": 5.977189701523905e-06, + "loss": 0.02147369384765625, + "step": 9085 + }, + { + "epoch": 0.07859854216565357, + "grad_norm": 28.509108387504142, + "learning_rate": 5.977164618876238e-06, + "loss": 0.169659423828125, + "step": 9090 + }, + { + "epoch": 0.07864177568719682, + "grad_norm": 16.798258839256604, + "learning_rate": 5.977139522498149e-06, + "loss": 0.2101116180419922, + "step": 9095 + }, + { + "epoch": 0.0786850092087401, + "grad_norm": 6.804189042632293, + "learning_rate": 5.977114412389757e-06, + "loss": 0.186328125, + "step": 9100 + }, + { + "epoch": 0.07872824273028335, + "grad_norm": 53.500868962266885, + "learning_rate": 5.977089288551176e-06, + "loss": 0.20690460205078126, + "step": 9105 + }, + { + "epoch": 0.07877147625182662, + "grad_norm": 2.736540454245606, + "learning_rate": 5.977064150982522e-06, + "loss": 0.2102783203125, + "step": 9110 + }, + { + "epoch": 0.07881470977336988, + "grad_norm": 13.447423363319157, + "learning_rate": 5.977038999683912e-06, + "loss": 0.17527732849121094, + "step": 9115 + }, + { + "epoch": 0.07885794329491315, + "grad_norm": 1.0706667790437845, + "learning_rate": 5.97701383465546e-06, + "loss": 0.220318603515625, + "step": 9120 + }, + { + "epoch": 0.0789011768164564, + "grad_norm": 9.92561567755826, + "learning_rate": 5.976988655897284e-06, + "loss": 0.10293426513671874, + "step": 9125 + }, + { + "epoch": 0.07894441033799968, + "grad_norm": 1.7436040374943054, + "learning_rate": 5.976963463409499e-06, + "loss": 0.29380950927734373, + "step": 9130 + }, + { + "epoch": 0.07898764385954293, + "grad_norm": 54.618026513294005, + "learning_rate": 5.976938257192222e-06, + "loss": 0.47209625244140624, + "step": 9135 + }, + { + "epoch": 0.0790308773810862, + "grad_norm": 9.732011848567852, + "learning_rate": 5.976913037245569e-06, + "loss": 0.2024078369140625, + "step": 9140 + }, + { + "epoch": 0.07907411090262946, + "grad_norm": 2.059640014784559, + "learning_rate": 5.976887803569655e-06, + "loss": 0.061496543884277347, + "step": 9145 + }, + { + "epoch": 0.07911734442417273, + "grad_norm": 52.38669168397316, + "learning_rate": 5.9768625561645984e-06, + "loss": 0.64281005859375, + "step": 9150 + }, + { + "epoch": 0.07916057794571599, + "grad_norm": 28.805499172190313, + "learning_rate": 5.976837295030515e-06, + "loss": 0.147247314453125, + "step": 9155 + }, + { + "epoch": 0.07920381146725926, + "grad_norm": 8.614272361852834, + "learning_rate": 5.97681202016752e-06, + "loss": 0.2215118408203125, + "step": 9160 + }, + { + "epoch": 0.07924704498880251, + "grad_norm": 33.109867113241826, + "learning_rate": 5.976786731575731e-06, + "loss": 0.2839042663574219, + "step": 9165 + }, + { + "epoch": 0.07929027851034579, + "grad_norm": 55.39695993060813, + "learning_rate": 5.976761429255266e-06, + "loss": 0.51700439453125, + "step": 9170 + }, + { + "epoch": 0.07933351203188904, + "grad_norm": 17.267428124673867, + "learning_rate": 5.97673611320624e-06, + "loss": 0.1667633056640625, + "step": 9175 + }, + { + "epoch": 0.07937674555343231, + "grad_norm": 16.594501421357727, + "learning_rate": 5.9767107834287695e-06, + "loss": 0.48579559326171873, + "step": 9180 + }, + { + "epoch": 0.07941997907497557, + "grad_norm": 45.331287950883755, + "learning_rate": 5.976685439922971e-06, + "loss": 0.21409759521484376, + "step": 9185 + }, + { + "epoch": 0.07946321259651884, + "grad_norm": 23.112387707814356, + "learning_rate": 5.976660082688964e-06, + "loss": 0.3886474609375, + "step": 9190 + }, + { + "epoch": 0.0795064461180621, + "grad_norm": 0.8785985014497925, + "learning_rate": 5.976634711726863e-06, + "loss": 0.323345947265625, + "step": 9195 + }, + { + "epoch": 0.07954967963960537, + "grad_norm": 1.0311892188814966, + "learning_rate": 5.976609327036785e-06, + "loss": 0.12984771728515626, + "step": 9200 + }, + { + "epoch": 0.07959291316114862, + "grad_norm": 0.10563385890495523, + "learning_rate": 5.976583928618849e-06, + "loss": 0.2065044403076172, + "step": 9205 + }, + { + "epoch": 0.0796361466826919, + "grad_norm": 57.57840451139724, + "learning_rate": 5.976558516473171e-06, + "loss": 0.512060546875, + "step": 9210 + }, + { + "epoch": 0.07967938020423515, + "grad_norm": 31.719299902755548, + "learning_rate": 5.976533090599866e-06, + "loss": 0.27025909423828126, + "step": 9215 + }, + { + "epoch": 0.07972261372577842, + "grad_norm": 10.8571397908985, + "learning_rate": 5.976507650999055e-06, + "loss": 0.0994049072265625, + "step": 9220 + }, + { + "epoch": 0.07976584724732168, + "grad_norm": 2.4392913361879995, + "learning_rate": 5.976482197670854e-06, + "loss": 0.17762680053710939, + "step": 9225 + }, + { + "epoch": 0.07980908076886495, + "grad_norm": 24.57113138845677, + "learning_rate": 5.97645673061538e-06, + "loss": 0.3755096435546875, + "step": 9230 + }, + { + "epoch": 0.0798523142904082, + "grad_norm": 6.970913144712703, + "learning_rate": 5.9764312498327505e-06, + "loss": 0.16662139892578126, + "step": 9235 + }, + { + "epoch": 0.07989554781195148, + "grad_norm": 1.4125379796888209, + "learning_rate": 5.976405755323082e-06, + "loss": 0.187176513671875, + "step": 9240 + }, + { + "epoch": 0.07993878133349473, + "grad_norm": 1.1952120205816192, + "learning_rate": 5.976380247086495e-06, + "loss": 0.23267822265625, + "step": 9245 + }, + { + "epoch": 0.079982014855038, + "grad_norm": 5.19132374557143, + "learning_rate": 5.976354725123104e-06, + "loss": 0.158544921875, + "step": 9250 + }, + { + "epoch": 0.08002524837658126, + "grad_norm": 2.211814429015143, + "learning_rate": 5.976329189433028e-06, + "loss": 0.47084503173828124, + "step": 9255 + }, + { + "epoch": 0.08006848189812453, + "grad_norm": 4.548719208140557, + "learning_rate": 5.976303640016385e-06, + "loss": 0.1621185302734375, + "step": 9260 + }, + { + "epoch": 0.0801117154196678, + "grad_norm": 30.880861680559264, + "learning_rate": 5.976278076873293e-06, + "loss": 0.2343353271484375, + "step": 9265 + }, + { + "epoch": 0.08015494894121106, + "grad_norm": 5.056092715333235, + "learning_rate": 5.97625250000387e-06, + "loss": 0.11478729248046875, + "step": 9270 + }, + { + "epoch": 0.08019818246275433, + "grad_norm": 34.9384524841081, + "learning_rate": 5.976226909408232e-06, + "loss": 0.17811279296875, + "step": 9275 + }, + { + "epoch": 0.08024141598429758, + "grad_norm": 31.988952517940405, + "learning_rate": 5.976201305086499e-06, + "loss": 0.188714599609375, + "step": 9280 + }, + { + "epoch": 0.08028464950584085, + "grad_norm": 5.868914842956357, + "learning_rate": 5.976175687038789e-06, + "loss": 0.11533584594726562, + "step": 9285 + }, + { + "epoch": 0.08032788302738411, + "grad_norm": 20.47261307073547, + "learning_rate": 5.97615005526522e-06, + "loss": 0.20108642578125, + "step": 9290 + }, + { + "epoch": 0.08037111654892738, + "grad_norm": 7.294028996066747, + "learning_rate": 5.9761244097659086e-06, + "loss": 0.219287109375, + "step": 9295 + }, + { + "epoch": 0.08041435007047064, + "grad_norm": 1.365957773538245, + "learning_rate": 5.9760987505409756e-06, + "loss": 0.06087570190429688, + "step": 9300 + }, + { + "epoch": 0.08045758359201391, + "grad_norm": 4.353683518314225, + "learning_rate": 5.976073077590538e-06, + "loss": 0.15711669921875, + "step": 9305 + }, + { + "epoch": 0.08050081711355717, + "grad_norm": 3.293317710216178, + "learning_rate": 5.976047390914714e-06, + "loss": 0.07652359008789063, + "step": 9310 + }, + { + "epoch": 0.08054405063510044, + "grad_norm": 6.282589463760422, + "learning_rate": 5.976021690513622e-06, + "loss": 0.162310791015625, + "step": 9315 + }, + { + "epoch": 0.0805872841566437, + "grad_norm": 14.063266337037469, + "learning_rate": 5.975995976387381e-06, + "loss": 0.0961395263671875, + "step": 9320 + }, + { + "epoch": 0.08063051767818696, + "grad_norm": 2.477228040463217, + "learning_rate": 5.9759702485361106e-06, + "loss": 0.173046875, + "step": 9325 + }, + { + "epoch": 0.08067375119973022, + "grad_norm": 2.4614049796108803, + "learning_rate": 5.975944506959927e-06, + "loss": 0.0711456298828125, + "step": 9330 + }, + { + "epoch": 0.08071698472127349, + "grad_norm": 31.341458650075428, + "learning_rate": 5.975918751658951e-06, + "loss": 0.8036293029785156, + "step": 9335 + }, + { + "epoch": 0.08076021824281675, + "grad_norm": 8.370230753373459, + "learning_rate": 5.975892982633301e-06, + "loss": 0.069342041015625, + "step": 9340 + }, + { + "epoch": 0.08080345176436002, + "grad_norm": 7.101659356881122, + "learning_rate": 5.975867199883095e-06, + "loss": 0.5239959716796875, + "step": 9345 + }, + { + "epoch": 0.08084668528590327, + "grad_norm": 3.193686645982965, + "learning_rate": 5.975841403408453e-06, + "loss": 0.19080657958984376, + "step": 9350 + }, + { + "epoch": 0.08088991880744655, + "grad_norm": 5.56084805397013, + "learning_rate": 5.975815593209492e-06, + "loss": 0.1024169921875, + "step": 9355 + }, + { + "epoch": 0.0809331523289898, + "grad_norm": 3.3546530841617224, + "learning_rate": 5.975789769286334e-06, + "loss": 0.1822998046875, + "step": 9360 + }, + { + "epoch": 0.08097638585053307, + "grad_norm": 24.83795479123996, + "learning_rate": 5.975763931639096e-06, + "loss": 0.6505184173583984, + "step": 9365 + }, + { + "epoch": 0.08101961937207633, + "grad_norm": 88.3480102440235, + "learning_rate": 5.975738080267897e-06, + "loss": 0.32347412109375, + "step": 9370 + }, + { + "epoch": 0.0810628528936196, + "grad_norm": 10.281999771889538, + "learning_rate": 5.9757122151728584e-06, + "loss": 0.080303955078125, + "step": 9375 + }, + { + "epoch": 0.08110608641516286, + "grad_norm": 7.81431248621866, + "learning_rate": 5.975686336354097e-06, + "loss": 0.24095458984375, + "step": 9380 + }, + { + "epoch": 0.08114931993670613, + "grad_norm": 6.572506024081361, + "learning_rate": 5.975660443811733e-06, + "loss": 0.08730316162109375, + "step": 9385 + }, + { + "epoch": 0.08119255345824938, + "grad_norm": 12.388105999976384, + "learning_rate": 5.975634537545886e-06, + "loss": 0.2263214111328125, + "step": 9390 + }, + { + "epoch": 0.08123578697979265, + "grad_norm": 22.256707024718285, + "learning_rate": 5.975608617556675e-06, + "loss": 0.374615478515625, + "step": 9395 + }, + { + "epoch": 0.08127902050133591, + "grad_norm": 9.676545279355881, + "learning_rate": 5.975582683844222e-06, + "loss": 0.24112548828125, + "step": 9400 + }, + { + "epoch": 0.08132225402287918, + "grad_norm": 4.134577773012617, + "learning_rate": 5.975556736408642e-06, + "loss": 0.325030517578125, + "step": 9405 + }, + { + "epoch": 0.08136548754442244, + "grad_norm": 31.285451596550892, + "learning_rate": 5.9755307752500595e-06, + "loss": 0.3108612060546875, + "step": 9410 + }, + { + "epoch": 0.08140872106596571, + "grad_norm": 23.789798237362028, + "learning_rate": 5.975504800368591e-06, + "loss": 0.12539138793945312, + "step": 9415 + }, + { + "epoch": 0.08145195458750897, + "grad_norm": 10.734782695866492, + "learning_rate": 5.9754788117643576e-06, + "loss": 0.2648193359375, + "step": 9420 + }, + { + "epoch": 0.08149518810905224, + "grad_norm": 8.044500153086254, + "learning_rate": 5.975452809437478e-06, + "loss": 0.4643035888671875, + "step": 9425 + }, + { + "epoch": 0.08153842163059549, + "grad_norm": 38.44390351831482, + "learning_rate": 5.975426793388074e-06, + "loss": 0.37254486083984373, + "step": 9430 + }, + { + "epoch": 0.08158165515213876, + "grad_norm": 24.566519814914805, + "learning_rate": 5.975400763616264e-06, + "loss": 0.412890625, + "step": 9435 + }, + { + "epoch": 0.08162488867368202, + "grad_norm": 30.021177434283064, + "learning_rate": 5.975374720122168e-06, + "loss": 0.5390625, + "step": 9440 + }, + { + "epoch": 0.08166812219522529, + "grad_norm": 23.614537287807718, + "learning_rate": 5.975348662905907e-06, + "loss": 0.2928680419921875, + "step": 9445 + }, + { + "epoch": 0.08171135571676856, + "grad_norm": 2.598572793682082, + "learning_rate": 5.975322591967602e-06, + "loss": 0.570050048828125, + "step": 9450 + }, + { + "epoch": 0.08175458923831182, + "grad_norm": 5.271930757531168, + "learning_rate": 5.975296507307371e-06, + "loss": 0.235205078125, + "step": 9455 + }, + { + "epoch": 0.08179782275985509, + "grad_norm": 55.19412288230106, + "learning_rate": 5.975270408925336e-06, + "loss": 0.3811798095703125, + "step": 9460 + }, + { + "epoch": 0.08184105628139834, + "grad_norm": 41.14215311965705, + "learning_rate": 5.975244296821617e-06, + "loss": 0.3150390625, + "step": 9465 + }, + { + "epoch": 0.08188428980294161, + "grad_norm": 27.796531170150665, + "learning_rate": 5.975218170996332e-06, + "loss": 0.3490447998046875, + "step": 9470 + }, + { + "epoch": 0.08192752332448487, + "grad_norm": 8.40684395236496, + "learning_rate": 5.975192031449606e-06, + "loss": 0.28995819091796876, + "step": 9475 + }, + { + "epoch": 0.08197075684602814, + "grad_norm": 5.007642253454993, + "learning_rate": 5.9751658781815565e-06, + "loss": 0.143048095703125, + "step": 9480 + }, + { + "epoch": 0.0820139903675714, + "grad_norm": 1.1714073927533428, + "learning_rate": 5.975139711192305e-06, + "loss": 0.351531982421875, + "step": 9485 + }, + { + "epoch": 0.08205722388911467, + "grad_norm": 3.866397974830824, + "learning_rate": 5.975113530481971e-06, + "loss": 0.0765869140625, + "step": 9490 + }, + { + "epoch": 0.08210045741065793, + "grad_norm": 9.979909779458717, + "learning_rate": 5.975087336050678e-06, + "loss": 0.27613525390625, + "step": 9495 + }, + { + "epoch": 0.0821436909322012, + "grad_norm": 19.273431084067816, + "learning_rate": 5.975061127898544e-06, + "loss": 0.32239837646484376, + "step": 9500 + }, + { + "epoch": 0.08218692445374445, + "grad_norm": 14.276301149651278, + "learning_rate": 5.975034906025692e-06, + "loss": 0.1444793701171875, + "step": 9505 + }, + { + "epoch": 0.08223015797528772, + "grad_norm": 0.9411373263182803, + "learning_rate": 5.975008670432242e-06, + "loss": 0.14062919616699218, + "step": 9510 + }, + { + "epoch": 0.08227339149683098, + "grad_norm": 0.5035483633415042, + "learning_rate": 5.974982421118314e-06, + "loss": 0.18337326049804686, + "step": 9515 + }, + { + "epoch": 0.08231662501837425, + "grad_norm": 5.9860179431210945, + "learning_rate": 5.974956158084029e-06, + "loss": 0.1004150390625, + "step": 9520 + }, + { + "epoch": 0.08235985853991751, + "grad_norm": 26.169139011044834, + "learning_rate": 5.974929881329511e-06, + "loss": 0.17276611328125, + "step": 9525 + }, + { + "epoch": 0.08240309206146078, + "grad_norm": 7.357241833157973, + "learning_rate": 5.974903590854878e-06, + "loss": 0.34443359375, + "step": 9530 + }, + { + "epoch": 0.08244632558300403, + "grad_norm": 15.42446870859426, + "learning_rate": 5.974877286660253e-06, + "loss": 0.06521072387695312, + "step": 9535 + }, + { + "epoch": 0.0824895591045473, + "grad_norm": 31.818723851056763, + "learning_rate": 5.974850968745756e-06, + "loss": 0.1565826416015625, + "step": 9540 + }, + { + "epoch": 0.08253279262609056, + "grad_norm": 0.30942341941658763, + "learning_rate": 5.9748246371115105e-06, + "loss": 0.08712425231933593, + "step": 9545 + }, + { + "epoch": 0.08257602614763383, + "grad_norm": 14.58820331383157, + "learning_rate": 5.974798291757636e-06, + "loss": 0.170220947265625, + "step": 9550 + }, + { + "epoch": 0.08261925966917709, + "grad_norm": 36.68360133770911, + "learning_rate": 5.974771932684255e-06, + "loss": 0.2070587158203125, + "step": 9555 + }, + { + "epoch": 0.08266249319072036, + "grad_norm": 44.35598954057679, + "learning_rate": 5.974745559891488e-06, + "loss": 0.130609130859375, + "step": 9560 + }, + { + "epoch": 0.08270572671226362, + "grad_norm": 1.008583781781222, + "learning_rate": 5.974719173379458e-06, + "loss": 0.288433837890625, + "step": 9565 + }, + { + "epoch": 0.08274896023380689, + "grad_norm": 42.6543640197866, + "learning_rate": 5.9746927731482855e-06, + "loss": 0.19795684814453124, + "step": 9570 + }, + { + "epoch": 0.08279219375535014, + "grad_norm": 13.557399473037734, + "learning_rate": 5.974666359198092e-06, + "loss": 0.275927734375, + "step": 9575 + }, + { + "epoch": 0.08283542727689341, + "grad_norm": 14.994868021795448, + "learning_rate": 5.974639931529002e-06, + "loss": 0.08589019775390624, + "step": 9580 + }, + { + "epoch": 0.08287866079843667, + "grad_norm": 8.354158338968944, + "learning_rate": 5.974613490141135e-06, + "loss": 0.1729248046875, + "step": 9585 + }, + { + "epoch": 0.08292189431997994, + "grad_norm": 3.688733481590501, + "learning_rate": 5.974587035034612e-06, + "loss": 0.1016937255859375, + "step": 9590 + }, + { + "epoch": 0.0829651278415232, + "grad_norm": 9.441072641918902, + "learning_rate": 5.974560566209558e-06, + "loss": 0.2387054443359375, + "step": 9595 + }, + { + "epoch": 0.08300836136306647, + "grad_norm": 10.05073242724147, + "learning_rate": 5.974534083666093e-06, + "loss": 0.207855224609375, + "step": 9600 + }, + { + "epoch": 0.08305159488460973, + "grad_norm": 0.48277034202950403, + "learning_rate": 5.9745075874043395e-06, + "loss": 0.5239532470703125, + "step": 9605 + }, + { + "epoch": 0.083094828406153, + "grad_norm": 13.085196740179505, + "learning_rate": 5.97448107742442e-06, + "loss": 0.19915313720703126, + "step": 9610 + }, + { + "epoch": 0.08313806192769625, + "grad_norm": 92.1802215994842, + "learning_rate": 5.974454553726457e-06, + "loss": 0.16241455078125, + "step": 9615 + }, + { + "epoch": 0.08318129544923952, + "grad_norm": 29.210575365017384, + "learning_rate": 5.974428016310572e-06, + "loss": 0.17126617431640626, + "step": 9620 + }, + { + "epoch": 0.08322452897078278, + "grad_norm": 110.43145697443252, + "learning_rate": 5.974401465176887e-06, + "loss": 0.288037109375, + "step": 9625 + }, + { + "epoch": 0.08326776249232605, + "grad_norm": 25.00460932562376, + "learning_rate": 5.9743749003255265e-06, + "loss": 0.24640960693359376, + "step": 9630 + }, + { + "epoch": 0.08331099601386932, + "grad_norm": 12.682586544799259, + "learning_rate": 5.974348321756611e-06, + "loss": 0.14036407470703124, + "step": 9635 + }, + { + "epoch": 0.08335422953541258, + "grad_norm": 2.069919670258263, + "learning_rate": 5.974321729470264e-06, + "loss": 0.4230831146240234, + "step": 9640 + }, + { + "epoch": 0.08339746305695585, + "grad_norm": 1.5642988759445067, + "learning_rate": 5.974295123466608e-06, + "loss": 0.3356414794921875, + "step": 9645 + }, + { + "epoch": 0.0834406965784991, + "grad_norm": 33.37305228597064, + "learning_rate": 5.974268503745766e-06, + "loss": 0.448419189453125, + "step": 9650 + }, + { + "epoch": 0.08348393010004238, + "grad_norm": 4.437989073590723, + "learning_rate": 5.974241870307861e-06, + "loss": 0.3619110107421875, + "step": 9655 + }, + { + "epoch": 0.08352716362158563, + "grad_norm": 28.48901883686757, + "learning_rate": 5.974215223153014e-06, + "loss": 0.53897705078125, + "step": 9660 + }, + { + "epoch": 0.0835703971431289, + "grad_norm": 47.69863876007573, + "learning_rate": 5.97418856228135e-06, + "loss": 0.4198951721191406, + "step": 9665 + }, + { + "epoch": 0.08361363066467216, + "grad_norm": 7.37734430643273, + "learning_rate": 5.974161887692991e-06, + "loss": 0.17200927734375, + "step": 9670 + }, + { + "epoch": 0.08365686418621543, + "grad_norm": 5.776734384634531, + "learning_rate": 5.97413519938806e-06, + "loss": 0.08606948852539062, + "step": 9675 + }, + { + "epoch": 0.08370009770775869, + "grad_norm": 33.992644844660084, + "learning_rate": 5.9741084973666805e-06, + "loss": 0.3325225830078125, + "step": 9680 + }, + { + "epoch": 0.08374333122930196, + "grad_norm": 19.32988905272157, + "learning_rate": 5.974081781628976e-06, + "loss": 0.21273193359375, + "step": 9685 + }, + { + "epoch": 0.08378656475084521, + "grad_norm": 63.07085630605038, + "learning_rate": 5.974055052175068e-06, + "loss": 0.25467987060546876, + "step": 9690 + }, + { + "epoch": 0.08382979827238848, + "grad_norm": 2.815391853853809, + "learning_rate": 5.974028309005082e-06, + "loss": 0.0325042724609375, + "step": 9695 + }, + { + "epoch": 0.08387303179393174, + "grad_norm": 12.037851811904629, + "learning_rate": 5.974001552119139e-06, + "loss": 0.2252685546875, + "step": 9700 + }, + { + "epoch": 0.08391626531547501, + "grad_norm": 16.24358454066937, + "learning_rate": 5.973974781517364e-06, + "loss": 0.401959228515625, + "step": 9705 + }, + { + "epoch": 0.08395949883701827, + "grad_norm": 3.865316076918934, + "learning_rate": 5.973947997199881e-06, + "loss": 0.21248550415039064, + "step": 9710 + }, + { + "epoch": 0.08400273235856154, + "grad_norm": 6.467413028505928, + "learning_rate": 5.973921199166811e-06, + "loss": 0.173870849609375, + "step": 9715 + }, + { + "epoch": 0.0840459658801048, + "grad_norm": 8.809286031730345, + "learning_rate": 5.973894387418281e-06, + "loss": 0.2296051025390625, + "step": 9720 + }, + { + "epoch": 0.08408919940164807, + "grad_norm": 20.92991849455114, + "learning_rate": 5.973867561954411e-06, + "loss": 0.80751953125, + "step": 9725 + }, + { + "epoch": 0.08413243292319132, + "grad_norm": 12.371843488550692, + "learning_rate": 5.973840722775329e-06, + "loss": 0.2243804931640625, + "step": 9730 + }, + { + "epoch": 0.08417566644473459, + "grad_norm": 5.714039296171383, + "learning_rate": 5.973813869881154e-06, + "loss": 0.24007797241210938, + "step": 9735 + }, + { + "epoch": 0.08421889996627785, + "grad_norm": 53.919366222947296, + "learning_rate": 5.9737870032720135e-06, + "loss": 0.5461578369140625, + "step": 9740 + }, + { + "epoch": 0.08426213348782112, + "grad_norm": 61.206424975567806, + "learning_rate": 5.973760122948029e-06, + "loss": 0.502044677734375, + "step": 9745 + }, + { + "epoch": 0.08430536700936438, + "grad_norm": 41.94801898337293, + "learning_rate": 5.973733228909326e-06, + "loss": 0.2123046875, + "step": 9750 + }, + { + "epoch": 0.08434860053090765, + "grad_norm": 6.115062526467149, + "learning_rate": 5.973706321156029e-06, + "loss": 0.4290557861328125, + "step": 9755 + }, + { + "epoch": 0.0843918340524509, + "grad_norm": 24.94217209590591, + "learning_rate": 5.9736793996882604e-06, + "loss": 0.339776611328125, + "step": 9760 + }, + { + "epoch": 0.08443506757399417, + "grad_norm": 0.14727565435626686, + "learning_rate": 5.973652464506145e-06, + "loss": 0.29270401000976565, + "step": 9765 + }, + { + "epoch": 0.08447830109553743, + "grad_norm": 3.642490622705205, + "learning_rate": 5.973625515609808e-06, + "loss": 0.0985137939453125, + "step": 9770 + }, + { + "epoch": 0.0845215346170807, + "grad_norm": 8.599116293557888, + "learning_rate": 5.973598552999373e-06, + "loss": 0.36296615600585935, + "step": 9775 + }, + { + "epoch": 0.08456476813862396, + "grad_norm": 15.27278127704458, + "learning_rate": 5.973571576674963e-06, + "loss": 0.19447021484375, + "step": 9780 + }, + { + "epoch": 0.08460800166016723, + "grad_norm": 13.031723854622806, + "learning_rate": 5.973544586636705e-06, + "loss": 0.10854034423828125, + "step": 9785 + }, + { + "epoch": 0.08465123518171049, + "grad_norm": 2.2504959782295013, + "learning_rate": 5.973517582884721e-06, + "loss": 0.21313095092773438, + "step": 9790 + }, + { + "epoch": 0.08469446870325376, + "grad_norm": 29.583076067330882, + "learning_rate": 5.973490565419137e-06, + "loss": 0.14772491455078124, + "step": 9795 + }, + { + "epoch": 0.08473770222479701, + "grad_norm": 2.4452177807405024, + "learning_rate": 5.973463534240078e-06, + "loss": 0.13181571960449218, + "step": 9800 + }, + { + "epoch": 0.08478093574634028, + "grad_norm": 24.589433873587694, + "learning_rate": 5.973436489347666e-06, + "loss": 0.18140716552734376, + "step": 9805 + }, + { + "epoch": 0.08482416926788354, + "grad_norm": 0.8506310259281606, + "learning_rate": 5.97340943074203e-06, + "loss": 0.16190185546875, + "step": 9810 + }, + { + "epoch": 0.08486740278942681, + "grad_norm": 20.31376620483646, + "learning_rate": 5.973382358423292e-06, + "loss": 0.37896728515625, + "step": 9815 + }, + { + "epoch": 0.08491063631097008, + "grad_norm": 3.590867293165558, + "learning_rate": 5.9733552723915755e-06, + "loss": 0.550762939453125, + "step": 9820 + }, + { + "epoch": 0.08495386983251334, + "grad_norm": 17.213953026451883, + "learning_rate": 5.973328172647008e-06, + "loss": 0.40640106201171877, + "step": 9825 + }, + { + "epoch": 0.08499710335405661, + "grad_norm": 57.15070074134439, + "learning_rate": 5.973301059189714e-06, + "loss": 0.63076171875, + "step": 9830 + }, + { + "epoch": 0.08504033687559986, + "grad_norm": 13.013714876536765, + "learning_rate": 5.973273932019819e-06, + "loss": 0.28427734375, + "step": 9835 + }, + { + "epoch": 0.08508357039714314, + "grad_norm": 16.475540772593913, + "learning_rate": 5.973246791137446e-06, + "loss": 0.282318115234375, + "step": 9840 + }, + { + "epoch": 0.08512680391868639, + "grad_norm": 10.686986380929255, + "learning_rate": 5.973219636542723e-06, + "loss": 0.19091644287109374, + "step": 9845 + }, + { + "epoch": 0.08517003744022966, + "grad_norm": 24.685458084518686, + "learning_rate": 5.9731924682357725e-06, + "loss": 0.3499237060546875, + "step": 9850 + }, + { + "epoch": 0.08521327096177292, + "grad_norm": 41.36641132905231, + "learning_rate": 5.973165286216722e-06, + "loss": 0.12784423828125, + "step": 9855 + }, + { + "epoch": 0.08525650448331619, + "grad_norm": 17.47168075622494, + "learning_rate": 5.973138090485695e-06, + "loss": 0.15927276611328126, + "step": 9860 + }, + { + "epoch": 0.08529973800485945, + "grad_norm": 13.479222762414784, + "learning_rate": 5.973110881042819e-06, + "loss": 0.27298431396484374, + "step": 9865 + }, + { + "epoch": 0.08534297152640272, + "grad_norm": 77.92500926282116, + "learning_rate": 5.9730836578882165e-06, + "loss": 0.4269927978515625, + "step": 9870 + }, + { + "epoch": 0.08538620504794597, + "grad_norm": 23.14605937575974, + "learning_rate": 5.973056421022016e-06, + "loss": 0.223876953125, + "step": 9875 + }, + { + "epoch": 0.08542943856948924, + "grad_norm": 6.621308871439761, + "learning_rate": 5.973029170444342e-06, + "loss": 0.17919549942016602, + "step": 9880 + }, + { + "epoch": 0.0854726720910325, + "grad_norm": 73.95419132200041, + "learning_rate": 5.97300190615532e-06, + "loss": 0.366058349609375, + "step": 9885 + }, + { + "epoch": 0.08551590561257577, + "grad_norm": 64.1762721309702, + "learning_rate": 5.972974628155076e-06, + "loss": 0.21975059509277345, + "step": 9890 + }, + { + "epoch": 0.08555913913411903, + "grad_norm": 46.64134412080625, + "learning_rate": 5.972947336443736e-06, + "loss": 0.33580322265625, + "step": 9895 + }, + { + "epoch": 0.0856023726556623, + "grad_norm": 10.080650747384404, + "learning_rate": 5.972920031021425e-06, + "loss": 0.11199951171875, + "step": 9900 + }, + { + "epoch": 0.08564560617720555, + "grad_norm": 9.826661490178553, + "learning_rate": 5.972892711888269e-06, + "loss": 0.24309768676757812, + "step": 9905 + }, + { + "epoch": 0.08568883969874883, + "grad_norm": 20.964023429849917, + "learning_rate": 5.972865379044396e-06, + "loss": 0.3934326171875, + "step": 9910 + }, + { + "epoch": 0.08573207322029208, + "grad_norm": 0.8421720560775712, + "learning_rate": 5.9728380324899295e-06, + "loss": 0.50477294921875, + "step": 9915 + }, + { + "epoch": 0.08577530674183535, + "grad_norm": 39.33236036680243, + "learning_rate": 5.972810672224998e-06, + "loss": 0.21436767578125, + "step": 9920 + }, + { + "epoch": 0.08581854026337861, + "grad_norm": 40.67951371621833, + "learning_rate": 5.972783298249725e-06, + "loss": 0.4034721374511719, + "step": 9925 + }, + { + "epoch": 0.08586177378492188, + "grad_norm": 0.4303725135673967, + "learning_rate": 5.9727559105642385e-06, + "loss": 0.35023956298828124, + "step": 9930 + }, + { + "epoch": 0.08590500730646514, + "grad_norm": 5.86546478100666, + "learning_rate": 5.972728509168664e-06, + "loss": 0.10536651611328125, + "step": 9935 + }, + { + "epoch": 0.08594824082800841, + "grad_norm": 1.7463139516775792, + "learning_rate": 5.972701094063129e-06, + "loss": 0.11437835693359374, + "step": 9940 + }, + { + "epoch": 0.08599147434955166, + "grad_norm": 17.153913777475477, + "learning_rate": 5.972673665247759e-06, + "loss": 0.16003799438476562, + "step": 9945 + }, + { + "epoch": 0.08603470787109493, + "grad_norm": 6.73996172059716, + "learning_rate": 5.97264622272268e-06, + "loss": 0.23257293701171874, + "step": 9950 + }, + { + "epoch": 0.08607794139263819, + "grad_norm": 0.44824422414867976, + "learning_rate": 5.9726187664880205e-06, + "loss": 0.3290901184082031, + "step": 9955 + }, + { + "epoch": 0.08612117491418146, + "grad_norm": 13.601189710736488, + "learning_rate": 5.972591296543905e-06, + "loss": 0.1255523681640625, + "step": 9960 + }, + { + "epoch": 0.08616440843572472, + "grad_norm": 23.144510387102997, + "learning_rate": 5.972563812890463e-06, + "loss": 0.201654052734375, + "step": 9965 + }, + { + "epoch": 0.08620764195726799, + "grad_norm": 15.184134004758306, + "learning_rate": 5.972536315527816e-06, + "loss": 0.2532470703125, + "step": 9970 + }, + { + "epoch": 0.08625087547881125, + "grad_norm": 20.404571082902933, + "learning_rate": 5.972508804456097e-06, + "loss": 0.19463577270507812, + "step": 9975 + }, + { + "epoch": 0.08629410900035452, + "grad_norm": 10.634180119399995, + "learning_rate": 5.972481279675429e-06, + "loss": 0.2584075927734375, + "step": 9980 + }, + { + "epoch": 0.08633734252189777, + "grad_norm": 10.299804359001136, + "learning_rate": 5.97245374118594e-06, + "loss": 0.1218048095703125, + "step": 9985 + }, + { + "epoch": 0.08638057604344104, + "grad_norm": 40.06039695958394, + "learning_rate": 5.972426188987756e-06, + "loss": 0.3451530456542969, + "step": 9990 + }, + { + "epoch": 0.0864238095649843, + "grad_norm": 14.766446178382624, + "learning_rate": 5.972398623081007e-06, + "loss": 0.359954833984375, + "step": 9995 + }, + { + "epoch": 0.08646704308652757, + "grad_norm": 3.0176706903033574, + "learning_rate": 5.972371043465817e-06, + "loss": 0.15145263671875, + "step": 10000 + }, + { + "epoch": 0.08651027660807083, + "grad_norm": 35.32943606690357, + "learning_rate": 5.9723434501423145e-06, + "loss": 0.37257537841796873, + "step": 10005 + }, + { + "epoch": 0.0865535101296141, + "grad_norm": 47.40502877752081, + "learning_rate": 5.972315843110627e-06, + "loss": 0.99525146484375, + "step": 10010 + }, + { + "epoch": 0.08659674365115737, + "grad_norm": 14.32018791354528, + "learning_rate": 5.972288222370881e-06, + "loss": 0.194952392578125, + "step": 10015 + }, + { + "epoch": 0.08663997717270062, + "grad_norm": 9.127189339177827, + "learning_rate": 5.972260587923205e-06, + "loss": 0.19522705078125, + "step": 10020 + }, + { + "epoch": 0.0866832106942439, + "grad_norm": 14.64783675616304, + "learning_rate": 5.972232939767726e-06, + "loss": 0.223272705078125, + "step": 10025 + }, + { + "epoch": 0.08672644421578715, + "grad_norm": 4.36702004717657, + "learning_rate": 5.97220527790457e-06, + "loss": 0.09523162841796876, + "step": 10030 + }, + { + "epoch": 0.08676967773733042, + "grad_norm": 20.745207614979222, + "learning_rate": 5.9721776023338665e-06, + "loss": 0.20584602355957032, + "step": 10035 + }, + { + "epoch": 0.08681291125887368, + "grad_norm": 7.427211569077455, + "learning_rate": 5.972149913055743e-06, + "loss": 0.03368072509765625, + "step": 10040 + }, + { + "epoch": 0.08685614478041695, + "grad_norm": 4.90157922698661, + "learning_rate": 5.9721222100703265e-06, + "loss": 0.386602783203125, + "step": 10045 + }, + { + "epoch": 0.0868993783019602, + "grad_norm": 1.1931344515562268, + "learning_rate": 5.972094493377745e-06, + "loss": 0.25673828125, + "step": 10050 + }, + { + "epoch": 0.08694261182350348, + "grad_norm": 27.960964668300143, + "learning_rate": 5.972066762978126e-06, + "loss": 0.3246673583984375, + "step": 10055 + }, + { + "epoch": 0.08698584534504673, + "grad_norm": 2.3474683365735216, + "learning_rate": 5.972039018871597e-06, + "loss": 0.1843536376953125, + "step": 10060 + }, + { + "epoch": 0.08702907886659, + "grad_norm": 17.878658841022347, + "learning_rate": 5.9720112610582876e-06, + "loss": 0.267193603515625, + "step": 10065 + }, + { + "epoch": 0.08707231238813326, + "grad_norm": 69.97185043265739, + "learning_rate": 5.971983489538325e-06, + "loss": 0.517156982421875, + "step": 10070 + }, + { + "epoch": 0.08711554590967653, + "grad_norm": 18.2928886005617, + "learning_rate": 5.971955704311838e-06, + "loss": 0.2397613525390625, + "step": 10075 + }, + { + "epoch": 0.08715877943121979, + "grad_norm": 7.831224431503593, + "learning_rate": 5.971927905378952e-06, + "loss": 0.2041015625, + "step": 10080 + }, + { + "epoch": 0.08720201295276306, + "grad_norm": 11.923354318568586, + "learning_rate": 5.971900092739798e-06, + "loss": 0.423291015625, + "step": 10085 + }, + { + "epoch": 0.08724524647430631, + "grad_norm": 47.00053342877463, + "learning_rate": 5.971872266394503e-06, + "loss": 0.3716579437255859, + "step": 10090 + }, + { + "epoch": 0.08728847999584959, + "grad_norm": 14.008309699221641, + "learning_rate": 5.971844426343197e-06, + "loss": 0.20607452392578124, + "step": 10095 + }, + { + "epoch": 0.08733171351739284, + "grad_norm": 8.34125434199902, + "learning_rate": 5.971816572586005e-06, + "loss": 0.14873046875, + "step": 10100 + }, + { + "epoch": 0.08737494703893611, + "grad_norm": 5.066291995080734, + "learning_rate": 5.97178870512306e-06, + "loss": 0.22953662872314454, + "step": 10105 + }, + { + "epoch": 0.08741818056047937, + "grad_norm": 60.472645414328625, + "learning_rate": 5.971760823954487e-06, + "loss": 0.4205780029296875, + "step": 10110 + }, + { + "epoch": 0.08746141408202264, + "grad_norm": 22.72625089350026, + "learning_rate": 5.971732929080414e-06, + "loss": 0.573431396484375, + "step": 10115 + }, + { + "epoch": 0.0875046476035659, + "grad_norm": 16.622860849239476, + "learning_rate": 5.971705020500973e-06, + "loss": 0.4200439453125, + "step": 10120 + }, + { + "epoch": 0.08754788112510917, + "grad_norm": 2.076305037132445, + "learning_rate": 5.971677098216292e-06, + "loss": 0.23663787841796874, + "step": 10125 + }, + { + "epoch": 0.08759111464665242, + "grad_norm": 10.13733165262601, + "learning_rate": 5.971649162226497e-06, + "loss": 0.11474990844726562, + "step": 10130 + }, + { + "epoch": 0.0876343481681957, + "grad_norm": 1.7005596276599164, + "learning_rate": 5.97162121253172e-06, + "loss": 0.2712577819824219, + "step": 10135 + }, + { + "epoch": 0.08767758168973895, + "grad_norm": 29.320907336423886, + "learning_rate": 5.971593249132087e-06, + "loss": 0.1668304443359375, + "step": 10140 + }, + { + "epoch": 0.08772081521128222, + "grad_norm": 12.420230706983826, + "learning_rate": 5.971565272027729e-06, + "loss": 0.1840057373046875, + "step": 10145 + }, + { + "epoch": 0.08776404873282548, + "grad_norm": 7.051758494966452, + "learning_rate": 5.9715372812187754e-06, + "loss": 0.11503448486328124, + "step": 10150 + }, + { + "epoch": 0.08780728225436875, + "grad_norm": 1.3434258363607503, + "learning_rate": 5.971509276705354e-06, + "loss": 0.11025848388671874, + "step": 10155 + }, + { + "epoch": 0.087850515775912, + "grad_norm": 16.15911062848116, + "learning_rate": 5.9714812584875936e-06, + "loss": 0.3657958984375, + "step": 10160 + }, + { + "epoch": 0.08789374929745528, + "grad_norm": 7.830093630007225, + "learning_rate": 5.971453226565625e-06, + "loss": 0.191192626953125, + "step": 10165 + }, + { + "epoch": 0.08793698281899853, + "grad_norm": 5.067349484267504, + "learning_rate": 5.971425180939577e-06, + "loss": 0.21273021697998046, + "step": 10170 + }, + { + "epoch": 0.0879802163405418, + "grad_norm": 1.9587835427375284, + "learning_rate": 5.971397121609578e-06, + "loss": 0.1759735107421875, + "step": 10175 + }, + { + "epoch": 0.08802344986208506, + "grad_norm": 4.510720845064227, + "learning_rate": 5.9713690485757584e-06, + "loss": 0.0751617431640625, + "step": 10180 + }, + { + "epoch": 0.08806668338362833, + "grad_norm": 30.241902805808596, + "learning_rate": 5.971340961838246e-06, + "loss": 0.18030014038085937, + "step": 10185 + }, + { + "epoch": 0.08810991690517159, + "grad_norm": 4.177871259550969, + "learning_rate": 5.971312861397174e-06, + "loss": 0.07701416015625, + "step": 10190 + }, + { + "epoch": 0.08815315042671486, + "grad_norm": 27.201695273024217, + "learning_rate": 5.971284747252668e-06, + "loss": 0.260186767578125, + "step": 10195 + }, + { + "epoch": 0.08819638394825813, + "grad_norm": 2.057913354949861, + "learning_rate": 5.97125661940486e-06, + "loss": 0.21312026977539061, + "step": 10200 + }, + { + "epoch": 0.08823961746980138, + "grad_norm": 23.182836353461024, + "learning_rate": 5.971228477853878e-06, + "loss": 0.37080230712890627, + "step": 10205 + }, + { + "epoch": 0.08828285099134466, + "grad_norm": 20.18987695488687, + "learning_rate": 5.971200322599854e-06, + "loss": 0.221630859375, + "step": 10210 + }, + { + "epoch": 0.08832608451288791, + "grad_norm": 18.504468025299747, + "learning_rate": 5.9711721536429164e-06, + "loss": 0.1801513671875, + "step": 10215 + }, + { + "epoch": 0.08836931803443118, + "grad_norm": 9.385593107740867, + "learning_rate": 5.971143970983195e-06, + "loss": 0.6923187255859375, + "step": 10220 + }, + { + "epoch": 0.08841255155597444, + "grad_norm": 30.199710921804016, + "learning_rate": 5.97111577462082e-06, + "loss": 0.413983154296875, + "step": 10225 + }, + { + "epoch": 0.08845578507751771, + "grad_norm": 4.248779795650375, + "learning_rate": 5.971087564555922e-06, + "loss": 0.2647735595703125, + "step": 10230 + }, + { + "epoch": 0.08849901859906097, + "grad_norm": 34.51524542264373, + "learning_rate": 5.971059340788631e-06, + "loss": 0.23350830078125, + "step": 10235 + }, + { + "epoch": 0.08854225212060424, + "grad_norm": 9.64036381250708, + "learning_rate": 5.9710311033190756e-06, + "loss": 0.21261749267578126, + "step": 10240 + }, + { + "epoch": 0.0885854856421475, + "grad_norm": 13.149358608669377, + "learning_rate": 5.971002852147389e-06, + "loss": 0.35045928955078126, + "step": 10245 + }, + { + "epoch": 0.08862871916369076, + "grad_norm": 35.41499038986253, + "learning_rate": 5.970974587273697e-06, + "loss": 0.4172607421875, + "step": 10250 + }, + { + "epoch": 0.08867195268523402, + "grad_norm": 26.167377138790563, + "learning_rate": 5.970946308698135e-06, + "loss": 0.23032073974609374, + "step": 10255 + }, + { + "epoch": 0.08871518620677729, + "grad_norm": 0.7543024098859612, + "learning_rate": 5.970918016420831e-06, + "loss": 0.5763046264648437, + "step": 10260 + }, + { + "epoch": 0.08875841972832055, + "grad_norm": 16.109161892471175, + "learning_rate": 5.970889710441916e-06, + "loss": 0.19656982421875, + "step": 10265 + }, + { + "epoch": 0.08880165324986382, + "grad_norm": 40.39310898862554, + "learning_rate": 5.970861390761519e-06, + "loss": 0.35504608154296874, + "step": 10270 + }, + { + "epoch": 0.08884488677140708, + "grad_norm": 18.548843871042838, + "learning_rate": 5.970833057379772e-06, + "loss": 0.307318115234375, + "step": 10275 + }, + { + "epoch": 0.08888812029295035, + "grad_norm": 5.811525246685331, + "learning_rate": 5.9708047102968054e-06, + "loss": 0.1634033203125, + "step": 10280 + }, + { + "epoch": 0.0889313538144936, + "grad_norm": 3.388873684062335, + "learning_rate": 5.970776349512751e-06, + "loss": 0.042919921875, + "step": 10285 + }, + { + "epoch": 0.08897458733603687, + "grad_norm": 5.875403118225356, + "learning_rate": 5.970747975027737e-06, + "loss": 0.1646240234375, + "step": 10290 + }, + { + "epoch": 0.08901782085758013, + "grad_norm": 1.5534584187433615, + "learning_rate": 5.970719586841897e-06, + "loss": 0.22379150390625, + "step": 10295 + }, + { + "epoch": 0.0890610543791234, + "grad_norm": 2.5422521436747383, + "learning_rate": 5.9706911849553605e-06, + "loss": 0.1654500961303711, + "step": 10300 + }, + { + "epoch": 0.08910428790066666, + "grad_norm": 5.008481481567721, + "learning_rate": 5.970662769368259e-06, + "loss": 0.0755126953125, + "step": 10305 + }, + { + "epoch": 0.08914752142220993, + "grad_norm": 65.08397969505269, + "learning_rate": 5.970634340080723e-06, + "loss": 0.49951629638671874, + "step": 10310 + }, + { + "epoch": 0.08919075494375318, + "grad_norm": 7.522829332836227, + "learning_rate": 5.970605897092884e-06, + "loss": 0.58680419921875, + "step": 10315 + }, + { + "epoch": 0.08923398846529645, + "grad_norm": 6.6399654946916, + "learning_rate": 5.970577440404873e-06, + "loss": 0.30081787109375, + "step": 10320 + }, + { + "epoch": 0.08927722198683971, + "grad_norm": 44.120023945513864, + "learning_rate": 5.970548970016821e-06, + "loss": 0.38620529174804685, + "step": 10325 + }, + { + "epoch": 0.08932045550838298, + "grad_norm": 36.130810880146434, + "learning_rate": 5.97052048592886e-06, + "loss": 0.21912384033203125, + "step": 10330 + }, + { + "epoch": 0.08936368902992624, + "grad_norm": 39.5038506562415, + "learning_rate": 5.970491988141121e-06, + "loss": 0.34988861083984374, + "step": 10335 + }, + { + "epoch": 0.08940692255146951, + "grad_norm": 8.074803399859046, + "learning_rate": 5.970463476653736e-06, + "loss": 0.09844970703125, + "step": 10340 + }, + { + "epoch": 0.08945015607301277, + "grad_norm": 4.637900067444965, + "learning_rate": 5.9704349514668345e-06, + "loss": 0.09890823364257813, + "step": 10345 + }, + { + "epoch": 0.08949338959455604, + "grad_norm": 7.514684237120038, + "learning_rate": 5.97040641258055e-06, + "loss": 0.08438873291015625, + "step": 10350 + }, + { + "epoch": 0.08953662311609929, + "grad_norm": 9.60019389650197, + "learning_rate": 5.970377859995014e-06, + "loss": 0.39909896850585935, + "step": 10355 + }, + { + "epoch": 0.08957985663764256, + "grad_norm": 12.889128118206099, + "learning_rate": 5.970349293710358e-06, + "loss": 0.22690658569335936, + "step": 10360 + }, + { + "epoch": 0.08962309015918582, + "grad_norm": 7.5347843409252695, + "learning_rate": 5.970320713726712e-06, + "loss": 0.09138336181640624, + "step": 10365 + }, + { + "epoch": 0.08966632368072909, + "grad_norm": 14.395895497592173, + "learning_rate": 5.9702921200442105e-06, + "loss": 0.3865966796875, + "step": 10370 + }, + { + "epoch": 0.08970955720227235, + "grad_norm": 10.681270272223031, + "learning_rate": 5.9702635126629846e-06, + "loss": 0.248828125, + "step": 10375 + }, + { + "epoch": 0.08975279072381562, + "grad_norm": 21.26451611967302, + "learning_rate": 5.970234891583166e-06, + "loss": 0.2202484130859375, + "step": 10380 + }, + { + "epoch": 0.08979602424535889, + "grad_norm": 40.345423514409305, + "learning_rate": 5.970206256804885e-06, + "loss": 0.44039382934570315, + "step": 10385 + }, + { + "epoch": 0.08983925776690214, + "grad_norm": 6.9368008569774355, + "learning_rate": 5.970177608328277e-06, + "loss": 0.075677490234375, + "step": 10390 + }, + { + "epoch": 0.08988249128844542, + "grad_norm": 17.8173304255902, + "learning_rate": 5.9701489461534715e-06, + "loss": 0.457183837890625, + "step": 10395 + }, + { + "epoch": 0.08992572480998867, + "grad_norm": 7.797718176290588, + "learning_rate": 5.970120270280601e-06, + "loss": 0.24974746704101564, + "step": 10400 + }, + { + "epoch": 0.08996895833153194, + "grad_norm": 38.62216957069026, + "learning_rate": 5.970091580709799e-06, + "loss": 0.348394775390625, + "step": 10405 + }, + { + "epoch": 0.0900121918530752, + "grad_norm": 50.440045096447776, + "learning_rate": 5.970062877441197e-06, + "loss": 0.3813629150390625, + "step": 10410 + }, + { + "epoch": 0.09005542537461847, + "grad_norm": 17.06038654121812, + "learning_rate": 5.970034160474927e-06, + "loss": 0.0702667236328125, + "step": 10415 + }, + { + "epoch": 0.09009865889616173, + "grad_norm": 0.5489078390311333, + "learning_rate": 5.970005429811122e-06, + "loss": 0.1612548828125, + "step": 10420 + }, + { + "epoch": 0.090141892417705, + "grad_norm": 31.19305630712008, + "learning_rate": 5.969976685449915e-06, + "loss": 0.11552963256835938, + "step": 10425 + }, + { + "epoch": 0.09018512593924825, + "grad_norm": 21.737735484558215, + "learning_rate": 5.969947927391437e-06, + "loss": 0.18717803955078124, + "step": 10430 + }, + { + "epoch": 0.09022835946079152, + "grad_norm": 22.225014226003523, + "learning_rate": 5.969919155635822e-06, + "loss": 0.24605560302734375, + "step": 10435 + }, + { + "epoch": 0.09027159298233478, + "grad_norm": 0.2892320693059339, + "learning_rate": 5.969890370183203e-06, + "loss": 0.13109664916992186, + "step": 10440 + }, + { + "epoch": 0.09031482650387805, + "grad_norm": 2.84529327586152, + "learning_rate": 5.969861571033711e-06, + "loss": 0.6265625, + "step": 10445 + }, + { + "epoch": 0.09035806002542131, + "grad_norm": 5.332570636330933, + "learning_rate": 5.969832758187481e-06, + "loss": 0.1914306640625, + "step": 10450 + }, + { + "epoch": 0.09040129354696458, + "grad_norm": 5.092519359858804, + "learning_rate": 5.969803931644644e-06, + "loss": 0.25055580139160155, + "step": 10455 + }, + { + "epoch": 0.09044452706850784, + "grad_norm": 41.86736106785221, + "learning_rate": 5.969775091405333e-06, + "loss": 0.4351310729980469, + "step": 10460 + }, + { + "epoch": 0.0904877605900511, + "grad_norm": 7.374065007592485, + "learning_rate": 5.969746237469683e-06, + "loss": 0.22297210693359376, + "step": 10465 + }, + { + "epoch": 0.09053099411159436, + "grad_norm": 4.662727751300974, + "learning_rate": 5.9697173698378244e-06, + "loss": 0.18662109375, + "step": 10470 + }, + { + "epoch": 0.09057422763313763, + "grad_norm": 18.21174666427264, + "learning_rate": 5.969688488509892e-06, + "loss": 0.1083953857421875, + "step": 10475 + }, + { + "epoch": 0.09061746115468089, + "grad_norm": 3.388179419089267, + "learning_rate": 5.96965959348602e-06, + "loss": 0.164361572265625, + "step": 10480 + }, + { + "epoch": 0.09066069467622416, + "grad_norm": 4.113757103549368, + "learning_rate": 5.969630684766339e-06, + "loss": 0.20550918579101562, + "step": 10485 + }, + { + "epoch": 0.09070392819776742, + "grad_norm": 12.1004781788531, + "learning_rate": 5.969601762350985e-06, + "loss": 0.2097198486328125, + "step": 10490 + }, + { + "epoch": 0.09074716171931069, + "grad_norm": 4.763761862600791, + "learning_rate": 5.969572826240089e-06, + "loss": 0.1769989013671875, + "step": 10495 + }, + { + "epoch": 0.09079039524085394, + "grad_norm": 34.3534914502328, + "learning_rate": 5.969543876433785e-06, + "loss": 0.526513671875, + "step": 10500 + }, + { + "epoch": 0.09083362876239721, + "grad_norm": 11.544015176844438, + "learning_rate": 5.969514912932208e-06, + "loss": 0.7408660888671875, + "step": 10505 + }, + { + "epoch": 0.09087686228394047, + "grad_norm": 1.0467327150134424, + "learning_rate": 5.96948593573549e-06, + "loss": 0.1937835693359375, + "step": 10510 + }, + { + "epoch": 0.09092009580548374, + "grad_norm": 5.9279457302367735, + "learning_rate": 5.969456944843767e-06, + "loss": 0.227593994140625, + "step": 10515 + }, + { + "epoch": 0.090963329327027, + "grad_norm": 18.707923215459623, + "learning_rate": 5.969427940257169e-06, + "loss": 0.11622314453125, + "step": 10520 + }, + { + "epoch": 0.09100656284857027, + "grad_norm": 22.567887782078575, + "learning_rate": 5.9693989219758325e-06, + "loss": 0.162158203125, + "step": 10525 + }, + { + "epoch": 0.09104979637011353, + "grad_norm": 1.0878813890719359, + "learning_rate": 5.9693698899998905e-06, + "loss": 0.324066162109375, + "step": 10530 + }, + { + "epoch": 0.0910930298916568, + "grad_norm": 10.502579866553743, + "learning_rate": 5.9693408443294764e-06, + "loss": 0.28399658203125, + "step": 10535 + }, + { + "epoch": 0.09113626341320005, + "grad_norm": 5.602423146247638, + "learning_rate": 5.969311784964725e-06, + "loss": 0.14930496215820313, + "step": 10540 + }, + { + "epoch": 0.09117949693474332, + "grad_norm": 1.5788455517202047, + "learning_rate": 5.969282711905771e-06, + "loss": 0.0379486083984375, + "step": 10545 + }, + { + "epoch": 0.09122273045628658, + "grad_norm": 4.6409996435549115, + "learning_rate": 5.969253625152746e-06, + "loss": 0.28699302673339844, + "step": 10550 + }, + { + "epoch": 0.09126596397782985, + "grad_norm": 49.357151487082064, + "learning_rate": 5.969224524705786e-06, + "loss": 0.1654449462890625, + "step": 10555 + }, + { + "epoch": 0.09130919749937311, + "grad_norm": 16.403506475934968, + "learning_rate": 5.969195410565026e-06, + "loss": 0.109478759765625, + "step": 10560 + }, + { + "epoch": 0.09135243102091638, + "grad_norm": 3.544123381708635, + "learning_rate": 5.969166282730598e-06, + "loss": 0.07073974609375, + "step": 10565 + }, + { + "epoch": 0.09139566454245965, + "grad_norm": 31.692136245364388, + "learning_rate": 5.9691371412026375e-06, + "loss": 0.597576904296875, + "step": 10570 + }, + { + "epoch": 0.0914388980640029, + "grad_norm": 12.307433383365717, + "learning_rate": 5.969107985981279e-06, + "loss": 0.2949951171875, + "step": 10575 + }, + { + "epoch": 0.09148213158554618, + "grad_norm": 11.552336355223755, + "learning_rate": 5.969078817066657e-06, + "loss": 0.26842041015625, + "step": 10580 + }, + { + "epoch": 0.09152536510708943, + "grad_norm": 16.561364328211823, + "learning_rate": 5.969049634458906e-06, + "loss": 0.3057373046875, + "step": 10585 + }, + { + "epoch": 0.0915685986286327, + "grad_norm": 128.6052387458721, + "learning_rate": 5.96902043815816e-06, + "loss": 0.70699462890625, + "step": 10590 + }, + { + "epoch": 0.09161183215017596, + "grad_norm": 15.958080191483708, + "learning_rate": 5.968991228164554e-06, + "loss": 0.06835403442382812, + "step": 10595 + }, + { + "epoch": 0.09165506567171923, + "grad_norm": 19.73277635721897, + "learning_rate": 5.9689620044782235e-06, + "loss": 0.07478866577148438, + "step": 10600 + }, + { + "epoch": 0.09169829919326249, + "grad_norm": 1.8497161105042994, + "learning_rate": 5.9689327670993024e-06, + "loss": 0.09700469970703125, + "step": 10605 + }, + { + "epoch": 0.09174153271480576, + "grad_norm": 20.851842354326536, + "learning_rate": 5.968903516027925e-06, + "loss": 0.394384765625, + "step": 10610 + }, + { + "epoch": 0.09178476623634901, + "grad_norm": 3.6623099848358005, + "learning_rate": 5.968874251264227e-06, + "loss": 0.19924850463867189, + "step": 10615 + }, + { + "epoch": 0.09182799975789228, + "grad_norm": 8.587309169893203, + "learning_rate": 5.968844972808344e-06, + "loss": 0.156756591796875, + "step": 10620 + }, + { + "epoch": 0.09187123327943554, + "grad_norm": 5.903600646733417, + "learning_rate": 5.96881568066041e-06, + "loss": 0.2144317626953125, + "step": 10625 + }, + { + "epoch": 0.09191446680097881, + "grad_norm": 18.676568521936968, + "learning_rate": 5.96878637482056e-06, + "loss": 0.333056640625, + "step": 10630 + }, + { + "epoch": 0.09195770032252207, + "grad_norm": 8.828216394336314, + "learning_rate": 5.968757055288931e-06, + "loss": 0.369610595703125, + "step": 10635 + }, + { + "epoch": 0.09200093384406534, + "grad_norm": 21.754575400051575, + "learning_rate": 5.968727722065655e-06, + "loss": 0.1058135986328125, + "step": 10640 + }, + { + "epoch": 0.0920441673656086, + "grad_norm": 34.24555158836114, + "learning_rate": 5.96869837515087e-06, + "loss": 0.2478759765625, + "step": 10645 + }, + { + "epoch": 0.09208740088715187, + "grad_norm": 34.35463748100826, + "learning_rate": 5.9686690145447105e-06, + "loss": 0.17297897338867188, + "step": 10650 + }, + { + "epoch": 0.09213063440869512, + "grad_norm": 8.431864730998306, + "learning_rate": 5.968639640247311e-06, + "loss": 0.18477935791015626, + "step": 10655 + }, + { + "epoch": 0.09217386793023839, + "grad_norm": 3.799310931901873, + "learning_rate": 5.9686102522588095e-06, + "loss": 0.08853378295898437, + "step": 10660 + }, + { + "epoch": 0.09221710145178165, + "grad_norm": 1.3598348992890146, + "learning_rate": 5.968580850579338e-06, + "loss": 0.16551971435546875, + "step": 10665 + }, + { + "epoch": 0.09226033497332492, + "grad_norm": 3.337427674029632, + "learning_rate": 5.968551435209035e-06, + "loss": 0.1767547607421875, + "step": 10670 + }, + { + "epoch": 0.09230356849486818, + "grad_norm": 8.940915047412103, + "learning_rate": 5.968522006148034e-06, + "loss": 0.174560546875, + "step": 10675 + }, + { + "epoch": 0.09234680201641145, + "grad_norm": 0.21012209760363365, + "learning_rate": 5.968492563396472e-06, + "loss": 0.17929229736328126, + "step": 10680 + }, + { + "epoch": 0.0923900355379547, + "grad_norm": 8.370111299742403, + "learning_rate": 5.968463106954486e-06, + "loss": 0.290179443359375, + "step": 10685 + }, + { + "epoch": 0.09243326905949797, + "grad_norm": 18.50348726176823, + "learning_rate": 5.96843363682221e-06, + "loss": 0.194915771484375, + "step": 10690 + }, + { + "epoch": 0.09247650258104123, + "grad_norm": 25.660947949936673, + "learning_rate": 5.968404152999779e-06, + "loss": 0.27781829833984373, + "step": 10695 + }, + { + "epoch": 0.0925197361025845, + "grad_norm": 18.91639808554413, + "learning_rate": 5.968374655487332e-06, + "loss": 0.274603271484375, + "step": 10700 + }, + { + "epoch": 0.09256296962412776, + "grad_norm": 25.24935944289936, + "learning_rate": 5.968345144285002e-06, + "loss": 0.13546142578125, + "step": 10705 + }, + { + "epoch": 0.09260620314567103, + "grad_norm": 11.179174629904738, + "learning_rate": 5.968315619392928e-06, + "loss": 0.151739501953125, + "step": 10710 + }, + { + "epoch": 0.09264943666721429, + "grad_norm": 5.139165077414567, + "learning_rate": 5.968286080811244e-06, + "loss": 0.182611083984375, + "step": 10715 + }, + { + "epoch": 0.09269267018875756, + "grad_norm": 4.976584301107463, + "learning_rate": 5.968256528540086e-06, + "loss": 0.09311370849609375, + "step": 10720 + }, + { + "epoch": 0.09273590371030081, + "grad_norm": 4.06625203268348, + "learning_rate": 5.968226962579592e-06, + "loss": 0.19109039306640624, + "step": 10725 + }, + { + "epoch": 0.09277913723184408, + "grad_norm": 5.408940778081732, + "learning_rate": 5.968197382929898e-06, + "loss": 0.3177040100097656, + "step": 10730 + }, + { + "epoch": 0.09282237075338734, + "grad_norm": 37.84503343087337, + "learning_rate": 5.968167789591139e-06, + "loss": 0.414642333984375, + "step": 10735 + }, + { + "epoch": 0.09286560427493061, + "grad_norm": 12.77348728113622, + "learning_rate": 5.9681381825634526e-06, + "loss": 0.09077301025390624, + "step": 10740 + }, + { + "epoch": 0.09290883779647387, + "grad_norm": 5.9875831917922255, + "learning_rate": 5.968108561846975e-06, + "loss": 0.365380859375, + "step": 10745 + }, + { + "epoch": 0.09295207131801714, + "grad_norm": 1.3336020042932037, + "learning_rate": 5.968078927441843e-06, + "loss": 0.186395263671875, + "step": 10750 + }, + { + "epoch": 0.09299530483956041, + "grad_norm": 57.74844853491077, + "learning_rate": 5.968049279348194e-06, + "loss": 0.405126953125, + "step": 10755 + }, + { + "epoch": 0.09303853836110366, + "grad_norm": 1.4098635686782715, + "learning_rate": 5.968019617566163e-06, + "loss": 0.22788848876953124, + "step": 10760 + }, + { + "epoch": 0.09308177188264694, + "grad_norm": 12.11563577771485, + "learning_rate": 5.967989942095889e-06, + "loss": 0.118548583984375, + "step": 10765 + }, + { + "epoch": 0.09312500540419019, + "grad_norm": 21.18175878599244, + "learning_rate": 5.967960252937507e-06, + "loss": 0.320819091796875, + "step": 10770 + }, + { + "epoch": 0.09316823892573346, + "grad_norm": 14.596562906008522, + "learning_rate": 5.9679305500911544e-06, + "loss": 0.108636474609375, + "step": 10775 + }, + { + "epoch": 0.09321147244727672, + "grad_norm": 118.8760705862863, + "learning_rate": 5.967900833556967e-06, + "loss": 0.21087188720703126, + "step": 10780 + }, + { + "epoch": 0.09325470596881999, + "grad_norm": 14.530107090488645, + "learning_rate": 5.967871103335086e-06, + "loss": 0.256646728515625, + "step": 10785 + }, + { + "epoch": 0.09329793949036325, + "grad_norm": 1.2444148256417595, + "learning_rate": 5.967841359425644e-06, + "loss": 0.25800285339355467, + "step": 10790 + }, + { + "epoch": 0.09334117301190652, + "grad_norm": 3.297646663645346, + "learning_rate": 5.96781160182878e-06, + "loss": 0.1445526123046875, + "step": 10795 + }, + { + "epoch": 0.09338440653344977, + "grad_norm": 11.329252305968776, + "learning_rate": 5.967781830544631e-06, + "loss": 0.2526611328125, + "step": 10800 + }, + { + "epoch": 0.09342764005499304, + "grad_norm": 18.02388189659987, + "learning_rate": 5.967752045573336e-06, + "loss": 0.3261688232421875, + "step": 10805 + }, + { + "epoch": 0.0934708735765363, + "grad_norm": 4.662629584702919, + "learning_rate": 5.9677222469150294e-06, + "loss": 0.219061279296875, + "step": 10810 + }, + { + "epoch": 0.09351410709807957, + "grad_norm": 11.995702499573596, + "learning_rate": 5.96769243456985e-06, + "loss": 0.1838623046875, + "step": 10815 + }, + { + "epoch": 0.09355734061962283, + "grad_norm": 21.93086782372639, + "learning_rate": 5.967662608537936e-06, + "loss": 0.2133087158203125, + "step": 10820 + }, + { + "epoch": 0.0936005741411661, + "grad_norm": 2.701559316373073, + "learning_rate": 5.967632768819424e-06, + "loss": 0.1073883056640625, + "step": 10825 + }, + { + "epoch": 0.09364380766270936, + "grad_norm": 51.07935863690244, + "learning_rate": 5.967602915414451e-06, + "loss": 0.3155517578125, + "step": 10830 + }, + { + "epoch": 0.09368704118425263, + "grad_norm": 0.5855393830985224, + "learning_rate": 5.9675730483231565e-06, + "loss": 0.2326202392578125, + "step": 10835 + }, + { + "epoch": 0.09373027470579588, + "grad_norm": 35.69536798217053, + "learning_rate": 5.967543167545677e-06, + "loss": 0.26532821655273436, + "step": 10840 + }, + { + "epoch": 0.09377350822733915, + "grad_norm": 18.641763848456748, + "learning_rate": 5.967513273082151e-06, + "loss": 0.3183837890625, + "step": 10845 + }, + { + "epoch": 0.09381674174888241, + "grad_norm": 14.682726127858501, + "learning_rate": 5.967483364932716e-06, + "loss": 0.08067779541015625, + "step": 10850 + }, + { + "epoch": 0.09385997527042568, + "grad_norm": 27.337566364464823, + "learning_rate": 5.96745344309751e-06, + "loss": 0.2781364440917969, + "step": 10855 + }, + { + "epoch": 0.09390320879196894, + "grad_norm": 17.93366420939978, + "learning_rate": 5.967423507576671e-06, + "loss": 0.3287506103515625, + "step": 10860 + }, + { + "epoch": 0.09394644231351221, + "grad_norm": 32.092037174153035, + "learning_rate": 5.967393558370335e-06, + "loss": 0.36141357421875, + "step": 10865 + }, + { + "epoch": 0.09398967583505546, + "grad_norm": 0.35643304407525445, + "learning_rate": 5.967363595478645e-06, + "loss": 0.18786392211914063, + "step": 10870 + }, + { + "epoch": 0.09403290935659873, + "grad_norm": 13.894300870054485, + "learning_rate": 5.9673336189017345e-06, + "loss": 0.1823028564453125, + "step": 10875 + }, + { + "epoch": 0.09407614287814199, + "grad_norm": 29.339778815123818, + "learning_rate": 5.967303628639744e-06, + "loss": 0.2994140625, + "step": 10880 + }, + { + "epoch": 0.09411937639968526, + "grad_norm": 15.004984083338309, + "learning_rate": 5.967273624692812e-06, + "loss": 0.32806396484375, + "step": 10885 + }, + { + "epoch": 0.09416260992122852, + "grad_norm": 2.7506828572645454, + "learning_rate": 5.967243607061075e-06, + "loss": 0.0924163818359375, + "step": 10890 + }, + { + "epoch": 0.09420584344277179, + "grad_norm": 20.34950760900633, + "learning_rate": 5.967213575744673e-06, + "loss": 0.375299072265625, + "step": 10895 + }, + { + "epoch": 0.09424907696431505, + "grad_norm": 6.086109686303235, + "learning_rate": 5.967183530743745e-06, + "loss": 0.20730743408203126, + "step": 10900 + }, + { + "epoch": 0.09429231048585832, + "grad_norm": 0.8425020707480004, + "learning_rate": 5.967153472058428e-06, + "loss": 0.18608016967773439, + "step": 10905 + }, + { + "epoch": 0.09433554400740157, + "grad_norm": 1.6271440861961926, + "learning_rate": 5.967123399688861e-06, + "loss": 0.24061737060546876, + "step": 10910 + }, + { + "epoch": 0.09437877752894484, + "grad_norm": 22.074333666445487, + "learning_rate": 5.967093313635185e-06, + "loss": 0.2891571044921875, + "step": 10915 + }, + { + "epoch": 0.0944220110504881, + "grad_norm": 11.436213619111916, + "learning_rate": 5.967063213897535e-06, + "loss": 0.09263458251953124, + "step": 10920 + }, + { + "epoch": 0.09446524457203137, + "grad_norm": 46.23190384933622, + "learning_rate": 5.967033100476053e-06, + "loss": 0.21239013671875, + "step": 10925 + }, + { + "epoch": 0.09450847809357463, + "grad_norm": 43.059544206517174, + "learning_rate": 5.9670029733708745e-06, + "loss": 0.2544219970703125, + "step": 10930 + }, + { + "epoch": 0.0945517116151179, + "grad_norm": 25.000070035244253, + "learning_rate": 5.9669728325821415e-06, + "loss": 0.1285003662109375, + "step": 10935 + }, + { + "epoch": 0.09459494513666117, + "grad_norm": 2.2330345300568313, + "learning_rate": 5.966942678109993e-06, + "loss": 0.092156982421875, + "step": 10940 + }, + { + "epoch": 0.09463817865820442, + "grad_norm": 8.624466288046658, + "learning_rate": 5.966912509954566e-06, + "loss": 0.19739990234375, + "step": 10945 + }, + { + "epoch": 0.0946814121797477, + "grad_norm": 42.59232120463804, + "learning_rate": 5.966882328116e-06, + "loss": 0.32579345703125, + "step": 10950 + }, + { + "epoch": 0.09472464570129095, + "grad_norm": 14.995741852092388, + "learning_rate": 5.966852132594436e-06, + "loss": 0.46714324951171876, + "step": 10955 + }, + { + "epoch": 0.09476787922283422, + "grad_norm": 18.45897391163704, + "learning_rate": 5.9668219233900114e-06, + "loss": 0.1115570068359375, + "step": 10960 + }, + { + "epoch": 0.09481111274437748, + "grad_norm": 6.323016974735605, + "learning_rate": 5.9667917005028675e-06, + "loss": 0.11275482177734375, + "step": 10965 + }, + { + "epoch": 0.09485434626592075, + "grad_norm": 21.76244314944868, + "learning_rate": 5.966761463933141e-06, + "loss": 0.304351806640625, + "step": 10970 + }, + { + "epoch": 0.094897579787464, + "grad_norm": 27.740532471306395, + "learning_rate": 5.9667312136809734e-06, + "loss": 0.340191650390625, + "step": 10975 + }, + { + "epoch": 0.09494081330900728, + "grad_norm": 31.18226210171813, + "learning_rate": 5.966700949746504e-06, + "loss": 0.1060211181640625, + "step": 10980 + }, + { + "epoch": 0.09498404683055053, + "grad_norm": 14.486319120234988, + "learning_rate": 5.966670672129871e-06, + "loss": 0.1795745849609375, + "step": 10985 + }, + { + "epoch": 0.0950272803520938, + "grad_norm": 10.085734096495747, + "learning_rate": 5.966640380831216e-06, + "loss": 0.116619873046875, + "step": 10990 + }, + { + "epoch": 0.09507051387363706, + "grad_norm": 2.217603104508091, + "learning_rate": 5.966610075850676e-06, + "loss": 0.2626251220703125, + "step": 10995 + }, + { + "epoch": 0.09511374739518033, + "grad_norm": 19.08447800401769, + "learning_rate": 5.9665797571883944e-06, + "loss": 0.17601394653320312, + "step": 11000 + }, + { + "epoch": 0.09515698091672359, + "grad_norm": 2.8136060183188802, + "learning_rate": 5.966549424844508e-06, + "loss": 0.14471054077148438, + "step": 11005 + }, + { + "epoch": 0.09520021443826686, + "grad_norm": 9.91647264579428, + "learning_rate": 5.966519078819158e-06, + "loss": 0.40673828125, + "step": 11010 + }, + { + "epoch": 0.09524344795981012, + "grad_norm": 3.052262003671378, + "learning_rate": 5.966488719112484e-06, + "loss": 0.17502288818359374, + "step": 11015 + }, + { + "epoch": 0.09528668148135339, + "grad_norm": 14.021603892203572, + "learning_rate": 5.966458345724626e-06, + "loss": 0.383050537109375, + "step": 11020 + }, + { + "epoch": 0.09532991500289664, + "grad_norm": 4.381139778195937, + "learning_rate": 5.966427958655725e-06, + "loss": 0.026935577392578125, + "step": 11025 + }, + { + "epoch": 0.09537314852443991, + "grad_norm": 13.715796100567646, + "learning_rate": 5.96639755790592e-06, + "loss": 0.1532867431640625, + "step": 11030 + }, + { + "epoch": 0.09541638204598317, + "grad_norm": 6.141865838614443, + "learning_rate": 5.9663671434753524e-06, + "loss": 0.366839599609375, + "step": 11035 + }, + { + "epoch": 0.09545961556752644, + "grad_norm": 4.0209271272695535, + "learning_rate": 5.96633671536416e-06, + "loss": 0.2506229400634766, + "step": 11040 + }, + { + "epoch": 0.0955028490890697, + "grad_norm": 0.9672819720733797, + "learning_rate": 5.966306273572486e-06, + "loss": 0.311517333984375, + "step": 11045 + }, + { + "epoch": 0.09554608261061297, + "grad_norm": 34.20023428344363, + "learning_rate": 5.966275818100468e-06, + "loss": 0.24703369140625, + "step": 11050 + }, + { + "epoch": 0.09558931613215622, + "grad_norm": 3.9334120364827063, + "learning_rate": 5.9662453489482495e-06, + "loss": 0.0628173828125, + "step": 11055 + }, + { + "epoch": 0.0956325496536995, + "grad_norm": 0.36081490230755603, + "learning_rate": 5.96621486611597e-06, + "loss": 0.22961769104003907, + "step": 11060 + }, + { + "epoch": 0.09567578317524275, + "grad_norm": 4.217244888740181, + "learning_rate": 5.9661843696037686e-06, + "loss": 0.3397979736328125, + "step": 11065 + }, + { + "epoch": 0.09571901669678602, + "grad_norm": 34.98007200094545, + "learning_rate": 5.966153859411787e-06, + "loss": 0.798895263671875, + "step": 11070 + }, + { + "epoch": 0.09576225021832928, + "grad_norm": 3.529745163198932, + "learning_rate": 5.9661233355401664e-06, + "loss": 0.193487548828125, + "step": 11075 + }, + { + "epoch": 0.09580548373987255, + "grad_norm": 6.26437981295316, + "learning_rate": 5.966092797989046e-06, + "loss": 0.212127685546875, + "step": 11080 + }, + { + "epoch": 0.0958487172614158, + "grad_norm": 30.057568650744606, + "learning_rate": 5.966062246758569e-06, + "loss": 0.227362060546875, + "step": 11085 + }, + { + "epoch": 0.09589195078295908, + "grad_norm": 17.948528650364448, + "learning_rate": 5.966031681848875e-06, + "loss": 0.36948089599609374, + "step": 11090 + }, + { + "epoch": 0.09593518430450233, + "grad_norm": 14.476412587216323, + "learning_rate": 5.966001103260105e-06, + "loss": 0.08181724548339844, + "step": 11095 + }, + { + "epoch": 0.0959784178260456, + "grad_norm": 0.5691132919724332, + "learning_rate": 5.965970510992399e-06, + "loss": 0.4029083251953125, + "step": 11100 + }, + { + "epoch": 0.09602165134758886, + "grad_norm": 6.969025835617112, + "learning_rate": 5.965939905045899e-06, + "loss": 0.07400360107421874, + "step": 11105 + }, + { + "epoch": 0.09606488486913213, + "grad_norm": 14.283911486244486, + "learning_rate": 5.965909285420747e-06, + "loss": 0.46168212890625, + "step": 11110 + }, + { + "epoch": 0.09610811839067539, + "grad_norm": 6.122472483664171, + "learning_rate": 5.965878652117083e-06, + "loss": 0.29747314453125, + "step": 11115 + }, + { + "epoch": 0.09615135191221866, + "grad_norm": 5.315216228179876, + "learning_rate": 5.965848005135049e-06, + "loss": 0.056634521484375, + "step": 11120 + }, + { + "epoch": 0.09619458543376193, + "grad_norm": 16.252205663044247, + "learning_rate": 5.9658173444747865e-06, + "loss": 0.4088165283203125, + "step": 11125 + }, + { + "epoch": 0.09623781895530518, + "grad_norm": 9.6761521958295, + "learning_rate": 5.965786670136436e-06, + "loss": 0.110498046875, + "step": 11130 + }, + { + "epoch": 0.09628105247684846, + "grad_norm": 24.973494138348695, + "learning_rate": 5.965755982120139e-06, + "loss": 0.3236083984375, + "step": 11135 + }, + { + "epoch": 0.09632428599839171, + "grad_norm": 0.9163245763054789, + "learning_rate": 5.965725280426038e-06, + "loss": 0.07764701843261719, + "step": 11140 + }, + { + "epoch": 0.09636751951993498, + "grad_norm": 8.31240659501708, + "learning_rate": 5.965694565054274e-06, + "loss": 0.10275726318359375, + "step": 11145 + }, + { + "epoch": 0.09641075304147824, + "grad_norm": 10.158700087783274, + "learning_rate": 5.965663836004989e-06, + "loss": 0.21644134521484376, + "step": 11150 + }, + { + "epoch": 0.09645398656302151, + "grad_norm": 5.214776278912615, + "learning_rate": 5.965633093278324e-06, + "loss": 0.339447021484375, + "step": 11155 + }, + { + "epoch": 0.09649722008456477, + "grad_norm": 20.37899060243129, + "learning_rate": 5.96560233687442e-06, + "loss": 0.114215087890625, + "step": 11160 + }, + { + "epoch": 0.09654045360610804, + "grad_norm": 50.78445699764798, + "learning_rate": 5.965571566793423e-06, + "loss": 0.1928955078125, + "step": 11165 + }, + { + "epoch": 0.0965836871276513, + "grad_norm": 9.639242029709798, + "learning_rate": 5.96554078303547e-06, + "loss": 0.1246307373046875, + "step": 11170 + }, + { + "epoch": 0.09662692064919456, + "grad_norm": 12.56267411365289, + "learning_rate": 5.965509985600706e-06, + "loss": 0.10624847412109376, + "step": 11175 + }, + { + "epoch": 0.09667015417073782, + "grad_norm": 2.3871792793586883, + "learning_rate": 5.96547917448927e-06, + "loss": 0.062945556640625, + "step": 11180 + }, + { + "epoch": 0.09671338769228109, + "grad_norm": 4.184982214132474, + "learning_rate": 5.965448349701308e-06, + "loss": 0.554241943359375, + "step": 11185 + }, + { + "epoch": 0.09675662121382435, + "grad_norm": 4.959647968038856, + "learning_rate": 5.965417511236959e-06, + "loss": 0.1071044921875, + "step": 11190 + }, + { + "epoch": 0.09679985473536762, + "grad_norm": 16.290214695165517, + "learning_rate": 5.9653866590963674e-06, + "loss": 0.457354736328125, + "step": 11195 + }, + { + "epoch": 0.09684308825691088, + "grad_norm": 74.46927028692775, + "learning_rate": 5.965355793279674e-06, + "loss": 0.44307861328125, + "step": 11200 + }, + { + "epoch": 0.09688632177845415, + "grad_norm": 3.392167966348929, + "learning_rate": 5.965324913787022e-06, + "loss": 0.13047447204589843, + "step": 11205 + }, + { + "epoch": 0.0969295552999974, + "grad_norm": 31.41912185076217, + "learning_rate": 5.965294020618554e-06, + "loss": 0.4222076416015625, + "step": 11210 + }, + { + "epoch": 0.09697278882154067, + "grad_norm": 9.2048456590762, + "learning_rate": 5.9652631137744115e-06, + "loss": 0.09909515380859375, + "step": 11215 + }, + { + "epoch": 0.09701602234308393, + "grad_norm": 41.670796528735586, + "learning_rate": 5.965232193254737e-06, + "loss": 0.23180694580078126, + "step": 11220 + }, + { + "epoch": 0.0970592558646272, + "grad_norm": 3.27275427185236, + "learning_rate": 5.965201259059675e-06, + "loss": 0.229095458984375, + "step": 11225 + }, + { + "epoch": 0.09710248938617046, + "grad_norm": 5.396139480014384, + "learning_rate": 5.965170311189366e-06, + "loss": 0.1114654541015625, + "step": 11230 + }, + { + "epoch": 0.09714572290771373, + "grad_norm": 11.233904686052798, + "learning_rate": 5.965139349643954e-06, + "loss": 0.07423553466796876, + "step": 11235 + }, + { + "epoch": 0.09718895642925698, + "grad_norm": 30.741625230980027, + "learning_rate": 5.965108374423579e-06, + "loss": 0.17598876953125, + "step": 11240 + }, + { + "epoch": 0.09723218995080025, + "grad_norm": 0.7712586309736568, + "learning_rate": 5.965077385528389e-06, + "loss": 0.32967529296875, + "step": 11245 + }, + { + "epoch": 0.09727542347234351, + "grad_norm": 1.4020050685158312, + "learning_rate": 5.965046382958522e-06, + "loss": 0.253424072265625, + "step": 11250 + }, + { + "epoch": 0.09731865699388678, + "grad_norm": 1.4878613531832356, + "learning_rate": 5.9650153667141255e-06, + "loss": 0.2963531494140625, + "step": 11255 + }, + { + "epoch": 0.09736189051543004, + "grad_norm": 19.46391358808716, + "learning_rate": 5.964984336795338e-06, + "loss": 0.27333221435546873, + "step": 11260 + }, + { + "epoch": 0.09740512403697331, + "grad_norm": 12.871955234348793, + "learning_rate": 5.964953293202306e-06, + "loss": 0.07375640869140625, + "step": 11265 + }, + { + "epoch": 0.09744835755851657, + "grad_norm": 13.69838145903411, + "learning_rate": 5.964922235935171e-06, + "loss": 0.12761688232421875, + "step": 11270 + }, + { + "epoch": 0.09749159108005984, + "grad_norm": 21.08231282159423, + "learning_rate": 5.964891164994076e-06, + "loss": 0.15586166381835936, + "step": 11275 + }, + { + "epoch": 0.09753482460160309, + "grad_norm": 24.656234744876976, + "learning_rate": 5.964860080379166e-06, + "loss": 0.214501953125, + "step": 11280 + }, + { + "epoch": 0.09757805812314636, + "grad_norm": 4.936137447211636, + "learning_rate": 5.964828982090582e-06, + "loss": 0.10721435546875, + "step": 11285 + }, + { + "epoch": 0.09762129164468962, + "grad_norm": 28.301307584331344, + "learning_rate": 5.9647978701284705e-06, + "loss": 0.45546875, + "step": 11290 + }, + { + "epoch": 0.09766452516623289, + "grad_norm": 38.27651878032326, + "learning_rate": 5.964766744492972e-06, + "loss": 0.3871337890625, + "step": 11295 + }, + { + "epoch": 0.09770775868777615, + "grad_norm": 9.832346583555532, + "learning_rate": 5.964735605184231e-06, + "loss": 0.12260055541992188, + "step": 11300 + }, + { + "epoch": 0.09775099220931942, + "grad_norm": 0.8740099454980151, + "learning_rate": 5.964704452202391e-06, + "loss": 0.522760009765625, + "step": 11305 + }, + { + "epoch": 0.09779422573086269, + "grad_norm": 43.94663870358126, + "learning_rate": 5.964673285547597e-06, + "loss": 0.38824462890625, + "step": 11310 + }, + { + "epoch": 0.09783745925240594, + "grad_norm": 9.46035777735044, + "learning_rate": 5.964642105219992e-06, + "loss": 0.4113250732421875, + "step": 11315 + }, + { + "epoch": 0.09788069277394922, + "grad_norm": 34.31436477622707, + "learning_rate": 5.964610911219719e-06, + "loss": 0.518487548828125, + "step": 11320 + }, + { + "epoch": 0.09792392629549247, + "grad_norm": 10.72095041833851, + "learning_rate": 5.9645797035469234e-06, + "loss": 0.2099395751953125, + "step": 11325 + }, + { + "epoch": 0.09796715981703574, + "grad_norm": 8.34882642226623, + "learning_rate": 5.964548482201747e-06, + "loss": 0.2945098876953125, + "step": 11330 + }, + { + "epoch": 0.098010393338579, + "grad_norm": 7.18979275319046, + "learning_rate": 5.9645172471843345e-06, + "loss": 0.2141021728515625, + "step": 11335 + }, + { + "epoch": 0.09805362686012227, + "grad_norm": 8.244947278198522, + "learning_rate": 5.964485998494831e-06, + "loss": 0.167913818359375, + "step": 11340 + }, + { + "epoch": 0.09809686038166553, + "grad_norm": 18.453706202960202, + "learning_rate": 5.964454736133381e-06, + "loss": 0.48656463623046875, + "step": 11345 + }, + { + "epoch": 0.0981400939032088, + "grad_norm": 17.806523395524568, + "learning_rate": 5.964423460100127e-06, + "loss": 0.195294189453125, + "step": 11350 + }, + { + "epoch": 0.09818332742475205, + "grad_norm": 8.387358352246638, + "learning_rate": 5.964392170395214e-06, + "loss": 0.21206512451171874, + "step": 11355 + }, + { + "epoch": 0.09822656094629532, + "grad_norm": 22.22498089071214, + "learning_rate": 5.964360867018786e-06, + "loss": 0.34420166015625, + "step": 11360 + }, + { + "epoch": 0.09826979446783858, + "grad_norm": 4.960428992591674, + "learning_rate": 5.964329549970987e-06, + "loss": 0.17017822265625, + "step": 11365 + }, + { + "epoch": 0.09831302798938185, + "grad_norm": 9.238831116963397, + "learning_rate": 5.964298219251963e-06, + "loss": 0.219183349609375, + "step": 11370 + }, + { + "epoch": 0.09835626151092511, + "grad_norm": 2.633742063538309, + "learning_rate": 5.964266874861857e-06, + "loss": 0.11542205810546875, + "step": 11375 + }, + { + "epoch": 0.09839949503246838, + "grad_norm": 447.6339852007769, + "learning_rate": 5.9642355168008155e-06, + "loss": 0.4739105224609375, + "step": 11380 + }, + { + "epoch": 0.09844272855401164, + "grad_norm": 5.640248366076667, + "learning_rate": 5.96420414506898e-06, + "loss": 0.5085693359375, + "step": 11385 + }, + { + "epoch": 0.0984859620755549, + "grad_norm": 6.170318486215109, + "learning_rate": 5.964172759666498e-06, + "loss": 0.14119873046875, + "step": 11390 + }, + { + "epoch": 0.09852919559709816, + "grad_norm": 28.17107682086224, + "learning_rate": 5.964141360593512e-06, + "loss": 0.45689697265625, + "step": 11395 + }, + { + "epoch": 0.09857242911864143, + "grad_norm": 13.600134868871894, + "learning_rate": 5.964109947850169e-06, + "loss": 0.2526100158691406, + "step": 11400 + }, + { + "epoch": 0.09861566264018469, + "grad_norm": 6.230089181118798, + "learning_rate": 5.964078521436612e-06, + "loss": 0.31651153564453127, + "step": 11405 + }, + { + "epoch": 0.09865889616172796, + "grad_norm": 13.19223877544901, + "learning_rate": 5.964047081352987e-06, + "loss": 0.3903076171875, + "step": 11410 + }, + { + "epoch": 0.09870212968327122, + "grad_norm": 29.938598138995523, + "learning_rate": 5.96401562759944e-06, + "loss": 0.2411041259765625, + "step": 11415 + }, + { + "epoch": 0.09874536320481449, + "grad_norm": 51.43461178308336, + "learning_rate": 5.963984160176113e-06, + "loss": 0.2597381591796875, + "step": 11420 + }, + { + "epoch": 0.09878859672635774, + "grad_norm": 0.6354162718441687, + "learning_rate": 5.963952679083154e-06, + "loss": 0.1461456298828125, + "step": 11425 + }, + { + "epoch": 0.09883183024790101, + "grad_norm": 40.956612742644644, + "learning_rate": 5.963921184320707e-06, + "loss": 0.3413330078125, + "step": 11430 + }, + { + "epoch": 0.09887506376944427, + "grad_norm": 6.731686269419159, + "learning_rate": 5.9638896758889176e-06, + "loss": 0.19240951538085938, + "step": 11435 + }, + { + "epoch": 0.09891829729098754, + "grad_norm": 16.556068163705582, + "learning_rate": 5.96385815378793e-06, + "loss": 0.17764892578125, + "step": 11440 + }, + { + "epoch": 0.0989615308125308, + "grad_norm": 14.916319041167007, + "learning_rate": 5.963826618017891e-06, + "loss": 0.327984619140625, + "step": 11445 + }, + { + "epoch": 0.09900476433407407, + "grad_norm": 2.4986846299917405, + "learning_rate": 5.963795068578946e-06, + "loss": 0.154840087890625, + "step": 11450 + }, + { + "epoch": 0.09904799785561733, + "grad_norm": 6.997906285328299, + "learning_rate": 5.96376350547124e-06, + "loss": 0.20682621002197266, + "step": 11455 + }, + { + "epoch": 0.0990912313771606, + "grad_norm": 6.031324785937273, + "learning_rate": 5.9637319286949185e-06, + "loss": 0.15311698913574218, + "step": 11460 + }, + { + "epoch": 0.09913446489870385, + "grad_norm": 26.606564216405367, + "learning_rate": 5.963700338250127e-06, + "loss": 0.385247802734375, + "step": 11465 + }, + { + "epoch": 0.09917769842024712, + "grad_norm": 4.0171633408301, + "learning_rate": 5.9636687341370114e-06, + "loss": 0.19349822998046876, + "step": 11470 + }, + { + "epoch": 0.09922093194179038, + "grad_norm": 50.48041002386682, + "learning_rate": 5.963637116355717e-06, + "loss": 0.5343170166015625, + "step": 11475 + }, + { + "epoch": 0.09926416546333365, + "grad_norm": 29.12556895405444, + "learning_rate": 5.96360548490639e-06, + "loss": 0.222705078125, + "step": 11480 + }, + { + "epoch": 0.09930739898487691, + "grad_norm": 2.6027265506063295, + "learning_rate": 5.963573839789178e-06, + "loss": 0.3203216552734375, + "step": 11485 + }, + { + "epoch": 0.09935063250642018, + "grad_norm": 16.58017714287846, + "learning_rate": 5.9635421810042235e-06, + "loss": 0.355908203125, + "step": 11490 + }, + { + "epoch": 0.09939386602796345, + "grad_norm": 27.578400058147697, + "learning_rate": 5.963510508551675e-06, + "loss": 0.2780754089355469, + "step": 11495 + }, + { + "epoch": 0.0994370995495067, + "grad_norm": 5.74769411242234, + "learning_rate": 5.963478822431679e-06, + "loss": 0.22577285766601562, + "step": 11500 + }, + { + "epoch": 0.09948033307104998, + "grad_norm": 11.22076856671369, + "learning_rate": 5.963447122644379e-06, + "loss": 0.061647796630859376, + "step": 11505 + }, + { + "epoch": 0.09952356659259323, + "grad_norm": 6.877039803383222, + "learning_rate": 5.963415409189923e-06, + "loss": 0.14226951599121093, + "step": 11510 + }, + { + "epoch": 0.0995668001141365, + "grad_norm": 6.508111388451497, + "learning_rate": 5.9633836820684575e-06, + "loss": 0.085333251953125, + "step": 11515 + }, + { + "epoch": 0.09961003363567976, + "grad_norm": 5.647468283362691, + "learning_rate": 5.963351941280129e-06, + "loss": 0.1194976806640625, + "step": 11520 + }, + { + "epoch": 0.09965326715722303, + "grad_norm": 10.714478110389798, + "learning_rate": 5.9633201868250816e-06, + "loss": 0.100177001953125, + "step": 11525 + }, + { + "epoch": 0.09969650067876629, + "grad_norm": 7.717793071978543, + "learning_rate": 5.963288418703464e-06, + "loss": 0.30455322265625, + "step": 11530 + }, + { + "epoch": 0.09973973420030956, + "grad_norm": 1.765557216470102, + "learning_rate": 5.9632566369154215e-06, + "loss": 0.131201171875, + "step": 11535 + }, + { + "epoch": 0.09978296772185281, + "grad_norm": 18.6428730752494, + "learning_rate": 5.963224841461102e-06, + "loss": 0.2486328125, + "step": 11540 + }, + { + "epoch": 0.09982620124339608, + "grad_norm": 9.710149279014562, + "learning_rate": 5.9631930323406505e-06, + "loss": 0.19013671875, + "step": 11545 + }, + { + "epoch": 0.09986943476493934, + "grad_norm": 3.211320167420063, + "learning_rate": 5.9631612095542155e-06, + "loss": 0.39586944580078126, + "step": 11550 + }, + { + "epoch": 0.09991266828648261, + "grad_norm": 3.911151582444777, + "learning_rate": 5.963129373101942e-06, + "loss": 0.074945068359375, + "step": 11555 + }, + { + "epoch": 0.09995590180802587, + "grad_norm": 2.2543190740731474, + "learning_rate": 5.963097522983979e-06, + "loss": 0.05845947265625, + "step": 11560 + }, + { + "epoch": 0.09999913532956914, + "grad_norm": 9.83108162370555, + "learning_rate": 5.963065659200471e-06, + "loss": 0.20525054931640624, + "step": 11565 + }, + { + "epoch": 0.1000423688511124, + "grad_norm": 16.760766760258026, + "learning_rate": 5.963033781751566e-06, + "loss": 0.202923583984375, + "step": 11570 + }, + { + "epoch": 0.10008560237265567, + "grad_norm": 32.865535576247815, + "learning_rate": 5.963001890637411e-06, + "loss": 0.150592041015625, + "step": 11575 + }, + { + "epoch": 0.10012883589419892, + "grad_norm": 1.6594371398304077, + "learning_rate": 5.962969985858154e-06, + "loss": 0.18271408081054688, + "step": 11580 + }, + { + "epoch": 0.1001720694157422, + "grad_norm": 6.986121578481402, + "learning_rate": 5.962938067413941e-06, + "loss": 0.11644287109375, + "step": 11585 + }, + { + "epoch": 0.10021530293728545, + "grad_norm": 8.465924109768578, + "learning_rate": 5.962906135304918e-06, + "loss": 0.40005836486816404, + "step": 11590 + }, + { + "epoch": 0.10025853645882872, + "grad_norm": 2.4312392884938046, + "learning_rate": 5.962874189531235e-06, + "loss": 0.226409912109375, + "step": 11595 + }, + { + "epoch": 0.10030176998037198, + "grad_norm": 10.623793599014498, + "learning_rate": 5.962842230093037e-06, + "loss": 0.18336029052734376, + "step": 11600 + }, + { + "epoch": 0.10034500350191525, + "grad_norm": 1.3599961389284991, + "learning_rate": 5.9628102569904736e-06, + "loss": 0.24530181884765626, + "step": 11605 + }, + { + "epoch": 0.1003882370234585, + "grad_norm": 36.99854286525842, + "learning_rate": 5.962778270223691e-06, + "loss": 0.3929107666015625, + "step": 11610 + }, + { + "epoch": 0.10043147054500177, + "grad_norm": 3.774808672257255, + "learning_rate": 5.962746269792837e-06, + "loss": 0.07096405029296875, + "step": 11615 + }, + { + "epoch": 0.10047470406654503, + "grad_norm": 1.650418220207846, + "learning_rate": 5.962714255698058e-06, + "loss": 0.406475830078125, + "step": 11620 + }, + { + "epoch": 0.1005179375880883, + "grad_norm": 12.688173123193945, + "learning_rate": 5.962682227939503e-06, + "loss": 0.110980224609375, + "step": 11625 + }, + { + "epoch": 0.10056117110963156, + "grad_norm": 30.843642758582373, + "learning_rate": 5.9626501865173195e-06, + "loss": 0.3956298828125, + "step": 11630 + }, + { + "epoch": 0.10060440463117483, + "grad_norm": 27.83335931064527, + "learning_rate": 5.962618131431655e-06, + "loss": 0.139202880859375, + "step": 11635 + }, + { + "epoch": 0.10064763815271809, + "grad_norm": 5.1889605841998065, + "learning_rate": 5.962586062682658e-06, + "loss": 0.26844940185546873, + "step": 11640 + }, + { + "epoch": 0.10069087167426136, + "grad_norm": 0.7248540713682983, + "learning_rate": 5.962553980270475e-06, + "loss": 0.137066650390625, + "step": 11645 + }, + { + "epoch": 0.10073410519580461, + "grad_norm": 19.44805175594182, + "learning_rate": 5.962521884195256e-06, + "loss": 0.309515380859375, + "step": 11650 + }, + { + "epoch": 0.10077733871734788, + "grad_norm": 1.1694489388201315, + "learning_rate": 5.962489774457147e-06, + "loss": 0.5263885498046875, + "step": 11655 + }, + { + "epoch": 0.10082057223889114, + "grad_norm": 36.6272559611016, + "learning_rate": 5.962457651056297e-06, + "loss": 0.140673828125, + "step": 11660 + }, + { + "epoch": 0.10086380576043441, + "grad_norm": 3.1015341370923903, + "learning_rate": 5.9624255139928535e-06, + "loss": 0.48671875, + "step": 11665 + }, + { + "epoch": 0.10090703928197767, + "grad_norm": 9.00063167788232, + "learning_rate": 5.962393363266967e-06, + "loss": 0.06314697265625, + "step": 11670 + }, + { + "epoch": 0.10095027280352094, + "grad_norm": 35.7817573531049, + "learning_rate": 5.962361198878781e-06, + "loss": 0.24818572998046876, + "step": 11675 + }, + { + "epoch": 0.10099350632506421, + "grad_norm": 22.26534552548358, + "learning_rate": 5.96232902082845e-06, + "loss": 0.26058197021484375, + "step": 11680 + }, + { + "epoch": 0.10103673984660747, + "grad_norm": 2.946322483763616, + "learning_rate": 5.962296829116118e-06, + "loss": 0.23836898803710938, + "step": 11685 + }, + { + "epoch": 0.10107997336815074, + "grad_norm": 5.077245868675985, + "learning_rate": 5.962264623741935e-06, + "loss": 0.1366912841796875, + "step": 11690 + }, + { + "epoch": 0.10112320688969399, + "grad_norm": 0.09004250762199657, + "learning_rate": 5.962232404706049e-06, + "loss": 0.1607666015625, + "step": 11695 + }, + { + "epoch": 0.10116644041123726, + "grad_norm": 6.69466389375152, + "learning_rate": 5.96220017200861e-06, + "loss": 0.20267410278320314, + "step": 11700 + }, + { + "epoch": 0.10120967393278052, + "grad_norm": 8.814666801521572, + "learning_rate": 5.962167925649765e-06, + "loss": 0.152728271484375, + "step": 11705 + }, + { + "epoch": 0.10125290745432379, + "grad_norm": 3.6405115765548097, + "learning_rate": 5.9621356656296624e-06, + "loss": 0.22076187133789063, + "step": 11710 + }, + { + "epoch": 0.10129614097586705, + "grad_norm": 6.120857152865463, + "learning_rate": 5.962103391948453e-06, + "loss": 0.15755615234375, + "step": 11715 + }, + { + "epoch": 0.10133937449741032, + "grad_norm": 9.01349669195407, + "learning_rate": 5.962071104606284e-06, + "loss": 0.39583892822265626, + "step": 11720 + }, + { + "epoch": 0.10138260801895357, + "grad_norm": 10.943696854767362, + "learning_rate": 5.962038803603304e-06, + "loss": 0.072772216796875, + "step": 11725 + }, + { + "epoch": 0.10142584154049684, + "grad_norm": 18.932222501256096, + "learning_rate": 5.962006488939663e-06, + "loss": 0.60211181640625, + "step": 11730 + }, + { + "epoch": 0.1014690750620401, + "grad_norm": 23.324841717039174, + "learning_rate": 5.961974160615511e-06, + "loss": 0.219091796875, + "step": 11735 + }, + { + "epoch": 0.10151230858358337, + "grad_norm": 9.651473192863492, + "learning_rate": 5.961941818630995e-06, + "loss": 0.15439071655273437, + "step": 11740 + }, + { + "epoch": 0.10155554210512663, + "grad_norm": 7.795204543914089, + "learning_rate": 5.961909462986265e-06, + "loss": 0.130181884765625, + "step": 11745 + }, + { + "epoch": 0.1015987756266699, + "grad_norm": 24.44352098757401, + "learning_rate": 5.961877093681471e-06, + "loss": 0.191790771484375, + "step": 11750 + }, + { + "epoch": 0.10164200914821316, + "grad_norm": 24.312274118332805, + "learning_rate": 5.961844710716761e-06, + "loss": 0.1782958984375, + "step": 11755 + }, + { + "epoch": 0.10168524266975643, + "grad_norm": 32.53836324152453, + "learning_rate": 5.961812314092285e-06, + "loss": 0.202874755859375, + "step": 11760 + }, + { + "epoch": 0.10172847619129968, + "grad_norm": 35.67485736263889, + "learning_rate": 5.961779903808192e-06, + "loss": 0.201483154296875, + "step": 11765 + }, + { + "epoch": 0.10177170971284295, + "grad_norm": 3.6992471434811303, + "learning_rate": 5.961747479864632e-06, + "loss": 0.11949462890625, + "step": 11770 + }, + { + "epoch": 0.10181494323438621, + "grad_norm": 18.64646026049998, + "learning_rate": 5.9617150422617545e-06, + "loss": 0.160845947265625, + "step": 11775 + }, + { + "epoch": 0.10185817675592948, + "grad_norm": 23.60589093764736, + "learning_rate": 5.961682590999709e-06, + "loss": 0.2531005859375, + "step": 11780 + }, + { + "epoch": 0.10190141027747274, + "grad_norm": 4.70084856524608, + "learning_rate": 5.961650126078644e-06, + "loss": 0.1257863998413086, + "step": 11785 + }, + { + "epoch": 0.10194464379901601, + "grad_norm": 59.19169039921359, + "learning_rate": 5.961617647498712e-06, + "loss": 0.3987457275390625, + "step": 11790 + }, + { + "epoch": 0.10198787732055926, + "grad_norm": 12.121379659990552, + "learning_rate": 5.9615851552600606e-06, + "loss": 0.13670654296875, + "step": 11795 + }, + { + "epoch": 0.10203111084210253, + "grad_norm": 11.183582643014798, + "learning_rate": 5.9615526493628396e-06, + "loss": 0.16344451904296875, + "step": 11800 + }, + { + "epoch": 0.10207434436364579, + "grad_norm": 2.800980876453373, + "learning_rate": 5.9615201298072e-06, + "loss": 0.26824951171875, + "step": 11805 + }, + { + "epoch": 0.10211757788518906, + "grad_norm": 35.20799837754747, + "learning_rate": 5.961487596593291e-06, + "loss": 0.28357696533203125, + "step": 11810 + }, + { + "epoch": 0.10216081140673232, + "grad_norm": 6.529259035006286, + "learning_rate": 5.9614550497212624e-06, + "loss": 0.280517578125, + "step": 11815 + }, + { + "epoch": 0.10220404492827559, + "grad_norm": 23.697494767303073, + "learning_rate": 5.961422489191266e-06, + "loss": 0.171405029296875, + "step": 11820 + }, + { + "epoch": 0.10224727844981885, + "grad_norm": 15.306023387514125, + "learning_rate": 5.961389915003449e-06, + "loss": 0.16708221435546874, + "step": 11825 + }, + { + "epoch": 0.10229051197136212, + "grad_norm": 17.041585321877857, + "learning_rate": 5.961357327157965e-06, + "loss": 0.11093215942382813, + "step": 11830 + }, + { + "epoch": 0.10233374549290537, + "grad_norm": 9.5820216463447, + "learning_rate": 5.961324725654962e-06, + "loss": 0.6031723022460938, + "step": 11835 + }, + { + "epoch": 0.10237697901444864, + "grad_norm": 58.13788334889465, + "learning_rate": 5.9612921104945905e-06, + "loss": 0.18541259765625, + "step": 11840 + }, + { + "epoch": 0.1024202125359919, + "grad_norm": 0.4028053981577167, + "learning_rate": 5.961259481677003e-06, + "loss": 0.12896957397460937, + "step": 11845 + }, + { + "epoch": 0.10246344605753517, + "grad_norm": 20.72937578600682, + "learning_rate": 5.961226839202348e-06, + "loss": 0.310650634765625, + "step": 11850 + }, + { + "epoch": 0.10250667957907843, + "grad_norm": 6.664439104800788, + "learning_rate": 5.961194183070775e-06, + "loss": 0.18602142333984376, + "step": 11855 + }, + { + "epoch": 0.1025499131006217, + "grad_norm": 37.41292771082435, + "learning_rate": 5.961161513282437e-06, + "loss": 0.5351669311523437, + "step": 11860 + }, + { + "epoch": 0.10259314662216497, + "grad_norm": 9.860174381361556, + "learning_rate": 5.961128829837483e-06, + "loss": 0.16126480102539062, + "step": 11865 + }, + { + "epoch": 0.10263638014370823, + "grad_norm": 9.110328066974127, + "learning_rate": 5.961096132736067e-06, + "loss": 0.14526748657226562, + "step": 11870 + }, + { + "epoch": 0.1026796136652515, + "grad_norm": 7.973950535869868, + "learning_rate": 5.961063421978335e-06, + "loss": 0.1557159423828125, + "step": 11875 + }, + { + "epoch": 0.10272284718679475, + "grad_norm": 0.503471457783695, + "learning_rate": 5.961030697564441e-06, + "loss": 0.17359619140625, + "step": 11880 + }, + { + "epoch": 0.10276608070833802, + "grad_norm": 13.301017881352925, + "learning_rate": 5.960997959494534e-06, + "loss": 0.267620849609375, + "step": 11885 + }, + { + "epoch": 0.10280931422988128, + "grad_norm": 36.00957855447813, + "learning_rate": 5.960965207768767e-06, + "loss": 0.27962646484375, + "step": 11890 + }, + { + "epoch": 0.10285254775142455, + "grad_norm": 14.97624415335922, + "learning_rate": 5.96093244238729e-06, + "loss": 0.2462158203125, + "step": 11895 + }, + { + "epoch": 0.1028957812729678, + "grad_norm": 0.8727577124573204, + "learning_rate": 5.960899663350254e-06, + "loss": 0.253350830078125, + "step": 11900 + }, + { + "epoch": 0.10293901479451108, + "grad_norm": 18.212615763473597, + "learning_rate": 5.960866870657811e-06, + "loss": 0.48667449951171876, + "step": 11905 + }, + { + "epoch": 0.10298224831605433, + "grad_norm": 30.90080974080515, + "learning_rate": 5.960834064310111e-06, + "loss": 0.174053955078125, + "step": 11910 + }, + { + "epoch": 0.1030254818375976, + "grad_norm": 1.4235719719727162, + "learning_rate": 5.9608012443073065e-06, + "loss": 0.1605010986328125, + "step": 11915 + }, + { + "epoch": 0.10306871535914086, + "grad_norm": 1.619071364279314, + "learning_rate": 5.960768410649547e-06, + "loss": 0.262548828125, + "step": 11920 + }, + { + "epoch": 0.10311194888068413, + "grad_norm": 82.87874116730684, + "learning_rate": 5.960735563336987e-06, + "loss": 0.435772705078125, + "step": 11925 + }, + { + "epoch": 0.10315518240222739, + "grad_norm": 8.488219276706921, + "learning_rate": 5.960702702369775e-06, + "loss": 0.2079833984375, + "step": 11930 + }, + { + "epoch": 0.10319841592377066, + "grad_norm": 1.5669815090475796, + "learning_rate": 5.960669827748064e-06, + "loss": 0.2022064208984375, + "step": 11935 + }, + { + "epoch": 0.10324164944531392, + "grad_norm": 5.941731132865248, + "learning_rate": 5.960636939472005e-06, + "loss": 0.2036346435546875, + "step": 11940 + }, + { + "epoch": 0.10328488296685719, + "grad_norm": 1.3957341170792443, + "learning_rate": 5.960604037541751e-06, + "loss": 0.07937164306640625, + "step": 11945 + }, + { + "epoch": 0.10332811648840044, + "grad_norm": 19.11011598855508, + "learning_rate": 5.960571121957452e-06, + "loss": 0.220263671875, + "step": 11950 + }, + { + "epoch": 0.10337135000994371, + "grad_norm": 3.454016177238377, + "learning_rate": 5.96053819271926e-06, + "loss": 0.06893768310546874, + "step": 11955 + }, + { + "epoch": 0.10341458353148697, + "grad_norm": 20.93127886627543, + "learning_rate": 5.960505249827329e-06, + "loss": 0.3408447265625, + "step": 11960 + }, + { + "epoch": 0.10345781705303024, + "grad_norm": 9.177934511649198, + "learning_rate": 5.960472293281808e-06, + "loss": 0.1470123291015625, + "step": 11965 + }, + { + "epoch": 0.1035010505745735, + "grad_norm": 23.593874862109985, + "learning_rate": 5.960439323082852e-06, + "loss": 0.138134765625, + "step": 11970 + }, + { + "epoch": 0.10354428409611677, + "grad_norm": 37.837175150460915, + "learning_rate": 5.960406339230611e-06, + "loss": 0.21967010498046874, + "step": 11975 + }, + { + "epoch": 0.10358751761766002, + "grad_norm": 0.8941325068027005, + "learning_rate": 5.960373341725236e-06, + "loss": 0.682196044921875, + "step": 11980 + }, + { + "epoch": 0.1036307511392033, + "grad_norm": 3.7650703801845866, + "learning_rate": 5.960340330566882e-06, + "loss": 0.07244873046875, + "step": 11985 + }, + { + "epoch": 0.10367398466074655, + "grad_norm": 2.3848627214522975, + "learning_rate": 5.960307305755699e-06, + "loss": 0.2591064453125, + "step": 11990 + }, + { + "epoch": 0.10371721818228982, + "grad_norm": 32.751146128087605, + "learning_rate": 5.960274267291841e-06, + "loss": 0.710107421875, + "step": 11995 + }, + { + "epoch": 0.10376045170383308, + "grad_norm": 21.634478284257067, + "learning_rate": 5.96024121517546e-06, + "loss": 0.2479400634765625, + "step": 12000 + }, + { + "epoch": 0.10380368522537635, + "grad_norm": 5.953707013181535, + "learning_rate": 5.960208149406707e-06, + "loss": 0.5508621215820313, + "step": 12005 + }, + { + "epoch": 0.1038469187469196, + "grad_norm": 2.6921828613207723, + "learning_rate": 5.9601750699857365e-06, + "loss": 0.1415863037109375, + "step": 12010 + }, + { + "epoch": 0.10389015226846288, + "grad_norm": 4.262682496092229, + "learning_rate": 5.960141976912701e-06, + "loss": 0.10245513916015625, + "step": 12015 + }, + { + "epoch": 0.10393338579000613, + "grad_norm": 0.4752715518472241, + "learning_rate": 5.960108870187751e-06, + "loss": 0.13344573974609375, + "step": 12020 + }, + { + "epoch": 0.1039766193115494, + "grad_norm": 20.40452510011408, + "learning_rate": 5.96007574981104e-06, + "loss": 0.0983154296875, + "step": 12025 + }, + { + "epoch": 0.10401985283309266, + "grad_norm": 21.69925870041268, + "learning_rate": 5.960042615782722e-06, + "loss": 0.13265380859375, + "step": 12030 + }, + { + "epoch": 0.10406308635463593, + "grad_norm": 10.505496579406984, + "learning_rate": 5.960009468102949e-06, + "loss": 0.055791473388671874, + "step": 12035 + }, + { + "epoch": 0.10410631987617919, + "grad_norm": 1.2120700610659398, + "learning_rate": 5.959976306771873e-06, + "loss": 0.5840423583984375, + "step": 12040 + }, + { + "epoch": 0.10414955339772246, + "grad_norm": 30.043172468822522, + "learning_rate": 5.959943131789649e-06, + "loss": 0.41475982666015626, + "step": 12045 + }, + { + "epoch": 0.10419278691926573, + "grad_norm": 35.990571663111865, + "learning_rate": 5.959909943156429e-06, + "loss": 0.170721435546875, + "step": 12050 + }, + { + "epoch": 0.10423602044080899, + "grad_norm": 0.4992814655198375, + "learning_rate": 5.9598767408723645e-06, + "loss": 0.28007659912109373, + "step": 12055 + }, + { + "epoch": 0.10427925396235226, + "grad_norm": 36.41704063161946, + "learning_rate": 5.95984352493761e-06, + "loss": 0.3255279541015625, + "step": 12060 + }, + { + "epoch": 0.10432248748389551, + "grad_norm": 8.48226596130869, + "learning_rate": 5.959810295352321e-06, + "loss": 0.5492156982421875, + "step": 12065 + }, + { + "epoch": 0.10436572100543878, + "grad_norm": 52.56244616267764, + "learning_rate": 5.959777052116646e-06, + "loss": 0.6015678405761719, + "step": 12070 + }, + { + "epoch": 0.10440895452698204, + "grad_norm": 3.067494226207724, + "learning_rate": 5.9597437952307415e-06, + "loss": 0.240966796875, + "step": 12075 + }, + { + "epoch": 0.10445218804852531, + "grad_norm": 15.066147034660668, + "learning_rate": 5.959710524694761e-06, + "loss": 0.167388916015625, + "step": 12080 + }, + { + "epoch": 0.10449542157006857, + "grad_norm": 14.561716748054355, + "learning_rate": 5.959677240508856e-06, + "loss": 0.4870597839355469, + "step": 12085 + }, + { + "epoch": 0.10453865509161184, + "grad_norm": 3.2698546507098953, + "learning_rate": 5.959643942673182e-06, + "loss": 0.218017578125, + "step": 12090 + }, + { + "epoch": 0.1045818886131551, + "grad_norm": 12.19101787332391, + "learning_rate": 5.95961063118789e-06, + "loss": 0.14471435546875, + "step": 12095 + }, + { + "epoch": 0.10462512213469836, + "grad_norm": 0.7723888910248614, + "learning_rate": 5.959577306053138e-06, + "loss": 0.12786865234375, + "step": 12100 + }, + { + "epoch": 0.10466835565624162, + "grad_norm": 2.473249905508043, + "learning_rate": 5.959543967269075e-06, + "loss": 0.15948715209960937, + "step": 12105 + }, + { + "epoch": 0.10471158917778489, + "grad_norm": 11.849522080665047, + "learning_rate": 5.959510614835857e-06, + "loss": 0.09007759094238281, + "step": 12110 + }, + { + "epoch": 0.10475482269932815, + "grad_norm": 0.8781729230585562, + "learning_rate": 5.959477248753637e-06, + "loss": 0.1706329345703125, + "step": 12115 + }, + { + "epoch": 0.10479805622087142, + "grad_norm": 24.828494889456667, + "learning_rate": 5.959443869022571e-06, + "loss": 0.192535400390625, + "step": 12120 + }, + { + "epoch": 0.10484128974241468, + "grad_norm": 18.94239813312701, + "learning_rate": 5.95941047564281e-06, + "loss": 0.21183929443359376, + "step": 12125 + }, + { + "epoch": 0.10488452326395795, + "grad_norm": 27.06060932850381, + "learning_rate": 5.95937706861451e-06, + "loss": 0.355206298828125, + "step": 12130 + }, + { + "epoch": 0.1049277567855012, + "grad_norm": 6.752373881425471, + "learning_rate": 5.959343647937824e-06, + "loss": 0.04214019775390625, + "step": 12135 + }, + { + "epoch": 0.10497099030704447, + "grad_norm": 24.587518650556312, + "learning_rate": 5.9593102136129075e-06, + "loss": 0.06853179931640625, + "step": 12140 + }, + { + "epoch": 0.10501422382858773, + "grad_norm": 13.762748490364713, + "learning_rate": 5.959276765639913e-06, + "loss": 0.1755218505859375, + "step": 12145 + }, + { + "epoch": 0.105057457350131, + "grad_norm": 14.93963276212016, + "learning_rate": 5.9592433040189956e-06, + "loss": 0.28036956787109374, + "step": 12150 + }, + { + "epoch": 0.10510069087167426, + "grad_norm": 35.48816712635642, + "learning_rate": 5.95920982875031e-06, + "loss": 0.4624656677246094, + "step": 12155 + }, + { + "epoch": 0.10514392439321753, + "grad_norm": 10.59375877846719, + "learning_rate": 5.95917633983401e-06, + "loss": 0.215380859375, + "step": 12160 + }, + { + "epoch": 0.10518715791476078, + "grad_norm": 4.3855620219812, + "learning_rate": 5.95914283727025e-06, + "loss": 0.1049041748046875, + "step": 12165 + }, + { + "epoch": 0.10523039143630405, + "grad_norm": 29.917914910431282, + "learning_rate": 5.959109321059184e-06, + "loss": 0.265399169921875, + "step": 12170 + }, + { + "epoch": 0.10527362495784731, + "grad_norm": 23.001774705294594, + "learning_rate": 5.959075791200969e-06, + "loss": 0.2013641357421875, + "step": 12175 + }, + { + "epoch": 0.10531685847939058, + "grad_norm": 24.16745038210624, + "learning_rate": 5.959042247695757e-06, + "loss": 0.4964012145996094, + "step": 12180 + }, + { + "epoch": 0.10536009200093384, + "grad_norm": 4.051811837383696, + "learning_rate": 5.959008690543704e-06, + "loss": 0.0985260009765625, + "step": 12185 + }, + { + "epoch": 0.10540332552247711, + "grad_norm": 5.3599138152472, + "learning_rate": 5.958975119744963e-06, + "loss": 0.3102745056152344, + "step": 12190 + }, + { + "epoch": 0.10544655904402037, + "grad_norm": 8.277449210870289, + "learning_rate": 5.9589415352996915e-06, + "loss": 0.1022735595703125, + "step": 12195 + }, + { + "epoch": 0.10548979256556364, + "grad_norm": 44.85459372892811, + "learning_rate": 5.9589079372080424e-06, + "loss": 0.49610595703125, + "step": 12200 + }, + { + "epoch": 0.1055330260871069, + "grad_norm": 14.308739621201003, + "learning_rate": 5.958874325470172e-06, + "loss": 0.154315185546875, + "step": 12205 + }, + { + "epoch": 0.10557625960865016, + "grad_norm": 4.190644558069299, + "learning_rate": 5.958840700086234e-06, + "loss": 0.2715972900390625, + "step": 12210 + }, + { + "epoch": 0.10561949313019342, + "grad_norm": 0.8828122476536281, + "learning_rate": 5.958807061056385e-06, + "loss": 0.16395111083984376, + "step": 12215 + }, + { + "epoch": 0.10566272665173669, + "grad_norm": 11.810342218519935, + "learning_rate": 5.958773408380779e-06, + "loss": 0.328179931640625, + "step": 12220 + }, + { + "epoch": 0.10570596017327995, + "grad_norm": 16.00879163708655, + "learning_rate": 5.958739742059572e-06, + "loss": 0.42861328125, + "step": 12225 + }, + { + "epoch": 0.10574919369482322, + "grad_norm": 6.573204508044176, + "learning_rate": 5.958706062092917e-06, + "loss": 0.1602386474609375, + "step": 12230 + }, + { + "epoch": 0.10579242721636649, + "grad_norm": 15.044751468474809, + "learning_rate": 5.958672368480972e-06, + "loss": 0.1012603759765625, + "step": 12235 + }, + { + "epoch": 0.10583566073790975, + "grad_norm": 36.3934077789277, + "learning_rate": 5.9586386612238915e-06, + "loss": 0.22796630859375, + "step": 12240 + }, + { + "epoch": 0.10587889425945302, + "grad_norm": 36.49242414005973, + "learning_rate": 5.958604940321831e-06, + "loss": 0.2229095458984375, + "step": 12245 + }, + { + "epoch": 0.10592212778099627, + "grad_norm": 9.626079131878504, + "learning_rate": 5.958571205774946e-06, + "loss": 0.21823959350585936, + "step": 12250 + }, + { + "epoch": 0.10596536130253954, + "grad_norm": 0.11411544684355648, + "learning_rate": 5.958537457583393e-06, + "loss": 0.09373550415039063, + "step": 12255 + }, + { + "epoch": 0.1060085948240828, + "grad_norm": 28.20137108651056, + "learning_rate": 5.958503695747325e-06, + "loss": 0.3178466796875, + "step": 12260 + }, + { + "epoch": 0.10605182834562607, + "grad_norm": 27.87750633428315, + "learning_rate": 5.9584699202669e-06, + "loss": 0.41124839782714845, + "step": 12265 + }, + { + "epoch": 0.10609506186716933, + "grad_norm": 3.0005554828768743, + "learning_rate": 5.958436131142273e-06, + "loss": 0.11150436401367188, + "step": 12270 + }, + { + "epoch": 0.1061382953887126, + "grad_norm": 0.7371447957346965, + "learning_rate": 5.9584023283736e-06, + "loss": 0.14984664916992188, + "step": 12275 + }, + { + "epoch": 0.10618152891025585, + "grad_norm": 5.956270694093625, + "learning_rate": 5.958368511961037e-06, + "loss": 0.28236236572265627, + "step": 12280 + }, + { + "epoch": 0.10622476243179912, + "grad_norm": 2.3147256021217353, + "learning_rate": 5.9583346819047406e-06, + "loss": 0.15153579711914061, + "step": 12285 + }, + { + "epoch": 0.10626799595334238, + "grad_norm": 30.11179813985326, + "learning_rate": 5.958300838204864e-06, + "loss": 0.41706695556640627, + "step": 12290 + }, + { + "epoch": 0.10631122947488565, + "grad_norm": 0.4313864330899434, + "learning_rate": 5.9582669808615675e-06, + "loss": 0.1411346435546875, + "step": 12295 + }, + { + "epoch": 0.10635446299642891, + "grad_norm": 1.0609615584035768, + "learning_rate": 5.958233109875004e-06, + "loss": 0.21763687133789061, + "step": 12300 + }, + { + "epoch": 0.10639769651797218, + "grad_norm": 3.920586313495099, + "learning_rate": 5.958199225245331e-06, + "loss": 0.48209228515625, + "step": 12305 + }, + { + "epoch": 0.10644093003951544, + "grad_norm": 3.1171578793629258, + "learning_rate": 5.9581653269727045e-06, + "loss": 0.4880340576171875, + "step": 12310 + }, + { + "epoch": 0.1064841635610587, + "grad_norm": 9.126546486334457, + "learning_rate": 5.958131415057281e-06, + "loss": 0.0609405517578125, + "step": 12315 + }, + { + "epoch": 0.10652739708260196, + "grad_norm": 9.937274317174825, + "learning_rate": 5.958097489499217e-06, + "loss": 0.223760986328125, + "step": 12320 + }, + { + "epoch": 0.10657063060414523, + "grad_norm": 6.394570377122387, + "learning_rate": 5.958063550298668e-06, + "loss": 0.0718231201171875, + "step": 12325 + }, + { + "epoch": 0.10661386412568849, + "grad_norm": 1.2085728596143102, + "learning_rate": 5.958029597455792e-06, + "loss": 0.1781494140625, + "step": 12330 + }, + { + "epoch": 0.10665709764723176, + "grad_norm": 6.523385759032611, + "learning_rate": 5.957995630970745e-06, + "loss": 0.0662384033203125, + "step": 12335 + }, + { + "epoch": 0.10670033116877502, + "grad_norm": 2.7030185141520193, + "learning_rate": 5.957961650843682e-06, + "loss": 0.38602294921875, + "step": 12340 + }, + { + "epoch": 0.10674356469031829, + "grad_norm": 1.3360826085828774, + "learning_rate": 5.9579276570747625e-06, + "loss": 0.236474609375, + "step": 12345 + }, + { + "epoch": 0.10678679821186154, + "grad_norm": 24.53701458841381, + "learning_rate": 5.957893649664142e-06, + "loss": 0.1592376708984375, + "step": 12350 + }, + { + "epoch": 0.10683003173340481, + "grad_norm": 8.552910755504078, + "learning_rate": 5.957859628611978e-06, + "loss": 0.11676025390625, + "step": 12355 + }, + { + "epoch": 0.10687326525494807, + "grad_norm": 6.299685430696489, + "learning_rate": 5.957825593918425e-06, + "loss": 0.146917724609375, + "step": 12360 + }, + { + "epoch": 0.10691649877649134, + "grad_norm": 17.996226495054124, + "learning_rate": 5.957791545583643e-06, + "loss": 0.092889404296875, + "step": 12365 + }, + { + "epoch": 0.1069597322980346, + "grad_norm": 15.679079448887125, + "learning_rate": 5.957757483607788e-06, + "loss": 0.072808837890625, + "step": 12370 + }, + { + "epoch": 0.10700296581957787, + "grad_norm": 31.721682269329662, + "learning_rate": 5.957723407991016e-06, + "loss": 0.1587890625, + "step": 12375 + }, + { + "epoch": 0.10704619934112113, + "grad_norm": 33.7753845655353, + "learning_rate": 5.957689318733486e-06, + "loss": 0.2508056640625, + "step": 12380 + }, + { + "epoch": 0.1070894328626644, + "grad_norm": 72.31117831014895, + "learning_rate": 5.957655215835353e-06, + "loss": 0.295135498046875, + "step": 12385 + }, + { + "epoch": 0.10713266638420765, + "grad_norm": 16.504647355771368, + "learning_rate": 5.957621099296776e-06, + "loss": 0.0836944580078125, + "step": 12390 + }, + { + "epoch": 0.10717589990575092, + "grad_norm": 20.346117217353413, + "learning_rate": 5.957586969117912e-06, + "loss": 0.11571807861328125, + "step": 12395 + }, + { + "epoch": 0.10721913342729418, + "grad_norm": 3.196159736406, + "learning_rate": 5.957552825298918e-06, + "loss": 0.12113494873046875, + "step": 12400 + }, + { + "epoch": 0.10726236694883745, + "grad_norm": 42.74099302135066, + "learning_rate": 5.957518667839951e-06, + "loss": 0.3410614013671875, + "step": 12405 + }, + { + "epoch": 0.10730560047038071, + "grad_norm": 16.07511230129486, + "learning_rate": 5.9574844967411706e-06, + "loss": 0.077789306640625, + "step": 12410 + }, + { + "epoch": 0.10734883399192398, + "grad_norm": 1.1583250078570326, + "learning_rate": 5.957450312002733e-06, + "loss": 0.04654998779296875, + "step": 12415 + }, + { + "epoch": 0.10739206751346725, + "grad_norm": 31.266923400183146, + "learning_rate": 5.9574161136247955e-06, + "loss": 0.17808990478515624, + "step": 12420 + }, + { + "epoch": 0.1074353010350105, + "grad_norm": 11.482250227107055, + "learning_rate": 5.957381901607515e-06, + "loss": 0.20477294921875, + "step": 12425 + }, + { + "epoch": 0.10747853455655378, + "grad_norm": 13.952767729892717, + "learning_rate": 5.957347675951053e-06, + "loss": 0.08818206787109376, + "step": 12430 + }, + { + "epoch": 0.10752176807809703, + "grad_norm": 2.336001895196664, + "learning_rate": 5.9573134366555626e-06, + "loss": 0.23535194396972656, + "step": 12435 + }, + { + "epoch": 0.1075650015996403, + "grad_norm": 8.634428940366302, + "learning_rate": 5.957279183721205e-06, + "loss": 0.15908203125, + "step": 12440 + }, + { + "epoch": 0.10760823512118356, + "grad_norm": 2.078556174059676, + "learning_rate": 5.957244917148136e-06, + "loss": 0.28960342407226564, + "step": 12445 + }, + { + "epoch": 0.10765146864272683, + "grad_norm": 12.096028590542831, + "learning_rate": 5.9572106369365156e-06, + "loss": 0.1212432861328125, + "step": 12450 + }, + { + "epoch": 0.10769470216427009, + "grad_norm": 51.33479915902762, + "learning_rate": 5.957176343086501e-06, + "loss": 0.472064208984375, + "step": 12455 + }, + { + "epoch": 0.10773793568581336, + "grad_norm": 20.39294906284195, + "learning_rate": 5.95714203559825e-06, + "loss": 0.4736961364746094, + "step": 12460 + }, + { + "epoch": 0.10778116920735661, + "grad_norm": 59.84375507576566, + "learning_rate": 5.957107714471923e-06, + "loss": 0.2668975830078125, + "step": 12465 + }, + { + "epoch": 0.10782440272889988, + "grad_norm": 2.5451675979797668, + "learning_rate": 5.957073379707675e-06, + "loss": 0.08891487121582031, + "step": 12470 + }, + { + "epoch": 0.10786763625044314, + "grad_norm": 6.48659838969534, + "learning_rate": 5.957039031305666e-06, + "loss": 0.282940673828125, + "step": 12475 + }, + { + "epoch": 0.10791086977198641, + "grad_norm": 20.38357722331134, + "learning_rate": 5.957004669266054e-06, + "loss": 0.15087890625, + "step": 12480 + }, + { + "epoch": 0.10795410329352967, + "grad_norm": 7.179524377043161, + "learning_rate": 5.9569702935889975e-06, + "loss": 0.07814178466796876, + "step": 12485 + }, + { + "epoch": 0.10799733681507294, + "grad_norm": 4.706046388873745, + "learning_rate": 5.956935904274657e-06, + "loss": 0.29364013671875, + "step": 12490 + }, + { + "epoch": 0.1080405703366162, + "grad_norm": 0.08786924052385708, + "learning_rate": 5.956901501323187e-06, + "loss": 0.114227294921875, + "step": 12495 + }, + { + "epoch": 0.10808380385815947, + "grad_norm": 1.4450857135649273, + "learning_rate": 5.95686708473475e-06, + "loss": 0.10728759765625, + "step": 12500 + }, + { + "epoch": 0.10812703737970272, + "grad_norm": 0.6928847642280505, + "learning_rate": 5.956832654509503e-06, + "loss": 0.10894775390625, + "step": 12505 + }, + { + "epoch": 0.108170270901246, + "grad_norm": 27.289021600538877, + "learning_rate": 5.956798210647605e-06, + "loss": 0.28443145751953125, + "step": 12510 + }, + { + "epoch": 0.10821350442278925, + "grad_norm": 9.423562793497144, + "learning_rate": 5.9567637531492154e-06, + "loss": 0.17379150390625, + "step": 12515 + }, + { + "epoch": 0.10825673794433252, + "grad_norm": 8.538362927638415, + "learning_rate": 5.956729282014492e-06, + "loss": 0.17109375, + "step": 12520 + }, + { + "epoch": 0.10829997146587578, + "grad_norm": 0.3103194473092233, + "learning_rate": 5.956694797243595e-06, + "loss": 0.13835525512695312, + "step": 12525 + }, + { + "epoch": 0.10834320498741905, + "grad_norm": 5.266395274416729, + "learning_rate": 5.956660298836683e-06, + "loss": 0.1847991943359375, + "step": 12530 + }, + { + "epoch": 0.1083864385089623, + "grad_norm": 46.65997208744191, + "learning_rate": 5.956625786793915e-06, + "loss": 0.2995170593261719, + "step": 12535 + }, + { + "epoch": 0.10842967203050558, + "grad_norm": 3.1885977639755367, + "learning_rate": 5.956591261115449e-06, + "loss": 0.30315093994140624, + "step": 12540 + }, + { + "epoch": 0.10847290555204883, + "grad_norm": 0.7378064894475093, + "learning_rate": 5.9565567218014455e-06, + "loss": 0.24802703857421876, + "step": 12545 + }, + { + "epoch": 0.1085161390735921, + "grad_norm": 22.45409069148834, + "learning_rate": 5.956522168852065e-06, + "loss": 0.21468887329101563, + "step": 12550 + }, + { + "epoch": 0.10855937259513536, + "grad_norm": 1.052770267266094, + "learning_rate": 5.9564876022674635e-06, + "loss": 0.077783203125, + "step": 12555 + }, + { + "epoch": 0.10860260611667863, + "grad_norm": 53.918374713335055, + "learning_rate": 5.956453022047804e-06, + "loss": 0.3211334228515625, + "step": 12560 + }, + { + "epoch": 0.10864583963822189, + "grad_norm": 5.970496781643749, + "learning_rate": 5.956418428193244e-06, + "loss": 0.4175506591796875, + "step": 12565 + }, + { + "epoch": 0.10868907315976516, + "grad_norm": 1.335286267926237, + "learning_rate": 5.956383820703943e-06, + "loss": 0.10736198425292968, + "step": 12570 + }, + { + "epoch": 0.10873230668130841, + "grad_norm": 2.0404533508397598, + "learning_rate": 5.956349199580062e-06, + "loss": 0.17579193115234376, + "step": 12575 + }, + { + "epoch": 0.10877554020285168, + "grad_norm": 16.117703705334815, + "learning_rate": 5.956314564821759e-06, + "loss": 0.1603271484375, + "step": 12580 + }, + { + "epoch": 0.10881877372439494, + "grad_norm": 5.870443483672114, + "learning_rate": 5.956279916429195e-06, + "loss": 0.230963134765625, + "step": 12585 + }, + { + "epoch": 0.10886200724593821, + "grad_norm": 8.56297868469695, + "learning_rate": 5.9562452544025286e-06, + "loss": 0.18236083984375, + "step": 12590 + }, + { + "epoch": 0.10890524076748147, + "grad_norm": 17.978527180315396, + "learning_rate": 5.95621057874192e-06, + "loss": 0.53734130859375, + "step": 12595 + }, + { + "epoch": 0.10894847428902474, + "grad_norm": 53.00567562450656, + "learning_rate": 5.956175889447531e-06, + "loss": 0.32432861328125, + "step": 12600 + }, + { + "epoch": 0.10899170781056801, + "grad_norm": 15.34224689442147, + "learning_rate": 5.956141186519518e-06, + "loss": 0.256964111328125, + "step": 12605 + }, + { + "epoch": 0.10903494133211127, + "grad_norm": 30.58082486451185, + "learning_rate": 5.956106469958044e-06, + "loss": 0.1087646484375, + "step": 12610 + }, + { + "epoch": 0.10907817485365454, + "grad_norm": 23.686175980635237, + "learning_rate": 5.956071739763268e-06, + "loss": 0.59501953125, + "step": 12615 + }, + { + "epoch": 0.10912140837519779, + "grad_norm": 12.67598260499715, + "learning_rate": 5.956036995935351e-06, + "loss": 0.48013916015625, + "step": 12620 + }, + { + "epoch": 0.10916464189674106, + "grad_norm": 34.099720383380046, + "learning_rate": 5.956002238474452e-06, + "loss": 0.265283203125, + "step": 12625 + }, + { + "epoch": 0.10920787541828432, + "grad_norm": 19.376068607820905, + "learning_rate": 5.955967467380732e-06, + "loss": 0.1585784912109375, + "step": 12630 + }, + { + "epoch": 0.10925110893982759, + "grad_norm": 55.62838823250057, + "learning_rate": 5.955932682654351e-06, + "loss": 0.213922119140625, + "step": 12635 + }, + { + "epoch": 0.10929434246137085, + "grad_norm": 40.17435256713296, + "learning_rate": 5.95589788429547e-06, + "loss": 0.1549224853515625, + "step": 12640 + }, + { + "epoch": 0.10933757598291412, + "grad_norm": 15.48210166978832, + "learning_rate": 5.955863072304249e-06, + "loss": 0.1380706787109375, + "step": 12645 + }, + { + "epoch": 0.10938080950445737, + "grad_norm": 12.476592418087575, + "learning_rate": 5.9558282466808485e-06, + "loss": 0.2277923583984375, + "step": 12650 + }, + { + "epoch": 0.10942404302600064, + "grad_norm": 49.94069099662398, + "learning_rate": 5.95579340742543e-06, + "loss": 0.30487060546875, + "step": 12655 + }, + { + "epoch": 0.1094672765475439, + "grad_norm": 23.309299654085205, + "learning_rate": 5.955758554538153e-06, + "loss": 0.5010498046875, + "step": 12660 + }, + { + "epoch": 0.10951051006908717, + "grad_norm": 1.001101954814316, + "learning_rate": 5.9557236880191785e-06, + "loss": 0.09618682861328125, + "step": 12665 + }, + { + "epoch": 0.10955374359063043, + "grad_norm": 10.877623447495749, + "learning_rate": 5.955688807868668e-06, + "loss": 0.08385009765625, + "step": 12670 + }, + { + "epoch": 0.1095969771121737, + "grad_norm": 2.3444019343282356, + "learning_rate": 5.955653914086782e-06, + "loss": 0.082427978515625, + "step": 12675 + }, + { + "epoch": 0.10964021063371696, + "grad_norm": 30.624796745510032, + "learning_rate": 5.955619006673681e-06, + "loss": 0.43690338134765627, + "step": 12680 + }, + { + "epoch": 0.10968344415526023, + "grad_norm": 3.709695123438368, + "learning_rate": 5.955584085629526e-06, + "loss": 0.37744903564453125, + "step": 12685 + }, + { + "epoch": 0.10972667767680348, + "grad_norm": 17.089967471078364, + "learning_rate": 5.955549150954479e-06, + "loss": 0.3346588134765625, + "step": 12690 + }, + { + "epoch": 0.10976991119834675, + "grad_norm": 5.174322950565259, + "learning_rate": 5.9555142026487e-06, + "loss": 0.2467529296875, + "step": 12695 + }, + { + "epoch": 0.10981314471989001, + "grad_norm": 8.830534933543657, + "learning_rate": 5.9554792407123505e-06, + "loss": 0.08062286376953125, + "step": 12700 + }, + { + "epoch": 0.10985637824143328, + "grad_norm": 26.893801403781012, + "learning_rate": 5.955444265145592e-06, + "loss": 0.164227294921875, + "step": 12705 + }, + { + "epoch": 0.10989961176297654, + "grad_norm": 13.738680478165781, + "learning_rate": 5.955409275948586e-06, + "loss": 0.68111572265625, + "step": 12710 + }, + { + "epoch": 0.10994284528451981, + "grad_norm": 8.767334511549956, + "learning_rate": 5.955374273121493e-06, + "loss": 0.09996185302734376, + "step": 12715 + }, + { + "epoch": 0.10998607880606306, + "grad_norm": 22.111734158027698, + "learning_rate": 5.955339256664476e-06, + "loss": 0.22621574401855468, + "step": 12720 + }, + { + "epoch": 0.11002931232760634, + "grad_norm": 17.882124353338636, + "learning_rate": 5.9553042265776945e-06, + "loss": 0.422393798828125, + "step": 12725 + }, + { + "epoch": 0.11007254584914959, + "grad_norm": 15.720525343813446, + "learning_rate": 5.955269182861311e-06, + "loss": 0.3568267822265625, + "step": 12730 + }, + { + "epoch": 0.11011577937069286, + "grad_norm": 27.163420637431567, + "learning_rate": 5.9552341255154875e-06, + "loss": 0.7912689208984375, + "step": 12735 + }, + { + "epoch": 0.11015901289223612, + "grad_norm": 0.18705285938453725, + "learning_rate": 5.955199054540386e-06, + "loss": 0.03962554931640625, + "step": 12740 + }, + { + "epoch": 0.11020224641377939, + "grad_norm": 5.969066903984986, + "learning_rate": 5.9551639699361665e-06, + "loss": 0.13333740234375, + "step": 12745 + }, + { + "epoch": 0.11024547993532265, + "grad_norm": 4.977989317542804, + "learning_rate": 5.955128871702993e-06, + "loss": 0.20752983093261718, + "step": 12750 + }, + { + "epoch": 0.11028871345686592, + "grad_norm": 15.886228031407356, + "learning_rate": 5.955093759841026e-06, + "loss": 0.720733642578125, + "step": 12755 + }, + { + "epoch": 0.11033194697840917, + "grad_norm": 40.10741426734803, + "learning_rate": 5.9550586343504265e-06, + "loss": 0.23146896362304686, + "step": 12760 + }, + { + "epoch": 0.11037518049995244, + "grad_norm": 52.27637953870251, + "learning_rate": 5.9550234952313585e-06, + "loss": 0.293731689453125, + "step": 12765 + }, + { + "epoch": 0.1104184140214957, + "grad_norm": 20.476200744222442, + "learning_rate": 5.954988342483984e-06, + "loss": 0.279083251953125, + "step": 12770 + }, + { + "epoch": 0.11046164754303897, + "grad_norm": 29.26137247187255, + "learning_rate": 5.954953176108463e-06, + "loss": 0.19505996704101564, + "step": 12775 + }, + { + "epoch": 0.11050488106458223, + "grad_norm": 1.6781801747611185, + "learning_rate": 5.95491799610496e-06, + "loss": 0.30159759521484375, + "step": 12780 + }, + { + "epoch": 0.1105481145861255, + "grad_norm": 17.699323540146572, + "learning_rate": 5.954882802473636e-06, + "loss": 0.23468017578125, + "step": 12785 + }, + { + "epoch": 0.11059134810766877, + "grad_norm": 15.334231044657379, + "learning_rate": 5.9548475952146535e-06, + "loss": 0.2248046875, + "step": 12790 + }, + { + "epoch": 0.11063458162921203, + "grad_norm": 51.89925909236781, + "learning_rate": 5.954812374328176e-06, + "loss": 0.6193328857421875, + "step": 12795 + }, + { + "epoch": 0.1106778151507553, + "grad_norm": 22.846369587142696, + "learning_rate": 5.954777139814363e-06, + "loss": 0.36855087280273435, + "step": 12800 + }, + { + "epoch": 0.11072104867229855, + "grad_norm": 0.9623682658067356, + "learning_rate": 5.95474189167338e-06, + "loss": 0.14196395874023438, + "step": 12805 + }, + { + "epoch": 0.11076428219384182, + "grad_norm": 2.1836780494082144, + "learning_rate": 5.954706629905389e-06, + "loss": 0.0643096923828125, + "step": 12810 + }, + { + "epoch": 0.11080751571538508, + "grad_norm": 13.996384661729834, + "learning_rate": 5.954671354510551e-06, + "loss": 0.5015289306640625, + "step": 12815 + }, + { + "epoch": 0.11085074923692835, + "grad_norm": 12.742310639179955, + "learning_rate": 5.954636065489031e-06, + "loss": 0.2370758056640625, + "step": 12820 + }, + { + "epoch": 0.11089398275847161, + "grad_norm": 4.480968396765759, + "learning_rate": 5.954600762840989e-06, + "loss": 0.165960693359375, + "step": 12825 + }, + { + "epoch": 0.11093721628001488, + "grad_norm": 0.3466477294430131, + "learning_rate": 5.9545654465665904e-06, + "loss": 0.118267822265625, + "step": 12830 + }, + { + "epoch": 0.11098044980155813, + "grad_norm": 1.766762844309071, + "learning_rate": 5.9545301166659965e-06, + "loss": 0.15927734375, + "step": 12835 + }, + { + "epoch": 0.1110236833231014, + "grad_norm": 16.744277948340468, + "learning_rate": 5.9544947731393715e-06, + "loss": 0.262847900390625, + "step": 12840 + }, + { + "epoch": 0.11106691684464466, + "grad_norm": 29.759975904138326, + "learning_rate": 5.954459415986877e-06, + "loss": 0.269940185546875, + "step": 12845 + }, + { + "epoch": 0.11111015036618793, + "grad_norm": 32.71798075480026, + "learning_rate": 5.9544240452086776e-06, + "loss": 0.153082275390625, + "step": 12850 + }, + { + "epoch": 0.11115338388773119, + "grad_norm": 13.676415568032331, + "learning_rate": 5.954388660804935e-06, + "loss": 0.092047119140625, + "step": 12855 + }, + { + "epoch": 0.11119661740927446, + "grad_norm": 2.034282913677631, + "learning_rate": 5.954353262775813e-06, + "loss": 0.1228546142578125, + "step": 12860 + }, + { + "epoch": 0.11123985093081772, + "grad_norm": 14.864706215220183, + "learning_rate": 5.954317851121475e-06, + "loss": 0.21545333862304689, + "step": 12865 + }, + { + "epoch": 0.11128308445236099, + "grad_norm": 24.990393084372457, + "learning_rate": 5.954282425842084e-06, + "loss": 0.2750885009765625, + "step": 12870 + }, + { + "epoch": 0.11132631797390424, + "grad_norm": 10.091687928112272, + "learning_rate": 5.954246986937803e-06, + "loss": 0.1601593017578125, + "step": 12875 + }, + { + "epoch": 0.11136955149544751, + "grad_norm": 43.45282246279171, + "learning_rate": 5.954211534408796e-06, + "loss": 0.17961273193359376, + "step": 12880 + }, + { + "epoch": 0.11141278501699077, + "grad_norm": 6.40559060508729, + "learning_rate": 5.954176068255228e-06, + "loss": 0.275341796875, + "step": 12885 + }, + { + "epoch": 0.11145601853853404, + "grad_norm": 24.316235667351638, + "learning_rate": 5.954140588477259e-06, + "loss": 0.39622802734375, + "step": 12890 + }, + { + "epoch": 0.1114992520600773, + "grad_norm": 3.920977040390832, + "learning_rate": 5.954105095075055e-06, + "loss": 0.2016357421875, + "step": 12895 + }, + { + "epoch": 0.11154248558162057, + "grad_norm": 3.4327067960228823, + "learning_rate": 5.9540695880487795e-06, + "loss": 0.1202484130859375, + "step": 12900 + }, + { + "epoch": 0.11158571910316382, + "grad_norm": 22.633794922218925, + "learning_rate": 5.954034067398597e-06, + "loss": 0.33217849731445315, + "step": 12905 + }, + { + "epoch": 0.1116289526247071, + "grad_norm": 0.044876444870319, + "learning_rate": 5.9539985331246694e-06, + "loss": 0.3057838439941406, + "step": 12910 + }, + { + "epoch": 0.11167218614625035, + "grad_norm": 3.7449405601402956, + "learning_rate": 5.9539629852271615e-06, + "loss": 0.30372467041015627, + "step": 12915 + }, + { + "epoch": 0.11171541966779362, + "grad_norm": 13.696605522561608, + "learning_rate": 5.953927423706238e-06, + "loss": 0.3780029296875, + "step": 12920 + }, + { + "epoch": 0.11175865318933688, + "grad_norm": 1.0064721165782835, + "learning_rate": 5.953891848562062e-06, + "loss": 0.25426902770996096, + "step": 12925 + }, + { + "epoch": 0.11180188671088015, + "grad_norm": 5.0942145766208204, + "learning_rate": 5.953856259794798e-06, + "loss": 0.04747772216796875, + "step": 12930 + }, + { + "epoch": 0.1118451202324234, + "grad_norm": 46.19033370040082, + "learning_rate": 5.953820657404609e-06, + "loss": 0.5302947998046875, + "step": 12935 + }, + { + "epoch": 0.11188835375396668, + "grad_norm": 11.78249435246842, + "learning_rate": 5.953785041391661e-06, + "loss": 0.1372955322265625, + "step": 12940 + }, + { + "epoch": 0.11193158727550993, + "grad_norm": 3.311445109198975, + "learning_rate": 5.9537494117561176e-06, + "loss": 0.2192138671875, + "step": 12945 + }, + { + "epoch": 0.1119748207970532, + "grad_norm": 19.48549309410215, + "learning_rate": 5.953713768498142e-06, + "loss": 0.35552978515625, + "step": 12950 + }, + { + "epoch": 0.11201805431859646, + "grad_norm": 3.5624229324000694, + "learning_rate": 5.9536781116179005e-06, + "loss": 0.09289016723632812, + "step": 12955 + }, + { + "epoch": 0.11206128784013973, + "grad_norm": 12.812068539478478, + "learning_rate": 5.953642441115556e-06, + "loss": 0.09473648071289062, + "step": 12960 + }, + { + "epoch": 0.11210452136168299, + "grad_norm": 37.90175639386, + "learning_rate": 5.953606756991273e-06, + "loss": 0.27135009765625, + "step": 12965 + }, + { + "epoch": 0.11214775488322626, + "grad_norm": 0.24273297730175786, + "learning_rate": 5.953571059245218e-06, + "loss": 0.050922393798828125, + "step": 12970 + }, + { + "epoch": 0.11219098840476953, + "grad_norm": 12.497402204335213, + "learning_rate": 5.9535353478775535e-06, + "loss": 0.086322021484375, + "step": 12975 + }, + { + "epoch": 0.11223422192631279, + "grad_norm": 27.764730710876055, + "learning_rate": 5.953499622888445e-06, + "loss": 0.2442047119140625, + "step": 12980 + }, + { + "epoch": 0.11227745544785606, + "grad_norm": 44.18545003741528, + "learning_rate": 5.953463884278057e-06, + "loss": 0.220220947265625, + "step": 12985 + }, + { + "epoch": 0.11232068896939931, + "grad_norm": 27.611129140992325, + "learning_rate": 5.953428132046555e-06, + "loss": 0.4059318542480469, + "step": 12990 + }, + { + "epoch": 0.11236392249094258, + "grad_norm": 6.045093285545886, + "learning_rate": 5.953392366194104e-06, + "loss": 0.290118408203125, + "step": 12995 + }, + { + "epoch": 0.11240715601248584, + "grad_norm": 6.760442356205357, + "learning_rate": 5.953356586720866e-06, + "loss": 0.18145751953125, + "step": 13000 + }, + { + "epoch": 0.11245038953402911, + "grad_norm": 12.511903948739192, + "learning_rate": 5.95332079362701e-06, + "loss": 0.264361572265625, + "step": 13005 + }, + { + "epoch": 0.11249362305557237, + "grad_norm": 27.216801757382534, + "learning_rate": 5.953284986912699e-06, + "loss": 0.360552978515625, + "step": 13010 + }, + { + "epoch": 0.11253685657711564, + "grad_norm": 1.5328996139717566, + "learning_rate": 5.9532491665781e-06, + "loss": 0.32605743408203125, + "step": 13015 + }, + { + "epoch": 0.1125800900986589, + "grad_norm": 7.912911842165785, + "learning_rate": 5.9532133326233744e-06, + "loss": 0.08908920288085938, + "step": 13020 + }, + { + "epoch": 0.11262332362020216, + "grad_norm": 6.527245859035964, + "learning_rate": 5.953177485048692e-06, + "loss": 0.24391555786132812, + "step": 13025 + }, + { + "epoch": 0.11266655714174542, + "grad_norm": 29.59196440490035, + "learning_rate": 5.953141623854215e-06, + "loss": 0.170025634765625, + "step": 13030 + }, + { + "epoch": 0.11270979066328869, + "grad_norm": 6.31455110952459, + "learning_rate": 5.95310574904011e-06, + "loss": 0.14867095947265624, + "step": 13035 + }, + { + "epoch": 0.11275302418483195, + "grad_norm": 4.1941868117037995, + "learning_rate": 5.953069860606543e-06, + "loss": 0.0916290283203125, + "step": 13040 + }, + { + "epoch": 0.11279625770637522, + "grad_norm": 23.32977623050154, + "learning_rate": 5.953033958553677e-06, + "loss": 0.13050155639648436, + "step": 13045 + }, + { + "epoch": 0.11283949122791848, + "grad_norm": 2.82259738083482, + "learning_rate": 5.952998042881681e-06, + "loss": 0.03600006103515625, + "step": 13050 + }, + { + "epoch": 0.11288272474946175, + "grad_norm": 10.093261978660511, + "learning_rate": 5.952962113590718e-06, + "loss": 0.225238037109375, + "step": 13055 + }, + { + "epoch": 0.112925958271005, + "grad_norm": 1.2548791681649716, + "learning_rate": 5.952926170680954e-06, + "loss": 0.2664459228515625, + "step": 13060 + }, + { + "epoch": 0.11296919179254827, + "grad_norm": 0.9943338843382233, + "learning_rate": 5.952890214152556e-06, + "loss": 0.0475067138671875, + "step": 13065 + }, + { + "epoch": 0.11301242531409153, + "grad_norm": 10.720345067818391, + "learning_rate": 5.95285424400569e-06, + "loss": 0.23472900390625, + "step": 13070 + }, + { + "epoch": 0.1130556588356348, + "grad_norm": 14.383465492916935, + "learning_rate": 5.952818260240521e-06, + "loss": 0.2162933349609375, + "step": 13075 + }, + { + "epoch": 0.11309889235717806, + "grad_norm": 2.6613890529131923, + "learning_rate": 5.9527822628572145e-06, + "loss": 0.02064361572265625, + "step": 13080 + }, + { + "epoch": 0.11314212587872133, + "grad_norm": 22.596600363862418, + "learning_rate": 5.952746251855938e-06, + "loss": 0.326739501953125, + "step": 13085 + }, + { + "epoch": 0.11318535940026458, + "grad_norm": 1.2710077268234055, + "learning_rate": 5.9527102272368555e-06, + "loss": 0.07425689697265625, + "step": 13090 + }, + { + "epoch": 0.11322859292180786, + "grad_norm": 0.23201983392223952, + "learning_rate": 5.952674189000136e-06, + "loss": 0.22129364013671876, + "step": 13095 + }, + { + "epoch": 0.11327182644335111, + "grad_norm": 5.780660633096228, + "learning_rate": 5.952638137145942e-06, + "loss": 0.24470291137695313, + "step": 13100 + }, + { + "epoch": 0.11331505996489438, + "grad_norm": 30.255044958911046, + "learning_rate": 5.952602071674442e-06, + "loss": 0.238568115234375, + "step": 13105 + }, + { + "epoch": 0.11335829348643764, + "grad_norm": 5.848887302252407, + "learning_rate": 5.952565992585804e-06, + "loss": 0.15269775390625, + "step": 13110 + }, + { + "epoch": 0.11340152700798091, + "grad_norm": 25.345377779285414, + "learning_rate": 5.952529899880191e-06, + "loss": 0.078436279296875, + "step": 13115 + }, + { + "epoch": 0.11344476052952417, + "grad_norm": 32.80904740692052, + "learning_rate": 5.952493793557771e-06, + "loss": 0.210040283203125, + "step": 13120 + }, + { + "epoch": 0.11348799405106744, + "grad_norm": 18.27826762453483, + "learning_rate": 5.952457673618711e-06, + "loss": 0.4144561767578125, + "step": 13125 + }, + { + "epoch": 0.1135312275726107, + "grad_norm": 18.950378752527797, + "learning_rate": 5.952421540063177e-06, + "loss": 0.22125091552734374, + "step": 13130 + }, + { + "epoch": 0.11357446109415396, + "grad_norm": 31.13113085245855, + "learning_rate": 5.9523853928913365e-06, + "loss": 0.40699615478515627, + "step": 13135 + }, + { + "epoch": 0.11361769461569722, + "grad_norm": 0.14954078799188278, + "learning_rate": 5.9523492321033546e-06, + "loss": 0.17119140625, + "step": 13140 + }, + { + "epoch": 0.11366092813724049, + "grad_norm": 13.260425592100422, + "learning_rate": 5.952313057699399e-06, + "loss": 0.11300048828125, + "step": 13145 + }, + { + "epoch": 0.11370416165878375, + "grad_norm": 6.200713329179805, + "learning_rate": 5.952276869679636e-06, + "loss": 0.41906585693359377, + "step": 13150 + }, + { + "epoch": 0.11374739518032702, + "grad_norm": 20.615863047162772, + "learning_rate": 5.952240668044234e-06, + "loss": 0.5018600463867188, + "step": 13155 + }, + { + "epoch": 0.11379062870187029, + "grad_norm": 9.635565822904216, + "learning_rate": 5.952204452793358e-06, + "loss": 0.11567611694335937, + "step": 13160 + }, + { + "epoch": 0.11383386222341355, + "grad_norm": 22.05208128920924, + "learning_rate": 5.952168223927177e-06, + "loss": 0.5306877136230469, + "step": 13165 + }, + { + "epoch": 0.11387709574495682, + "grad_norm": 0.7585443419931235, + "learning_rate": 5.952131981445856e-06, + "loss": 0.5132888793945313, + "step": 13170 + }, + { + "epoch": 0.11392032926650007, + "grad_norm": 1.0722643181379086, + "learning_rate": 5.9520957253495635e-06, + "loss": 0.16079025268554686, + "step": 13175 + }, + { + "epoch": 0.11396356278804334, + "grad_norm": 33.81598290370876, + "learning_rate": 5.952059455638466e-06, + "loss": 0.451275634765625, + "step": 13180 + }, + { + "epoch": 0.1140067963095866, + "grad_norm": 25.92116384071318, + "learning_rate": 5.952023172312731e-06, + "loss": 0.14677734375, + "step": 13185 + }, + { + "epoch": 0.11405002983112987, + "grad_norm": 18.112895237976016, + "learning_rate": 5.9519868753725275e-06, + "loss": 0.14207916259765624, + "step": 13190 + }, + { + "epoch": 0.11409326335267313, + "grad_norm": 3.3900082247105896, + "learning_rate": 5.95195056481802e-06, + "loss": 0.0764373779296875, + "step": 13195 + }, + { + "epoch": 0.1141364968742164, + "grad_norm": 1.4500098361067297, + "learning_rate": 5.9519142406493775e-06, + "loss": 0.144830322265625, + "step": 13200 + }, + { + "epoch": 0.11417973039575965, + "grad_norm": 6.4737510634839115, + "learning_rate": 5.9518779028667665e-06, + "loss": 0.12152099609375, + "step": 13205 + }, + { + "epoch": 0.11422296391730292, + "grad_norm": 23.060380116270753, + "learning_rate": 5.951841551470357e-06, + "loss": 0.14736328125, + "step": 13210 + }, + { + "epoch": 0.11426619743884618, + "grad_norm": 2.4828072109754022, + "learning_rate": 5.9518051864603144e-06, + "loss": 0.43280029296875, + "step": 13215 + }, + { + "epoch": 0.11430943096038945, + "grad_norm": 2.3239494672489993, + "learning_rate": 5.951768807836808e-06, + "loss": 0.30771484375, + "step": 13220 + }, + { + "epoch": 0.11435266448193271, + "grad_norm": 12.650303806808838, + "learning_rate": 5.951732415600004e-06, + "loss": 0.27330322265625, + "step": 13225 + }, + { + "epoch": 0.11439589800347598, + "grad_norm": 34.04212441775144, + "learning_rate": 5.95169600975007e-06, + "loss": 0.2455352783203125, + "step": 13230 + }, + { + "epoch": 0.11443913152501924, + "grad_norm": 17.61570596030048, + "learning_rate": 5.951659590287176e-06, + "loss": 0.12211990356445312, + "step": 13235 + }, + { + "epoch": 0.1144823650465625, + "grad_norm": 38.04164242004995, + "learning_rate": 5.951623157211489e-06, + "loss": 0.32288818359375, + "step": 13240 + }, + { + "epoch": 0.11452559856810576, + "grad_norm": 3.965211371924213, + "learning_rate": 5.951586710523176e-06, + "loss": 0.057689666748046875, + "step": 13245 + }, + { + "epoch": 0.11456883208964903, + "grad_norm": 1.7081870976947051, + "learning_rate": 5.951550250222405e-06, + "loss": 0.13840789794921876, + "step": 13250 + }, + { + "epoch": 0.11461206561119229, + "grad_norm": 1.1424957890472929, + "learning_rate": 5.951513776309347e-06, + "loss": 0.16192626953125, + "step": 13255 + }, + { + "epoch": 0.11465529913273556, + "grad_norm": 60.6265810809121, + "learning_rate": 5.951477288784167e-06, + "loss": 0.329803466796875, + "step": 13260 + }, + { + "epoch": 0.11469853265427882, + "grad_norm": 9.034787375619835, + "learning_rate": 5.951440787647035e-06, + "loss": 0.084234619140625, + "step": 13265 + }, + { + "epoch": 0.11474176617582209, + "grad_norm": 8.890598063830044, + "learning_rate": 5.95140427289812e-06, + "loss": 0.427374267578125, + "step": 13270 + }, + { + "epoch": 0.11478499969736534, + "grad_norm": 14.679932649377523, + "learning_rate": 5.9513677445375874e-06, + "loss": 0.25331573486328124, + "step": 13275 + }, + { + "epoch": 0.11482823321890862, + "grad_norm": 25.36348748645272, + "learning_rate": 5.951331202565608e-06, + "loss": 0.390069580078125, + "step": 13280 + }, + { + "epoch": 0.11487146674045187, + "grad_norm": 23.65873079585341, + "learning_rate": 5.95129464698235e-06, + "loss": 0.089276123046875, + "step": 13285 + }, + { + "epoch": 0.11491470026199514, + "grad_norm": 1.9099432003992973, + "learning_rate": 5.951258077787982e-06, + "loss": 0.16594619750976564, + "step": 13290 + }, + { + "epoch": 0.1149579337835384, + "grad_norm": 1.3670667622422181, + "learning_rate": 5.951221494982673e-06, + "loss": 0.1529205322265625, + "step": 13295 + }, + { + "epoch": 0.11500116730508167, + "grad_norm": 23.348887554814365, + "learning_rate": 5.951184898566591e-06, + "loss": 0.2866851806640625, + "step": 13300 + }, + { + "epoch": 0.11504440082662493, + "grad_norm": 23.156000215969563, + "learning_rate": 5.951148288539905e-06, + "loss": 0.4106536865234375, + "step": 13305 + }, + { + "epoch": 0.1150876343481682, + "grad_norm": 5.2703355395925175, + "learning_rate": 5.951111664902783e-06, + "loss": 0.484307861328125, + "step": 13310 + }, + { + "epoch": 0.11513086786971145, + "grad_norm": 37.72704758444498, + "learning_rate": 5.951075027655395e-06, + "loss": 0.1615081787109375, + "step": 13315 + }, + { + "epoch": 0.11517410139125472, + "grad_norm": 34.96714032105794, + "learning_rate": 5.951038376797911e-06, + "loss": 0.3218505859375, + "step": 13320 + }, + { + "epoch": 0.11521733491279798, + "grad_norm": 34.51811401269841, + "learning_rate": 5.9510017123304976e-06, + "loss": 0.812158203125, + "step": 13325 + }, + { + "epoch": 0.11526056843434125, + "grad_norm": 8.101129558112467, + "learning_rate": 5.950965034253326e-06, + "loss": 0.4771484375, + "step": 13330 + }, + { + "epoch": 0.11530380195588451, + "grad_norm": 68.69360152516997, + "learning_rate": 5.950928342566563e-06, + "loss": 0.37457275390625, + "step": 13335 + }, + { + "epoch": 0.11534703547742778, + "grad_norm": 12.153654215816339, + "learning_rate": 5.95089163727038e-06, + "loss": 0.3000244140625, + "step": 13340 + }, + { + "epoch": 0.11539026899897105, + "grad_norm": 17.90314381418786, + "learning_rate": 5.950854918364946e-06, + "loss": 0.5798828125, + "step": 13345 + }, + { + "epoch": 0.1154335025205143, + "grad_norm": 43.26473710968844, + "learning_rate": 5.95081818585043e-06, + "loss": 0.47142333984375, + "step": 13350 + }, + { + "epoch": 0.11547673604205758, + "grad_norm": 12.813537655155478, + "learning_rate": 5.950781439727001e-06, + "loss": 0.3190399169921875, + "step": 13355 + }, + { + "epoch": 0.11551996956360083, + "grad_norm": 12.980772707202837, + "learning_rate": 5.950744679994828e-06, + "loss": 0.19301605224609375, + "step": 13360 + }, + { + "epoch": 0.1155632030851441, + "grad_norm": 8.370792603465182, + "learning_rate": 5.9507079066540815e-06, + "loss": 0.0705596923828125, + "step": 13365 + }, + { + "epoch": 0.11560643660668736, + "grad_norm": 17.75758948702746, + "learning_rate": 5.950671119704931e-06, + "loss": 0.36999359130859377, + "step": 13370 + }, + { + "epoch": 0.11564967012823063, + "grad_norm": 9.46179890431785, + "learning_rate": 5.9506343191475454e-06, + "loss": 0.509527587890625, + "step": 13375 + }, + { + "epoch": 0.11569290364977389, + "grad_norm": 4.0928550891456625, + "learning_rate": 5.950597504982096e-06, + "loss": 0.187103271484375, + "step": 13380 + }, + { + "epoch": 0.11573613717131716, + "grad_norm": 10.746195986024556, + "learning_rate": 5.950560677208752e-06, + "loss": 0.11446189880371094, + "step": 13385 + }, + { + "epoch": 0.11577937069286041, + "grad_norm": 11.819796992403763, + "learning_rate": 5.950523835827682e-06, + "loss": 0.153131103515625, + "step": 13390 + }, + { + "epoch": 0.11582260421440368, + "grad_norm": 2.066051057296743, + "learning_rate": 5.950486980839057e-06, + "loss": 0.1314788818359375, + "step": 13395 + }, + { + "epoch": 0.11586583773594694, + "grad_norm": 25.85283940289774, + "learning_rate": 5.950450112243046e-06, + "loss": 0.15613861083984376, + "step": 13400 + }, + { + "epoch": 0.11590907125749021, + "grad_norm": 8.299113568241706, + "learning_rate": 5.950413230039821e-06, + "loss": 0.3007293701171875, + "step": 13405 + }, + { + "epoch": 0.11595230477903347, + "grad_norm": 128.19845261176147, + "learning_rate": 5.95037633422955e-06, + "loss": 0.2497406005859375, + "step": 13410 + }, + { + "epoch": 0.11599553830057674, + "grad_norm": 25.00122727517647, + "learning_rate": 5.950339424812405e-06, + "loss": 0.1418609619140625, + "step": 13415 + }, + { + "epoch": 0.11603877182212, + "grad_norm": 9.244621258677848, + "learning_rate": 5.950302501788555e-06, + "loss": 0.078594970703125, + "step": 13420 + }, + { + "epoch": 0.11608200534366327, + "grad_norm": 23.255400932200615, + "learning_rate": 5.95026556515817e-06, + "loss": 0.1132720947265625, + "step": 13425 + }, + { + "epoch": 0.11612523886520652, + "grad_norm": 8.02338768663515, + "learning_rate": 5.95022861492142e-06, + "loss": 0.308233642578125, + "step": 13430 + }, + { + "epoch": 0.1161684723867498, + "grad_norm": 14.991222836933915, + "learning_rate": 5.950191651078478e-06, + "loss": 0.168505859375, + "step": 13435 + }, + { + "epoch": 0.11621170590829305, + "grad_norm": 20.378370028054324, + "learning_rate": 5.950154673629513e-06, + "loss": 0.1874176025390625, + "step": 13440 + }, + { + "epoch": 0.11625493942983632, + "grad_norm": 18.09655933946649, + "learning_rate": 5.9501176825746945e-06, + "loss": 0.154779052734375, + "step": 13445 + }, + { + "epoch": 0.11629817295137958, + "grad_norm": 15.766242591382222, + "learning_rate": 5.9500806779141934e-06, + "loss": 0.24238967895507812, + "step": 13450 + }, + { + "epoch": 0.11634140647292285, + "grad_norm": 1.2608222541043714, + "learning_rate": 5.9500436596481825e-06, + "loss": 0.241510009765625, + "step": 13455 + }, + { + "epoch": 0.1163846399944661, + "grad_norm": 7.37590767209123, + "learning_rate": 5.950006627776829e-06, + "loss": 0.219110107421875, + "step": 13460 + }, + { + "epoch": 0.11642787351600938, + "grad_norm": 16.15850681435082, + "learning_rate": 5.949969582300307e-06, + "loss": 0.23169403076171874, + "step": 13465 + }, + { + "epoch": 0.11647110703755263, + "grad_norm": 12.56484942272245, + "learning_rate": 5.949932523218786e-06, + "loss": 0.09032745361328125, + "step": 13470 + }, + { + "epoch": 0.1165143405590959, + "grad_norm": 1.6699268166981707, + "learning_rate": 5.949895450532435e-06, + "loss": 0.10404815673828124, + "step": 13475 + }, + { + "epoch": 0.11655757408063916, + "grad_norm": 3.4073612771361588, + "learning_rate": 5.9498583642414295e-06, + "loss": 0.080938720703125, + "step": 13480 + }, + { + "epoch": 0.11660080760218243, + "grad_norm": 43.91224897876333, + "learning_rate": 5.949821264345936e-06, + "loss": 0.3005401611328125, + "step": 13485 + }, + { + "epoch": 0.11664404112372569, + "grad_norm": 2.4578992212647774, + "learning_rate": 5.949784150846128e-06, + "loss": 0.57674560546875, + "step": 13490 + }, + { + "epoch": 0.11668727464526896, + "grad_norm": 10.78978094737179, + "learning_rate": 5.949747023742176e-06, + "loss": 0.31781005859375, + "step": 13495 + }, + { + "epoch": 0.11673050816681221, + "grad_norm": 28.408191414219598, + "learning_rate": 5.949709883034252e-06, + "loss": 0.182061767578125, + "step": 13500 + }, + { + "epoch": 0.11677374168835548, + "grad_norm": 0.5044550669022092, + "learning_rate": 5.949672728722526e-06, + "loss": 0.30053863525390623, + "step": 13505 + }, + { + "epoch": 0.11681697520989874, + "grad_norm": 43.036058244344915, + "learning_rate": 5.94963556080717e-06, + "loss": 0.2886322021484375, + "step": 13510 + }, + { + "epoch": 0.11686020873144201, + "grad_norm": 50.84204816703431, + "learning_rate": 5.949598379288356e-06, + "loss": 0.5115150451660156, + "step": 13515 + }, + { + "epoch": 0.11690344225298527, + "grad_norm": 21.678503853366966, + "learning_rate": 5.949561184166254e-06, + "loss": 0.12724456787109376, + "step": 13520 + }, + { + "epoch": 0.11694667577452854, + "grad_norm": 27.06631877417214, + "learning_rate": 5.949523975441037e-06, + "loss": 0.25278167724609374, + "step": 13525 + }, + { + "epoch": 0.11698990929607181, + "grad_norm": 19.622010506497478, + "learning_rate": 5.9494867531128765e-06, + "loss": 0.3244762420654297, + "step": 13530 + }, + { + "epoch": 0.11703314281761507, + "grad_norm": 13.415140822181002, + "learning_rate": 5.949449517181943e-06, + "loss": 0.13793859481811524, + "step": 13535 + }, + { + "epoch": 0.11707637633915834, + "grad_norm": 17.871631070823238, + "learning_rate": 5.9494122676484095e-06, + "loss": 0.3647552490234375, + "step": 13540 + }, + { + "epoch": 0.11711960986070159, + "grad_norm": 24.708506869574684, + "learning_rate": 5.949375004512447e-06, + "loss": 0.16846923828125, + "step": 13545 + }, + { + "epoch": 0.11716284338224486, + "grad_norm": 34.02261216651143, + "learning_rate": 5.949337727774228e-06, + "loss": 0.252703857421875, + "step": 13550 + }, + { + "epoch": 0.11720607690378812, + "grad_norm": 0.8196414180604751, + "learning_rate": 5.949300437433924e-06, + "loss": 0.0651947021484375, + "step": 13555 + }, + { + "epoch": 0.11724931042533139, + "grad_norm": 19.354596320242567, + "learning_rate": 5.949263133491706e-06, + "loss": 0.61287841796875, + "step": 13560 + }, + { + "epoch": 0.11729254394687465, + "grad_norm": 2.9881438221250383, + "learning_rate": 5.949225815947748e-06, + "loss": 0.295343017578125, + "step": 13565 + }, + { + "epoch": 0.11733577746841792, + "grad_norm": 1.5235343058685624, + "learning_rate": 5.94918848480222e-06, + "loss": 0.07382888793945312, + "step": 13570 + }, + { + "epoch": 0.11737901098996117, + "grad_norm": 10.614219480289117, + "learning_rate": 5.949151140055296e-06, + "loss": 0.29315185546875, + "step": 13575 + }, + { + "epoch": 0.11742224451150444, + "grad_norm": 4.417950195371763, + "learning_rate": 5.949113781707147e-06, + "loss": 0.206976318359375, + "step": 13580 + }, + { + "epoch": 0.1174654780330477, + "grad_norm": 26.006551243472764, + "learning_rate": 5.949076409757947e-06, + "loss": 0.49956550598144533, + "step": 13585 + }, + { + "epoch": 0.11750871155459097, + "grad_norm": 0.4685326031678949, + "learning_rate": 5.949039024207867e-06, + "loss": 0.07391128540039063, + "step": 13590 + }, + { + "epoch": 0.11755194507613423, + "grad_norm": 0.5773808243903257, + "learning_rate": 5.949001625057079e-06, + "loss": 0.25977783203125, + "step": 13595 + }, + { + "epoch": 0.1175951785976775, + "grad_norm": 0.6390091463925853, + "learning_rate": 5.948964212305756e-06, + "loss": 0.1332843780517578, + "step": 13600 + }, + { + "epoch": 0.11763841211922076, + "grad_norm": 2.4119825730972484, + "learning_rate": 5.948926785954071e-06, + "loss": 0.1530517578125, + "step": 13605 + }, + { + "epoch": 0.11768164564076403, + "grad_norm": 14.533662485773736, + "learning_rate": 5.9488893460021955e-06, + "loss": 0.0958282470703125, + "step": 13610 + }, + { + "epoch": 0.11772487916230728, + "grad_norm": 33.263169952602325, + "learning_rate": 5.948851892450304e-06, + "loss": 0.196099853515625, + "step": 13615 + }, + { + "epoch": 0.11776811268385055, + "grad_norm": 0.8364355114670619, + "learning_rate": 5.948814425298567e-06, + "loss": 0.2344635009765625, + "step": 13620 + }, + { + "epoch": 0.11781134620539381, + "grad_norm": 6.092294870168676, + "learning_rate": 5.9487769445471595e-06, + "loss": 0.3187976837158203, + "step": 13625 + }, + { + "epoch": 0.11785457972693708, + "grad_norm": 6.198622122356674, + "learning_rate": 5.9487394501962525e-06, + "loss": 0.2656768798828125, + "step": 13630 + }, + { + "epoch": 0.11789781324848034, + "grad_norm": 4.034893335415473, + "learning_rate": 5.948701942246021e-06, + "loss": 0.156646728515625, + "step": 13635 + }, + { + "epoch": 0.11794104677002361, + "grad_norm": 14.77443975407244, + "learning_rate": 5.948664420696635e-06, + "loss": 0.1833770751953125, + "step": 13640 + }, + { + "epoch": 0.11798428029156686, + "grad_norm": 18.73526096950516, + "learning_rate": 5.94862688554827e-06, + "loss": 0.30411376953125, + "step": 13645 + }, + { + "epoch": 0.11802751381311014, + "grad_norm": 5.979221185424452, + "learning_rate": 5.948589336801099e-06, + "loss": 0.45250473022460935, + "step": 13650 + }, + { + "epoch": 0.11807074733465339, + "grad_norm": 24.03896559237069, + "learning_rate": 5.948551774455294e-06, + "loss": 0.286236572265625, + "step": 13655 + }, + { + "epoch": 0.11811398085619666, + "grad_norm": 17.134483505062683, + "learning_rate": 5.9485141985110285e-06, + "loss": 0.1561767578125, + "step": 13660 + }, + { + "epoch": 0.11815721437773992, + "grad_norm": 3.021760715698225, + "learning_rate": 5.948476608968476e-06, + "loss": 0.08900222778320313, + "step": 13665 + }, + { + "epoch": 0.11820044789928319, + "grad_norm": 0.7224295062000115, + "learning_rate": 5.948439005827809e-06, + "loss": 0.04128341674804688, + "step": 13670 + }, + { + "epoch": 0.11824368142082645, + "grad_norm": 42.81612698030383, + "learning_rate": 5.948401389089204e-06, + "loss": 0.20619964599609375, + "step": 13675 + }, + { + "epoch": 0.11828691494236972, + "grad_norm": 4.333093077196887, + "learning_rate": 5.9483637587528316e-06, + "loss": 0.08975830078125, + "step": 13680 + }, + { + "epoch": 0.11833014846391297, + "grad_norm": 54.3226340558362, + "learning_rate": 5.948326114818866e-06, + "loss": 0.5503662109375, + "step": 13685 + }, + { + "epoch": 0.11837338198545624, + "grad_norm": 31.572950946451797, + "learning_rate": 5.948288457287479e-06, + "loss": 0.316851806640625, + "step": 13690 + }, + { + "epoch": 0.1184166155069995, + "grad_norm": 13.647870192661562, + "learning_rate": 5.948250786158849e-06, + "loss": 0.227532958984375, + "step": 13695 + }, + { + "epoch": 0.11845984902854277, + "grad_norm": 32.534328311551924, + "learning_rate": 5.948213101433146e-06, + "loss": 0.209228515625, + "step": 13700 + }, + { + "epoch": 0.11850308255008603, + "grad_norm": 5.79812160396483, + "learning_rate": 5.948175403110544e-06, + "loss": 0.02569427490234375, + "step": 13705 + }, + { + "epoch": 0.1185463160716293, + "grad_norm": 14.415694456087168, + "learning_rate": 5.948137691191217e-06, + "loss": 0.1663299560546875, + "step": 13710 + }, + { + "epoch": 0.11858954959317257, + "grad_norm": 3.6888600794957855, + "learning_rate": 5.948099965675341e-06, + "loss": 0.21384124755859374, + "step": 13715 + }, + { + "epoch": 0.11863278311471583, + "grad_norm": 9.935897589632269, + "learning_rate": 5.948062226563088e-06, + "loss": 0.1625335693359375, + "step": 13720 + }, + { + "epoch": 0.1186760166362591, + "grad_norm": 4.248459018010456, + "learning_rate": 5.9480244738546315e-06, + "loss": 0.17657394409179689, + "step": 13725 + }, + { + "epoch": 0.11871925015780235, + "grad_norm": 33.57655902280627, + "learning_rate": 5.947986707550148e-06, + "loss": 0.31884765625, + "step": 13730 + }, + { + "epoch": 0.11876248367934562, + "grad_norm": 3.2563050806020213, + "learning_rate": 5.9479489276498085e-06, + "loss": 0.149017333984375, + "step": 13735 + }, + { + "epoch": 0.11880571720088888, + "grad_norm": 6.135157343192234, + "learning_rate": 5.947911134153791e-06, + "loss": 0.5144332885742188, + "step": 13740 + }, + { + "epoch": 0.11884895072243215, + "grad_norm": 13.677224094128656, + "learning_rate": 5.947873327062267e-06, + "loss": 0.37874755859375, + "step": 13745 + }, + { + "epoch": 0.11889218424397541, + "grad_norm": 2.475153506219695, + "learning_rate": 5.947835506375412e-06, + "loss": 0.1259521484375, + "step": 13750 + }, + { + "epoch": 0.11893541776551868, + "grad_norm": 32.71727962575067, + "learning_rate": 5.9477976720934e-06, + "loss": 0.192919921875, + "step": 13755 + }, + { + "epoch": 0.11897865128706193, + "grad_norm": 38.971154528130064, + "learning_rate": 5.947759824216404e-06, + "loss": 0.2991546630859375, + "step": 13760 + }, + { + "epoch": 0.1190218848086052, + "grad_norm": 16.5333439722211, + "learning_rate": 5.947721962744603e-06, + "loss": 0.1004638671875, + "step": 13765 + }, + { + "epoch": 0.11906511833014846, + "grad_norm": 0.8750285115742303, + "learning_rate": 5.947684087678167e-06, + "loss": 0.2514251708984375, + "step": 13770 + }, + { + "epoch": 0.11910835185169173, + "grad_norm": 14.820688816161748, + "learning_rate": 5.947646199017273e-06, + "loss": 0.3454315185546875, + "step": 13775 + }, + { + "epoch": 0.11915158537323499, + "grad_norm": 7.598727932841654, + "learning_rate": 5.947608296762095e-06, + "loss": 0.28279571533203124, + "step": 13780 + }, + { + "epoch": 0.11919481889477826, + "grad_norm": 6.922588595906437, + "learning_rate": 5.947570380912809e-06, + "loss": 0.05346488952636719, + "step": 13785 + }, + { + "epoch": 0.11923805241632152, + "grad_norm": 6.09169959842249, + "learning_rate": 5.947532451469587e-06, + "loss": 0.07196197509765626, + "step": 13790 + }, + { + "epoch": 0.11928128593786479, + "grad_norm": 26.027437294512424, + "learning_rate": 5.947494508432606e-06, + "loss": 0.5828323364257812, + "step": 13795 + }, + { + "epoch": 0.11932451945940804, + "grad_norm": 14.624170114282718, + "learning_rate": 5.947456551802042e-06, + "loss": 0.3032264709472656, + "step": 13800 + }, + { + "epoch": 0.11936775298095131, + "grad_norm": 9.32711998993455, + "learning_rate": 5.947418581578068e-06, + "loss": 0.1793914794921875, + "step": 13805 + }, + { + "epoch": 0.11941098650249457, + "grad_norm": 0.505901138069074, + "learning_rate": 5.947380597760861e-06, + "loss": 0.29556808471679685, + "step": 13810 + }, + { + "epoch": 0.11945422002403784, + "grad_norm": 8.972097296701921, + "learning_rate": 5.947342600350594e-06, + "loss": 0.1718414306640625, + "step": 13815 + }, + { + "epoch": 0.1194974535455811, + "grad_norm": 4.94322760586074, + "learning_rate": 5.947304589347443e-06, + "loss": 0.133123779296875, + "step": 13820 + }, + { + "epoch": 0.11954068706712437, + "grad_norm": 0.7444262171600347, + "learning_rate": 5.947266564751585e-06, + "loss": 0.1864593505859375, + "step": 13825 + }, + { + "epoch": 0.11958392058866762, + "grad_norm": 21.474215885844057, + "learning_rate": 5.947228526563194e-06, + "loss": 0.09166259765625, + "step": 13830 + }, + { + "epoch": 0.1196271541102109, + "grad_norm": 1.0983818381959507, + "learning_rate": 5.947190474782444e-06, + "loss": 0.568304443359375, + "step": 13835 + }, + { + "epoch": 0.11967038763175415, + "grad_norm": 7.830113519702243, + "learning_rate": 5.9471524094095125e-06, + "loss": 0.354473876953125, + "step": 13840 + }, + { + "epoch": 0.11971362115329742, + "grad_norm": 18.262485602276143, + "learning_rate": 5.947114330444575e-06, + "loss": 0.482257080078125, + "step": 13845 + }, + { + "epoch": 0.11975685467484068, + "grad_norm": 15.788433577705089, + "learning_rate": 5.947076237887806e-06, + "loss": 0.1284271240234375, + "step": 13850 + }, + { + "epoch": 0.11980008819638395, + "grad_norm": 10.959715845787535, + "learning_rate": 5.947038131739383e-06, + "loss": 0.1308868408203125, + "step": 13855 + }, + { + "epoch": 0.1198433217179272, + "grad_norm": 1.487958195989044, + "learning_rate": 5.9470000119994795e-06, + "loss": 0.21090087890625, + "step": 13860 + }, + { + "epoch": 0.11988655523947048, + "grad_norm": 4.913285852083083, + "learning_rate": 5.946961878668272e-06, + "loss": 0.1037994384765625, + "step": 13865 + }, + { + "epoch": 0.11992978876101373, + "grad_norm": 1.7212382775449908, + "learning_rate": 5.9469237317459374e-06, + "loss": 0.1819610595703125, + "step": 13870 + }, + { + "epoch": 0.119973022282557, + "grad_norm": 21.146189964715788, + "learning_rate": 5.946885571232651e-06, + "loss": 0.457391357421875, + "step": 13875 + }, + { + "epoch": 0.12001625580410026, + "grad_norm": 1.8054010359644168, + "learning_rate": 5.946847397128589e-06, + "loss": 0.21758880615234374, + "step": 13880 + }, + { + "epoch": 0.12005948932564353, + "grad_norm": 30.843777507861628, + "learning_rate": 5.946809209433927e-06, + "loss": 0.23138885498046874, + "step": 13885 + }, + { + "epoch": 0.12010272284718679, + "grad_norm": 4.5336327832419565, + "learning_rate": 5.946771008148841e-06, + "loss": 0.06295166015625, + "step": 13890 + }, + { + "epoch": 0.12014595636873006, + "grad_norm": 4.519232075079036, + "learning_rate": 5.946732793273508e-06, + "loss": 0.139404296875, + "step": 13895 + }, + { + "epoch": 0.12018918989027333, + "grad_norm": 36.719509959112486, + "learning_rate": 5.946694564808102e-06, + "loss": 0.288482666015625, + "step": 13900 + }, + { + "epoch": 0.12023242341181659, + "grad_norm": 4.257066357736013, + "learning_rate": 5.946656322752803e-06, + "loss": 0.6545852661132813, + "step": 13905 + }, + { + "epoch": 0.12027565693335986, + "grad_norm": 8.01332357532078, + "learning_rate": 5.9466180671077845e-06, + "loss": 0.2817626953125, + "step": 13910 + }, + { + "epoch": 0.12031889045490311, + "grad_norm": 11.724294791626079, + "learning_rate": 5.946579797873224e-06, + "loss": 0.375640869140625, + "step": 13915 + }, + { + "epoch": 0.12036212397644638, + "grad_norm": 1.7013535879422232, + "learning_rate": 5.946541515049299e-06, + "loss": 0.28953399658203127, + "step": 13920 + }, + { + "epoch": 0.12040535749798964, + "grad_norm": 28.395337631529653, + "learning_rate": 5.946503218636183e-06, + "loss": 0.31351318359375, + "step": 13925 + }, + { + "epoch": 0.12044859101953291, + "grad_norm": 22.84625654646657, + "learning_rate": 5.946464908634055e-06, + "loss": 0.596551513671875, + "step": 13930 + }, + { + "epoch": 0.12049182454107617, + "grad_norm": 6.409056679569571, + "learning_rate": 5.946426585043092e-06, + "loss": 0.2879669189453125, + "step": 13935 + }, + { + "epoch": 0.12053505806261944, + "grad_norm": 41.44914911530297, + "learning_rate": 5.946388247863469e-06, + "loss": 0.424017333984375, + "step": 13940 + }, + { + "epoch": 0.1205782915841627, + "grad_norm": 5.977753429087851, + "learning_rate": 5.946349897095365e-06, + "loss": 0.2609130859375, + "step": 13945 + }, + { + "epoch": 0.12062152510570597, + "grad_norm": 14.85951176126837, + "learning_rate": 5.946311532738955e-06, + "loss": 0.05631103515625, + "step": 13950 + }, + { + "epoch": 0.12066475862724922, + "grad_norm": 18.243505495725394, + "learning_rate": 5.946273154794416e-06, + "loss": 0.103759765625, + "step": 13955 + }, + { + "epoch": 0.12070799214879249, + "grad_norm": 4.40255115581594, + "learning_rate": 5.946234763261926e-06, + "loss": 0.18023300170898438, + "step": 13960 + }, + { + "epoch": 0.12075122567033575, + "grad_norm": 2.56668628227149, + "learning_rate": 5.946196358141661e-06, + "loss": 0.23841552734375, + "step": 13965 + }, + { + "epoch": 0.12079445919187902, + "grad_norm": 5.228896143101051, + "learning_rate": 5.946157939433799e-06, + "loss": 0.11327590942382812, + "step": 13970 + }, + { + "epoch": 0.12083769271342228, + "grad_norm": 6.492272368594153, + "learning_rate": 5.946119507138518e-06, + "loss": 0.24498481750488282, + "step": 13975 + }, + { + "epoch": 0.12088092623496555, + "grad_norm": 0.5858632907264495, + "learning_rate": 5.946081061255993e-06, + "loss": 0.107135009765625, + "step": 13980 + }, + { + "epoch": 0.1209241597565088, + "grad_norm": 42.99721019381831, + "learning_rate": 5.9460426017864025e-06, + "loss": 0.13059816360473633, + "step": 13985 + }, + { + "epoch": 0.12096739327805207, + "grad_norm": 9.601177400021204, + "learning_rate": 5.946004128729924e-06, + "loss": 0.0852294921875, + "step": 13990 + }, + { + "epoch": 0.12101062679959533, + "grad_norm": 22.78620998707532, + "learning_rate": 5.945965642086735e-06, + "loss": 0.130377197265625, + "step": 13995 + }, + { + "epoch": 0.1210538603211386, + "grad_norm": 1.7646694313429403, + "learning_rate": 5.945927141857013e-06, + "loss": 0.12686614990234374, + "step": 14000 + }, + { + "epoch": 0.12109709384268186, + "grad_norm": 17.832134883259766, + "learning_rate": 5.945888628040935e-06, + "loss": 0.270196533203125, + "step": 14005 + }, + { + "epoch": 0.12114032736422513, + "grad_norm": 4.393115090201885, + "learning_rate": 5.945850100638679e-06, + "loss": 0.1709197998046875, + "step": 14010 + }, + { + "epoch": 0.12118356088576838, + "grad_norm": 45.93782094888231, + "learning_rate": 5.945811559650423e-06, + "loss": 0.36376495361328126, + "step": 14015 + }, + { + "epoch": 0.12122679440731166, + "grad_norm": 89.04475977200924, + "learning_rate": 5.945773005076344e-06, + "loss": 0.326300048828125, + "step": 14020 + }, + { + "epoch": 0.12127002792885491, + "grad_norm": 4.914577283271504, + "learning_rate": 5.94573443691662e-06, + "loss": 0.1940399169921875, + "step": 14025 + }, + { + "epoch": 0.12131326145039818, + "grad_norm": 2.287573880805592, + "learning_rate": 5.945695855171429e-06, + "loss": 0.0759185791015625, + "step": 14030 + }, + { + "epoch": 0.12135649497194144, + "grad_norm": 16.39400681281723, + "learning_rate": 5.945657259840949e-06, + "loss": 0.17054672241210939, + "step": 14035 + }, + { + "epoch": 0.12139972849348471, + "grad_norm": 4.868495579834552, + "learning_rate": 5.945618650925358e-06, + "loss": 0.28471832275390624, + "step": 14040 + }, + { + "epoch": 0.12144296201502797, + "grad_norm": 6.5747306204959575, + "learning_rate": 5.945580028424836e-06, + "loss": 0.11967620849609376, + "step": 14045 + }, + { + "epoch": 0.12148619553657124, + "grad_norm": 22.89474121708083, + "learning_rate": 5.945541392339556e-06, + "loss": 0.17161407470703124, + "step": 14050 + }, + { + "epoch": 0.1215294290581145, + "grad_norm": 0.7466861599188523, + "learning_rate": 5.945502742669701e-06, + "loss": 0.09027481079101562, + "step": 14055 + }, + { + "epoch": 0.12157266257965776, + "grad_norm": 13.696477961930016, + "learning_rate": 5.945464079415448e-06, + "loss": 0.10335311889648438, + "step": 14060 + }, + { + "epoch": 0.12161589610120102, + "grad_norm": 8.52395565052339, + "learning_rate": 5.945425402576974e-06, + "loss": 0.4436500549316406, + "step": 14065 + }, + { + "epoch": 0.12165912962274429, + "grad_norm": 7.6662038555214655, + "learning_rate": 5.945386712154458e-06, + "loss": 0.138739013671875, + "step": 14070 + }, + { + "epoch": 0.12170236314428755, + "grad_norm": 6.764118045359735, + "learning_rate": 5.94534800814808e-06, + "loss": 0.0570587158203125, + "step": 14075 + }, + { + "epoch": 0.12174559666583082, + "grad_norm": 1.7658117242486726, + "learning_rate": 5.945309290558016e-06, + "loss": 0.206060791015625, + "step": 14080 + }, + { + "epoch": 0.12178883018737409, + "grad_norm": 5.722249830802554, + "learning_rate": 5.945270559384446e-06, + "loss": 0.1626708984375, + "step": 14085 + }, + { + "epoch": 0.12183206370891735, + "grad_norm": 3.384728874852423, + "learning_rate": 5.9452318146275505e-06, + "loss": 0.124310302734375, + "step": 14090 + }, + { + "epoch": 0.12187529723046062, + "grad_norm": 15.87947984644811, + "learning_rate": 5.945193056287504e-06, + "loss": 0.08677978515625, + "step": 14095 + }, + { + "epoch": 0.12191853075200387, + "grad_norm": 13.31999612953753, + "learning_rate": 5.945154284364488e-06, + "loss": 0.045554733276367186, + "step": 14100 + }, + { + "epoch": 0.12196176427354714, + "grad_norm": 4.350399448601812, + "learning_rate": 5.945115498858681e-06, + "loss": 0.04995651245117187, + "step": 14105 + }, + { + "epoch": 0.1220049977950904, + "grad_norm": 24.152198977417466, + "learning_rate": 5.945076699770262e-06, + "loss": 0.1757843017578125, + "step": 14110 + }, + { + "epoch": 0.12204823131663367, + "grad_norm": 30.987170670906792, + "learning_rate": 5.9450378870994075e-06, + "loss": 0.21940345764160157, + "step": 14115 + }, + { + "epoch": 0.12209146483817693, + "grad_norm": 39.98918854874564, + "learning_rate": 5.9449990608463e-06, + "loss": 0.4396339416503906, + "step": 14120 + }, + { + "epoch": 0.1221346983597202, + "grad_norm": 34.61751927061697, + "learning_rate": 5.944960221011117e-06, + "loss": 0.3210426330566406, + "step": 14125 + }, + { + "epoch": 0.12217793188126345, + "grad_norm": 4.511703125348702, + "learning_rate": 5.944921367594037e-06, + "loss": 0.0866973876953125, + "step": 14130 + }, + { + "epoch": 0.12222116540280673, + "grad_norm": 6.924623536803289, + "learning_rate": 5.944882500595241e-06, + "loss": 0.17210006713867188, + "step": 14135 + }, + { + "epoch": 0.12226439892434998, + "grad_norm": 5.282313984231836, + "learning_rate": 5.944843620014905e-06, + "loss": 0.2490570068359375, + "step": 14140 + }, + { + "epoch": 0.12230763244589325, + "grad_norm": 26.293164300472416, + "learning_rate": 5.944804725853212e-06, + "loss": 0.3813201904296875, + "step": 14145 + }, + { + "epoch": 0.12235086596743651, + "grad_norm": 21.448928177702427, + "learning_rate": 5.944765818110339e-06, + "loss": 0.3851470947265625, + "step": 14150 + }, + { + "epoch": 0.12239409948897978, + "grad_norm": 2.8354048037804125, + "learning_rate": 5.944726896786468e-06, + "loss": 0.3708740234375, + "step": 14155 + }, + { + "epoch": 0.12243733301052304, + "grad_norm": 0.46894644983186423, + "learning_rate": 5.9446879618817756e-06, + "loss": 0.09022445678710937, + "step": 14160 + }, + { + "epoch": 0.1224805665320663, + "grad_norm": 16.65241334007879, + "learning_rate": 5.9446490133964415e-06, + "loss": 0.30999755859375, + "step": 14165 + }, + { + "epoch": 0.12252380005360956, + "grad_norm": 21.65848181499693, + "learning_rate": 5.944610051330648e-06, + "loss": 0.20634994506835938, + "step": 14170 + }, + { + "epoch": 0.12256703357515283, + "grad_norm": 13.452759703986398, + "learning_rate": 5.944571075684572e-06, + "loss": 0.09903564453125, + "step": 14175 + }, + { + "epoch": 0.12261026709669609, + "grad_norm": 2.966725504059389, + "learning_rate": 5.944532086458395e-06, + "loss": 0.08476715087890625, + "step": 14180 + }, + { + "epoch": 0.12265350061823936, + "grad_norm": 2.1712966022629874, + "learning_rate": 5.944493083652297e-06, + "loss": 0.8437990188598633, + "step": 14185 + }, + { + "epoch": 0.12269673413978262, + "grad_norm": 10.643388335488833, + "learning_rate": 5.944454067266455e-06, + "loss": 0.10001068115234375, + "step": 14190 + }, + { + "epoch": 0.12273996766132589, + "grad_norm": 3.5321097622729822, + "learning_rate": 5.944415037301053e-06, + "loss": 0.540966796875, + "step": 14195 + }, + { + "epoch": 0.12278320118286914, + "grad_norm": 1.688962710808847, + "learning_rate": 5.944375993756268e-06, + "loss": 0.2068866729736328, + "step": 14200 + }, + { + "epoch": 0.12282643470441242, + "grad_norm": 11.485655958511698, + "learning_rate": 5.9443369366322814e-06, + "loss": 0.28828125, + "step": 14205 + }, + { + "epoch": 0.12286966822595567, + "grad_norm": 11.74497222482383, + "learning_rate": 5.944297865929272e-06, + "loss": 0.163525390625, + "step": 14210 + }, + { + "epoch": 0.12291290174749894, + "grad_norm": 4.3218977811639885, + "learning_rate": 5.944258781647422e-06, + "loss": 0.221624755859375, + "step": 14215 + }, + { + "epoch": 0.1229561352690422, + "grad_norm": 1.3011222900605488, + "learning_rate": 5.944219683786911e-06, + "loss": 0.015833282470703126, + "step": 14220 + }, + { + "epoch": 0.12299936879058547, + "grad_norm": 7.775517633502911, + "learning_rate": 5.944180572347919e-06, + "loss": 0.13116188049316407, + "step": 14225 + }, + { + "epoch": 0.12304260231212873, + "grad_norm": 5.000744884249007, + "learning_rate": 5.944141447330625e-06, + "loss": 0.1968994140625, + "step": 14230 + }, + { + "epoch": 0.123085835833672, + "grad_norm": 12.829238680305117, + "learning_rate": 5.944102308735212e-06, + "loss": 0.1867431640625, + "step": 14235 + }, + { + "epoch": 0.12312906935521525, + "grad_norm": 8.210735955261901, + "learning_rate": 5.944063156561858e-06, + "loss": 0.089190673828125, + "step": 14240 + }, + { + "epoch": 0.12317230287675852, + "grad_norm": 29.772436599701507, + "learning_rate": 5.944023990810747e-06, + "loss": 0.18637542724609374, + "step": 14245 + }, + { + "epoch": 0.12321553639830178, + "grad_norm": 11.696387555896818, + "learning_rate": 5.943984811482056e-06, + "loss": 0.31256103515625, + "step": 14250 + }, + { + "epoch": 0.12325876991984505, + "grad_norm": 0.1306761216576402, + "learning_rate": 5.943945618575968e-06, + "loss": 0.11428794860839844, + "step": 14255 + }, + { + "epoch": 0.12330200344138831, + "grad_norm": 29.689230602454945, + "learning_rate": 5.943906412092663e-06, + "loss": 0.4296539306640625, + "step": 14260 + }, + { + "epoch": 0.12334523696293158, + "grad_norm": 8.25858867908835, + "learning_rate": 5.943867192032321e-06, + "loss": 0.20177230834960938, + "step": 14265 + }, + { + "epoch": 0.12338847048447485, + "grad_norm": 4.3195821617981975, + "learning_rate": 5.943827958395123e-06, + "loss": 0.07075958251953125, + "step": 14270 + }, + { + "epoch": 0.1234317040060181, + "grad_norm": 10.419381002720124, + "learning_rate": 5.9437887111812515e-06, + "loss": 0.0815521240234375, + "step": 14275 + }, + { + "epoch": 0.12347493752756138, + "grad_norm": 3.1034310786983945, + "learning_rate": 5.943749450390886e-06, + "loss": 0.2497100830078125, + "step": 14280 + }, + { + "epoch": 0.12351817104910463, + "grad_norm": 6.043356443412548, + "learning_rate": 5.943710176024209e-06, + "loss": 0.431103515625, + "step": 14285 + }, + { + "epoch": 0.1235614045706479, + "grad_norm": 14.846422438879197, + "learning_rate": 5.9436708880814005e-06, + "loss": 0.2263916015625, + "step": 14290 + }, + { + "epoch": 0.12360463809219116, + "grad_norm": 29.515311922202326, + "learning_rate": 5.943631586562642e-06, + "loss": 0.2375457763671875, + "step": 14295 + }, + { + "epoch": 0.12364787161373443, + "grad_norm": 3.643733898983005, + "learning_rate": 5.943592271468114e-06, + "loss": 0.2208740234375, + "step": 14300 + }, + { + "epoch": 0.12369110513527769, + "grad_norm": 1.271275644976522, + "learning_rate": 5.943552942797999e-06, + "loss": 0.1202484130859375, + "step": 14305 + }, + { + "epoch": 0.12373433865682096, + "grad_norm": 20.149386602835786, + "learning_rate": 5.943513600552479e-06, + "loss": 0.1759765625, + "step": 14310 + }, + { + "epoch": 0.12377757217836421, + "grad_norm": 15.427288877764397, + "learning_rate": 5.943474244731733e-06, + "loss": 0.247943115234375, + "step": 14315 + }, + { + "epoch": 0.12382080569990749, + "grad_norm": 13.236735250155636, + "learning_rate": 5.943434875335944e-06, + "loss": 0.2180450439453125, + "step": 14320 + }, + { + "epoch": 0.12386403922145074, + "grad_norm": 34.064460563329185, + "learning_rate": 5.943395492365294e-06, + "loss": 0.16140613555908204, + "step": 14325 + }, + { + "epoch": 0.12390727274299401, + "grad_norm": 17.520125422233537, + "learning_rate": 5.9433560958199645e-06, + "loss": 0.38096160888671876, + "step": 14330 + }, + { + "epoch": 0.12395050626453727, + "grad_norm": 21.11130028412074, + "learning_rate": 5.943316685700136e-06, + "loss": 0.126458740234375, + "step": 14335 + }, + { + "epoch": 0.12399373978608054, + "grad_norm": 2.753541619714424, + "learning_rate": 5.943277262005991e-06, + "loss": 0.0809967041015625, + "step": 14340 + }, + { + "epoch": 0.1240369733076238, + "grad_norm": 1.816143615032317, + "learning_rate": 5.943237824737711e-06, + "loss": 0.1245391845703125, + "step": 14345 + }, + { + "epoch": 0.12408020682916707, + "grad_norm": 1.0046148561397505, + "learning_rate": 5.943198373895479e-06, + "loss": 0.40610198974609374, + "step": 14350 + }, + { + "epoch": 0.12412344035071032, + "grad_norm": 6.2965237759622, + "learning_rate": 5.943158909479477e-06, + "loss": 0.13099212646484376, + "step": 14355 + }, + { + "epoch": 0.1241666738722536, + "grad_norm": 15.39707474483752, + "learning_rate": 5.943119431489885e-06, + "loss": 0.10918426513671875, + "step": 14360 + }, + { + "epoch": 0.12420990739379685, + "grad_norm": 3.3392740409377577, + "learning_rate": 5.943079939926885e-06, + "loss": 0.18876953125, + "step": 14365 + }, + { + "epoch": 0.12425314091534012, + "grad_norm": 33.95323001891189, + "learning_rate": 5.9430404347906625e-06, + "loss": 0.46908111572265626, + "step": 14370 + }, + { + "epoch": 0.12429637443688338, + "grad_norm": 5.856507579211151, + "learning_rate": 5.943000916081396e-06, + "loss": 0.028191375732421874, + "step": 14375 + }, + { + "epoch": 0.12433960795842665, + "grad_norm": 8.254955364787575, + "learning_rate": 5.94296138379927e-06, + "loss": 0.241326904296875, + "step": 14380 + }, + { + "epoch": 0.1243828414799699, + "grad_norm": 20.81218076786984, + "learning_rate": 5.942921837944467e-06, + "loss": 0.1828582763671875, + "step": 14385 + }, + { + "epoch": 0.12442607500151318, + "grad_norm": 0.5562274647378451, + "learning_rate": 5.942882278517168e-06, + "loss": 0.2639739990234375, + "step": 14390 + }, + { + "epoch": 0.12446930852305643, + "grad_norm": 13.714585396659524, + "learning_rate": 5.942842705517554e-06, + "loss": 0.180950927734375, + "step": 14395 + }, + { + "epoch": 0.1245125420445997, + "grad_norm": 0.7163156808339507, + "learning_rate": 5.942803118945812e-06, + "loss": 0.22217483520507814, + "step": 14400 + }, + { + "epoch": 0.12455577556614296, + "grad_norm": 4.438726941821817, + "learning_rate": 5.942763518802121e-06, + "loss": 0.329962158203125, + "step": 14405 + }, + { + "epoch": 0.12459900908768623, + "grad_norm": 0.8918994147715151, + "learning_rate": 5.942723905086665e-06, + "loss": 0.15106964111328125, + "step": 14410 + }, + { + "epoch": 0.12464224260922949, + "grad_norm": 4.232742232877931, + "learning_rate": 5.942684277799626e-06, + "loss": 0.15064697265625, + "step": 14415 + }, + { + "epoch": 0.12468547613077276, + "grad_norm": 1.9832336636554564, + "learning_rate": 5.942644636941187e-06, + "loss": 0.2016357421875, + "step": 14420 + }, + { + "epoch": 0.12472870965231601, + "grad_norm": 1.2864918307021984, + "learning_rate": 5.942604982511531e-06, + "loss": 0.1395294189453125, + "step": 14425 + }, + { + "epoch": 0.12477194317385928, + "grad_norm": 41.313693468762594, + "learning_rate": 5.942565314510842e-06, + "loss": 0.3254058837890625, + "step": 14430 + }, + { + "epoch": 0.12481517669540254, + "grad_norm": 0.6343026231982956, + "learning_rate": 5.942525632939301e-06, + "loss": 0.18028106689453124, + "step": 14435 + }, + { + "epoch": 0.12485841021694581, + "grad_norm": 14.56879526104336, + "learning_rate": 5.9424859377970914e-06, + "loss": 0.128277587890625, + "step": 14440 + }, + { + "epoch": 0.12490164373848907, + "grad_norm": 5.0355956683382805, + "learning_rate": 5.9424462290843975e-06, + "loss": 0.29796142578125, + "step": 14445 + }, + { + "epoch": 0.12494487726003234, + "grad_norm": 30.046545241632494, + "learning_rate": 5.9424065068014005e-06, + "loss": 0.696868896484375, + "step": 14450 + }, + { + "epoch": 0.12498811078157561, + "grad_norm": 1.9420318368382494, + "learning_rate": 5.942366770948286e-06, + "loss": 0.103851318359375, + "step": 14455 + }, + { + "epoch": 0.12503134430311888, + "grad_norm": 3.8193174476238676, + "learning_rate": 5.9423270215252345e-06, + "loss": 0.186785888671875, + "step": 14460 + }, + { + "epoch": 0.12507457782466214, + "grad_norm": 6.332401855624656, + "learning_rate": 5.942287258532431e-06, + "loss": 0.23977279663085938, + "step": 14465 + }, + { + "epoch": 0.1251178113462054, + "grad_norm": 39.70347408771592, + "learning_rate": 5.94224748197006e-06, + "loss": 0.758319091796875, + "step": 14470 + }, + { + "epoch": 0.12516104486774865, + "grad_norm": 10.954179735808623, + "learning_rate": 5.942207691838302e-06, + "loss": 0.29161376953125, + "step": 14475 + }, + { + "epoch": 0.12520427838929193, + "grad_norm": 33.14283396326635, + "learning_rate": 5.942167888137343e-06, + "loss": 0.550439453125, + "step": 14480 + }, + { + "epoch": 0.1252475119108352, + "grad_norm": 0.6044096577402993, + "learning_rate": 5.942128070867366e-06, + "loss": 0.15700302124023438, + "step": 14485 + }, + { + "epoch": 0.12529074543237845, + "grad_norm": 29.608661764293274, + "learning_rate": 5.942088240028553e-06, + "loss": 0.2116485595703125, + "step": 14490 + }, + { + "epoch": 0.1253339789539217, + "grad_norm": 23.65958613339211, + "learning_rate": 5.942048395621091e-06, + "loss": 0.5627792358398438, + "step": 14495 + }, + { + "epoch": 0.125377212475465, + "grad_norm": 42.675499346574924, + "learning_rate": 5.94200853764516e-06, + "loss": 0.5800552368164062, + "step": 14500 + }, + { + "epoch": 0.12542044599700825, + "grad_norm": 4.220429630135452, + "learning_rate": 5.941968666100946e-06, + "loss": 0.1303497314453125, + "step": 14505 + }, + { + "epoch": 0.1254636795185515, + "grad_norm": 25.702971804157524, + "learning_rate": 5.941928780988632e-06, + "loss": 0.11903419494628906, + "step": 14510 + }, + { + "epoch": 0.12550691304009476, + "grad_norm": 19.677237111272817, + "learning_rate": 5.941888882308403e-06, + "loss": 0.168817138671875, + "step": 14515 + }, + { + "epoch": 0.12555014656163804, + "grad_norm": 2.0723227847141197, + "learning_rate": 5.9418489700604426e-06, + "loss": 0.36098175048828124, + "step": 14520 + }, + { + "epoch": 0.1255933800831813, + "grad_norm": 21.092150201049055, + "learning_rate": 5.941809044244935e-06, + "loss": 0.27558555603027346, + "step": 14525 + }, + { + "epoch": 0.12563661360472456, + "grad_norm": 27.151648654120113, + "learning_rate": 5.941769104862063e-06, + "loss": 0.34094085693359377, + "step": 14530 + }, + { + "epoch": 0.1256798471262678, + "grad_norm": 3.2477024321102563, + "learning_rate": 5.941729151912013e-06, + "loss": 0.464703369140625, + "step": 14535 + }, + { + "epoch": 0.1257230806478111, + "grad_norm": 0.85159506073136, + "learning_rate": 5.9416891853949675e-06, + "loss": 0.17237548828125, + "step": 14540 + }, + { + "epoch": 0.12576631416935435, + "grad_norm": 49.1201630109314, + "learning_rate": 5.941649205311111e-06, + "loss": 0.337689208984375, + "step": 14545 + }, + { + "epoch": 0.1258095476908976, + "grad_norm": 0.8260037454996431, + "learning_rate": 5.941609211660629e-06, + "loss": 0.14111061096191407, + "step": 14550 + }, + { + "epoch": 0.12585278121244087, + "grad_norm": 61.01327134708362, + "learning_rate": 5.941569204443704e-06, + "loss": 0.28720703125, + "step": 14555 + }, + { + "epoch": 0.12589601473398415, + "grad_norm": 70.60954339261139, + "learning_rate": 5.941529183660523e-06, + "loss": 0.2680530548095703, + "step": 14560 + }, + { + "epoch": 0.1259392482555274, + "grad_norm": 9.28644581574521, + "learning_rate": 5.9414891493112695e-06, + "loss": 0.085516357421875, + "step": 14565 + }, + { + "epoch": 0.12598248177707067, + "grad_norm": 0.9693976839387545, + "learning_rate": 5.941449101396127e-06, + "loss": 0.15120925903320312, + "step": 14570 + }, + { + "epoch": 0.12602571529861392, + "grad_norm": 12.47008188279637, + "learning_rate": 5.941409039915282e-06, + "loss": 0.1227783203125, + "step": 14575 + }, + { + "epoch": 0.1260689488201572, + "grad_norm": 1.9246570749611254, + "learning_rate": 5.941368964868918e-06, + "loss": 0.55291748046875, + "step": 14580 + }, + { + "epoch": 0.12611218234170046, + "grad_norm": 7.621468248456648, + "learning_rate": 5.94132887625722e-06, + "loss": 0.059600830078125, + "step": 14585 + }, + { + "epoch": 0.12615541586324372, + "grad_norm": 15.913100924460794, + "learning_rate": 5.941288774080374e-06, + "loss": 0.24041748046875, + "step": 14590 + }, + { + "epoch": 0.12619864938478698, + "grad_norm": 54.85298183213921, + "learning_rate": 5.941248658338563e-06, + "loss": 0.41272697448730467, + "step": 14595 + }, + { + "epoch": 0.12624188290633026, + "grad_norm": 23.52875625196378, + "learning_rate": 5.941208529031974e-06, + "loss": 0.150457763671875, + "step": 14600 + }, + { + "epoch": 0.12628511642787352, + "grad_norm": 0.5572826351883255, + "learning_rate": 5.94116838616079e-06, + "loss": 0.09504985809326172, + "step": 14605 + }, + { + "epoch": 0.12632834994941677, + "grad_norm": 54.80219600482582, + "learning_rate": 5.941128229725198e-06, + "loss": 0.547332763671875, + "step": 14610 + }, + { + "epoch": 0.12637158347096003, + "grad_norm": 23.371456856340984, + "learning_rate": 5.941088059725383e-06, + "loss": 0.249908447265625, + "step": 14615 + }, + { + "epoch": 0.12641481699250331, + "grad_norm": 0.44674391248650325, + "learning_rate": 5.941047876161528e-06, + "loss": 0.13267440795898439, + "step": 14620 + }, + { + "epoch": 0.12645805051404657, + "grad_norm": 13.666461546305326, + "learning_rate": 5.941007679033822e-06, + "loss": 0.31432571411132815, + "step": 14625 + }, + { + "epoch": 0.12650128403558983, + "grad_norm": 1.3950414668225721, + "learning_rate": 5.940967468342448e-06, + "loss": 0.18843994140625, + "step": 14630 + }, + { + "epoch": 0.12654451755713308, + "grad_norm": 25.20466570919541, + "learning_rate": 5.940927244087591e-06, + "loss": 0.17603759765625, + "step": 14635 + }, + { + "epoch": 0.12658775107867637, + "grad_norm": 9.131994629498793, + "learning_rate": 5.9408870062694385e-06, + "loss": 0.15153045654296876, + "step": 14640 + }, + { + "epoch": 0.12663098460021963, + "grad_norm": 0.2500165556578089, + "learning_rate": 5.940846754888174e-06, + "loss": 0.18265838623046876, + "step": 14645 + }, + { + "epoch": 0.12667421812176288, + "grad_norm": 14.21455492430344, + "learning_rate": 5.940806489943985e-06, + "loss": 0.2299163818359375, + "step": 14650 + }, + { + "epoch": 0.12671745164330617, + "grad_norm": 2.33519610079509, + "learning_rate": 5.940766211437057e-06, + "loss": 0.037164306640625, + "step": 14655 + }, + { + "epoch": 0.12676068516484942, + "grad_norm": 19.145816112334245, + "learning_rate": 5.940725919367573e-06, + "loss": 0.15041961669921874, + "step": 14660 + }, + { + "epoch": 0.12680391868639268, + "grad_norm": 22.74153153072243, + "learning_rate": 5.940685613735722e-06, + "loss": 0.246539306640625, + "step": 14665 + }, + { + "epoch": 0.12684715220793594, + "grad_norm": 1.6158968136185476, + "learning_rate": 5.940645294541689e-06, + "loss": 0.20788230895996093, + "step": 14670 + }, + { + "epoch": 0.12689038572947922, + "grad_norm": 21.036226599077594, + "learning_rate": 5.9406049617856595e-06, + "loss": 0.17583770751953126, + "step": 14675 + }, + { + "epoch": 0.12693361925102248, + "grad_norm": 0.4134413919843054, + "learning_rate": 5.94056461546782e-06, + "loss": 0.2562347412109375, + "step": 14680 + }, + { + "epoch": 0.12697685277256573, + "grad_norm": 46.84681599887378, + "learning_rate": 5.9405242555883556e-06, + "loss": 0.2282867431640625, + "step": 14685 + }, + { + "epoch": 0.127020086294109, + "grad_norm": 11.431561804037612, + "learning_rate": 5.940483882147453e-06, + "loss": 0.3000679016113281, + "step": 14690 + }, + { + "epoch": 0.12706331981565228, + "grad_norm": 17.496922559829628, + "learning_rate": 5.9404434951453e-06, + "loss": 0.2761512756347656, + "step": 14695 + }, + { + "epoch": 0.12710655333719553, + "grad_norm": 18.611998393179697, + "learning_rate": 5.9404030945820805e-06, + "loss": 0.10152587890625, + "step": 14700 + }, + { + "epoch": 0.1271497868587388, + "grad_norm": 8.50667576646865, + "learning_rate": 5.940362680457982e-06, + "loss": 0.24488372802734376, + "step": 14705 + }, + { + "epoch": 0.12719302038028205, + "grad_norm": 2.9203808937950875, + "learning_rate": 5.94032225277319e-06, + "loss": 0.0431549072265625, + "step": 14710 + }, + { + "epoch": 0.12723625390182533, + "grad_norm": 13.129073104120945, + "learning_rate": 5.940281811527892e-06, + "loss": 0.1070098876953125, + "step": 14715 + }, + { + "epoch": 0.1272794874233686, + "grad_norm": 35.38266885628262, + "learning_rate": 5.940241356722275e-06, + "loss": 0.4693634033203125, + "step": 14720 + }, + { + "epoch": 0.12732272094491184, + "grad_norm": 4.0023114185021775, + "learning_rate": 5.940200888356524e-06, + "loss": 0.08883628845214844, + "step": 14725 + }, + { + "epoch": 0.1273659544664551, + "grad_norm": 16.2990481972515, + "learning_rate": 5.940160406430826e-06, + "loss": 0.2516265869140625, + "step": 14730 + }, + { + "epoch": 0.12740918798799838, + "grad_norm": 0.40383109664983224, + "learning_rate": 5.940119910945368e-06, + "loss": 0.09950103759765624, + "step": 14735 + }, + { + "epoch": 0.12745242150954164, + "grad_norm": 0.362470804466614, + "learning_rate": 5.940079401900337e-06, + "loss": 0.34006805419921876, + "step": 14740 + }, + { + "epoch": 0.1274956550310849, + "grad_norm": 6.001341302340899, + "learning_rate": 5.940038879295919e-06, + "loss": 0.101568603515625, + "step": 14745 + }, + { + "epoch": 0.12753888855262815, + "grad_norm": 3.178945478759964, + "learning_rate": 5.939998343132303e-06, + "loss": 0.2732666015625, + "step": 14750 + }, + { + "epoch": 0.12758212207417144, + "grad_norm": 8.898561011385189, + "learning_rate": 5.939957793409673e-06, + "loss": 0.27951202392578123, + "step": 14755 + }, + { + "epoch": 0.1276253555957147, + "grad_norm": 7.00568814733897, + "learning_rate": 5.939917230128218e-06, + "loss": 0.1552490234375, + "step": 14760 + }, + { + "epoch": 0.12766858911725795, + "grad_norm": 19.259246801643076, + "learning_rate": 5.939876653288124e-06, + "loss": 0.361688232421875, + "step": 14765 + }, + { + "epoch": 0.1277118226388012, + "grad_norm": 6.889745686484791, + "learning_rate": 5.939836062889579e-06, + "loss": 0.3530998229980469, + "step": 14770 + }, + { + "epoch": 0.1277550561603445, + "grad_norm": 33.953654028297066, + "learning_rate": 5.93979545893277e-06, + "loss": 0.169354248046875, + "step": 14775 + }, + { + "epoch": 0.12779828968188775, + "grad_norm": 15.610378312038257, + "learning_rate": 5.939754841417884e-06, + "loss": 0.1358856201171875, + "step": 14780 + }, + { + "epoch": 0.127841523203431, + "grad_norm": 2.972347658392523, + "learning_rate": 5.939714210345108e-06, + "loss": 0.1329986572265625, + "step": 14785 + }, + { + "epoch": 0.12788475672497426, + "grad_norm": 29.854168985608492, + "learning_rate": 5.9396735657146305e-06, + "loss": 0.26641387939453126, + "step": 14790 + }, + { + "epoch": 0.12792799024651755, + "grad_norm": 1.1372693927812765, + "learning_rate": 5.9396329075266375e-06, + "loss": 0.09199085235595703, + "step": 14795 + }, + { + "epoch": 0.1279712237680608, + "grad_norm": 36.67232909022184, + "learning_rate": 5.939592235781318e-06, + "loss": 0.432611083984375, + "step": 14800 + }, + { + "epoch": 0.12801445728960406, + "grad_norm": 4.1654959316363085, + "learning_rate": 5.939551550478859e-06, + "loss": 0.20751628875732422, + "step": 14805 + }, + { + "epoch": 0.12805769081114732, + "grad_norm": 22.147139955363652, + "learning_rate": 5.939510851619447e-06, + "loss": 0.1642852783203125, + "step": 14810 + }, + { + "epoch": 0.1281009243326906, + "grad_norm": 4.495696872550088, + "learning_rate": 5.939470139203272e-06, + "loss": 0.076910400390625, + "step": 14815 + }, + { + "epoch": 0.12814415785423386, + "grad_norm": 19.02393276496489, + "learning_rate": 5.939429413230519e-06, + "loss": 0.129901123046875, + "step": 14820 + }, + { + "epoch": 0.12818739137577712, + "grad_norm": 1.7416011162636469, + "learning_rate": 5.939388673701378e-06, + "loss": 0.1289337158203125, + "step": 14825 + }, + { + "epoch": 0.1282306248973204, + "grad_norm": 9.822327243137677, + "learning_rate": 5.939347920616036e-06, + "loss": 0.30933837890625, + "step": 14830 + }, + { + "epoch": 0.12827385841886366, + "grad_norm": 9.083017204327287, + "learning_rate": 5.939307153974682e-06, + "loss": 0.4836090087890625, + "step": 14835 + }, + { + "epoch": 0.1283170919404069, + "grad_norm": 13.550695783388427, + "learning_rate": 5.9392663737775025e-06, + "loss": 0.19510459899902344, + "step": 14840 + }, + { + "epoch": 0.12836032546195017, + "grad_norm": 2.2290238197863723, + "learning_rate": 5.939225580024686e-06, + "loss": 0.2306610107421875, + "step": 14845 + }, + { + "epoch": 0.12840355898349345, + "grad_norm": 19.193761925392476, + "learning_rate": 5.9391847727164205e-06, + "loss": 0.19164886474609374, + "step": 14850 + }, + { + "epoch": 0.1284467925050367, + "grad_norm": 32.10423203017397, + "learning_rate": 5.939143951852896e-06, + "loss": 0.3881561279296875, + "step": 14855 + }, + { + "epoch": 0.12849002602657997, + "grad_norm": 0.49938760463588705, + "learning_rate": 5.939103117434297e-06, + "loss": 0.22271728515625, + "step": 14860 + }, + { + "epoch": 0.12853325954812322, + "grad_norm": 3.660707652538978, + "learning_rate": 5.939062269460816e-06, + "loss": 0.1581329345703125, + "step": 14865 + }, + { + "epoch": 0.1285764930696665, + "grad_norm": 11.743578083902154, + "learning_rate": 5.939021407932639e-06, + "loss": 0.07793655395507812, + "step": 14870 + }, + { + "epoch": 0.12861972659120977, + "grad_norm": 11.40097337686374, + "learning_rate": 5.938980532849955e-06, + "loss": 0.16099853515625, + "step": 14875 + }, + { + "epoch": 0.12866296011275302, + "grad_norm": 9.48329604469777, + "learning_rate": 5.938939644212952e-06, + "loss": 0.3118743896484375, + "step": 14880 + }, + { + "epoch": 0.12870619363429628, + "grad_norm": 59.6846329899196, + "learning_rate": 5.9388987420218195e-06, + "loss": 0.1582183837890625, + "step": 14885 + }, + { + "epoch": 0.12874942715583956, + "grad_norm": 4.268502374571657, + "learning_rate": 5.938857826276746e-06, + "loss": 0.21883468627929686, + "step": 14890 + }, + { + "epoch": 0.12879266067738282, + "grad_norm": 0.508537353146423, + "learning_rate": 5.93881689697792e-06, + "loss": 0.1600860595703125, + "step": 14895 + }, + { + "epoch": 0.12883589419892608, + "grad_norm": 0.9516034163715622, + "learning_rate": 5.938775954125529e-06, + "loss": 0.17303237915039063, + "step": 14900 + }, + { + "epoch": 0.12887912772046933, + "grad_norm": 20.752389607645945, + "learning_rate": 5.938734997719763e-06, + "loss": 0.352178955078125, + "step": 14905 + }, + { + "epoch": 0.12892236124201262, + "grad_norm": 18.49706956116733, + "learning_rate": 5.938694027760813e-06, + "loss": 0.051035308837890626, + "step": 14910 + }, + { + "epoch": 0.12896559476355587, + "grad_norm": 44.59642222517665, + "learning_rate": 5.938653044248863e-06, + "loss": 0.5751373291015625, + "step": 14915 + }, + { + "epoch": 0.12900882828509913, + "grad_norm": 36.750458724650855, + "learning_rate": 5.938612047184106e-06, + "loss": 0.2181884765625, + "step": 14920 + }, + { + "epoch": 0.1290520618066424, + "grad_norm": 9.521953053562221, + "learning_rate": 5.938571036566731e-06, + "loss": 0.1797637939453125, + "step": 14925 + }, + { + "epoch": 0.12909529532818567, + "grad_norm": 11.27920600735301, + "learning_rate": 5.938530012396925e-06, + "loss": 0.39505615234375, + "step": 14930 + }, + { + "epoch": 0.12913852884972893, + "grad_norm": 23.824932062318226, + "learning_rate": 5.938488974674878e-06, + "loss": 0.29955368041992186, + "step": 14935 + }, + { + "epoch": 0.12918176237127219, + "grad_norm": 18.458108548473618, + "learning_rate": 5.93844792340078e-06, + "loss": 0.28008270263671875, + "step": 14940 + }, + { + "epoch": 0.12922499589281544, + "grad_norm": 35.48771089245462, + "learning_rate": 5.938406858574819e-06, + "loss": 0.3861419677734375, + "step": 14945 + }, + { + "epoch": 0.12926822941435873, + "grad_norm": 4.740401615587166, + "learning_rate": 5.9383657801971864e-06, + "loss": 0.0633392333984375, + "step": 14950 + }, + { + "epoch": 0.12931146293590198, + "grad_norm": 8.630893778022195, + "learning_rate": 5.938324688268069e-06, + "loss": 0.373040771484375, + "step": 14955 + }, + { + "epoch": 0.12935469645744524, + "grad_norm": 16.57815140074755, + "learning_rate": 5.93828358278766e-06, + "loss": 0.15899658203125, + "step": 14960 + }, + { + "epoch": 0.1293979299789885, + "grad_norm": 28.522924700947943, + "learning_rate": 5.938242463756145e-06, + "loss": 0.25382080078125, + "step": 14965 + }, + { + "epoch": 0.12944116350053178, + "grad_norm": 41.634463477245895, + "learning_rate": 5.9382013311737155e-06, + "loss": 0.2377105712890625, + "step": 14970 + }, + { + "epoch": 0.12948439702207504, + "grad_norm": 8.792056912212463, + "learning_rate": 5.938160185040562e-06, + "loss": 0.2664947509765625, + "step": 14975 + }, + { + "epoch": 0.1295276305436183, + "grad_norm": 28.67456145363823, + "learning_rate": 5.938119025356873e-06, + "loss": 0.35765533447265624, + "step": 14980 + }, + { + "epoch": 0.12957086406516155, + "grad_norm": 3.604994875535681, + "learning_rate": 5.938077852122837e-06, + "loss": 0.31644287109375, + "step": 14985 + }, + { + "epoch": 0.12961409758670484, + "grad_norm": 8.131180700689058, + "learning_rate": 5.938036665338647e-06, + "loss": 0.206829833984375, + "step": 14990 + }, + { + "epoch": 0.1296573311082481, + "grad_norm": 0.0716086918703926, + "learning_rate": 5.9379954650044915e-06, + "loss": 0.19271621704101563, + "step": 14995 + }, + { + "epoch": 0.12970056462979135, + "grad_norm": 32.02451943115926, + "learning_rate": 5.937954251120561e-06, + "loss": 0.3239013671875, + "step": 15000 + }, + { + "epoch": 0.1297437981513346, + "grad_norm": 5.49727172607535, + "learning_rate": 5.937913023687044e-06, + "loss": 0.2987030029296875, + "step": 15005 + }, + { + "epoch": 0.1297870316728779, + "grad_norm": 11.672411192351593, + "learning_rate": 5.937871782704133e-06, + "loss": 0.15284576416015624, + "step": 15010 + }, + { + "epoch": 0.12983026519442115, + "grad_norm": 68.78436567959058, + "learning_rate": 5.937830528172016e-06, + "loss": 0.681463623046875, + "step": 15015 + }, + { + "epoch": 0.1298734987159644, + "grad_norm": 31.71393894281273, + "learning_rate": 5.937789260090885e-06, + "loss": 0.2838008880615234, + "step": 15020 + }, + { + "epoch": 0.1299167322375077, + "grad_norm": 37.26310286166677, + "learning_rate": 5.9377479784609275e-06, + "loss": 0.17973823547363282, + "step": 15025 + }, + { + "epoch": 0.12995996575905094, + "grad_norm": 2.516787648052432, + "learning_rate": 5.937706683282338e-06, + "loss": 0.19639434814453124, + "step": 15030 + }, + { + "epoch": 0.1300031992805942, + "grad_norm": 6.442524102423849, + "learning_rate": 5.9376653745553045e-06, + "loss": 0.227093505859375, + "step": 15035 + }, + { + "epoch": 0.13004643280213746, + "grad_norm": 19.509080968515708, + "learning_rate": 5.937624052280017e-06, + "loss": 0.1394439697265625, + "step": 15040 + }, + { + "epoch": 0.13008966632368074, + "grad_norm": 0.19119882989369516, + "learning_rate": 5.937582716456668e-06, + "loss": 0.19120216369628906, + "step": 15045 + }, + { + "epoch": 0.130132899845224, + "grad_norm": 12.323833459126641, + "learning_rate": 5.937541367085446e-06, + "loss": 0.20343017578125, + "step": 15050 + }, + { + "epoch": 0.13017613336676725, + "grad_norm": 3.1496467324290336, + "learning_rate": 5.937500004166544e-06, + "loss": 0.20940837860107422, + "step": 15055 + }, + { + "epoch": 0.1302193668883105, + "grad_norm": 19.864901630531136, + "learning_rate": 5.937458627700151e-06, + "loss": 0.142822265625, + "step": 15060 + }, + { + "epoch": 0.1302626004098538, + "grad_norm": 8.175174069294785, + "learning_rate": 5.937417237686458e-06, + "loss": 0.30141448974609375, + "step": 15065 + }, + { + "epoch": 0.13030583393139705, + "grad_norm": 33.862014654201815, + "learning_rate": 5.937375834125657e-06, + "loss": 0.26781234741210935, + "step": 15070 + }, + { + "epoch": 0.1303490674529403, + "grad_norm": 9.588187547272032, + "learning_rate": 5.937334417017938e-06, + "loss": 0.2890869140625, + "step": 15075 + }, + { + "epoch": 0.13039230097448357, + "grad_norm": 0.9296761257318884, + "learning_rate": 5.937292986363491e-06, + "loss": 0.07383499145507813, + "step": 15080 + }, + { + "epoch": 0.13043553449602685, + "grad_norm": 43.053825362415225, + "learning_rate": 5.93725154216251e-06, + "loss": 0.25117645263671873, + "step": 15085 + }, + { + "epoch": 0.1304787680175701, + "grad_norm": 12.737585021101207, + "learning_rate": 5.937210084415184e-06, + "loss": 0.15650634765625, + "step": 15090 + }, + { + "epoch": 0.13052200153911336, + "grad_norm": 4.488774936163862, + "learning_rate": 5.937168613121704e-06, + "loss": 0.08349456787109374, + "step": 15095 + }, + { + "epoch": 0.13056523506065662, + "grad_norm": 38.46137137521181, + "learning_rate": 5.9371271282822615e-06, + "loss": 0.386328125, + "step": 15100 + }, + { + "epoch": 0.1306084685821999, + "grad_norm": 11.52856182455365, + "learning_rate": 5.937085629897049e-06, + "loss": 0.2709053039550781, + "step": 15105 + }, + { + "epoch": 0.13065170210374316, + "grad_norm": 39.52710222869654, + "learning_rate": 5.937044117966257e-06, + "loss": 0.62762451171875, + "step": 15110 + }, + { + "epoch": 0.13069493562528642, + "grad_norm": 31.8890257329816, + "learning_rate": 5.937002592490077e-06, + "loss": 0.361993408203125, + "step": 15115 + }, + { + "epoch": 0.13073816914682967, + "grad_norm": 49.46446676179151, + "learning_rate": 5.936961053468701e-06, + "loss": 0.418359375, + "step": 15120 + }, + { + "epoch": 0.13078140266837296, + "grad_norm": 10.320194992279205, + "learning_rate": 5.9369195009023195e-06, + "loss": 0.3207275390625, + "step": 15125 + }, + { + "epoch": 0.13082463618991622, + "grad_norm": 5.736516292283234, + "learning_rate": 5.936877934791125e-06, + "loss": 0.10708465576171874, + "step": 15130 + }, + { + "epoch": 0.13086786971145947, + "grad_norm": 2.3163668606180585, + "learning_rate": 5.93683635513531e-06, + "loss": 0.028679656982421874, + "step": 15135 + }, + { + "epoch": 0.13091110323300273, + "grad_norm": 11.644058884416106, + "learning_rate": 5.936794761935064e-06, + "loss": 0.5105178833007813, + "step": 15140 + }, + { + "epoch": 0.130954336754546, + "grad_norm": 5.232861144350562, + "learning_rate": 5.93675315519058e-06, + "loss": 0.32423038482666017, + "step": 15145 + }, + { + "epoch": 0.13099757027608927, + "grad_norm": 40.702313079006174, + "learning_rate": 5.93671153490205e-06, + "loss": 0.2764610290527344, + "step": 15150 + }, + { + "epoch": 0.13104080379763253, + "grad_norm": 5.514199051162708, + "learning_rate": 5.936669901069666e-06, + "loss": 0.0660888671875, + "step": 15155 + }, + { + "epoch": 0.13108403731917578, + "grad_norm": 2.0548508973597404, + "learning_rate": 5.936628253693621e-06, + "loss": 0.09753265380859374, + "step": 15160 + }, + { + "epoch": 0.13112727084071907, + "grad_norm": 30.54686866847536, + "learning_rate": 5.936586592774104e-06, + "loss": 0.231353759765625, + "step": 15165 + }, + { + "epoch": 0.13117050436226232, + "grad_norm": 33.17199038153279, + "learning_rate": 5.93654491831131e-06, + "loss": 0.1747802734375, + "step": 15170 + }, + { + "epoch": 0.13121373788380558, + "grad_norm": 4.041999750688126, + "learning_rate": 5.936503230305431e-06, + "loss": 0.13719482421875, + "step": 15175 + }, + { + "epoch": 0.13125697140534884, + "grad_norm": 34.04024394949842, + "learning_rate": 5.936461528756658e-06, + "loss": 0.51494140625, + "step": 15180 + }, + { + "epoch": 0.13130020492689212, + "grad_norm": 6.656457845149924, + "learning_rate": 5.936419813665184e-06, + "loss": 0.11875, + "step": 15185 + }, + { + "epoch": 0.13134343844843538, + "grad_norm": 15.802273431243554, + "learning_rate": 5.936378085031201e-06, + "loss": 0.1290191650390625, + "step": 15190 + }, + { + "epoch": 0.13138667196997864, + "grad_norm": 11.033824974211745, + "learning_rate": 5.936336342854902e-06, + "loss": 0.07615203857421875, + "step": 15195 + }, + { + "epoch": 0.13142990549152192, + "grad_norm": 17.16677903698663, + "learning_rate": 5.936294587136479e-06, + "loss": 0.20645751953125, + "step": 15200 + }, + { + "epoch": 0.13147313901306518, + "grad_norm": 5.082290531706724, + "learning_rate": 5.936252817876126e-06, + "loss": 0.0605255126953125, + "step": 15205 + }, + { + "epoch": 0.13151637253460843, + "grad_norm": 4.126248310362294, + "learning_rate": 5.936211035074033e-06, + "loss": 0.4830024719238281, + "step": 15210 + }, + { + "epoch": 0.1315596060561517, + "grad_norm": 0.10266548381880179, + "learning_rate": 5.936169238730394e-06, + "loss": 0.05181045532226562, + "step": 15215 + }, + { + "epoch": 0.13160283957769497, + "grad_norm": 5.6391077710777635, + "learning_rate": 5.936127428845403e-06, + "loss": 0.18089141845703124, + "step": 15220 + }, + { + "epoch": 0.13164607309923823, + "grad_norm": 1.3842933853393218, + "learning_rate": 5.936085605419251e-06, + "loss": 0.01595001220703125, + "step": 15225 + }, + { + "epoch": 0.1316893066207815, + "grad_norm": 13.567914444894331, + "learning_rate": 5.936043768452133e-06, + "loss": 0.416168212890625, + "step": 15230 + }, + { + "epoch": 0.13173254014232474, + "grad_norm": 13.161855574353337, + "learning_rate": 5.936001917944239e-06, + "loss": 0.13546791076660156, + "step": 15235 + }, + { + "epoch": 0.13177577366386803, + "grad_norm": 45.4044100656237, + "learning_rate": 5.935960053895764e-06, + "loss": 0.2619823455810547, + "step": 15240 + }, + { + "epoch": 0.13181900718541129, + "grad_norm": 6.549388322635006, + "learning_rate": 5.9359181763069004e-06, + "loss": 0.21012096405029296, + "step": 15245 + }, + { + "epoch": 0.13186224070695454, + "grad_norm": 4.941944586547778, + "learning_rate": 5.935876285177842e-06, + "loss": 0.6717132568359375, + "step": 15250 + }, + { + "epoch": 0.1319054742284978, + "grad_norm": 17.596950519869633, + "learning_rate": 5.935834380508781e-06, + "loss": 0.212548828125, + "step": 15255 + }, + { + "epoch": 0.13194870775004108, + "grad_norm": 49.51613181488686, + "learning_rate": 5.9357924622999115e-06, + "loss": 0.3044281005859375, + "step": 15260 + }, + { + "epoch": 0.13199194127158434, + "grad_norm": 14.992500041416713, + "learning_rate": 5.935750530551427e-06, + "loss": 0.19398193359375, + "step": 15265 + }, + { + "epoch": 0.1320351747931276, + "grad_norm": 18.20473346931192, + "learning_rate": 5.93570858526352e-06, + "loss": 0.19782638549804688, + "step": 15270 + }, + { + "epoch": 0.13207840831467085, + "grad_norm": 11.41723393670201, + "learning_rate": 5.935666626436385e-06, + "loss": 0.229693603515625, + "step": 15275 + }, + { + "epoch": 0.13212164183621414, + "grad_norm": 5.186328444972898, + "learning_rate": 5.935624654070215e-06, + "loss": 0.14853057861328126, + "step": 15280 + }, + { + "epoch": 0.1321648753577574, + "grad_norm": 11.389448436830547, + "learning_rate": 5.9355826681652025e-06, + "loss": 0.0634185791015625, + "step": 15285 + }, + { + "epoch": 0.13220810887930065, + "grad_norm": 0.7341542036261167, + "learning_rate": 5.935540668721543e-06, + "loss": 0.21934967041015624, + "step": 15290 + }, + { + "epoch": 0.1322513424008439, + "grad_norm": 45.76675934501357, + "learning_rate": 5.935498655739429e-06, + "loss": 0.16610946655273437, + "step": 15295 + }, + { + "epoch": 0.1322945759223872, + "grad_norm": 19.542493295236316, + "learning_rate": 5.935456629219055e-06, + "loss": 0.3954559326171875, + "step": 15300 + }, + { + "epoch": 0.13233780944393045, + "grad_norm": 34.23953341993522, + "learning_rate": 5.935414589160614e-06, + "loss": 0.258636474609375, + "step": 15305 + }, + { + "epoch": 0.1323810429654737, + "grad_norm": 22.13720597262187, + "learning_rate": 5.9353725355643e-06, + "loss": 0.6404861450195313, + "step": 15310 + }, + { + "epoch": 0.13242427648701696, + "grad_norm": 41.29272498536659, + "learning_rate": 5.935330468430308e-06, + "loss": 0.3713348388671875, + "step": 15315 + }, + { + "epoch": 0.13246751000856025, + "grad_norm": 0.4326387059259602, + "learning_rate": 5.935288387758831e-06, + "loss": 0.2120513916015625, + "step": 15320 + }, + { + "epoch": 0.1325107435301035, + "grad_norm": 21.17126988239507, + "learning_rate": 5.935246293550063e-06, + "loss": 0.399847412109375, + "step": 15325 + }, + { + "epoch": 0.13255397705164676, + "grad_norm": 2.472163104457559, + "learning_rate": 5.935204185804198e-06, + "loss": 0.199761962890625, + "step": 15330 + }, + { + "epoch": 0.13259721057319002, + "grad_norm": 9.324808478777523, + "learning_rate": 5.935162064521432e-06, + "loss": 0.11522216796875, + "step": 15335 + }, + { + "epoch": 0.1326404440947333, + "grad_norm": 9.766546674173993, + "learning_rate": 5.9351199297019564e-06, + "loss": 0.18369903564453124, + "step": 15340 + }, + { + "epoch": 0.13268367761627656, + "grad_norm": 3.2042361772737826, + "learning_rate": 5.935077781345968e-06, + "loss": 0.270953369140625, + "step": 15345 + }, + { + "epoch": 0.13272691113781981, + "grad_norm": 15.975381317318574, + "learning_rate": 5.93503561945366e-06, + "loss": 0.40148162841796875, + "step": 15350 + }, + { + "epoch": 0.13277014465936307, + "grad_norm": 34.819953575807034, + "learning_rate": 5.934993444025227e-06, + "loss": 0.15597381591796874, + "step": 15355 + }, + { + "epoch": 0.13281337818090636, + "grad_norm": 2.399194794301932, + "learning_rate": 5.934951255060863e-06, + "loss": 0.10696563720703126, + "step": 15360 + }, + { + "epoch": 0.1328566117024496, + "grad_norm": 36.77873462924236, + "learning_rate": 5.934909052560765e-06, + "loss": 0.2050323486328125, + "step": 15365 + }, + { + "epoch": 0.13289984522399287, + "grad_norm": 26.36970105545054, + "learning_rate": 5.934866836525124e-06, + "loss": 0.15618972778320311, + "step": 15370 + }, + { + "epoch": 0.13294307874553613, + "grad_norm": 47.58546921306091, + "learning_rate": 5.934824606954138e-06, + "loss": 0.20328826904296876, + "step": 15375 + }, + { + "epoch": 0.1329863122670794, + "grad_norm": 7.150733954642342, + "learning_rate": 5.9347823638479986e-06, + "loss": 0.4600830078125, + "step": 15380 + }, + { + "epoch": 0.13302954578862267, + "grad_norm": 4.754441475229637, + "learning_rate": 5.934740107206903e-06, + "loss": 0.3172271728515625, + "step": 15385 + }, + { + "epoch": 0.13307277931016592, + "grad_norm": 16.519270309222104, + "learning_rate": 5.934697837031045e-06, + "loss": 0.1487396240234375, + "step": 15390 + }, + { + "epoch": 0.1331160128317092, + "grad_norm": 38.87595107444872, + "learning_rate": 5.934655553320621e-06, + "loss": 0.3196857452392578, + "step": 15395 + }, + { + "epoch": 0.13315924635325246, + "grad_norm": 19.129983630876012, + "learning_rate": 5.9346132560758236e-06, + "loss": 0.4632072448730469, + "step": 15400 + }, + { + "epoch": 0.13320247987479572, + "grad_norm": 0.21834703219443757, + "learning_rate": 5.934570945296849e-06, + "loss": 0.15872535705566407, + "step": 15405 + }, + { + "epoch": 0.13324571339633898, + "grad_norm": 1.564027772625229, + "learning_rate": 5.934528620983894e-06, + "loss": 0.146734619140625, + "step": 15410 + }, + { + "epoch": 0.13328894691788226, + "grad_norm": 7.7215092244247705, + "learning_rate": 5.934486283137151e-06, + "loss": 0.2658195495605469, + "step": 15415 + }, + { + "epoch": 0.13333218043942552, + "grad_norm": 5.844314609778957, + "learning_rate": 5.934443931756817e-06, + "loss": 0.18275222778320313, + "step": 15420 + }, + { + "epoch": 0.13337541396096877, + "grad_norm": 7.564838444226029, + "learning_rate": 5.934401566843088e-06, + "loss": 0.307757568359375, + "step": 15425 + }, + { + "epoch": 0.13341864748251203, + "grad_norm": 44.69388646348608, + "learning_rate": 5.9343591883961565e-06, + "loss": 0.3287841796875, + "step": 15430 + }, + { + "epoch": 0.13346188100405532, + "grad_norm": 16.805163512169965, + "learning_rate": 5.934316796416221e-06, + "loss": 0.20107421875, + "step": 15435 + }, + { + "epoch": 0.13350511452559857, + "grad_norm": 27.749568421514944, + "learning_rate": 5.934274390903474e-06, + "loss": 0.180999755859375, + "step": 15440 + }, + { + "epoch": 0.13354834804714183, + "grad_norm": 13.53937239492805, + "learning_rate": 5.934231971858115e-06, + "loss": 0.19996185302734376, + "step": 15445 + }, + { + "epoch": 0.13359158156868509, + "grad_norm": 44.78678352389129, + "learning_rate": 5.934189539280336e-06, + "loss": 1.1053466796875, + "step": 15450 + }, + { + "epoch": 0.13363481509022837, + "grad_norm": 9.885230915184357, + "learning_rate": 5.934147093170334e-06, + "loss": 0.1235382080078125, + "step": 15455 + }, + { + "epoch": 0.13367804861177163, + "grad_norm": 26.224916059361448, + "learning_rate": 5.934104633528305e-06, + "loss": 0.3006500244140625, + "step": 15460 + }, + { + "epoch": 0.13372128213331488, + "grad_norm": 24.318265075833775, + "learning_rate": 5.934062160354445e-06, + "loss": 0.40423126220703126, + "step": 15465 + }, + { + "epoch": 0.13376451565485814, + "grad_norm": 7.672528419658072, + "learning_rate": 5.934019673648949e-06, + "loss": 0.041790771484375, + "step": 15470 + }, + { + "epoch": 0.13380774917640142, + "grad_norm": 10.220776182552768, + "learning_rate": 5.933977173412014e-06, + "loss": 0.20699005126953124, + "step": 15475 + }, + { + "epoch": 0.13385098269794468, + "grad_norm": 24.099027280268693, + "learning_rate": 5.933934659643836e-06, + "loss": 0.258648681640625, + "step": 15480 + }, + { + "epoch": 0.13389421621948794, + "grad_norm": 5.061578065270894, + "learning_rate": 5.93389213234461e-06, + "loss": 0.1931884765625, + "step": 15485 + }, + { + "epoch": 0.1339374497410312, + "grad_norm": 6.450684984518139, + "learning_rate": 5.933849591514533e-06, + "loss": 0.18192291259765625, + "step": 15490 + }, + { + "epoch": 0.13398068326257448, + "grad_norm": 0.9909678852845774, + "learning_rate": 5.9338070371538015e-06, + "loss": 0.4262786865234375, + "step": 15495 + }, + { + "epoch": 0.13402391678411774, + "grad_norm": 6.694081170846877, + "learning_rate": 5.9337644692626095e-06, + "loss": 0.1013916015625, + "step": 15500 + }, + { + "epoch": 0.134067150305661, + "grad_norm": 71.59444565799073, + "learning_rate": 5.933721887841157e-06, + "loss": 0.25119705200195314, + "step": 15505 + }, + { + "epoch": 0.13411038382720425, + "grad_norm": 0.5332824619763, + "learning_rate": 5.933679292889637e-06, + "loss": 0.32389678955078127, + "step": 15510 + }, + { + "epoch": 0.13415361734874753, + "grad_norm": 1.866593665906778, + "learning_rate": 5.933636684408248e-06, + "loss": 0.0759002685546875, + "step": 15515 + }, + { + "epoch": 0.1341968508702908, + "grad_norm": 13.980281026055318, + "learning_rate": 5.933594062397186e-06, + "loss": 0.18091964721679688, + "step": 15520 + }, + { + "epoch": 0.13424008439183405, + "grad_norm": 31.00615333780484, + "learning_rate": 5.933551426856647e-06, + "loss": 0.6096641540527343, + "step": 15525 + }, + { + "epoch": 0.1342833179133773, + "grad_norm": 8.46209130705364, + "learning_rate": 5.9335087777868275e-06, + "loss": 0.1901580810546875, + "step": 15530 + }, + { + "epoch": 0.1343265514349206, + "grad_norm": 1.266938237395735, + "learning_rate": 5.933466115187925e-06, + "loss": 0.056536865234375, + "step": 15535 + }, + { + "epoch": 0.13436978495646384, + "grad_norm": 19.653735675259636, + "learning_rate": 5.933423439060137e-06, + "loss": 0.1139129638671875, + "step": 15540 + }, + { + "epoch": 0.1344130184780071, + "grad_norm": 23.87584588294796, + "learning_rate": 5.933380749403658e-06, + "loss": 0.322296142578125, + "step": 15545 + }, + { + "epoch": 0.13445625199955036, + "grad_norm": 28.01233847712946, + "learning_rate": 5.933338046218687e-06, + "loss": 0.22877979278564453, + "step": 15550 + }, + { + "epoch": 0.13449948552109364, + "grad_norm": 4.482236252974542, + "learning_rate": 5.933295329505419e-06, + "loss": 0.17980728149414063, + "step": 15555 + }, + { + "epoch": 0.1345427190426369, + "grad_norm": 6.463858732820348, + "learning_rate": 5.933252599264053e-06, + "loss": 0.13226699829101562, + "step": 15560 + }, + { + "epoch": 0.13458595256418016, + "grad_norm": 4.024807706156505, + "learning_rate": 5.933209855494785e-06, + "loss": 0.27309112548828124, + "step": 15565 + }, + { + "epoch": 0.13462918608572344, + "grad_norm": 26.581897653913146, + "learning_rate": 5.933167098197813e-06, + "loss": 0.131353759765625, + "step": 15570 + }, + { + "epoch": 0.1346724196072667, + "grad_norm": 14.3654949708258, + "learning_rate": 5.933124327373333e-06, + "loss": 0.0975830078125, + "step": 15575 + }, + { + "epoch": 0.13471565312880995, + "grad_norm": 1.4443143307111441, + "learning_rate": 5.933081543021542e-06, + "loss": 0.11409912109375, + "step": 15580 + }, + { + "epoch": 0.1347588866503532, + "grad_norm": 9.68238966540108, + "learning_rate": 5.933038745142639e-06, + "loss": 0.09222412109375, + "step": 15585 + }, + { + "epoch": 0.1348021201718965, + "grad_norm": 0.3534070869287052, + "learning_rate": 5.9329959337368206e-06, + "loss": 0.0878662109375, + "step": 15590 + }, + { + "epoch": 0.13484535369343975, + "grad_norm": 30.27729183722176, + "learning_rate": 5.9329531088042835e-06, + "loss": 0.435089111328125, + "step": 15595 + }, + { + "epoch": 0.134888587214983, + "grad_norm": 14.205223716290298, + "learning_rate": 5.932910270345227e-06, + "loss": 0.17787246704101561, + "step": 15600 + }, + { + "epoch": 0.13493182073652626, + "grad_norm": 12.038808128933622, + "learning_rate": 5.9328674183598464e-06, + "loss": 0.22528305053710937, + "step": 15605 + }, + { + "epoch": 0.13497505425806955, + "grad_norm": 29.651304180040487, + "learning_rate": 5.932824552848341e-06, + "loss": 0.20831298828125, + "step": 15610 + }, + { + "epoch": 0.1350182877796128, + "grad_norm": 3.4862828169032185, + "learning_rate": 5.932781673810907e-06, + "loss": 0.13038330078125, + "step": 15615 + }, + { + "epoch": 0.13506152130115606, + "grad_norm": 6.666184976443255, + "learning_rate": 5.932738781247744e-06, + "loss": 0.059466552734375, + "step": 15620 + }, + { + "epoch": 0.13510475482269932, + "grad_norm": 3.7273253664554944, + "learning_rate": 5.932695875159049e-06, + "loss": 0.33057689666748047, + "step": 15625 + }, + { + "epoch": 0.1351479883442426, + "grad_norm": 6.366180567708482, + "learning_rate": 5.932652955545019e-06, + "loss": 0.1455230712890625, + "step": 15630 + }, + { + "epoch": 0.13519122186578586, + "grad_norm": 47.055455666538144, + "learning_rate": 5.932610022405853e-06, + "loss": 0.3109344482421875, + "step": 15635 + }, + { + "epoch": 0.13523445538732912, + "grad_norm": 3.059848564235419, + "learning_rate": 5.932567075741748e-06, + "loss": 0.43603515625, + "step": 15640 + }, + { + "epoch": 0.13527768890887237, + "grad_norm": 15.626767694280492, + "learning_rate": 5.932524115552904e-06, + "loss": 0.0814239501953125, + "step": 15645 + }, + { + "epoch": 0.13532092243041566, + "grad_norm": 29.48335669346052, + "learning_rate": 5.932481141839517e-06, + "loss": 0.172369384765625, + "step": 15650 + }, + { + "epoch": 0.13536415595195891, + "grad_norm": 21.938407106176015, + "learning_rate": 5.932438154601787e-06, + "loss": 0.23369140625, + "step": 15655 + }, + { + "epoch": 0.13540738947350217, + "grad_norm": 5.48813750560348, + "learning_rate": 5.93239515383991e-06, + "loss": 0.3010498046875, + "step": 15660 + }, + { + "epoch": 0.13545062299504543, + "grad_norm": 5.980967843263891, + "learning_rate": 5.932352139554087e-06, + "loss": 0.1413818359375, + "step": 15665 + }, + { + "epoch": 0.1354938565165887, + "grad_norm": 18.99669689727083, + "learning_rate": 5.9323091117445145e-06, + "loss": 0.2655494689941406, + "step": 15670 + }, + { + "epoch": 0.13553709003813197, + "grad_norm": 4.557641907258935, + "learning_rate": 5.932266070411392e-06, + "loss": 0.06163978576660156, + "step": 15675 + }, + { + "epoch": 0.13558032355967523, + "grad_norm": 8.37004854738529, + "learning_rate": 5.9322230155549166e-06, + "loss": 0.21973876953125, + "step": 15680 + }, + { + "epoch": 0.13562355708121848, + "grad_norm": 37.34484472539925, + "learning_rate": 5.932179947175288e-06, + "loss": 0.37808837890625, + "step": 15685 + }, + { + "epoch": 0.13566679060276177, + "grad_norm": 39.451363500351405, + "learning_rate": 5.9321368652727045e-06, + "loss": 0.25066680908203126, + "step": 15690 + }, + { + "epoch": 0.13571002412430502, + "grad_norm": 1.1809204566583442, + "learning_rate": 5.932093769847365e-06, + "loss": 0.3513397216796875, + "step": 15695 + }, + { + "epoch": 0.13575325764584828, + "grad_norm": 0.6035623894909119, + "learning_rate": 5.932050660899468e-06, + "loss": 0.1471038818359375, + "step": 15700 + }, + { + "epoch": 0.13579649116739154, + "grad_norm": 10.926758572643863, + "learning_rate": 5.932007538429213e-06, + "loss": 0.36240234375, + "step": 15705 + }, + { + "epoch": 0.13583972468893482, + "grad_norm": 2.41712483950103, + "learning_rate": 5.9319644024367975e-06, + "loss": 0.28682937622070315, + "step": 15710 + }, + { + "epoch": 0.13588295821047808, + "grad_norm": 1.451463949308235, + "learning_rate": 5.931921252922423e-06, + "loss": 0.156048583984375, + "step": 15715 + }, + { + "epoch": 0.13592619173202133, + "grad_norm": 3.0857090915531984, + "learning_rate": 5.931878089886285e-06, + "loss": 0.04258575439453125, + "step": 15720 + }, + { + "epoch": 0.1359694252535646, + "grad_norm": 2.9674497164064184, + "learning_rate": 5.931834913328584e-06, + "loss": 0.21537017822265625, + "step": 15725 + }, + { + "epoch": 0.13601265877510788, + "grad_norm": 7.612723820064911, + "learning_rate": 5.93179172324952e-06, + "loss": 0.2261016845703125, + "step": 15730 + }, + { + "epoch": 0.13605589229665113, + "grad_norm": 26.738863984261485, + "learning_rate": 5.931748519649292e-06, + "loss": 0.30914306640625, + "step": 15735 + }, + { + "epoch": 0.1360991258181944, + "grad_norm": 9.034034138907382, + "learning_rate": 5.931705302528099e-06, + "loss": 0.275811767578125, + "step": 15740 + }, + { + "epoch": 0.13614235933973765, + "grad_norm": 27.172806558136678, + "learning_rate": 5.93166207188614e-06, + "loss": 0.4097728729248047, + "step": 15745 + }, + { + "epoch": 0.13618559286128093, + "grad_norm": 6.8522997365812515, + "learning_rate": 5.931618827723614e-06, + "loss": 0.47980194091796874, + "step": 15750 + }, + { + "epoch": 0.1362288263828242, + "grad_norm": 10.674914536078747, + "learning_rate": 5.931575570040721e-06, + "loss": 0.6157363891601563, + "step": 15755 + }, + { + "epoch": 0.13627205990436744, + "grad_norm": 22.393639709139947, + "learning_rate": 5.931532298837662e-06, + "loss": 0.3499176025390625, + "step": 15760 + }, + { + "epoch": 0.13631529342591073, + "grad_norm": 7.37490937945627, + "learning_rate": 5.931489014114633e-06, + "loss": 0.05860595703125, + "step": 15765 + }, + { + "epoch": 0.13635852694745398, + "grad_norm": 26.349726639208612, + "learning_rate": 5.931445715871837e-06, + "loss": 0.38985748291015626, + "step": 15770 + }, + { + "epoch": 0.13640176046899724, + "grad_norm": 0.604530235294314, + "learning_rate": 5.9314024041094726e-06, + "loss": 0.254718017578125, + "step": 15775 + }, + { + "epoch": 0.1364449939905405, + "grad_norm": 3.5769299325951858, + "learning_rate": 5.931359078827739e-06, + "loss": 0.05955810546875, + "step": 15780 + }, + { + "epoch": 0.13648822751208378, + "grad_norm": 10.965991104005571, + "learning_rate": 5.931315740026836e-06, + "loss": 0.1360992431640625, + "step": 15785 + }, + { + "epoch": 0.13653146103362704, + "grad_norm": 6.006617366905231, + "learning_rate": 5.931272387706964e-06, + "loss": 0.16025009155273437, + "step": 15790 + }, + { + "epoch": 0.1365746945551703, + "grad_norm": 22.74723144341473, + "learning_rate": 5.931229021868323e-06, + "loss": 0.1923187255859375, + "step": 15795 + }, + { + "epoch": 0.13661792807671355, + "grad_norm": 39.01097206357572, + "learning_rate": 5.931185642511113e-06, + "loss": 0.40423583984375, + "step": 15800 + }, + { + "epoch": 0.13666116159825684, + "grad_norm": 1.7318346185467786, + "learning_rate": 5.931142249635533e-06, + "loss": 0.1259765625, + "step": 15805 + }, + { + "epoch": 0.1367043951198001, + "grad_norm": 61.873920840046296, + "learning_rate": 5.931098843241785e-06, + "loss": 0.2799896240234375, + "step": 15810 + }, + { + "epoch": 0.13674762864134335, + "grad_norm": 41.486359670945006, + "learning_rate": 5.931055423330066e-06, + "loss": 0.386541748046875, + "step": 15815 + }, + { + "epoch": 0.1367908621628866, + "grad_norm": 5.389614349132367, + "learning_rate": 5.9310119899005805e-06, + "loss": 0.0724761962890625, + "step": 15820 + }, + { + "epoch": 0.1368340956844299, + "grad_norm": 3.536927235571073, + "learning_rate": 5.930968542953525e-06, + "loss": 0.06963653564453125, + "step": 15825 + }, + { + "epoch": 0.13687732920597315, + "grad_norm": 20.447443062298643, + "learning_rate": 5.9309250824891035e-06, + "loss": 0.39294281005859377, + "step": 15830 + }, + { + "epoch": 0.1369205627275164, + "grad_norm": 22.988345587300255, + "learning_rate": 5.930881608507514e-06, + "loss": 0.23015899658203126, + "step": 15835 + }, + { + "epoch": 0.13696379624905966, + "grad_norm": 13.480414857399817, + "learning_rate": 5.930838121008956e-06, + "loss": 0.5200691223144531, + "step": 15840 + }, + { + "epoch": 0.13700702977060294, + "grad_norm": 140.52498114472826, + "learning_rate": 5.9307946199936324e-06, + "loss": 0.45081253051757814, + "step": 15845 + }, + { + "epoch": 0.1370502632921462, + "grad_norm": 19.21325342562093, + "learning_rate": 5.930751105461744e-06, + "loss": 0.3641845703125, + "step": 15850 + }, + { + "epoch": 0.13709349681368946, + "grad_norm": 21.174556104500564, + "learning_rate": 5.93070757741349e-06, + "loss": 0.14376220703125, + "step": 15855 + }, + { + "epoch": 0.13713673033523271, + "grad_norm": 29.55721823671225, + "learning_rate": 5.930664035849071e-06, + "loss": 0.16005859375, + "step": 15860 + }, + { + "epoch": 0.137179963856776, + "grad_norm": 2.917217916907111, + "learning_rate": 5.9306204807686885e-06, + "loss": 0.1173248291015625, + "step": 15865 + }, + { + "epoch": 0.13722319737831926, + "grad_norm": 8.842560086239352, + "learning_rate": 5.930576912172543e-06, + "loss": 0.05250396728515625, + "step": 15870 + }, + { + "epoch": 0.1372664308998625, + "grad_norm": 49.456311814651464, + "learning_rate": 5.930533330060837e-06, + "loss": 0.3007080078125, + "step": 15875 + }, + { + "epoch": 0.13730966442140577, + "grad_norm": 2.542768000995804, + "learning_rate": 5.9304897344337694e-06, + "loss": 0.15013427734375, + "step": 15880 + }, + { + "epoch": 0.13735289794294905, + "grad_norm": 2.5343737386869543, + "learning_rate": 5.930446125291542e-06, + "loss": 0.4632080078125, + "step": 15885 + }, + { + "epoch": 0.1373961314644923, + "grad_norm": 14.06930178283949, + "learning_rate": 5.9304025026343565e-06, + "loss": 0.31496429443359375, + "step": 15890 + }, + { + "epoch": 0.13743936498603557, + "grad_norm": 6.061535593276188, + "learning_rate": 5.930358866462413e-06, + "loss": 0.458917236328125, + "step": 15895 + }, + { + "epoch": 0.13748259850757882, + "grad_norm": 7.70183579621195, + "learning_rate": 5.9303152167759134e-06, + "loss": 0.051873779296875, + "step": 15900 + }, + { + "epoch": 0.1375258320291221, + "grad_norm": 19.11026631270834, + "learning_rate": 5.930271553575059e-06, + "loss": 0.12526092529296876, + "step": 15905 + }, + { + "epoch": 0.13756906555066536, + "grad_norm": 2.226112487993049, + "learning_rate": 5.930227876860051e-06, + "loss": 0.38644561767578123, + "step": 15910 + }, + { + "epoch": 0.13761229907220862, + "grad_norm": 10.249009380697546, + "learning_rate": 5.9301841866310925e-06, + "loss": 0.2422607421875, + "step": 15915 + }, + { + "epoch": 0.13765553259375188, + "grad_norm": 2.8294278868989893, + "learning_rate": 5.930140482888381e-06, + "loss": 0.16834564208984376, + "step": 15920 + }, + { + "epoch": 0.13769876611529516, + "grad_norm": 267.3516201443124, + "learning_rate": 5.930096765632122e-06, + "loss": 0.3294044494628906, + "step": 15925 + }, + { + "epoch": 0.13774199963683842, + "grad_norm": 18.95939217508487, + "learning_rate": 5.930053034862516e-06, + "loss": 0.21285400390625, + "step": 15930 + }, + { + "epoch": 0.13778523315838168, + "grad_norm": 13.038470424923387, + "learning_rate": 5.930009290579762e-06, + "loss": 0.090423583984375, + "step": 15935 + }, + { + "epoch": 0.13782846667992496, + "grad_norm": 0.5770472060549248, + "learning_rate": 5.929965532784066e-06, + "loss": 0.03912811279296875, + "step": 15940 + }, + { + "epoch": 0.13787170020146822, + "grad_norm": 5.749127773981404, + "learning_rate": 5.929921761475629e-06, + "loss": 0.15803604125976561, + "step": 15945 + }, + { + "epoch": 0.13791493372301147, + "grad_norm": 16.567993701861763, + "learning_rate": 5.929877976654649e-06, + "loss": 0.434539794921875, + "step": 15950 + }, + { + "epoch": 0.13795816724455473, + "grad_norm": 28.168863884127706, + "learning_rate": 5.929834178321333e-06, + "loss": 0.6388336181640625, + "step": 15955 + }, + { + "epoch": 0.13800140076609801, + "grad_norm": 1.8385092191101366, + "learning_rate": 5.929790366475879e-06, + "loss": 0.03444671630859375, + "step": 15960 + }, + { + "epoch": 0.13804463428764127, + "grad_norm": 17.57948369448117, + "learning_rate": 5.929746541118491e-06, + "loss": 0.13470458984375, + "step": 15965 + }, + { + "epoch": 0.13808786780918453, + "grad_norm": 1.2685397058852752, + "learning_rate": 5.929702702249372e-06, + "loss": 0.3978759765625, + "step": 15970 + }, + { + "epoch": 0.13813110133072778, + "grad_norm": 20.08927233692365, + "learning_rate": 5.9296588498687215e-06, + "loss": 0.2524147033691406, + "step": 15975 + }, + { + "epoch": 0.13817433485227107, + "grad_norm": 9.943484166176637, + "learning_rate": 5.929614983976743e-06, + "loss": 0.3594505310058594, + "step": 15980 + }, + { + "epoch": 0.13821756837381433, + "grad_norm": 17.444143784483714, + "learning_rate": 5.9295711045736405e-06, + "loss": 0.28685302734375, + "step": 15985 + }, + { + "epoch": 0.13826080189535758, + "grad_norm": 33.255345926948095, + "learning_rate": 5.9295272116596145e-06, + "loss": 0.47296905517578125, + "step": 15990 + }, + { + "epoch": 0.13830403541690084, + "grad_norm": 12.082653790691447, + "learning_rate": 5.9294833052348675e-06, + "loss": 0.2573150634765625, + "step": 15995 + }, + { + "epoch": 0.13834726893844412, + "grad_norm": 2.867313926930228, + "learning_rate": 5.929439385299603e-06, + "loss": 0.0577728271484375, + "step": 16000 + }, + { + "epoch": 0.13839050245998738, + "grad_norm": 33.968714135990076, + "learning_rate": 5.929395451854022e-06, + "loss": 0.1415008544921875, + "step": 16005 + }, + { + "epoch": 0.13843373598153064, + "grad_norm": 10.223132731540097, + "learning_rate": 5.929351504898328e-06, + "loss": 0.34122772216796876, + "step": 16010 + }, + { + "epoch": 0.1384769695030739, + "grad_norm": 2.5693991692618456, + "learning_rate": 5.929307544432724e-06, + "loss": 0.10935821533203124, + "step": 16015 + }, + { + "epoch": 0.13852020302461718, + "grad_norm": 4.503715743858449, + "learning_rate": 5.929263570457414e-06, + "loss": 0.199749755859375, + "step": 16020 + }, + { + "epoch": 0.13856343654616043, + "grad_norm": 15.682090690356924, + "learning_rate": 5.929219582972597e-06, + "loss": 0.095672607421875, + "step": 16025 + }, + { + "epoch": 0.1386066700677037, + "grad_norm": 5.567588619527575, + "learning_rate": 5.929175581978479e-06, + "loss": 0.1223663330078125, + "step": 16030 + }, + { + "epoch": 0.13864990358924695, + "grad_norm": 1.8891743707715172, + "learning_rate": 5.929131567475262e-06, + "loss": 0.0906585693359375, + "step": 16035 + }, + { + "epoch": 0.13869313711079023, + "grad_norm": 31.42320748034939, + "learning_rate": 5.929087539463149e-06, + "loss": 0.32440185546875, + "step": 16040 + }, + { + "epoch": 0.1387363706323335, + "grad_norm": 1.3139066840561868, + "learning_rate": 5.929043497942344e-06, + "loss": 0.26340179443359374, + "step": 16045 + }, + { + "epoch": 0.13877960415387675, + "grad_norm": 0.3352707828042887, + "learning_rate": 5.928999442913047e-06, + "loss": 0.13485107421875, + "step": 16050 + }, + { + "epoch": 0.13882283767542, + "grad_norm": 23.00240791431845, + "learning_rate": 5.928955374375466e-06, + "loss": 0.2256561279296875, + "step": 16055 + }, + { + "epoch": 0.1388660711969633, + "grad_norm": 45.41803419175751, + "learning_rate": 5.9289112923298e-06, + "loss": 0.3120361328125, + "step": 16060 + }, + { + "epoch": 0.13890930471850654, + "grad_norm": 1.3984208889413399, + "learning_rate": 5.928867196776254e-06, + "loss": 0.5205375671386718, + "step": 16065 + }, + { + "epoch": 0.1389525382400498, + "grad_norm": 26.85033955946842, + "learning_rate": 5.928823087715032e-06, + "loss": 0.168804931640625, + "step": 16070 + }, + { + "epoch": 0.13899577176159306, + "grad_norm": 19.63647737449243, + "learning_rate": 5.928778965146336e-06, + "loss": 0.060573577880859375, + "step": 16075 + }, + { + "epoch": 0.13903900528313634, + "grad_norm": 2.774718418792585, + "learning_rate": 5.9287348290703695e-06, + "loss": 0.08153228759765625, + "step": 16080 + }, + { + "epoch": 0.1390822388046796, + "grad_norm": 10.820705730678387, + "learning_rate": 5.928690679487339e-06, + "loss": 0.28583335876464844, + "step": 16085 + }, + { + "epoch": 0.13912547232622285, + "grad_norm": 2.9123407610999505, + "learning_rate": 5.928646516397444e-06, + "loss": 0.450689697265625, + "step": 16090 + }, + { + "epoch": 0.1391687058477661, + "grad_norm": 25.274515133906043, + "learning_rate": 5.92860233980089e-06, + "loss": 0.21689777374267577, + "step": 16095 + }, + { + "epoch": 0.1392119393693094, + "grad_norm": 15.777489078246619, + "learning_rate": 5.928558149697882e-06, + "loss": 0.04420013427734375, + "step": 16100 + }, + { + "epoch": 0.13925517289085265, + "grad_norm": 3.4455291663470367, + "learning_rate": 5.928513946088622e-06, + "loss": 0.157989501953125, + "step": 16105 + }, + { + "epoch": 0.1392984064123959, + "grad_norm": 54.583792276727735, + "learning_rate": 5.928469728973315e-06, + "loss": 0.22408599853515626, + "step": 16110 + }, + { + "epoch": 0.13934163993393917, + "grad_norm": 1.4576993673215357, + "learning_rate": 5.928425498352163e-06, + "loss": 0.33008346557617185, + "step": 16115 + }, + { + "epoch": 0.13938487345548245, + "grad_norm": 19.125552021814922, + "learning_rate": 5.9283812542253724e-06, + "loss": 0.373394775390625, + "step": 16120 + }, + { + "epoch": 0.1394281069770257, + "grad_norm": 61.95778809203529, + "learning_rate": 5.928336996593145e-06, + "loss": 0.44383544921875, + "step": 16125 + }, + { + "epoch": 0.13947134049856896, + "grad_norm": 35.98635037602293, + "learning_rate": 5.928292725455689e-06, + "loss": 0.29320068359375, + "step": 16130 + }, + { + "epoch": 0.13951457402011225, + "grad_norm": 9.980770880534669, + "learning_rate": 5.928248440813203e-06, + "loss": 0.09216670989990235, + "step": 16135 + }, + { + "epoch": 0.1395578075416555, + "grad_norm": 26.37696067033681, + "learning_rate": 5.928204142665894e-06, + "loss": 0.2286865234375, + "step": 16140 + }, + { + "epoch": 0.13960104106319876, + "grad_norm": 24.38724420141136, + "learning_rate": 5.928159831013968e-06, + "loss": 0.09842376708984375, + "step": 16145 + }, + { + "epoch": 0.13964427458474202, + "grad_norm": 7.1145864887395565, + "learning_rate": 5.928115505857627e-06, + "loss": 0.0645233154296875, + "step": 16150 + }, + { + "epoch": 0.1396875081062853, + "grad_norm": 6.688380432704362, + "learning_rate": 5.928071167197075e-06, + "loss": 0.09265594482421875, + "step": 16155 + }, + { + "epoch": 0.13973074162782856, + "grad_norm": 5.103470246365329, + "learning_rate": 5.928026815032519e-06, + "loss": 0.11761245727539063, + "step": 16160 + }, + { + "epoch": 0.13977397514937182, + "grad_norm": 6.23244818722278, + "learning_rate": 5.927982449364161e-06, + "loss": 0.06690902709960937, + "step": 16165 + }, + { + "epoch": 0.13981720867091507, + "grad_norm": 18.160380978726565, + "learning_rate": 5.927938070192206e-06, + "loss": 0.2678009033203125, + "step": 16170 + }, + { + "epoch": 0.13986044219245836, + "grad_norm": 4.730547155402601, + "learning_rate": 5.927893677516861e-06, + "loss": 0.17227783203125, + "step": 16175 + }, + { + "epoch": 0.1399036757140016, + "grad_norm": 18.372439154704644, + "learning_rate": 5.927849271338328e-06, + "loss": 0.7213926315307617, + "step": 16180 + }, + { + "epoch": 0.13994690923554487, + "grad_norm": 1.5283786854591095, + "learning_rate": 5.927804851656812e-06, + "loss": 0.022564697265625, + "step": 16185 + }, + { + "epoch": 0.13999014275708813, + "grad_norm": 4.75054862132701, + "learning_rate": 5.92776041847252e-06, + "loss": 0.04644927978515625, + "step": 16190 + }, + { + "epoch": 0.1400333762786314, + "grad_norm": 24.714281311770616, + "learning_rate": 5.9277159717856565e-06, + "loss": 0.24861373901367187, + "step": 16195 + }, + { + "epoch": 0.14007660980017467, + "grad_norm": 9.779516262463558, + "learning_rate": 5.9276715115964235e-06, + "loss": 0.112921142578125, + "step": 16200 + }, + { + "epoch": 0.14011984332171792, + "grad_norm": 1.0921109853193152, + "learning_rate": 5.92762703790503e-06, + "loss": 0.3003692626953125, + "step": 16205 + }, + { + "epoch": 0.14016307684326118, + "grad_norm": 3.939422238166449, + "learning_rate": 5.927582550711678e-06, + "loss": 0.11837005615234375, + "step": 16210 + }, + { + "epoch": 0.14020631036480447, + "grad_norm": 2.931149999512193, + "learning_rate": 5.927538050016575e-06, + "loss": 0.46580810546875, + "step": 16215 + }, + { + "epoch": 0.14024954388634772, + "grad_norm": 2.164329357710955, + "learning_rate": 5.927493535819924e-06, + "loss": 0.13133544921875, + "step": 16220 + }, + { + "epoch": 0.14029277740789098, + "grad_norm": 3.2528843733838566, + "learning_rate": 5.927449008121933e-06, + "loss": 0.08573455810546875, + "step": 16225 + }, + { + "epoch": 0.14033601092943424, + "grad_norm": 8.244124329015705, + "learning_rate": 5.927404466922805e-06, + "loss": 0.403759765625, + "step": 16230 + }, + { + "epoch": 0.14037924445097752, + "grad_norm": 11.520669263386461, + "learning_rate": 5.927359912222746e-06, + "loss": 0.10804901123046876, + "step": 16235 + }, + { + "epoch": 0.14042247797252078, + "grad_norm": 6.270246055810291, + "learning_rate": 5.9273153440219625e-06, + "loss": 0.3307586669921875, + "step": 16240 + }, + { + "epoch": 0.14046571149406403, + "grad_norm": 14.886285705754773, + "learning_rate": 5.927270762320659e-06, + "loss": 0.07600746154785157, + "step": 16245 + }, + { + "epoch": 0.1405089450156073, + "grad_norm": 45.29086373071676, + "learning_rate": 5.927226167119041e-06, + "loss": 0.45510406494140626, + "step": 16250 + }, + { + "epoch": 0.14055217853715057, + "grad_norm": 2.6149660108850363, + "learning_rate": 5.927181558417315e-06, + "loss": 0.28014068603515624, + "step": 16255 + }, + { + "epoch": 0.14059541205869383, + "grad_norm": 1.9009037682215122, + "learning_rate": 5.9271369362156865e-06, + "loss": 0.07784385681152343, + "step": 16260 + }, + { + "epoch": 0.1406386455802371, + "grad_norm": 5.7317912436736735, + "learning_rate": 5.927092300514361e-06, + "loss": 0.2625030517578125, + "step": 16265 + }, + { + "epoch": 0.14068187910178034, + "grad_norm": 17.255536214152624, + "learning_rate": 5.927047651313544e-06, + "loss": 0.4707855224609375, + "step": 16270 + }, + { + "epoch": 0.14072511262332363, + "grad_norm": 22.83957138991938, + "learning_rate": 5.927002988613442e-06, + "loss": 0.22706375122070313, + "step": 16275 + }, + { + "epoch": 0.14076834614486688, + "grad_norm": 4.857735682225901, + "learning_rate": 5.926958312414261e-06, + "loss": 0.31496429443359375, + "step": 16280 + }, + { + "epoch": 0.14081157966641014, + "grad_norm": 8.101233152778645, + "learning_rate": 5.926913622716207e-06, + "loss": 0.16481475830078124, + "step": 16285 + }, + { + "epoch": 0.1408548131879534, + "grad_norm": 9.237359849323925, + "learning_rate": 5.926868919519486e-06, + "loss": 0.08463287353515625, + "step": 16290 + }, + { + "epoch": 0.14089804670949668, + "grad_norm": 49.560708219689644, + "learning_rate": 5.926824202824304e-06, + "loss": 0.4633037567138672, + "step": 16295 + }, + { + "epoch": 0.14094128023103994, + "grad_norm": 10.909326389677352, + "learning_rate": 5.9267794726308666e-06, + "loss": 0.0648101806640625, + "step": 16300 + }, + { + "epoch": 0.1409845137525832, + "grad_norm": 55.895968932757704, + "learning_rate": 5.926734728939382e-06, + "loss": 0.37186279296875, + "step": 16305 + }, + { + "epoch": 0.14102774727412648, + "grad_norm": 11.632342764510902, + "learning_rate": 5.926689971750055e-06, + "loss": 0.12191314697265625, + "step": 16310 + }, + { + "epoch": 0.14107098079566974, + "grad_norm": 17.344955840994633, + "learning_rate": 5.926645201063093e-06, + "loss": 0.07750167846679687, + "step": 16315 + }, + { + "epoch": 0.141114214317213, + "grad_norm": 40.689001750186954, + "learning_rate": 5.926600416878701e-06, + "loss": 0.3976287841796875, + "step": 16320 + }, + { + "epoch": 0.14115744783875625, + "grad_norm": 28.484407016572014, + "learning_rate": 5.926555619197088e-06, + "loss": 0.6412124633789062, + "step": 16325 + }, + { + "epoch": 0.14120068136029953, + "grad_norm": 11.885413727738017, + "learning_rate": 5.926510808018457e-06, + "loss": 0.14867210388183594, + "step": 16330 + }, + { + "epoch": 0.1412439148818428, + "grad_norm": 48.147360459195895, + "learning_rate": 5.926465983343018e-06, + "loss": 0.216558837890625, + "step": 16335 + }, + { + "epoch": 0.14128714840338605, + "grad_norm": 8.042148633787018, + "learning_rate": 5.926421145170976e-06, + "loss": 0.227923583984375, + "step": 16340 + }, + { + "epoch": 0.1413303819249293, + "grad_norm": 6.433230386069336, + "learning_rate": 5.9263762935025396e-06, + "loss": 0.608807373046875, + "step": 16345 + }, + { + "epoch": 0.1413736154464726, + "grad_norm": 51.18809040321457, + "learning_rate": 5.926331428337912e-06, + "loss": 0.5113998413085937, + "step": 16350 + }, + { + "epoch": 0.14141684896801585, + "grad_norm": 5.297889012655832, + "learning_rate": 5.926286549677304e-06, + "loss": 0.05788726806640625, + "step": 16355 + }, + { + "epoch": 0.1414600824895591, + "grad_norm": 24.64332506628646, + "learning_rate": 5.926241657520921e-06, + "loss": 0.107257080078125, + "step": 16360 + }, + { + "epoch": 0.14150331601110236, + "grad_norm": 0.16351500056235488, + "learning_rate": 5.926196751868969e-06, + "loss": 0.44730148315429685, + "step": 16365 + }, + { + "epoch": 0.14154654953264564, + "grad_norm": 5.953252650989257, + "learning_rate": 5.926151832721657e-06, + "loss": 0.39794921875, + "step": 16370 + }, + { + "epoch": 0.1415897830541889, + "grad_norm": 28.379316911817867, + "learning_rate": 5.926106900079192e-06, + "loss": 0.2529022216796875, + "step": 16375 + }, + { + "epoch": 0.14163301657573216, + "grad_norm": 1.0836342011980145, + "learning_rate": 5.92606195394178e-06, + "loss": 0.1241241455078125, + "step": 16380 + }, + { + "epoch": 0.1416762500972754, + "grad_norm": 8.064061893717083, + "learning_rate": 5.926016994309628e-06, + "loss": 0.03536376953125, + "step": 16385 + }, + { + "epoch": 0.1417194836188187, + "grad_norm": 4.978307331625878, + "learning_rate": 5.9259720211829444e-06, + "loss": 0.07766494750976563, + "step": 16390 + }, + { + "epoch": 0.14176271714036195, + "grad_norm": 3.2086932971138213, + "learning_rate": 5.925927034561937e-06, + "loss": 0.1677734375, + "step": 16395 + }, + { + "epoch": 0.1418059506619052, + "grad_norm": 15.279037148675675, + "learning_rate": 5.925882034446812e-06, + "loss": 0.14821319580078124, + "step": 16400 + }, + { + "epoch": 0.14184918418344847, + "grad_norm": 44.00953421758605, + "learning_rate": 5.925837020837778e-06, + "loss": 0.277069091796875, + "step": 16405 + }, + { + "epoch": 0.14189241770499175, + "grad_norm": 6.682771575260814, + "learning_rate": 5.925791993735043e-06, + "loss": 0.155242919921875, + "step": 16410 + }, + { + "epoch": 0.141935651226535, + "grad_norm": 9.027546559398628, + "learning_rate": 5.925746953138813e-06, + "loss": 0.22848052978515626, + "step": 16415 + }, + { + "epoch": 0.14197888474807827, + "grad_norm": 0.9747324139289683, + "learning_rate": 5.9257018990492966e-06, + "loss": 0.08507080078125, + "step": 16420 + }, + { + "epoch": 0.14202211826962152, + "grad_norm": 6.998681356384003, + "learning_rate": 5.925656831466701e-06, + "loss": 0.20360641479492186, + "step": 16425 + }, + { + "epoch": 0.1420653517911648, + "grad_norm": 1.4685793151731426, + "learning_rate": 5.925611750391235e-06, + "loss": 0.160272216796875, + "step": 16430 + }, + { + "epoch": 0.14210858531270806, + "grad_norm": 4.699248528308727, + "learning_rate": 5.925566655823106e-06, + "loss": 0.338836669921875, + "step": 16435 + }, + { + "epoch": 0.14215181883425132, + "grad_norm": 1.7035074522493785, + "learning_rate": 5.925521547762522e-06, + "loss": 0.14292755126953124, + "step": 16440 + }, + { + "epoch": 0.14219505235579458, + "grad_norm": 12.049569021072879, + "learning_rate": 5.9254764262096916e-06, + "loss": 0.1182159423828125, + "step": 16445 + }, + { + "epoch": 0.14223828587733786, + "grad_norm": 64.76984691904356, + "learning_rate": 5.925431291164823e-06, + "loss": 0.42981796264648436, + "step": 16450 + }, + { + "epoch": 0.14228151939888112, + "grad_norm": 19.63635444265763, + "learning_rate": 5.925386142628122e-06, + "loss": 0.11247940063476562, + "step": 16455 + }, + { + "epoch": 0.14232475292042437, + "grad_norm": 25.387420862315064, + "learning_rate": 5.9253409805998e-06, + "loss": 0.29624786376953127, + "step": 16460 + }, + { + "epoch": 0.14236798644196763, + "grad_norm": 43.36643796903917, + "learning_rate": 5.925295805080064e-06, + "loss": 0.418145751953125, + "step": 16465 + }, + { + "epoch": 0.14241121996351092, + "grad_norm": 53.74726891371195, + "learning_rate": 5.9252506160691215e-06, + "loss": 0.2779361724853516, + "step": 16470 + }, + { + "epoch": 0.14245445348505417, + "grad_norm": 9.650172816547935, + "learning_rate": 5.925205413567181e-06, + "loss": 0.1570404052734375, + "step": 16475 + }, + { + "epoch": 0.14249768700659743, + "grad_norm": 0.4719856628218746, + "learning_rate": 5.925160197574453e-06, + "loss": 0.0851776123046875, + "step": 16480 + }, + { + "epoch": 0.14254092052814069, + "grad_norm": 9.929980784784279, + "learning_rate": 5.925114968091145e-06, + "loss": 0.2656707763671875, + "step": 16485 + }, + { + "epoch": 0.14258415404968397, + "grad_norm": 3.2977696152379026, + "learning_rate": 5.925069725117464e-06, + "loss": 0.08206787109375, + "step": 16490 + }, + { + "epoch": 0.14262738757122723, + "grad_norm": 6.019712050532792, + "learning_rate": 5.92502446865362e-06, + "loss": 0.08149032592773438, + "step": 16495 + }, + { + "epoch": 0.14267062109277048, + "grad_norm": 2.9987437467780316, + "learning_rate": 5.924979198699822e-06, + "loss": 0.17388916015625, + "step": 16500 + }, + { + "epoch": 0.14271385461431377, + "grad_norm": 13.802550874670333, + "learning_rate": 5.9249339152562776e-06, + "loss": 0.171343994140625, + "step": 16505 + }, + { + "epoch": 0.14275708813585702, + "grad_norm": 12.93965007972728, + "learning_rate": 5.924888618323197e-06, + "loss": 0.0737274169921875, + "step": 16510 + }, + { + "epoch": 0.14280032165740028, + "grad_norm": 1.7937662062590978, + "learning_rate": 5.924843307900789e-06, + "loss": 0.244720458984375, + "step": 16515 + }, + { + "epoch": 0.14284355517894354, + "grad_norm": 6.95679317089471, + "learning_rate": 5.924797983989262e-06, + "loss": 0.4449207305908203, + "step": 16520 + }, + { + "epoch": 0.14288678870048682, + "grad_norm": 31.943836099679853, + "learning_rate": 5.924752646588826e-06, + "loss": 0.410107421875, + "step": 16525 + }, + { + "epoch": 0.14293002222203008, + "grad_norm": 2.1887878280401005, + "learning_rate": 5.924707295699687e-06, + "loss": 0.1662872314453125, + "step": 16530 + }, + { + "epoch": 0.14297325574357334, + "grad_norm": 1.006993492928405, + "learning_rate": 5.9246619313220575e-06, + "loss": 0.06823806762695313, + "step": 16535 + }, + { + "epoch": 0.1430164892651166, + "grad_norm": 1.2973630585221883, + "learning_rate": 5.924616553456146e-06, + "loss": 0.3257293701171875, + "step": 16540 + }, + { + "epoch": 0.14305972278665988, + "grad_norm": 4.009486150416436, + "learning_rate": 5.924571162102161e-06, + "loss": 0.10010986328125, + "step": 16545 + }, + { + "epoch": 0.14310295630820313, + "grad_norm": 13.143705522939443, + "learning_rate": 5.924525757260313e-06, + "loss": 0.2120849609375, + "step": 16550 + }, + { + "epoch": 0.1431461898297464, + "grad_norm": 16.793918403566554, + "learning_rate": 5.924480338930808e-06, + "loss": 0.24268798828125, + "step": 16555 + }, + { + "epoch": 0.14318942335128965, + "grad_norm": 28.521204556578493, + "learning_rate": 5.924434907113862e-06, + "loss": 0.3591594696044922, + "step": 16560 + }, + { + "epoch": 0.14323265687283293, + "grad_norm": 13.280001485053758, + "learning_rate": 5.924389461809677e-06, + "loss": 0.17744369506835939, + "step": 16565 + }, + { + "epoch": 0.1432758903943762, + "grad_norm": 16.15916814114217, + "learning_rate": 5.9243440030184685e-06, + "loss": 0.450091552734375, + "step": 16570 + }, + { + "epoch": 0.14331912391591944, + "grad_norm": 16.202756794922657, + "learning_rate": 5.924298530740443e-06, + "loss": 0.0381011962890625, + "step": 16575 + }, + { + "epoch": 0.1433623574374627, + "grad_norm": 45.269768360252364, + "learning_rate": 5.924253044975811e-06, + "loss": 0.3175384521484375, + "step": 16580 + }, + { + "epoch": 0.14340559095900599, + "grad_norm": 13.722592557815569, + "learning_rate": 5.924207545724782e-06, + "loss": 0.156671142578125, + "step": 16585 + }, + { + "epoch": 0.14344882448054924, + "grad_norm": 8.403065314825366, + "learning_rate": 5.924162032987567e-06, + "loss": 0.22517852783203124, + "step": 16590 + }, + { + "epoch": 0.1434920580020925, + "grad_norm": 1.6881244711426495, + "learning_rate": 5.924116506764375e-06, + "loss": 0.09156265258789062, + "step": 16595 + }, + { + "epoch": 0.14353529152363576, + "grad_norm": 13.349474419399659, + "learning_rate": 5.9240709670554154e-06, + "loss": 0.14130859375, + "step": 16600 + }, + { + "epoch": 0.14357852504517904, + "grad_norm": 4.064264925424284, + "learning_rate": 5.9240254138609e-06, + "loss": 0.237982177734375, + "step": 16605 + }, + { + "epoch": 0.1436217585667223, + "grad_norm": 12.378501885736545, + "learning_rate": 5.923979847181036e-06, + "loss": 0.072479248046875, + "step": 16610 + }, + { + "epoch": 0.14366499208826555, + "grad_norm": 31.901114049011, + "learning_rate": 5.923934267016037e-06, + "loss": 0.4554145812988281, + "step": 16615 + }, + { + "epoch": 0.1437082256098088, + "grad_norm": 3.8545874734672334, + "learning_rate": 5.923888673366111e-06, + "loss": 0.1373992919921875, + "step": 16620 + }, + { + "epoch": 0.1437514591313521, + "grad_norm": 22.90930626907558, + "learning_rate": 5.923843066231468e-06, + "loss": 0.21756744384765625, + "step": 16625 + }, + { + "epoch": 0.14379469265289535, + "grad_norm": 7.359670268559375, + "learning_rate": 5.923797445612321e-06, + "loss": 0.12798194885253905, + "step": 16630 + }, + { + "epoch": 0.1438379261744386, + "grad_norm": 5.591680028199843, + "learning_rate": 5.923751811508877e-06, + "loss": 0.061602783203125, + "step": 16635 + }, + { + "epoch": 0.14388115969598186, + "grad_norm": 14.410684632245097, + "learning_rate": 5.9237061639213475e-06, + "loss": 0.3792137145996094, + "step": 16640 + }, + { + "epoch": 0.14392439321752515, + "grad_norm": 36.21403113719913, + "learning_rate": 5.923660502849946e-06, + "loss": 0.1643768310546875, + "step": 16645 + }, + { + "epoch": 0.1439676267390684, + "grad_norm": 42.315090416839915, + "learning_rate": 5.923614828294878e-06, + "loss": 0.453057861328125, + "step": 16650 + }, + { + "epoch": 0.14401086026061166, + "grad_norm": 17.10430298120411, + "learning_rate": 5.923569140256358e-06, + "loss": 0.5128440856933594, + "step": 16655 + }, + { + "epoch": 0.14405409378215492, + "grad_norm": 1.5439698748846504, + "learning_rate": 5.923523438734595e-06, + "loss": 0.11755218505859374, + "step": 16660 + }, + { + "epoch": 0.1440973273036982, + "grad_norm": 2.382444311030836, + "learning_rate": 5.923477723729801e-06, + "loss": 0.4487060546875, + "step": 16665 + }, + { + "epoch": 0.14414056082524146, + "grad_norm": 19.87386215734945, + "learning_rate": 5.9234319952421845e-06, + "loss": 0.3898750305175781, + "step": 16670 + }, + { + "epoch": 0.14418379434678472, + "grad_norm": 5.861559124846552, + "learning_rate": 5.923386253271959e-06, + "loss": 0.14653701782226564, + "step": 16675 + }, + { + "epoch": 0.144227027868328, + "grad_norm": 5.541595802246494, + "learning_rate": 5.9233404978193345e-06, + "loss": 0.4745849609375, + "step": 16680 + }, + { + "epoch": 0.14427026138987126, + "grad_norm": 2.654100539862431, + "learning_rate": 5.923294728884522e-06, + "loss": 0.051580810546875, + "step": 16685 + }, + { + "epoch": 0.1443134949114145, + "grad_norm": 18.412961578271037, + "learning_rate": 5.923248946467732e-06, + "loss": 0.21466331481933593, + "step": 16690 + }, + { + "epoch": 0.14435672843295777, + "grad_norm": 0.3032944434331412, + "learning_rate": 5.923203150569177e-06, + "loss": 0.12707901000976562, + "step": 16695 + }, + { + "epoch": 0.14439996195450105, + "grad_norm": 1.740344403749772, + "learning_rate": 5.923157341189066e-06, + "loss": 0.24943389892578124, + "step": 16700 + }, + { + "epoch": 0.1444431954760443, + "grad_norm": 5.40943402253499, + "learning_rate": 5.923111518327613e-06, + "loss": 0.17626953125, + "step": 16705 + }, + { + "epoch": 0.14448642899758757, + "grad_norm": 71.96055958966893, + "learning_rate": 5.923065681985028e-06, + "loss": 0.446240234375, + "step": 16710 + }, + { + "epoch": 0.14452966251913082, + "grad_norm": 39.647797743935584, + "learning_rate": 5.923019832161521e-06, + "loss": 0.27965545654296875, + "step": 16715 + }, + { + "epoch": 0.1445728960406741, + "grad_norm": 33.270359850980896, + "learning_rate": 5.922973968857306e-06, + "loss": 0.17944564819335937, + "step": 16720 + }, + { + "epoch": 0.14461612956221737, + "grad_norm": 4.10018289198918, + "learning_rate": 5.922928092072592e-06, + "loss": 0.0940155029296875, + "step": 16725 + }, + { + "epoch": 0.14465936308376062, + "grad_norm": 8.7447876000492, + "learning_rate": 5.922882201807593e-06, + "loss": 0.11206283569335937, + "step": 16730 + }, + { + "epoch": 0.14470259660530388, + "grad_norm": 11.569173673570216, + "learning_rate": 5.92283629806252e-06, + "loss": 0.5197113037109375, + "step": 16735 + }, + { + "epoch": 0.14474583012684716, + "grad_norm": 1.56966358429192, + "learning_rate": 5.922790380837583e-06, + "loss": 0.170147705078125, + "step": 16740 + }, + { + "epoch": 0.14478906364839042, + "grad_norm": 13.524739786494946, + "learning_rate": 5.922744450132996e-06, + "loss": 0.13305816650390626, + "step": 16745 + }, + { + "epoch": 0.14483229716993368, + "grad_norm": 1.0990016667171727, + "learning_rate": 5.92269850594897e-06, + "loss": 0.071307373046875, + "step": 16750 + }, + { + "epoch": 0.14487553069147693, + "grad_norm": 50.66039729762288, + "learning_rate": 5.922652548285716e-06, + "loss": 0.39757537841796875, + "step": 16755 + }, + { + "epoch": 0.14491876421302022, + "grad_norm": 30.04110680123458, + "learning_rate": 5.922606577143446e-06, + "loss": 0.26331787109375, + "step": 16760 + }, + { + "epoch": 0.14496199773456347, + "grad_norm": 10.463914354716202, + "learning_rate": 5.9225605925223746e-06, + "loss": 0.17069854736328124, + "step": 16765 + }, + { + "epoch": 0.14500523125610673, + "grad_norm": 4.376657844935754, + "learning_rate": 5.922514594422711e-06, + "loss": 0.3573211669921875, + "step": 16770 + }, + { + "epoch": 0.14504846477765, + "grad_norm": 20.82395046724013, + "learning_rate": 5.922468582844669e-06, + "loss": 0.3186737060546875, + "step": 16775 + }, + { + "epoch": 0.14509169829919327, + "grad_norm": 1.9077885598085254, + "learning_rate": 5.92242255778846e-06, + "loss": 0.4022064208984375, + "step": 16780 + }, + { + "epoch": 0.14513493182073653, + "grad_norm": 26.65978709142893, + "learning_rate": 5.922376519254295e-06, + "loss": 0.25615234375, + "step": 16785 + }, + { + "epoch": 0.14517816534227979, + "grad_norm": 2.875792208967807, + "learning_rate": 5.922330467242389e-06, + "loss": 0.170074462890625, + "step": 16790 + }, + { + "epoch": 0.14522139886382304, + "grad_norm": 3.515822765078381, + "learning_rate": 5.922284401752953e-06, + "loss": 0.17528533935546875, + "step": 16795 + }, + { + "epoch": 0.14526463238536633, + "grad_norm": 7.020196762114929, + "learning_rate": 5.922238322786199e-06, + "loss": 0.1989105224609375, + "step": 16800 + }, + { + "epoch": 0.14530786590690958, + "grad_norm": 4.1719156712897005, + "learning_rate": 5.92219223034234e-06, + "loss": 0.12209568023681641, + "step": 16805 + }, + { + "epoch": 0.14535109942845284, + "grad_norm": 3.5685953268608954, + "learning_rate": 5.922146124421589e-06, + "loss": 0.1543792724609375, + "step": 16810 + }, + { + "epoch": 0.1453943329499961, + "grad_norm": 29.292712721765906, + "learning_rate": 5.922100005024159e-06, + "loss": 0.115386962890625, + "step": 16815 + }, + { + "epoch": 0.14543756647153938, + "grad_norm": 9.442512126131168, + "learning_rate": 5.922053872150261e-06, + "loss": 0.2796875, + "step": 16820 + }, + { + "epoch": 0.14548079999308264, + "grad_norm": 4.881084534881247, + "learning_rate": 5.922007725800108e-06, + "loss": 0.4415016174316406, + "step": 16825 + }, + { + "epoch": 0.1455240335146259, + "grad_norm": 1.3544185875584343, + "learning_rate": 5.921961565973915e-06, + "loss": 0.12544097900390624, + "step": 16830 + }, + { + "epoch": 0.14556726703616915, + "grad_norm": 8.305896066843266, + "learning_rate": 5.9219153926718936e-06, + "loss": 0.07363662719726563, + "step": 16835 + }, + { + "epoch": 0.14561050055771244, + "grad_norm": 9.747427768903417, + "learning_rate": 5.921869205894256e-06, + "loss": 0.50460205078125, + "step": 16840 + }, + { + "epoch": 0.1456537340792557, + "grad_norm": 3.6188557204150826, + "learning_rate": 5.921823005641215e-06, + "loss": 0.20681915283203126, + "step": 16845 + }, + { + "epoch": 0.14569696760079895, + "grad_norm": 45.80875550383127, + "learning_rate": 5.921776791912986e-06, + "loss": 0.6342041015625, + "step": 16850 + }, + { + "epoch": 0.1457402011223422, + "grad_norm": 4.509888308599645, + "learning_rate": 5.92173056470978e-06, + "loss": 0.12196788787841797, + "step": 16855 + }, + { + "epoch": 0.1457834346438855, + "grad_norm": 0.4170709611051168, + "learning_rate": 5.92168432403181e-06, + "loss": 0.1864501953125, + "step": 16860 + }, + { + "epoch": 0.14582666816542875, + "grad_norm": 24.736768814655978, + "learning_rate": 5.9216380698792915e-06, + "loss": 0.10371894836425781, + "step": 16865 + }, + { + "epoch": 0.145869901686972, + "grad_norm": 19.03398639572747, + "learning_rate": 5.921591802252435e-06, + "loss": 0.26744384765625, + "step": 16870 + }, + { + "epoch": 0.1459131352085153, + "grad_norm": 5.098511139824401, + "learning_rate": 5.9215455211514565e-06, + "loss": 0.44227142333984376, + "step": 16875 + }, + { + "epoch": 0.14595636873005854, + "grad_norm": 51.748311069591985, + "learning_rate": 5.921499226576567e-06, + "loss": 0.46764984130859377, + "step": 16880 + }, + { + "epoch": 0.1459996022516018, + "grad_norm": 40.85348832810252, + "learning_rate": 5.921452918527982e-06, + "loss": 0.379290771484375, + "step": 16885 + }, + { + "epoch": 0.14604283577314506, + "grad_norm": 8.973679664892067, + "learning_rate": 5.921406597005915e-06, + "loss": 0.2733795166015625, + "step": 16890 + }, + { + "epoch": 0.14608606929468834, + "grad_norm": 14.091443490121645, + "learning_rate": 5.9213602620105776e-06, + "loss": 0.5196624755859375, + "step": 16895 + }, + { + "epoch": 0.1461293028162316, + "grad_norm": 6.060206633255842, + "learning_rate": 5.921313913542185e-06, + "loss": 0.1448944091796875, + "step": 16900 + }, + { + "epoch": 0.14617253633777486, + "grad_norm": 2.491701333368038, + "learning_rate": 5.921267551600951e-06, + "loss": 0.0379730224609375, + "step": 16905 + }, + { + "epoch": 0.1462157698593181, + "grad_norm": 0.5171816219877285, + "learning_rate": 5.921221176187089e-06, + "loss": 0.31669158935546876, + "step": 16910 + }, + { + "epoch": 0.1462590033808614, + "grad_norm": 30.39962331419116, + "learning_rate": 5.921174787300814e-06, + "loss": 0.23787345886230468, + "step": 16915 + }, + { + "epoch": 0.14630223690240465, + "grad_norm": 7.942438185914288, + "learning_rate": 5.921128384942338e-06, + "loss": 0.1429931640625, + "step": 16920 + }, + { + "epoch": 0.1463454704239479, + "grad_norm": 7.258887424651392, + "learning_rate": 5.921081969111876e-06, + "loss": 0.15953369140625, + "step": 16925 + }, + { + "epoch": 0.14638870394549117, + "grad_norm": 22.263853515718267, + "learning_rate": 5.921035539809642e-06, + "loss": 0.21367225646972657, + "step": 16930 + }, + { + "epoch": 0.14643193746703445, + "grad_norm": 4.366078502932118, + "learning_rate": 5.920989097035851e-06, + "loss": 0.133905029296875, + "step": 16935 + }, + { + "epoch": 0.1464751709885777, + "grad_norm": 2.366185512651357, + "learning_rate": 5.920942640790716e-06, + "loss": 0.3185302734375, + "step": 16940 + }, + { + "epoch": 0.14651840451012096, + "grad_norm": 11.074079066122156, + "learning_rate": 5.920896171074452e-06, + "loss": 0.1152099609375, + "step": 16945 + }, + { + "epoch": 0.14656163803166422, + "grad_norm": 16.381809990755922, + "learning_rate": 5.9208496878872726e-06, + "loss": 0.3563323974609375, + "step": 16950 + }, + { + "epoch": 0.1466048715532075, + "grad_norm": 9.969004987861036, + "learning_rate": 5.920803191229391e-06, + "loss": 0.0697021484375, + "step": 16955 + }, + { + "epoch": 0.14664810507475076, + "grad_norm": 1.586418496500373, + "learning_rate": 5.920756681101025e-06, + "loss": 0.2564208984375, + "step": 16960 + }, + { + "epoch": 0.14669133859629402, + "grad_norm": 9.523007224011078, + "learning_rate": 5.920710157502387e-06, + "loss": 0.32929840087890627, + "step": 16965 + }, + { + "epoch": 0.14673457211783728, + "grad_norm": 47.13795127816318, + "learning_rate": 5.9206636204336915e-06, + "loss": 0.5809394836425781, + "step": 16970 + }, + { + "epoch": 0.14677780563938056, + "grad_norm": 34.30030506358018, + "learning_rate": 5.920617069895154e-06, + "loss": 0.3154266357421875, + "step": 16975 + }, + { + "epoch": 0.14682103916092382, + "grad_norm": 45.28661524906409, + "learning_rate": 5.920570505886988e-06, + "loss": 0.389385986328125, + "step": 16980 + }, + { + "epoch": 0.14686427268246707, + "grad_norm": 2.0318928998050763, + "learning_rate": 5.92052392840941e-06, + "loss": 0.04930267333984375, + "step": 16985 + }, + { + "epoch": 0.14690750620401033, + "grad_norm": 48.67908176821227, + "learning_rate": 5.920477337462632e-06, + "loss": 0.5508247375488281, + "step": 16990 + }, + { + "epoch": 0.14695073972555361, + "grad_norm": 25.52647515864899, + "learning_rate": 5.920430733046872e-06, + "loss": 0.37969970703125, + "step": 16995 + }, + { + "epoch": 0.14699397324709687, + "grad_norm": 2.2042889754050607, + "learning_rate": 5.9203841151623425e-06, + "loss": 0.05496368408203125, + "step": 17000 + }, + { + "epoch": 0.14703720676864013, + "grad_norm": 51.981631058280044, + "learning_rate": 5.92033748380926e-06, + "loss": 0.45701904296875, + "step": 17005 + }, + { + "epoch": 0.14708044029018338, + "grad_norm": 1.3408298344835785, + "learning_rate": 5.920290838987839e-06, + "loss": 0.175885009765625, + "step": 17010 + }, + { + "epoch": 0.14712367381172667, + "grad_norm": 3.0660675874536416, + "learning_rate": 5.920244180698295e-06, + "loss": 0.106048583984375, + "step": 17015 + }, + { + "epoch": 0.14716690733326993, + "grad_norm": 8.426955640343978, + "learning_rate": 5.920197508940843e-06, + "loss": 0.21785736083984375, + "step": 17020 + }, + { + "epoch": 0.14721014085481318, + "grad_norm": 1.809079775250747, + "learning_rate": 5.920150823715698e-06, + "loss": 0.507940673828125, + "step": 17025 + }, + { + "epoch": 0.14725337437635644, + "grad_norm": 21.008928308345247, + "learning_rate": 5.920104125023075e-06, + "loss": 0.21885261535644532, + "step": 17030 + }, + { + "epoch": 0.14729660789789972, + "grad_norm": 8.298132283807483, + "learning_rate": 5.92005741286319e-06, + "loss": 0.301837158203125, + "step": 17035 + }, + { + "epoch": 0.14733984141944298, + "grad_norm": 64.57594540330764, + "learning_rate": 5.920010687236258e-06, + "loss": 0.1878082275390625, + "step": 17040 + }, + { + "epoch": 0.14738307494098624, + "grad_norm": 1.9972055542426925, + "learning_rate": 5.919963948142495e-06, + "loss": 0.378070068359375, + "step": 17045 + }, + { + "epoch": 0.14742630846252952, + "grad_norm": 11.967559831622772, + "learning_rate": 5.919917195582116e-06, + "loss": 0.2517723083496094, + "step": 17050 + }, + { + "epoch": 0.14746954198407278, + "grad_norm": 4.6279887700664375, + "learning_rate": 5.919870429555337e-06, + "loss": 0.05991973876953125, + "step": 17055 + }, + { + "epoch": 0.14751277550561603, + "grad_norm": 2.892437404934506, + "learning_rate": 5.9198236500623735e-06, + "loss": 0.31417388916015626, + "step": 17060 + }, + { + "epoch": 0.1475560090271593, + "grad_norm": 31.152516162175992, + "learning_rate": 5.919776857103442e-06, + "loss": 0.199951171875, + "step": 17065 + }, + { + "epoch": 0.14759924254870257, + "grad_norm": 52.0126680308671, + "learning_rate": 5.919730050678757e-06, + "loss": 0.2948211669921875, + "step": 17070 + }, + { + "epoch": 0.14764247607024583, + "grad_norm": 140.2597623526963, + "learning_rate": 5.919683230788536e-06, + "loss": 0.217724609375, + "step": 17075 + }, + { + "epoch": 0.1476857095917891, + "grad_norm": 19.605529394950942, + "learning_rate": 5.919636397432993e-06, + "loss": 0.1300750732421875, + "step": 17080 + }, + { + "epoch": 0.14772894311333234, + "grad_norm": 6.326897170718189, + "learning_rate": 5.919589550612344e-06, + "loss": 0.0532867431640625, + "step": 17085 + }, + { + "epoch": 0.14777217663487563, + "grad_norm": 21.03759562066031, + "learning_rate": 5.919542690326808e-06, + "loss": 0.1983551025390625, + "step": 17090 + }, + { + "epoch": 0.14781541015641889, + "grad_norm": 4.43485928316658, + "learning_rate": 5.919495816576597e-06, + "loss": 0.119110107421875, + "step": 17095 + }, + { + "epoch": 0.14785864367796214, + "grad_norm": 36.4920326717131, + "learning_rate": 5.919448929361931e-06, + "loss": 0.376519775390625, + "step": 17100 + }, + { + "epoch": 0.1479018771995054, + "grad_norm": 4.044645757956681, + "learning_rate": 5.919402028683023e-06, + "loss": 0.22481231689453124, + "step": 17105 + }, + { + "epoch": 0.14794511072104868, + "grad_norm": 43.865736177355544, + "learning_rate": 5.919355114540092e-06, + "loss": 0.16710205078125, + "step": 17110 + }, + { + "epoch": 0.14798834424259194, + "grad_norm": 10.626421829239828, + "learning_rate": 5.919308186933353e-06, + "loss": 0.1844451904296875, + "step": 17115 + }, + { + "epoch": 0.1480315777641352, + "grad_norm": 1.481765280947935, + "learning_rate": 5.9192612458630225e-06, + "loss": 0.07192459106445312, + "step": 17120 + }, + { + "epoch": 0.14807481128567845, + "grad_norm": 17.07941528897959, + "learning_rate": 5.919214291329317e-06, + "loss": 0.254876708984375, + "step": 17125 + }, + { + "epoch": 0.14811804480722174, + "grad_norm": 0.8548762554172888, + "learning_rate": 5.919167323332453e-06, + "loss": 0.3041046142578125, + "step": 17130 + }, + { + "epoch": 0.148161278328765, + "grad_norm": 5.581401605875958, + "learning_rate": 5.919120341872647e-06, + "loss": 0.13402099609375, + "step": 17135 + }, + { + "epoch": 0.14820451185030825, + "grad_norm": 26.607914422771728, + "learning_rate": 5.919073346950116e-06, + "loss": 0.22738037109375, + "step": 17140 + }, + { + "epoch": 0.1482477453718515, + "grad_norm": 2.414536836123267, + "learning_rate": 5.919026338565078e-06, + "loss": 0.29275054931640626, + "step": 17145 + }, + { + "epoch": 0.1482909788933948, + "grad_norm": 20.340474028605854, + "learning_rate": 5.9189793167177464e-06, + "loss": 0.33428955078125, + "step": 17150 + }, + { + "epoch": 0.14833421241493805, + "grad_norm": 12.02934901020012, + "learning_rate": 5.918932281408342e-06, + "loss": 0.3375984191894531, + "step": 17155 + }, + { + "epoch": 0.1483774459364813, + "grad_norm": 2.6026350118475197, + "learning_rate": 5.918885232637079e-06, + "loss": 0.5422332763671875, + "step": 17160 + }, + { + "epoch": 0.14842067945802456, + "grad_norm": 11.377419648446539, + "learning_rate": 5.918838170404175e-06, + "loss": 0.349169921875, + "step": 17165 + }, + { + "epoch": 0.14846391297956785, + "grad_norm": 31.734068310033095, + "learning_rate": 5.918791094709847e-06, + "loss": 0.392279052734375, + "step": 17170 + }, + { + "epoch": 0.1485071465011111, + "grad_norm": 0.688021193785736, + "learning_rate": 5.918744005554313e-06, + "loss": 0.2605438232421875, + "step": 17175 + }, + { + "epoch": 0.14855038002265436, + "grad_norm": 0.7982493905501168, + "learning_rate": 5.91869690293779e-06, + "loss": 0.12491912841796875, + "step": 17180 + }, + { + "epoch": 0.14859361354419762, + "grad_norm": 4.830024827201549, + "learning_rate": 5.9186497868604935e-06, + "loss": 0.1886913299560547, + "step": 17185 + }, + { + "epoch": 0.1486368470657409, + "grad_norm": 28.17164843149409, + "learning_rate": 5.918602657322643e-06, + "loss": 0.24816741943359374, + "step": 17190 + }, + { + "epoch": 0.14868008058728416, + "grad_norm": 11.17326214987487, + "learning_rate": 5.918555514324455e-06, + "loss": 0.1255462646484375, + "step": 17195 + }, + { + "epoch": 0.14872331410882741, + "grad_norm": 16.134970530134375, + "learning_rate": 5.918508357866146e-06, + "loss": 0.15464324951171876, + "step": 17200 + }, + { + "epoch": 0.14876654763037067, + "grad_norm": 0.5317856019357258, + "learning_rate": 5.918461187947935e-06, + "loss": 0.1845233917236328, + "step": 17205 + }, + { + "epoch": 0.14880978115191396, + "grad_norm": 16.95133367821003, + "learning_rate": 5.918414004570038e-06, + "loss": 0.30350341796875, + "step": 17210 + }, + { + "epoch": 0.1488530146734572, + "grad_norm": 6.83304953926701, + "learning_rate": 5.9183668077326745e-06, + "loss": 0.1255279541015625, + "step": 17215 + }, + { + "epoch": 0.14889624819500047, + "grad_norm": 14.190874053081227, + "learning_rate": 5.9183195974360614e-06, + "loss": 0.08680572509765624, + "step": 17220 + }, + { + "epoch": 0.14893948171654373, + "grad_norm": 1.070221256999885, + "learning_rate": 5.918272373680415e-06, + "loss": 0.09957351684570312, + "step": 17225 + }, + { + "epoch": 0.148982715238087, + "grad_norm": 10.846547452542639, + "learning_rate": 5.9182251364659554e-06, + "loss": 0.1812286376953125, + "step": 17230 + }, + { + "epoch": 0.14902594875963027, + "grad_norm": 10.09667394836873, + "learning_rate": 5.918177885792899e-06, + "loss": 0.34249420166015626, + "step": 17235 + }, + { + "epoch": 0.14906918228117352, + "grad_norm": 27.054076090571552, + "learning_rate": 5.9181306216614634e-06, + "loss": 0.26817169189453127, + "step": 17240 + }, + { + "epoch": 0.1491124158027168, + "grad_norm": 16.168224345981386, + "learning_rate": 5.918083344071868e-06, + "loss": 0.4316837310791016, + "step": 17245 + }, + { + "epoch": 0.14915564932426006, + "grad_norm": 33.587236151241804, + "learning_rate": 5.9180360530243295e-06, + "loss": 0.2535713195800781, + "step": 17250 + }, + { + "epoch": 0.14919888284580332, + "grad_norm": 15.850748216827908, + "learning_rate": 5.917988748519068e-06, + "loss": 0.11064071655273437, + "step": 17255 + }, + { + "epoch": 0.14924211636734658, + "grad_norm": 5.4035893400327595, + "learning_rate": 5.9179414305562985e-06, + "loss": 0.2724884033203125, + "step": 17260 + }, + { + "epoch": 0.14928534988888986, + "grad_norm": 1.3544957279929584, + "learning_rate": 5.9178940991362414e-06, + "loss": 0.35576934814453126, + "step": 17265 + }, + { + "epoch": 0.14932858341043312, + "grad_norm": 13.646954351210347, + "learning_rate": 5.9178467542591145e-06, + "loss": 0.0678070068359375, + "step": 17270 + }, + { + "epoch": 0.14937181693197638, + "grad_norm": 0.4087748037423289, + "learning_rate": 5.917799395925136e-06, + "loss": 0.3601409912109375, + "step": 17275 + }, + { + "epoch": 0.14941505045351963, + "grad_norm": 9.199921899796307, + "learning_rate": 5.917752024134526e-06, + "loss": 0.3286022186279297, + "step": 17280 + }, + { + "epoch": 0.14945828397506292, + "grad_norm": 19.371889863110493, + "learning_rate": 5.9177046388875e-06, + "loss": 0.16883506774902343, + "step": 17285 + }, + { + "epoch": 0.14950151749660617, + "grad_norm": 6.631424371335406, + "learning_rate": 5.917657240184278e-06, + "loss": 0.07372779846191406, + "step": 17290 + }, + { + "epoch": 0.14954475101814943, + "grad_norm": 42.454160552092624, + "learning_rate": 5.917609828025079e-06, + "loss": 0.3509765625, + "step": 17295 + }, + { + "epoch": 0.1495879845396927, + "grad_norm": 5.267960406239073, + "learning_rate": 5.917562402410122e-06, + "loss": 0.7882293701171875, + "step": 17300 + }, + { + "epoch": 0.14963121806123597, + "grad_norm": 3.490067245807428, + "learning_rate": 5.917514963339624e-06, + "loss": 0.1997760772705078, + "step": 17305 + }, + { + "epoch": 0.14967445158277923, + "grad_norm": 28.643874120101977, + "learning_rate": 5.9174675108138045e-06, + "loss": 0.31212158203125, + "step": 17310 + }, + { + "epoch": 0.14971768510432248, + "grad_norm": 37.70404231459035, + "learning_rate": 5.917420044832884e-06, + "loss": 0.17230072021484374, + "step": 17315 + }, + { + "epoch": 0.14976091862586574, + "grad_norm": 6.041842309559596, + "learning_rate": 5.9173725653970795e-06, + "loss": 0.35547332763671874, + "step": 17320 + }, + { + "epoch": 0.14980415214740903, + "grad_norm": 0.9912355962888119, + "learning_rate": 5.91732507250661e-06, + "loss": 0.156884765625, + "step": 17325 + }, + { + "epoch": 0.14984738566895228, + "grad_norm": 17.29004171283406, + "learning_rate": 5.917277566161695e-06, + "loss": 0.184478759765625, + "step": 17330 + }, + { + "epoch": 0.14989061919049554, + "grad_norm": 4.74814063136041, + "learning_rate": 5.917230046362554e-06, + "loss": 0.275457763671875, + "step": 17335 + }, + { + "epoch": 0.1499338527120388, + "grad_norm": 0.8517089552519516, + "learning_rate": 5.917182513109406e-06, + "loss": 0.19664382934570312, + "step": 17340 + }, + { + "epoch": 0.14997708623358208, + "grad_norm": 1.3502692995630092, + "learning_rate": 5.917134966402469e-06, + "loss": 0.1412017822265625, + "step": 17345 + }, + { + "epoch": 0.15002031975512534, + "grad_norm": 3.776128885492998, + "learning_rate": 5.917087406241964e-06, + "loss": 0.427587890625, + "step": 17350 + }, + { + "epoch": 0.1500635532766686, + "grad_norm": 48.59528179845327, + "learning_rate": 5.91703983262811e-06, + "loss": 0.3123809814453125, + "step": 17355 + }, + { + "epoch": 0.15010678679821185, + "grad_norm": 5.442471765586202, + "learning_rate": 5.916992245561126e-06, + "loss": 0.0695709228515625, + "step": 17360 + }, + { + "epoch": 0.15015002031975513, + "grad_norm": 22.531697576805207, + "learning_rate": 5.916944645041231e-06, + "loss": 0.5043830871582031, + "step": 17365 + }, + { + "epoch": 0.1501932538412984, + "grad_norm": 5.085726937240026, + "learning_rate": 5.916897031068645e-06, + "loss": 0.1828094482421875, + "step": 17370 + }, + { + "epoch": 0.15023648736284165, + "grad_norm": 16.241728104569813, + "learning_rate": 5.916849403643588e-06, + "loss": 0.12763404846191406, + "step": 17375 + }, + { + "epoch": 0.1502797208843849, + "grad_norm": 4.073728416997315, + "learning_rate": 5.9168017627662794e-06, + "loss": 0.04463119506835937, + "step": 17380 + }, + { + "epoch": 0.1503229544059282, + "grad_norm": 0.4450762773647758, + "learning_rate": 5.916754108436939e-06, + "loss": 0.1469024658203125, + "step": 17385 + }, + { + "epoch": 0.15036618792747145, + "grad_norm": 3.446210397059912, + "learning_rate": 5.916706440655786e-06, + "loss": 0.2352874755859375, + "step": 17390 + }, + { + "epoch": 0.1504094214490147, + "grad_norm": 1.7114781601048563, + "learning_rate": 5.916658759423041e-06, + "loss": 0.0442535400390625, + "step": 17395 + }, + { + "epoch": 0.15045265497055796, + "grad_norm": 37.52797773116981, + "learning_rate": 5.9166110647389225e-06, + "loss": 0.643414306640625, + "step": 17400 + }, + { + "epoch": 0.15049588849210124, + "grad_norm": 11.715189078201805, + "learning_rate": 5.916563356603652e-06, + "loss": 0.130255126953125, + "step": 17405 + }, + { + "epoch": 0.1505391220136445, + "grad_norm": 9.147760307253268, + "learning_rate": 5.91651563501745e-06, + "loss": 0.106146240234375, + "step": 17410 + }, + { + "epoch": 0.15058235553518776, + "grad_norm": 13.035403816134396, + "learning_rate": 5.916467899980534e-06, + "loss": 0.1509063720703125, + "step": 17415 + }, + { + "epoch": 0.15062558905673104, + "grad_norm": 6.746576389176456, + "learning_rate": 5.916420151493127e-06, + "loss": 0.119219970703125, + "step": 17420 + }, + { + "epoch": 0.1506688225782743, + "grad_norm": 0.4605658266291703, + "learning_rate": 5.916372389555447e-06, + "loss": 0.2097332000732422, + "step": 17425 + }, + { + "epoch": 0.15071205609981755, + "grad_norm": 4.351396564059247, + "learning_rate": 5.916324614167715e-06, + "loss": 0.279296875, + "step": 17430 + }, + { + "epoch": 0.1507552896213608, + "grad_norm": 1.118851879521594, + "learning_rate": 5.916276825330154e-06, + "loss": 0.2145233154296875, + "step": 17435 + }, + { + "epoch": 0.1507985231429041, + "grad_norm": 4.801501082952275, + "learning_rate": 5.9162290230429794e-06, + "loss": 0.20077667236328126, + "step": 17440 + }, + { + "epoch": 0.15084175666444735, + "grad_norm": 9.619764223078127, + "learning_rate": 5.9161812073064145e-06, + "loss": 0.0739227294921875, + "step": 17445 + }, + { + "epoch": 0.1508849901859906, + "grad_norm": 18.003720051317806, + "learning_rate": 5.91613337812068e-06, + "loss": 0.4474945068359375, + "step": 17450 + }, + { + "epoch": 0.15092822370753387, + "grad_norm": 24.727849770332547, + "learning_rate": 5.916085535485997e-06, + "loss": 0.22910690307617188, + "step": 17455 + }, + { + "epoch": 0.15097145722907715, + "grad_norm": 10.101907600077025, + "learning_rate": 5.916037679402584e-06, + "loss": 0.1132720947265625, + "step": 17460 + }, + { + "epoch": 0.1510146907506204, + "grad_norm": 17.748069745384626, + "learning_rate": 5.915989809870663e-06, + "loss": 0.3099395751953125, + "step": 17465 + }, + { + "epoch": 0.15105792427216366, + "grad_norm": 2.8439554604260002, + "learning_rate": 5.915941926890455e-06, + "loss": 0.234637451171875, + "step": 17470 + }, + { + "epoch": 0.15110115779370692, + "grad_norm": 39.0307259169612, + "learning_rate": 5.91589403046218e-06, + "loss": 0.40147705078125, + "step": 17475 + }, + { + "epoch": 0.1511443913152502, + "grad_norm": 3.826584031903583, + "learning_rate": 5.915846120586059e-06, + "loss": 0.13590240478515625, + "step": 17480 + }, + { + "epoch": 0.15118762483679346, + "grad_norm": 3.2609927656949864, + "learning_rate": 5.915798197262315e-06, + "loss": 0.15787811279296876, + "step": 17485 + }, + { + "epoch": 0.15123085835833672, + "grad_norm": 57.97844375963201, + "learning_rate": 5.915750260491167e-06, + "loss": 0.16172637939453124, + "step": 17490 + }, + { + "epoch": 0.15127409187987997, + "grad_norm": 8.734702693616953, + "learning_rate": 5.915702310272835e-06, + "loss": 0.13070068359375, + "step": 17495 + }, + { + "epoch": 0.15131732540142326, + "grad_norm": 2.031802107878597, + "learning_rate": 5.915654346607543e-06, + "loss": 0.5065338134765625, + "step": 17500 + }, + { + "epoch": 0.15136055892296651, + "grad_norm": 17.912579702348634, + "learning_rate": 5.9156063694955095e-06, + "loss": 0.1161712646484375, + "step": 17505 + }, + { + "epoch": 0.15140379244450977, + "grad_norm": 4.8566100245678525, + "learning_rate": 5.915558378936958e-06, + "loss": 0.37690277099609376, + "step": 17510 + }, + { + "epoch": 0.15144702596605303, + "grad_norm": 5.983020312511538, + "learning_rate": 5.915510374932109e-06, + "loss": 0.206646728515625, + "step": 17515 + }, + { + "epoch": 0.1514902594875963, + "grad_norm": 10.485207912066842, + "learning_rate": 5.915462357481183e-06, + "loss": 0.09623565673828124, + "step": 17520 + }, + { + "epoch": 0.15153349300913957, + "grad_norm": 31.26967148267222, + "learning_rate": 5.9154143265844026e-06, + "loss": 0.2505767822265625, + "step": 17525 + }, + { + "epoch": 0.15157672653068283, + "grad_norm": 5.836152251394665, + "learning_rate": 5.915366282241989e-06, + "loss": 0.24151763916015626, + "step": 17530 + }, + { + "epoch": 0.15161996005222608, + "grad_norm": 1.3260927944498615, + "learning_rate": 5.915318224454164e-06, + "loss": 0.19646453857421875, + "step": 17535 + }, + { + "epoch": 0.15166319357376937, + "grad_norm": 3.7780731182158305, + "learning_rate": 5.915270153221149e-06, + "loss": 0.40715847015380857, + "step": 17540 + }, + { + "epoch": 0.15170642709531262, + "grad_norm": 7.762336337233774, + "learning_rate": 5.915222068543165e-06, + "loss": 0.18404541015625, + "step": 17545 + }, + { + "epoch": 0.15174966061685588, + "grad_norm": 20.299295409468186, + "learning_rate": 5.915173970420435e-06, + "loss": 0.4330657958984375, + "step": 17550 + }, + { + "epoch": 0.15179289413839914, + "grad_norm": 10.42853819747841, + "learning_rate": 5.91512585885318e-06, + "loss": 0.0807403564453125, + "step": 17555 + }, + { + "epoch": 0.15183612765994242, + "grad_norm": 17.17849348920206, + "learning_rate": 5.915077733841622e-06, + "loss": 0.2677520751953125, + "step": 17560 + }, + { + "epoch": 0.15187936118148568, + "grad_norm": 52.00559468042826, + "learning_rate": 5.915029595385984e-06, + "loss": 0.5133895874023438, + "step": 17565 + }, + { + "epoch": 0.15192259470302893, + "grad_norm": 14.946859334847241, + "learning_rate": 5.914981443486485e-06, + "loss": 0.19756317138671875, + "step": 17570 + }, + { + "epoch": 0.1519658282245722, + "grad_norm": 24.473683968217262, + "learning_rate": 5.9149332781433516e-06, + "loss": 0.2105194091796875, + "step": 17575 + }, + { + "epoch": 0.15200906174611548, + "grad_norm": 1.0386392729301583, + "learning_rate": 5.914885099356802e-06, + "loss": 0.10679817199707031, + "step": 17580 + }, + { + "epoch": 0.15205229526765873, + "grad_norm": 9.578636006057016, + "learning_rate": 5.91483690712706e-06, + "loss": 0.09444198608398438, + "step": 17585 + }, + { + "epoch": 0.152095528789202, + "grad_norm": 5.654004663784634, + "learning_rate": 5.914788701454348e-06, + "loss": 0.2273590087890625, + "step": 17590 + }, + { + "epoch": 0.15213876231074525, + "grad_norm": 29.41410410761266, + "learning_rate": 5.914740482338887e-06, + "loss": 0.2767578125, + "step": 17595 + }, + { + "epoch": 0.15218199583228853, + "grad_norm": 5.7840907616852935, + "learning_rate": 5.914692249780901e-06, + "loss": 0.2689910888671875, + "step": 17600 + }, + { + "epoch": 0.1522252293538318, + "grad_norm": 5.468148512903627, + "learning_rate": 5.914644003780613e-06, + "loss": 0.12615966796875, + "step": 17605 + }, + { + "epoch": 0.15226846287537504, + "grad_norm": 3.334983074210034, + "learning_rate": 5.914595744338244e-06, + "loss": 0.038474655151367186, + "step": 17610 + }, + { + "epoch": 0.15231169639691833, + "grad_norm": 7.879888633038141, + "learning_rate": 5.914547471454016e-06, + "loss": 0.2686920166015625, + "step": 17615 + }, + { + "epoch": 0.15235492991846158, + "grad_norm": 24.5451666866875, + "learning_rate": 5.914499185128153e-06, + "loss": 0.18434906005859375, + "step": 17620 + }, + { + "epoch": 0.15239816344000484, + "grad_norm": 2.5946390575480023, + "learning_rate": 5.914450885360879e-06, + "loss": 0.4181034088134766, + "step": 17625 + }, + { + "epoch": 0.1524413969615481, + "grad_norm": 20.151404495874868, + "learning_rate": 5.914402572152413e-06, + "loss": 0.58438720703125, + "step": 17630 + }, + { + "epoch": 0.15248463048309138, + "grad_norm": 8.685333584000878, + "learning_rate": 5.9143542455029805e-06, + "loss": 0.1179595947265625, + "step": 17635 + }, + { + "epoch": 0.15252786400463464, + "grad_norm": 39.315076022012455, + "learning_rate": 5.914305905412804e-06, + "loss": 0.272111701965332, + "step": 17640 + }, + { + "epoch": 0.1525710975261779, + "grad_norm": 13.700370987845991, + "learning_rate": 5.914257551882105e-06, + "loss": 0.3627685546875, + "step": 17645 + }, + { + "epoch": 0.15261433104772115, + "grad_norm": 17.55912017209921, + "learning_rate": 5.91420918491111e-06, + "loss": 0.14647216796875, + "step": 17650 + }, + { + "epoch": 0.15265756456926444, + "grad_norm": 3.1860030206408765, + "learning_rate": 5.914160804500037e-06, + "loss": 0.197357177734375, + "step": 17655 + }, + { + "epoch": 0.1527007980908077, + "grad_norm": 7.2270919904926325, + "learning_rate": 5.914112410649113e-06, + "loss": 0.6417724609375, + "step": 17660 + }, + { + "epoch": 0.15274403161235095, + "grad_norm": 7.051789827900744, + "learning_rate": 5.914064003358561e-06, + "loss": 0.302593994140625, + "step": 17665 + }, + { + "epoch": 0.1527872651338942, + "grad_norm": 15.976367928970166, + "learning_rate": 5.914015582628602e-06, + "loss": 0.3654571533203125, + "step": 17670 + }, + { + "epoch": 0.1528304986554375, + "grad_norm": 3.207221467551434, + "learning_rate": 5.9139671484594615e-06, + "loss": 0.49833984375, + "step": 17675 + }, + { + "epoch": 0.15287373217698075, + "grad_norm": 9.460337121133295, + "learning_rate": 5.913918700851362e-06, + "loss": 0.14486846923828126, + "step": 17680 + }, + { + "epoch": 0.152916965698524, + "grad_norm": 23.77016408955745, + "learning_rate": 5.913870239804527e-06, + "loss": 0.20213470458984376, + "step": 17685 + }, + { + "epoch": 0.15296019922006726, + "grad_norm": 49.0688235801874, + "learning_rate": 5.913821765319178e-06, + "loss": 0.2737274169921875, + "step": 17690 + }, + { + "epoch": 0.15300343274161055, + "grad_norm": 6.762853874244158, + "learning_rate": 5.913773277395543e-06, + "loss": 0.119281005859375, + "step": 17695 + }, + { + "epoch": 0.1530466662631538, + "grad_norm": 3.3408117781531086, + "learning_rate": 5.913724776033841e-06, + "loss": 0.1657470703125, + "step": 17700 + }, + { + "epoch": 0.15308989978469706, + "grad_norm": 4.071535120917019, + "learning_rate": 5.9136762612342995e-06, + "loss": 0.10311508178710938, + "step": 17705 + }, + { + "epoch": 0.15313313330624032, + "grad_norm": 10.620341491955505, + "learning_rate": 5.91362773299714e-06, + "loss": 0.3180717468261719, + "step": 17710 + }, + { + "epoch": 0.1531763668277836, + "grad_norm": 4.917724500249976, + "learning_rate": 5.913579191322587e-06, + "loss": 0.0626678466796875, + "step": 17715 + }, + { + "epoch": 0.15321960034932686, + "grad_norm": 19.21920603661667, + "learning_rate": 5.913530636210864e-06, + "loss": 0.102984619140625, + "step": 17720 + }, + { + "epoch": 0.1532628338708701, + "grad_norm": 11.77549373451749, + "learning_rate": 5.913482067662195e-06, + "loss": 0.28370361328125, + "step": 17725 + }, + { + "epoch": 0.15330606739241337, + "grad_norm": 40.34425839127403, + "learning_rate": 5.913433485676804e-06, + "loss": 0.23847808837890624, + "step": 17730 + }, + { + "epoch": 0.15334930091395665, + "grad_norm": 5.133945422027836, + "learning_rate": 5.9133848902549154e-06, + "loss": 0.2002685546875, + "step": 17735 + }, + { + "epoch": 0.1533925344354999, + "grad_norm": 12.190378983097908, + "learning_rate": 5.913336281396753e-06, + "loss": 0.10392074584960938, + "step": 17740 + }, + { + "epoch": 0.15343576795704317, + "grad_norm": 16.65192980947679, + "learning_rate": 5.913287659102541e-06, + "loss": 0.12486495971679687, + "step": 17745 + }, + { + "epoch": 0.15347900147858642, + "grad_norm": 14.660882335963109, + "learning_rate": 5.913239023372504e-06, + "loss": 0.12020721435546874, + "step": 17750 + }, + { + "epoch": 0.1535222350001297, + "grad_norm": 17.055122191428637, + "learning_rate": 5.913190374206866e-06, + "loss": 0.45455398559570315, + "step": 17755 + }, + { + "epoch": 0.15356546852167297, + "grad_norm": 7.294895657774037, + "learning_rate": 5.913141711605852e-06, + "loss": 0.2209991455078125, + "step": 17760 + }, + { + "epoch": 0.15360870204321622, + "grad_norm": 9.131221433219302, + "learning_rate": 5.913093035569685e-06, + "loss": 0.181640625, + "step": 17765 + }, + { + "epoch": 0.15365193556475948, + "grad_norm": 9.748692595739325, + "learning_rate": 5.913044346098591e-06, + "loss": 0.118035888671875, + "step": 17770 + }, + { + "epoch": 0.15369516908630276, + "grad_norm": 7.354579920822497, + "learning_rate": 5.9129956431927935e-06, + "loss": 0.24148941040039062, + "step": 17775 + }, + { + "epoch": 0.15373840260784602, + "grad_norm": 2.4610109124605755, + "learning_rate": 5.9129469268525166e-06, + "loss": 0.363580322265625, + "step": 17780 + }, + { + "epoch": 0.15378163612938928, + "grad_norm": 30.761330701759512, + "learning_rate": 5.912898197077988e-06, + "loss": 0.304949951171875, + "step": 17785 + }, + { + "epoch": 0.15382486965093256, + "grad_norm": 15.00248472464913, + "learning_rate": 5.912849453869429e-06, + "loss": 0.6134628295898438, + "step": 17790 + }, + { + "epoch": 0.15386810317247582, + "grad_norm": 13.20090470697383, + "learning_rate": 5.912800697227066e-06, + "loss": 0.1615264892578125, + "step": 17795 + }, + { + "epoch": 0.15391133669401907, + "grad_norm": 0.15195482502602728, + "learning_rate": 5.912751927151123e-06, + "loss": 0.15856170654296875, + "step": 17800 + }, + { + "epoch": 0.15395457021556233, + "grad_norm": 27.915789043628937, + "learning_rate": 5.912703143641825e-06, + "loss": 0.1978179931640625, + "step": 17805 + }, + { + "epoch": 0.15399780373710562, + "grad_norm": 3.082504022815407, + "learning_rate": 5.912654346699399e-06, + "loss": 0.152191162109375, + "step": 17810 + }, + { + "epoch": 0.15404103725864887, + "grad_norm": 22.500870526604736, + "learning_rate": 5.912605536324068e-06, + "loss": 0.32352294921875, + "step": 17815 + }, + { + "epoch": 0.15408427078019213, + "grad_norm": 2.0460779990293108, + "learning_rate": 5.912556712516057e-06, + "loss": 0.45067138671875, + "step": 17820 + }, + { + "epoch": 0.15412750430173539, + "grad_norm": 1.030566625258253, + "learning_rate": 5.912507875275593e-06, + "loss": 0.0358642578125, + "step": 17825 + }, + { + "epoch": 0.15417073782327867, + "grad_norm": 4.842850673420539, + "learning_rate": 5.912459024602899e-06, + "loss": 0.298016357421875, + "step": 17830 + }, + { + "epoch": 0.15421397134482193, + "grad_norm": 0.27670201585428605, + "learning_rate": 5.912410160498201e-06, + "loss": 0.39081592559814454, + "step": 17835 + }, + { + "epoch": 0.15425720486636518, + "grad_norm": 3.642121177845874, + "learning_rate": 5.912361282961725e-06, + "loss": 0.15823974609375, + "step": 17840 + }, + { + "epoch": 0.15430043838790844, + "grad_norm": 7.851105112373311, + "learning_rate": 5.912312391993697e-06, + "loss": 0.39248046875, + "step": 17845 + }, + { + "epoch": 0.15434367190945172, + "grad_norm": 19.983635921064245, + "learning_rate": 5.9122634875943405e-06, + "loss": 0.07797813415527344, + "step": 17850 + }, + { + "epoch": 0.15438690543099498, + "grad_norm": 9.136475483757428, + "learning_rate": 5.912214569763883e-06, + "loss": 0.27996673583984377, + "step": 17855 + }, + { + "epoch": 0.15443013895253824, + "grad_norm": 2.230208503403616, + "learning_rate": 5.912165638502549e-06, + "loss": 0.14171142578125, + "step": 17860 + }, + { + "epoch": 0.1544733724740815, + "grad_norm": 20.148552488784787, + "learning_rate": 5.912116693810565e-06, + "loss": 0.14994049072265625, + "step": 17865 + }, + { + "epoch": 0.15451660599562478, + "grad_norm": 5.066039451750499, + "learning_rate": 5.912067735688154e-06, + "loss": 0.0949676513671875, + "step": 17870 + }, + { + "epoch": 0.15455983951716804, + "grad_norm": 25.44560001472181, + "learning_rate": 5.912018764135546e-06, + "loss": 0.10883369445800781, + "step": 17875 + }, + { + "epoch": 0.1546030730387113, + "grad_norm": 2.2873258487218155, + "learning_rate": 5.911969779152964e-06, + "loss": 0.3085418701171875, + "step": 17880 + }, + { + "epoch": 0.15464630656025455, + "grad_norm": 38.41485646332964, + "learning_rate": 5.911920780740635e-06, + "loss": 0.34178466796875, + "step": 17885 + }, + { + "epoch": 0.15468954008179783, + "grad_norm": 11.558852384055232, + "learning_rate": 5.9118717688987834e-06, + "loss": 0.09441070556640625, + "step": 17890 + }, + { + "epoch": 0.1547327736033411, + "grad_norm": 38.4910171878339, + "learning_rate": 5.911822743627638e-06, + "loss": 0.5243408203125, + "step": 17895 + }, + { + "epoch": 0.15477600712488435, + "grad_norm": 39.27598947561461, + "learning_rate": 5.9117737049274225e-06, + "loss": 0.365618896484375, + "step": 17900 + }, + { + "epoch": 0.1548192406464276, + "grad_norm": 3.140919204479213, + "learning_rate": 5.911724652798365e-06, + "loss": 0.46981658935546877, + "step": 17905 + }, + { + "epoch": 0.1548624741679709, + "grad_norm": 27.496768649756497, + "learning_rate": 5.91167558724069e-06, + "loss": 0.3941375732421875, + "step": 17910 + }, + { + "epoch": 0.15490570768951414, + "grad_norm": 1.4977071492900047, + "learning_rate": 5.911626508254624e-06, + "loss": 0.11214599609375, + "step": 17915 + }, + { + "epoch": 0.1549489412110574, + "grad_norm": 11.63656195311001, + "learning_rate": 5.911577415840394e-06, + "loss": 0.06707763671875, + "step": 17920 + }, + { + "epoch": 0.15499217473260066, + "grad_norm": 14.358437082683645, + "learning_rate": 5.911528309998227e-06, + "loss": 0.1941965103149414, + "step": 17925 + }, + { + "epoch": 0.15503540825414394, + "grad_norm": 24.02999158449203, + "learning_rate": 5.911479190728348e-06, + "loss": 0.23683624267578124, + "step": 17930 + }, + { + "epoch": 0.1550786417756872, + "grad_norm": 0.6763764880797801, + "learning_rate": 5.911430058030985e-06, + "loss": 0.0544189453125, + "step": 17935 + }, + { + "epoch": 0.15512187529723045, + "grad_norm": 24.49119345919311, + "learning_rate": 5.911380911906363e-06, + "loss": 0.169427490234375, + "step": 17940 + }, + { + "epoch": 0.1551651088187737, + "grad_norm": 10.204802722243112, + "learning_rate": 5.911331752354709e-06, + "loss": 0.22715530395507813, + "step": 17945 + }, + { + "epoch": 0.155208342340317, + "grad_norm": 21.12424759974152, + "learning_rate": 5.911282579376252e-06, + "loss": 0.28841552734375, + "step": 17950 + }, + { + "epoch": 0.15525157586186025, + "grad_norm": 7.845543681951318, + "learning_rate": 5.911233392971216e-06, + "loss": 0.151495361328125, + "step": 17955 + }, + { + "epoch": 0.1552948093834035, + "grad_norm": 0.9445241367814319, + "learning_rate": 5.911184193139828e-06, + "loss": 0.08222198486328125, + "step": 17960 + }, + { + "epoch": 0.15533804290494677, + "grad_norm": 8.252293583873042, + "learning_rate": 5.911134979882317e-06, + "loss": 0.13711090087890626, + "step": 17965 + }, + { + "epoch": 0.15538127642649005, + "grad_norm": 63.004811348629566, + "learning_rate": 5.911085753198908e-06, + "loss": 0.4750823974609375, + "step": 17970 + }, + { + "epoch": 0.1554245099480333, + "grad_norm": 2.830298316821077, + "learning_rate": 5.91103651308983e-06, + "loss": 0.044793701171875, + "step": 17975 + }, + { + "epoch": 0.15546774346957656, + "grad_norm": 5.105306988422574, + "learning_rate": 5.9109872595553075e-06, + "loss": 0.259942626953125, + "step": 17980 + }, + { + "epoch": 0.15551097699111985, + "grad_norm": 0.5889234156592711, + "learning_rate": 5.910937992595569e-06, + "loss": 0.4613185882568359, + "step": 17985 + }, + { + "epoch": 0.1555542105126631, + "grad_norm": 10.676225049197871, + "learning_rate": 5.910888712210842e-06, + "loss": 0.07996940612792969, + "step": 17990 + }, + { + "epoch": 0.15559744403420636, + "grad_norm": 13.885284601992536, + "learning_rate": 5.910839418401354e-06, + "loss": 0.13533477783203124, + "step": 17995 + }, + { + "epoch": 0.15564067755574962, + "grad_norm": 0.3475970489748084, + "learning_rate": 5.910790111167331e-06, + "loss": 0.21352996826171874, + "step": 18000 + }, + { + "epoch": 0.1556839110772929, + "grad_norm": 3.0090948836126112, + "learning_rate": 5.9107407905090015e-06, + "loss": 0.150982666015625, + "step": 18005 + }, + { + "epoch": 0.15572714459883616, + "grad_norm": 43.20451052666899, + "learning_rate": 5.910691456426592e-06, + "loss": 0.34289398193359377, + "step": 18010 + }, + { + "epoch": 0.15577037812037942, + "grad_norm": 10.24821363581399, + "learning_rate": 5.910642108920332e-06, + "loss": 0.4156890869140625, + "step": 18015 + }, + { + "epoch": 0.15581361164192267, + "grad_norm": 6.9243218106990305, + "learning_rate": 5.910592747990447e-06, + "loss": 0.091552734375, + "step": 18020 + }, + { + "epoch": 0.15585684516346596, + "grad_norm": 21.72766818739777, + "learning_rate": 5.910543373637166e-06, + "loss": 0.32521209716796873, + "step": 18025 + }, + { + "epoch": 0.1559000786850092, + "grad_norm": 0.966243664403487, + "learning_rate": 5.910493985860715e-06, + "loss": 0.2213592529296875, + "step": 18030 + }, + { + "epoch": 0.15594331220655247, + "grad_norm": 1.8539302084580591, + "learning_rate": 5.910444584661324e-06, + "loss": 0.1370452880859375, + "step": 18035 + }, + { + "epoch": 0.15598654572809573, + "grad_norm": 38.61875819584219, + "learning_rate": 5.9103951700392196e-06, + "loss": 0.42632369995117186, + "step": 18040 + }, + { + "epoch": 0.156029779249639, + "grad_norm": 13.699012367746155, + "learning_rate": 5.910345741994629e-06, + "loss": 0.1186126708984375, + "step": 18045 + }, + { + "epoch": 0.15607301277118227, + "grad_norm": 6.939461257918195, + "learning_rate": 5.9102963005277825e-06, + "loss": 0.06872406005859374, + "step": 18050 + }, + { + "epoch": 0.15611624629272552, + "grad_norm": 12.224491402404116, + "learning_rate": 5.910246845638904e-06, + "loss": 0.26560211181640625, + "step": 18055 + }, + { + "epoch": 0.15615947981426878, + "grad_norm": 11.915608550822082, + "learning_rate": 5.910197377328227e-06, + "loss": 0.29444580078125, + "step": 18060 + }, + { + "epoch": 0.15620271333581207, + "grad_norm": 12.427708224969303, + "learning_rate": 5.910147895595976e-06, + "loss": 0.24402809143066406, + "step": 18065 + }, + { + "epoch": 0.15624594685735532, + "grad_norm": 17.321523445073833, + "learning_rate": 5.9100984004423796e-06, + "loss": 0.1284088134765625, + "step": 18070 + }, + { + "epoch": 0.15628918037889858, + "grad_norm": 3.1499104235863085, + "learning_rate": 5.910048891867667e-06, + "loss": 0.1389627456665039, + "step": 18075 + }, + { + "epoch": 0.15633241390044184, + "grad_norm": 5.5586235292610375, + "learning_rate": 5.909999369872065e-06, + "loss": 0.203009033203125, + "step": 18080 + }, + { + "epoch": 0.15637564742198512, + "grad_norm": 26.59771972851976, + "learning_rate": 5.909949834455805e-06, + "loss": 0.38847503662109373, + "step": 18085 + }, + { + "epoch": 0.15641888094352838, + "grad_norm": 1.2875137177536398, + "learning_rate": 5.909900285619112e-06, + "loss": 0.0847259521484375, + "step": 18090 + }, + { + "epoch": 0.15646211446507163, + "grad_norm": 7.270440613056111, + "learning_rate": 5.909850723362217e-06, + "loss": 0.06399993896484375, + "step": 18095 + }, + { + "epoch": 0.1565053479866149, + "grad_norm": 8.927239908951295, + "learning_rate": 5.909801147685346e-06, + "loss": 0.3251220703125, + "step": 18100 + }, + { + "epoch": 0.15654858150815817, + "grad_norm": 17.830372988767163, + "learning_rate": 5.909751558588731e-06, + "loss": 0.28905029296875, + "step": 18105 + }, + { + "epoch": 0.15659181502970143, + "grad_norm": 2.684231537207428, + "learning_rate": 5.909701956072598e-06, + "loss": 0.170428466796875, + "step": 18110 + }, + { + "epoch": 0.1566350485512447, + "grad_norm": 0.4484241132206628, + "learning_rate": 5.909652340137176e-06, + "loss": 0.08510818481445312, + "step": 18115 + }, + { + "epoch": 0.15667828207278794, + "grad_norm": 53.518768471529256, + "learning_rate": 5.909602710782695e-06, + "loss": 0.594384765625, + "step": 18120 + }, + { + "epoch": 0.15672151559433123, + "grad_norm": 16.733551469389976, + "learning_rate": 5.909553068009384e-06, + "loss": 0.37866897583007814, + "step": 18125 + }, + { + "epoch": 0.15676474911587449, + "grad_norm": 33.0478220277303, + "learning_rate": 5.90950341181747e-06, + "loss": 0.41790084838867186, + "step": 18130 + }, + { + "epoch": 0.15680798263741774, + "grad_norm": 0.1849344662255298, + "learning_rate": 5.909453742207185e-06, + "loss": 0.17212677001953125, + "step": 18135 + }, + { + "epoch": 0.156851216158961, + "grad_norm": 12.139231531113714, + "learning_rate": 5.909404059178756e-06, + "loss": 0.0958770751953125, + "step": 18140 + }, + { + "epoch": 0.15689444968050428, + "grad_norm": 0.8932578945353582, + "learning_rate": 5.9093543627324116e-06, + "loss": 0.23886337280273437, + "step": 18145 + }, + { + "epoch": 0.15693768320204754, + "grad_norm": 15.250735451224392, + "learning_rate": 5.909304652868383e-06, + "loss": 0.1515167236328125, + "step": 18150 + }, + { + "epoch": 0.1569809167235908, + "grad_norm": 3.691111182303715, + "learning_rate": 5.909254929586899e-06, + "loss": 0.19677352905273438, + "step": 18155 + }, + { + "epoch": 0.15702415024513408, + "grad_norm": 42.25654138914492, + "learning_rate": 5.909205192888187e-06, + "loss": 0.2311767578125, + "step": 18160 + }, + { + "epoch": 0.15706738376667734, + "grad_norm": 5.435100046018372, + "learning_rate": 5.909155442772478e-06, + "loss": 0.0362701416015625, + "step": 18165 + }, + { + "epoch": 0.1571106172882206, + "grad_norm": 2.4758059445503617, + "learning_rate": 5.909105679240001e-06, + "loss": 0.05197982788085938, + "step": 18170 + }, + { + "epoch": 0.15715385080976385, + "grad_norm": 8.907660785058168, + "learning_rate": 5.9090559022909865e-06, + "loss": 0.0840911865234375, + "step": 18175 + }, + { + "epoch": 0.15719708433130714, + "grad_norm": 5.760390175812317, + "learning_rate": 5.9090061119256625e-06, + "loss": 0.297857666015625, + "step": 18180 + }, + { + "epoch": 0.1572403178528504, + "grad_norm": 14.66724024829053, + "learning_rate": 5.90895630814426e-06, + "loss": 0.0735595703125, + "step": 18185 + }, + { + "epoch": 0.15728355137439365, + "grad_norm": 6.9409265324291765, + "learning_rate": 5.9089064909470065e-06, + "loss": 0.2002431869506836, + "step": 18190 + }, + { + "epoch": 0.1573267848959369, + "grad_norm": 47.531522475943866, + "learning_rate": 5.908856660334134e-06, + "loss": 0.147564697265625, + "step": 18195 + }, + { + "epoch": 0.1573700184174802, + "grad_norm": 52.36771113836715, + "learning_rate": 5.908806816305872e-06, + "loss": 0.29318695068359374, + "step": 18200 + }, + { + "epoch": 0.15741325193902345, + "grad_norm": 5.746056005851249, + "learning_rate": 5.908756958862451e-06, + "loss": 0.033498382568359374, + "step": 18205 + }, + { + "epoch": 0.1574564854605667, + "grad_norm": 21.905598908354, + "learning_rate": 5.908707088004099e-06, + "loss": 0.09055023193359375, + "step": 18210 + }, + { + "epoch": 0.15749971898210996, + "grad_norm": 32.092810761207545, + "learning_rate": 5.908657203731046e-06, + "loss": 0.220513916015625, + "step": 18215 + }, + { + "epoch": 0.15754295250365324, + "grad_norm": 1.2235526784062605, + "learning_rate": 5.9086073060435245e-06, + "loss": 0.11640167236328125, + "step": 18220 + }, + { + "epoch": 0.1575861860251965, + "grad_norm": 8.595153037644286, + "learning_rate": 5.9085573949417625e-06, + "loss": 0.0941497802734375, + "step": 18225 + }, + { + "epoch": 0.15762941954673976, + "grad_norm": 49.841551719975975, + "learning_rate": 5.90850747042599e-06, + "loss": 0.38638916015625, + "step": 18230 + }, + { + "epoch": 0.15767265306828301, + "grad_norm": 3.006645573335205, + "learning_rate": 5.908457532496439e-06, + "loss": 0.1818859100341797, + "step": 18235 + }, + { + "epoch": 0.1577158865898263, + "grad_norm": 16.46661476247405, + "learning_rate": 5.908407581153339e-06, + "loss": 0.17902145385742188, + "step": 18240 + }, + { + "epoch": 0.15775912011136956, + "grad_norm": 3.642663911786817, + "learning_rate": 5.908357616396919e-06, + "loss": 0.07077484130859375, + "step": 18245 + }, + { + "epoch": 0.1578023536329128, + "grad_norm": 0.127603130191714, + "learning_rate": 5.908307638227412e-06, + "loss": 0.24315948486328126, + "step": 18250 + }, + { + "epoch": 0.15784558715445607, + "grad_norm": 15.224917618349817, + "learning_rate": 5.908257646645047e-06, + "loss": 0.2107940673828125, + "step": 18255 + }, + { + "epoch": 0.15788882067599935, + "grad_norm": 24.73333621249261, + "learning_rate": 5.9082076416500534e-06, + "loss": 0.17890090942382814, + "step": 18260 + }, + { + "epoch": 0.1579320541975426, + "grad_norm": 7.3715256575006505, + "learning_rate": 5.9081576232426645e-06, + "loss": 0.26587371826171874, + "step": 18265 + }, + { + "epoch": 0.15797528771908587, + "grad_norm": 71.93074420592264, + "learning_rate": 5.908107591423109e-06, + "loss": 0.220599365234375, + "step": 18270 + }, + { + "epoch": 0.15801852124062912, + "grad_norm": 40.85702452087819, + "learning_rate": 5.9080575461916185e-06, + "loss": 0.402044677734375, + "step": 18275 + }, + { + "epoch": 0.1580617547621724, + "grad_norm": 51.45791809753238, + "learning_rate": 5.908007487548424e-06, + "loss": 0.22661247253417968, + "step": 18280 + }, + { + "epoch": 0.15810498828371566, + "grad_norm": 38.20666616399058, + "learning_rate": 5.907957415493756e-06, + "loss": 0.382086181640625, + "step": 18285 + }, + { + "epoch": 0.15814822180525892, + "grad_norm": 2.6575862212388914, + "learning_rate": 5.907907330027844e-06, + "loss": 0.1100433349609375, + "step": 18290 + }, + { + "epoch": 0.15819145532680218, + "grad_norm": 2.9959338384633796, + "learning_rate": 5.907857231150921e-06, + "loss": 0.325616455078125, + "step": 18295 + }, + { + "epoch": 0.15823468884834546, + "grad_norm": 5.431978640608665, + "learning_rate": 5.907807118863218e-06, + "loss": 0.17629661560058593, + "step": 18300 + }, + { + "epoch": 0.15827792236988872, + "grad_norm": 7.983493978411822, + "learning_rate": 5.907756993164965e-06, + "loss": 0.23128662109375, + "step": 18305 + }, + { + "epoch": 0.15832115589143197, + "grad_norm": 33.878062001349065, + "learning_rate": 5.907706854056394e-06, + "loss": 0.3039398193359375, + "step": 18310 + }, + { + "epoch": 0.15836438941297523, + "grad_norm": 3.025521179785065, + "learning_rate": 5.907656701537735e-06, + "loss": 0.08740234375, + "step": 18315 + }, + { + "epoch": 0.15840762293451852, + "grad_norm": 3.7196276598911755, + "learning_rate": 5.907606535609221e-06, + "loss": 0.135504150390625, + "step": 18320 + }, + { + "epoch": 0.15845085645606177, + "grad_norm": 1.3669027556428532, + "learning_rate": 5.907556356271082e-06, + "loss": 0.3016326904296875, + "step": 18325 + }, + { + "epoch": 0.15849408997760503, + "grad_norm": 47.386549267006416, + "learning_rate": 5.9075061635235504e-06, + "loss": 0.2443206787109375, + "step": 18330 + }, + { + "epoch": 0.15853732349914829, + "grad_norm": 3.06616352147364, + "learning_rate": 5.907455957366858e-06, + "loss": 0.2264984130859375, + "step": 18335 + }, + { + "epoch": 0.15858055702069157, + "grad_norm": 42.849744886790944, + "learning_rate": 5.907405737801235e-06, + "loss": 0.5413482666015625, + "step": 18340 + }, + { + "epoch": 0.15862379054223483, + "grad_norm": 2.610391158515186, + "learning_rate": 5.907355504826913e-06, + "loss": 0.2864055633544922, + "step": 18345 + }, + { + "epoch": 0.15866702406377808, + "grad_norm": 34.33776090499562, + "learning_rate": 5.907305258444125e-06, + "loss": 0.17433204650878906, + "step": 18350 + }, + { + "epoch": 0.15871025758532137, + "grad_norm": 35.89563115346041, + "learning_rate": 5.907254998653102e-06, + "loss": 0.3238189697265625, + "step": 18355 + }, + { + "epoch": 0.15875349110686462, + "grad_norm": 19.86031537011966, + "learning_rate": 5.907204725454076e-06, + "loss": 0.202874755859375, + "step": 18360 + }, + { + "epoch": 0.15879672462840788, + "grad_norm": 3.059220415222128, + "learning_rate": 5.907154438847279e-06, + "loss": 0.0577545166015625, + "step": 18365 + }, + { + "epoch": 0.15883995814995114, + "grad_norm": 10.192114655859239, + "learning_rate": 5.907104138832942e-06, + "loss": 0.382049560546875, + "step": 18370 + }, + { + "epoch": 0.15888319167149442, + "grad_norm": 50.03337471197756, + "learning_rate": 5.9070538254112974e-06, + "loss": 0.48524169921875, + "step": 18375 + }, + { + "epoch": 0.15892642519303768, + "grad_norm": 11.974006374794591, + "learning_rate": 5.9070034985825775e-06, + "loss": 0.2621368408203125, + "step": 18380 + }, + { + "epoch": 0.15896965871458094, + "grad_norm": 8.070009773212892, + "learning_rate": 5.906953158347015e-06, + "loss": 0.28013153076171876, + "step": 18385 + }, + { + "epoch": 0.1590128922361242, + "grad_norm": 4.167296898265123, + "learning_rate": 5.90690280470484e-06, + "loss": 0.0852142333984375, + "step": 18390 + }, + { + "epoch": 0.15905612575766748, + "grad_norm": 18.37036771499242, + "learning_rate": 5.906852437656287e-06, + "loss": 0.492999267578125, + "step": 18395 + }, + { + "epoch": 0.15909935927921073, + "grad_norm": 26.26948299321978, + "learning_rate": 5.906802057201587e-06, + "loss": 0.3626434326171875, + "step": 18400 + }, + { + "epoch": 0.159142592800754, + "grad_norm": 15.603359003395182, + "learning_rate": 5.9067516633409736e-06, + "loss": 0.1215911865234375, + "step": 18405 + }, + { + "epoch": 0.15918582632229725, + "grad_norm": 25.791572545518125, + "learning_rate": 5.906701256074678e-06, + "loss": 0.24772796630859376, + "step": 18410 + }, + { + "epoch": 0.15922905984384053, + "grad_norm": 13.24801117750097, + "learning_rate": 5.906650835402934e-06, + "loss": 0.3462982177734375, + "step": 18415 + }, + { + "epoch": 0.1592722933653838, + "grad_norm": 11.153034592841696, + "learning_rate": 5.9066004013259726e-06, + "loss": 0.13020553588867187, + "step": 18420 + }, + { + "epoch": 0.15931552688692704, + "grad_norm": 27.744201915454003, + "learning_rate": 5.9065499538440255e-06, + "loss": 0.2150177001953125, + "step": 18425 + }, + { + "epoch": 0.1593587604084703, + "grad_norm": 5.490386650096594, + "learning_rate": 5.906499492957329e-06, + "loss": 0.142694091796875, + "step": 18430 + }, + { + "epoch": 0.15940199393001359, + "grad_norm": 5.18675212303607, + "learning_rate": 5.906449018666111e-06, + "loss": 0.0669830322265625, + "step": 18435 + }, + { + "epoch": 0.15944522745155684, + "grad_norm": 6.009046866163282, + "learning_rate": 5.906398530970609e-06, + "loss": 0.059637451171875, + "step": 18440 + }, + { + "epoch": 0.1594884609731001, + "grad_norm": 1.4871732606086512, + "learning_rate": 5.906348029871053e-06, + "loss": 0.256927490234375, + "step": 18445 + }, + { + "epoch": 0.15953169449464336, + "grad_norm": 7.079104477100563, + "learning_rate": 5.906297515367678e-06, + "loss": 0.3817287445068359, + "step": 18450 + }, + { + "epoch": 0.15957492801618664, + "grad_norm": 6.457448822371054, + "learning_rate": 5.9062469874607145e-06, + "loss": 0.20388107299804686, + "step": 18455 + }, + { + "epoch": 0.1596181615377299, + "grad_norm": 1.7239034790992684, + "learning_rate": 5.906196446150397e-06, + "loss": 0.22093124389648439, + "step": 18460 + }, + { + "epoch": 0.15966139505927315, + "grad_norm": 3.7323529681765097, + "learning_rate": 5.906145891436958e-06, + "loss": 0.379119873046875, + "step": 18465 + }, + { + "epoch": 0.1597046285808164, + "grad_norm": 70.91808769022407, + "learning_rate": 5.906095323320632e-06, + "loss": 0.11746978759765625, + "step": 18470 + }, + { + "epoch": 0.1597478621023597, + "grad_norm": 23.66446896870885, + "learning_rate": 5.906044741801648e-06, + "loss": 0.038852691650390625, + "step": 18475 + }, + { + "epoch": 0.15979109562390295, + "grad_norm": 18.417206149211335, + "learning_rate": 5.9059941468802456e-06, + "loss": 0.273016357421875, + "step": 18480 + }, + { + "epoch": 0.1598343291454462, + "grad_norm": 15.467637631792268, + "learning_rate": 5.905943538556655e-06, + "loss": 0.0690032958984375, + "step": 18485 + }, + { + "epoch": 0.15987756266698946, + "grad_norm": 41.94888041792517, + "learning_rate": 5.905892916831108e-06, + "loss": 0.5196060180664063, + "step": 18490 + }, + { + "epoch": 0.15992079618853275, + "grad_norm": 22.1639168719606, + "learning_rate": 5.905842281703841e-06, + "loss": 0.50350341796875, + "step": 18495 + }, + { + "epoch": 0.159964029710076, + "grad_norm": 2.9062748793979982, + "learning_rate": 5.905791633175084e-06, + "loss": 0.2065093994140625, + "step": 18500 + }, + { + "epoch": 0.16000726323161926, + "grad_norm": 9.367058468039266, + "learning_rate": 5.905740971245075e-06, + "loss": 0.19373626708984376, + "step": 18505 + }, + { + "epoch": 0.16005049675316252, + "grad_norm": 16.14325326633839, + "learning_rate": 5.905690295914044e-06, + "loss": 0.1026641845703125, + "step": 18510 + }, + { + "epoch": 0.1600937302747058, + "grad_norm": 3.199561986535286, + "learning_rate": 5.905639607182227e-06, + "loss": 0.062274169921875, + "step": 18515 + }, + { + "epoch": 0.16013696379624906, + "grad_norm": 4.9971245771283925, + "learning_rate": 5.905588905049857e-06, + "loss": 0.1724700927734375, + "step": 18520 + }, + { + "epoch": 0.16018019731779232, + "grad_norm": 18.094022621613504, + "learning_rate": 5.905538189517167e-06, + "loss": 0.13302841186523437, + "step": 18525 + }, + { + "epoch": 0.1602234308393356, + "grad_norm": 13.643499446503814, + "learning_rate": 5.905487460584392e-06, + "loss": 0.438714599609375, + "step": 18530 + }, + { + "epoch": 0.16026666436087886, + "grad_norm": 9.916207115524795, + "learning_rate": 5.905436718251766e-06, + "loss": 0.32674484252929686, + "step": 18535 + }, + { + "epoch": 0.16030989788242211, + "grad_norm": 17.101619602863906, + "learning_rate": 5.905385962519522e-06, + "loss": 0.133355712890625, + "step": 18540 + }, + { + "epoch": 0.16035313140396537, + "grad_norm": 3.1415618838855, + "learning_rate": 5.905335193387894e-06, + "loss": 0.12156982421875, + "step": 18545 + }, + { + "epoch": 0.16039636492550866, + "grad_norm": 2.8627480777000867, + "learning_rate": 5.905284410857118e-06, + "loss": 0.11914520263671875, + "step": 18550 + }, + { + "epoch": 0.1604395984470519, + "grad_norm": 20.06025444707851, + "learning_rate": 5.9052336149274265e-06, + "loss": 0.18319244384765626, + "step": 18555 + }, + { + "epoch": 0.16048283196859517, + "grad_norm": 13.297208272538287, + "learning_rate": 5.905182805599054e-06, + "loss": 0.2075408935546875, + "step": 18560 + }, + { + "epoch": 0.16052606549013843, + "grad_norm": 14.587101235213932, + "learning_rate": 5.905131982872235e-06, + "loss": 0.4159442901611328, + "step": 18565 + }, + { + "epoch": 0.1605692990116817, + "grad_norm": 5.719064948417053, + "learning_rate": 5.905081146747205e-06, + "loss": 0.212615966796875, + "step": 18570 + }, + { + "epoch": 0.16061253253322497, + "grad_norm": 3.0733185348462686, + "learning_rate": 5.905030297224196e-06, + "loss": 0.117144775390625, + "step": 18575 + }, + { + "epoch": 0.16065576605476822, + "grad_norm": 13.17419223464278, + "learning_rate": 5.904979434303445e-06, + "loss": 0.21723480224609376, + "step": 18580 + }, + { + "epoch": 0.16069899957631148, + "grad_norm": 26.686251313699074, + "learning_rate": 5.904928557985186e-06, + "loss": 0.2667236328125, + "step": 18585 + }, + { + "epoch": 0.16074223309785476, + "grad_norm": 26.785939239128922, + "learning_rate": 5.904877668269652e-06, + "loss": 0.483746337890625, + "step": 18590 + }, + { + "epoch": 0.16078546661939802, + "grad_norm": 0.3650046743912247, + "learning_rate": 5.90482676515708e-06, + "loss": 0.0714691162109375, + "step": 18595 + }, + { + "epoch": 0.16082870014094128, + "grad_norm": 23.587319617807196, + "learning_rate": 5.904775848647703e-06, + "loss": 0.13724021911621093, + "step": 18600 + }, + { + "epoch": 0.16087193366248453, + "grad_norm": 1.9590037868716783, + "learning_rate": 5.904724918741756e-06, + "loss": 0.07200927734375, + "step": 18605 + }, + { + "epoch": 0.16091516718402782, + "grad_norm": 2.5000462802736174, + "learning_rate": 5.904673975439475e-06, + "loss": 0.4175140380859375, + "step": 18610 + }, + { + "epoch": 0.16095840070557108, + "grad_norm": 6.2185234927631505, + "learning_rate": 5.904623018741094e-06, + "loss": 0.28305892944335936, + "step": 18615 + }, + { + "epoch": 0.16100163422711433, + "grad_norm": 1.0534397716965818, + "learning_rate": 5.904572048646849e-06, + "loss": 0.310772705078125, + "step": 18620 + }, + { + "epoch": 0.1610448677486576, + "grad_norm": 1.678044778015022, + "learning_rate": 5.9045210651569735e-06, + "loss": 0.07275390625, + "step": 18625 + }, + { + "epoch": 0.16108810127020087, + "grad_norm": 10.245220194701366, + "learning_rate": 5.904470068271704e-06, + "loss": 0.23126220703125, + "step": 18630 + }, + { + "epoch": 0.16113133479174413, + "grad_norm": 13.894916003197833, + "learning_rate": 5.904419057991275e-06, + "loss": 0.1725006103515625, + "step": 18635 + }, + { + "epoch": 0.1611745683132874, + "grad_norm": 6.430157248324974, + "learning_rate": 5.904368034315922e-06, + "loss": 0.45040283203125, + "step": 18640 + }, + { + "epoch": 0.16121780183483064, + "grad_norm": 5.328479438243145, + "learning_rate": 5.904316997245879e-06, + "loss": 0.09196319580078124, + "step": 18645 + }, + { + "epoch": 0.16126103535637393, + "grad_norm": 8.179127227902773, + "learning_rate": 5.904265946781385e-06, + "loss": 0.520001220703125, + "step": 18650 + }, + { + "epoch": 0.16130426887791718, + "grad_norm": 26.281562611971324, + "learning_rate": 5.904214882922672e-06, + "loss": 0.270147705078125, + "step": 18655 + }, + { + "epoch": 0.16134750239946044, + "grad_norm": 43.232538743312304, + "learning_rate": 5.904163805669976e-06, + "loss": 0.4746734619140625, + "step": 18660 + }, + { + "epoch": 0.1613907359210037, + "grad_norm": 2.7702722165259175, + "learning_rate": 5.9041127150235335e-06, + "loss": 0.11904220581054688, + "step": 18665 + }, + { + "epoch": 0.16143396944254698, + "grad_norm": 4.562017344582694, + "learning_rate": 5.90406161098358e-06, + "loss": 0.16640625, + "step": 18670 + }, + { + "epoch": 0.16147720296409024, + "grad_norm": 23.59503925032054, + "learning_rate": 5.90401049355035e-06, + "loss": 0.10806427001953126, + "step": 18675 + }, + { + "epoch": 0.1615204364856335, + "grad_norm": 3.0322323048376996, + "learning_rate": 5.903959362724081e-06, + "loss": 0.2360980987548828, + "step": 18680 + }, + { + "epoch": 0.16156367000717675, + "grad_norm": 3.3516180507446167, + "learning_rate": 5.903908218505009e-06, + "loss": 0.039910888671875, + "step": 18685 + }, + { + "epoch": 0.16160690352872004, + "grad_norm": 3.6181948537493978, + "learning_rate": 5.903857060893367e-06, + "loss": 0.168316650390625, + "step": 18690 + }, + { + "epoch": 0.1616501370502633, + "grad_norm": 12.754178809350599, + "learning_rate": 5.9038058898893945e-06, + "loss": 0.4773719787597656, + "step": 18695 + }, + { + "epoch": 0.16169337057180655, + "grad_norm": 26.830126854998312, + "learning_rate": 5.903754705493325e-06, + "loss": 0.5480777740478515, + "step": 18700 + }, + { + "epoch": 0.1617366040933498, + "grad_norm": 24.938053720601953, + "learning_rate": 5.903703507705395e-06, + "loss": 0.38968505859375, + "step": 18705 + }, + { + "epoch": 0.1617798376148931, + "grad_norm": 5.314347874457476, + "learning_rate": 5.903652296525842e-06, + "loss": 0.05373382568359375, + "step": 18710 + }, + { + "epoch": 0.16182307113643635, + "grad_norm": 32.2212790348636, + "learning_rate": 5.9036010719549e-06, + "loss": 0.34593048095703127, + "step": 18715 + }, + { + "epoch": 0.1618663046579796, + "grad_norm": 9.405365531685318, + "learning_rate": 5.9035498339928075e-06, + "loss": 0.06060104370117188, + "step": 18720 + }, + { + "epoch": 0.1619095381795229, + "grad_norm": 7.1009601327563345, + "learning_rate": 5.903498582639799e-06, + "loss": 0.051263427734375, + "step": 18725 + }, + { + "epoch": 0.16195277170106614, + "grad_norm": 2.599471886802718, + "learning_rate": 5.903447317896112e-06, + "loss": 0.147869873046875, + "step": 18730 + }, + { + "epoch": 0.1619960052226094, + "grad_norm": 40.4378846752519, + "learning_rate": 5.903396039761983e-06, + "loss": 0.14150772094726563, + "step": 18735 + }, + { + "epoch": 0.16203923874415266, + "grad_norm": 7.301575386153703, + "learning_rate": 5.903344748237647e-06, + "loss": 0.31187744140625, + "step": 18740 + }, + { + "epoch": 0.16208247226569594, + "grad_norm": 3.702838876575185, + "learning_rate": 5.903293443323342e-06, + "loss": 0.18156280517578124, + "step": 18745 + }, + { + "epoch": 0.1621257057872392, + "grad_norm": 49.25266626811795, + "learning_rate": 5.903242125019304e-06, + "loss": 0.3693023681640625, + "step": 18750 + }, + { + "epoch": 0.16216893930878246, + "grad_norm": 45.270990418068386, + "learning_rate": 5.90319079332577e-06, + "loss": 0.9804832458496093, + "step": 18755 + }, + { + "epoch": 0.1622121728303257, + "grad_norm": 15.045462688451064, + "learning_rate": 5.903139448242977e-06, + "loss": 0.1084747314453125, + "step": 18760 + }, + { + "epoch": 0.162255406351869, + "grad_norm": 35.63607590722256, + "learning_rate": 5.9030880897711606e-06, + "loss": 0.19584236145019532, + "step": 18765 + }, + { + "epoch": 0.16229863987341225, + "grad_norm": 35.043527331418545, + "learning_rate": 5.903036717910559e-06, + "loss": 0.139263916015625, + "step": 18770 + }, + { + "epoch": 0.1623418733949555, + "grad_norm": 16.855923428804143, + "learning_rate": 5.902985332661409e-06, + "loss": 0.277874755859375, + "step": 18775 + }, + { + "epoch": 0.16238510691649877, + "grad_norm": 37.71551690484471, + "learning_rate": 5.902933934023946e-06, + "loss": 0.21221542358398438, + "step": 18780 + }, + { + "epoch": 0.16242834043804205, + "grad_norm": 4.177262000047362, + "learning_rate": 5.902882521998408e-06, + "loss": 0.07234344482421876, + "step": 18785 + }, + { + "epoch": 0.1624715739595853, + "grad_norm": 1.6747110051525584, + "learning_rate": 5.9028310965850336e-06, + "loss": 0.1101226806640625, + "step": 18790 + }, + { + "epoch": 0.16251480748112856, + "grad_norm": 73.8027155649233, + "learning_rate": 5.902779657784058e-06, + "loss": 0.223590087890625, + "step": 18795 + }, + { + "epoch": 0.16255804100267182, + "grad_norm": 23.00251460182629, + "learning_rate": 5.90272820559572e-06, + "loss": 0.0826507568359375, + "step": 18800 + }, + { + "epoch": 0.1626012745242151, + "grad_norm": 8.010275398279676, + "learning_rate": 5.902676740020254e-06, + "loss": 0.0927734375, + "step": 18805 + }, + { + "epoch": 0.16264450804575836, + "grad_norm": 5.889790039075263, + "learning_rate": 5.9026252610579016e-06, + "loss": 0.09942474365234374, + "step": 18810 + }, + { + "epoch": 0.16268774156730162, + "grad_norm": 8.243072658233476, + "learning_rate": 5.902573768708897e-06, + "loss": 0.33037109375, + "step": 18815 + }, + { + "epoch": 0.16273097508884488, + "grad_norm": 35.46648751627148, + "learning_rate": 5.902522262973479e-06, + "loss": 0.8901134490966797, + "step": 18820 + }, + { + "epoch": 0.16277420861038816, + "grad_norm": 3.86676322113661, + "learning_rate": 5.902470743851884e-06, + "loss": 0.04681396484375, + "step": 18825 + }, + { + "epoch": 0.16281744213193142, + "grad_norm": 18.33559002039148, + "learning_rate": 5.902419211344351e-06, + "loss": 0.242047119140625, + "step": 18830 + }, + { + "epoch": 0.16286067565347467, + "grad_norm": 23.18166844751268, + "learning_rate": 5.9023676654511166e-06, + "loss": 0.16912841796875, + "step": 18835 + }, + { + "epoch": 0.16290390917501793, + "grad_norm": 6.82568972240715, + "learning_rate": 5.9023161061724195e-06, + "loss": 0.2438812255859375, + "step": 18840 + }, + { + "epoch": 0.16294714269656121, + "grad_norm": 3.485931058712672, + "learning_rate": 5.902264533508497e-06, + "loss": 0.52874755859375, + "step": 18845 + }, + { + "epoch": 0.16299037621810447, + "grad_norm": 18.44537001220139, + "learning_rate": 5.902212947459586e-06, + "loss": 0.269268798828125, + "step": 18850 + }, + { + "epoch": 0.16303360973964773, + "grad_norm": 2.761223491380943, + "learning_rate": 5.902161348025927e-06, + "loss": 0.28809814453125, + "step": 18855 + }, + { + "epoch": 0.16307684326119098, + "grad_norm": 6.503025162539405, + "learning_rate": 5.902109735207755e-06, + "loss": 0.541363525390625, + "step": 18860 + }, + { + "epoch": 0.16312007678273427, + "grad_norm": 1.053819660562763, + "learning_rate": 5.902058109005309e-06, + "loss": 0.2604248046875, + "step": 18865 + }, + { + "epoch": 0.16316331030427753, + "grad_norm": 11.597741941873934, + "learning_rate": 5.902006469418829e-06, + "loss": 0.20680084228515624, + "step": 18870 + }, + { + "epoch": 0.16320654382582078, + "grad_norm": 2.6281014709608383, + "learning_rate": 5.9019548164485505e-06, + "loss": 0.05943756103515625, + "step": 18875 + }, + { + "epoch": 0.16324977734736404, + "grad_norm": 3.349779465068383, + "learning_rate": 5.901903150094713e-06, + "loss": 0.4902801513671875, + "step": 18880 + }, + { + "epoch": 0.16329301086890732, + "grad_norm": 6.097170309413962, + "learning_rate": 5.901851470357554e-06, + "loss": 0.17288360595703126, + "step": 18885 + }, + { + "epoch": 0.16333624439045058, + "grad_norm": 4.410046299761464, + "learning_rate": 5.9017997772373136e-06, + "loss": 0.66436767578125, + "step": 18890 + }, + { + "epoch": 0.16337947791199384, + "grad_norm": 5.687114033343565, + "learning_rate": 5.901748070734228e-06, + "loss": 0.15806427001953124, + "step": 18895 + }, + { + "epoch": 0.16342271143353712, + "grad_norm": 8.849739017595393, + "learning_rate": 5.9016963508485376e-06, + "loss": 0.1301544189453125, + "step": 18900 + }, + { + "epoch": 0.16346594495508038, + "grad_norm": 34.99809264458316, + "learning_rate": 5.901644617580479e-06, + "loss": 0.448779296875, + "step": 18905 + }, + { + "epoch": 0.16350917847662363, + "grad_norm": 7.385394979237893, + "learning_rate": 5.901592870930292e-06, + "loss": 0.1525390625, + "step": 18910 + }, + { + "epoch": 0.1635524119981669, + "grad_norm": 2.8184237318927776, + "learning_rate": 5.901541110898216e-06, + "loss": 0.1502300262451172, + "step": 18915 + }, + { + "epoch": 0.16359564551971018, + "grad_norm": 15.45230227061373, + "learning_rate": 5.901489337484488e-06, + "loss": 0.21883373260498046, + "step": 18920 + }, + { + "epoch": 0.16363887904125343, + "grad_norm": 25.996770363224833, + "learning_rate": 5.9014375506893484e-06, + "loss": 0.19451522827148438, + "step": 18925 + }, + { + "epoch": 0.1636821125627967, + "grad_norm": 5.270733200779605, + "learning_rate": 5.901385750513034e-06, + "loss": 0.41553955078125, + "step": 18930 + }, + { + "epoch": 0.16372534608433995, + "grad_norm": 10.837592632813255, + "learning_rate": 5.901333936955786e-06, + "loss": 0.0558349609375, + "step": 18935 + }, + { + "epoch": 0.16376857960588323, + "grad_norm": 10.238064007994176, + "learning_rate": 5.901282110017841e-06, + "loss": 0.3014404296875, + "step": 18940 + }, + { + "epoch": 0.1638118131274265, + "grad_norm": 17.32012172131748, + "learning_rate": 5.901230269699441e-06, + "loss": 0.40297927856445315, + "step": 18945 + }, + { + "epoch": 0.16385504664896974, + "grad_norm": 8.150987264580678, + "learning_rate": 5.901178416000822e-06, + "loss": 0.18154296875, + "step": 18950 + }, + { + "epoch": 0.163898280170513, + "grad_norm": 2.579732144783236, + "learning_rate": 5.901126548922224e-06, + "loss": 0.0950408935546875, + "step": 18955 + }, + { + "epoch": 0.16394151369205628, + "grad_norm": 11.741748190890556, + "learning_rate": 5.901074668463888e-06, + "loss": 0.149285888671875, + "step": 18960 + }, + { + "epoch": 0.16398474721359954, + "grad_norm": 51.3689311975751, + "learning_rate": 5.901022774626052e-06, + "loss": 0.2657806396484375, + "step": 18965 + }, + { + "epoch": 0.1640279807351428, + "grad_norm": 3.650246819697637, + "learning_rate": 5.9009708674089545e-06, + "loss": 0.1423614501953125, + "step": 18970 + }, + { + "epoch": 0.16407121425668605, + "grad_norm": 4.415258118830975, + "learning_rate": 5.900918946812836e-06, + "loss": 0.0397979736328125, + "step": 18975 + }, + { + "epoch": 0.16411444777822934, + "grad_norm": 11.508197761964885, + "learning_rate": 5.900867012837936e-06, + "loss": 0.07921142578125, + "step": 18980 + }, + { + "epoch": 0.1641576812997726, + "grad_norm": 35.82977200591129, + "learning_rate": 5.900815065484493e-06, + "loss": 0.1475128173828125, + "step": 18985 + }, + { + "epoch": 0.16420091482131585, + "grad_norm": 1.6487634098463153, + "learning_rate": 5.9007631047527475e-06, + "loss": 0.1587066650390625, + "step": 18990 + }, + { + "epoch": 0.1642441483428591, + "grad_norm": 0.7965982790180239, + "learning_rate": 5.900711130642939e-06, + "loss": 0.08021697998046876, + "step": 18995 + }, + { + "epoch": 0.1642873818644024, + "grad_norm": 2.9022229581132457, + "learning_rate": 5.900659143155307e-06, + "loss": 0.1721466064453125, + "step": 19000 + }, + { + "epoch": 0.16433061538594565, + "grad_norm": 38.90086058747275, + "learning_rate": 5.900607142290092e-06, + "loss": 0.3516845703125, + "step": 19005 + }, + { + "epoch": 0.1643738489074889, + "grad_norm": 12.032579403938835, + "learning_rate": 5.900555128047532e-06, + "loss": 0.2647796630859375, + "step": 19010 + }, + { + "epoch": 0.16441708242903216, + "grad_norm": 4.581623221668638, + "learning_rate": 5.900503100427869e-06, + "loss": 0.41943359375, + "step": 19015 + }, + { + "epoch": 0.16446031595057545, + "grad_norm": 14.135488704317552, + "learning_rate": 5.900451059431341e-06, + "loss": 0.57740478515625, + "step": 19020 + }, + { + "epoch": 0.1645035494721187, + "grad_norm": 6.349813895721785, + "learning_rate": 5.9003990050581894e-06, + "loss": 0.0848876953125, + "step": 19025 + }, + { + "epoch": 0.16454678299366196, + "grad_norm": 13.328793385521738, + "learning_rate": 5.900346937308654e-06, + "loss": 0.11121826171875, + "step": 19030 + }, + { + "epoch": 0.16459001651520522, + "grad_norm": 22.399481894238782, + "learning_rate": 5.900294856182975e-06, + "loss": 0.17057037353515625, + "step": 19035 + }, + { + "epoch": 0.1646332500367485, + "grad_norm": 26.744323394475792, + "learning_rate": 5.9002427616813925e-06, + "loss": 0.1090057373046875, + "step": 19040 + }, + { + "epoch": 0.16467648355829176, + "grad_norm": 39.85748205329972, + "learning_rate": 5.900190653804146e-06, + "loss": 0.26198348999023435, + "step": 19045 + }, + { + "epoch": 0.16471971707983502, + "grad_norm": 43.39851582184366, + "learning_rate": 5.900138532551478e-06, + "loss": 0.3467041015625, + "step": 19050 + }, + { + "epoch": 0.16476295060137827, + "grad_norm": 2.3366463077426913, + "learning_rate": 5.900086397923625e-06, + "loss": 0.07329254150390625, + "step": 19055 + }, + { + "epoch": 0.16480618412292156, + "grad_norm": 43.48863028251151, + "learning_rate": 5.900034249920831e-06, + "loss": 0.37965087890625, + "step": 19060 + }, + { + "epoch": 0.1648494176444648, + "grad_norm": 89.09609833540499, + "learning_rate": 5.899982088543336e-06, + "loss": 0.17330322265625, + "step": 19065 + }, + { + "epoch": 0.16489265116600807, + "grad_norm": 1.2973468459321391, + "learning_rate": 5.899929913791378e-06, + "loss": 0.08460693359375, + "step": 19070 + }, + { + "epoch": 0.16493588468755133, + "grad_norm": 2.328918533748567, + "learning_rate": 5.899877725665201e-06, + "loss": 0.10787353515625, + "step": 19075 + }, + { + "epoch": 0.1649791182090946, + "grad_norm": 19.08958032587743, + "learning_rate": 5.899825524165044e-06, + "loss": 0.23640060424804688, + "step": 19080 + }, + { + "epoch": 0.16502235173063787, + "grad_norm": 0.29323538322245407, + "learning_rate": 5.899773309291148e-06, + "loss": 0.30889739990234377, + "step": 19085 + }, + { + "epoch": 0.16506558525218112, + "grad_norm": 3.0776732837818224, + "learning_rate": 5.899721081043753e-06, + "loss": 0.22236099243164062, + "step": 19090 + }, + { + "epoch": 0.1651088187737244, + "grad_norm": 6.648730652016332, + "learning_rate": 5.899668839423101e-06, + "loss": 0.18482666015625, + "step": 19095 + }, + { + "epoch": 0.16515205229526767, + "grad_norm": 11.880603372899087, + "learning_rate": 5.899616584429433e-06, + "loss": 0.09363250732421875, + "step": 19100 + }, + { + "epoch": 0.16519528581681092, + "grad_norm": 43.99133638877014, + "learning_rate": 5.899564316062989e-06, + "loss": 0.5501708984375, + "step": 19105 + }, + { + "epoch": 0.16523851933835418, + "grad_norm": 9.584376831289598, + "learning_rate": 5.899512034324011e-06, + "loss": 0.142510986328125, + "step": 19110 + }, + { + "epoch": 0.16528175285989746, + "grad_norm": 28.38013884271316, + "learning_rate": 5.899459739212739e-06, + "loss": 0.43656005859375, + "step": 19115 + }, + { + "epoch": 0.16532498638144072, + "grad_norm": 7.895424538755779, + "learning_rate": 5.899407430729415e-06, + "loss": 0.04764251708984375, + "step": 19120 + }, + { + "epoch": 0.16536821990298398, + "grad_norm": 21.911195778674944, + "learning_rate": 5.8993551088742795e-06, + "loss": 0.181610107421875, + "step": 19125 + }, + { + "epoch": 0.16541145342452723, + "grad_norm": 8.351882723120482, + "learning_rate": 5.899302773647576e-06, + "loss": 0.417449951171875, + "step": 19130 + }, + { + "epoch": 0.16545468694607052, + "grad_norm": 60.156748486612095, + "learning_rate": 5.8992504250495434e-06, + "loss": 0.35106964111328126, + "step": 19135 + }, + { + "epoch": 0.16549792046761377, + "grad_norm": 5.675530528428177, + "learning_rate": 5.8991980630804246e-06, + "loss": 0.09716873168945313, + "step": 19140 + }, + { + "epoch": 0.16554115398915703, + "grad_norm": 5.937177796903188, + "learning_rate": 5.89914568774046e-06, + "loss": 0.19553375244140625, + "step": 19145 + }, + { + "epoch": 0.1655843875107003, + "grad_norm": 5.007907287941078, + "learning_rate": 5.899093299029892e-06, + "loss": 0.35116424560546877, + "step": 19150 + }, + { + "epoch": 0.16562762103224357, + "grad_norm": 14.02408927878511, + "learning_rate": 5.899040896948961e-06, + "loss": 0.3251708984375, + "step": 19155 + }, + { + "epoch": 0.16567085455378683, + "grad_norm": 3.4224752911417577, + "learning_rate": 5.898988481497911e-06, + "loss": 0.15628509521484374, + "step": 19160 + }, + { + "epoch": 0.16571408807533008, + "grad_norm": 1.972912634453489, + "learning_rate": 5.898936052676981e-06, + "loss": 0.19596176147460936, + "step": 19165 + }, + { + "epoch": 0.16575732159687334, + "grad_norm": 1.0856920449037861, + "learning_rate": 5.898883610486415e-06, + "loss": 0.2515899658203125, + "step": 19170 + }, + { + "epoch": 0.16580055511841663, + "grad_norm": 17.54157651927055, + "learning_rate": 5.898831154926454e-06, + "loss": 0.0561248779296875, + "step": 19175 + }, + { + "epoch": 0.16584378863995988, + "grad_norm": 4.485752430118755, + "learning_rate": 5.898778685997339e-06, + "loss": 0.12508544921875, + "step": 19180 + }, + { + "epoch": 0.16588702216150314, + "grad_norm": 18.49547971179302, + "learning_rate": 5.898726203699313e-06, + "loss": 0.15803356170654298, + "step": 19185 + }, + { + "epoch": 0.1659302556830464, + "grad_norm": 51.56842048995225, + "learning_rate": 5.898673708032618e-06, + "loss": 0.2503684997558594, + "step": 19190 + }, + { + "epoch": 0.16597348920458968, + "grad_norm": 7.2064383748400465, + "learning_rate": 5.898621198997496e-06, + "loss": 0.2399749755859375, + "step": 19195 + }, + { + "epoch": 0.16601672272613294, + "grad_norm": 24.424258461205977, + "learning_rate": 5.8985686765941886e-06, + "loss": 0.4180816650390625, + "step": 19200 + }, + { + "epoch": 0.1660599562476762, + "grad_norm": 7.753371923384376, + "learning_rate": 5.898516140822939e-06, + "loss": 0.18180007934570314, + "step": 19205 + }, + { + "epoch": 0.16610318976921945, + "grad_norm": 68.12848050999968, + "learning_rate": 5.89846359168399e-06, + "loss": 0.20602378845214844, + "step": 19210 + }, + { + "epoch": 0.16614642329076273, + "grad_norm": 9.61246548798304, + "learning_rate": 5.898411029177582e-06, + "loss": 0.2705356597900391, + "step": 19215 + }, + { + "epoch": 0.166189656812306, + "grad_norm": 5.1454396953332, + "learning_rate": 5.8983584533039596e-06, + "loss": 0.19522705078125, + "step": 19220 + }, + { + "epoch": 0.16623289033384925, + "grad_norm": 10.005631467636992, + "learning_rate": 5.898305864063363e-06, + "loss": 0.20526123046875, + "step": 19225 + }, + { + "epoch": 0.1662761238553925, + "grad_norm": 32.66042207487282, + "learning_rate": 5.8982532614560354e-06, + "loss": 0.2162464141845703, + "step": 19230 + }, + { + "epoch": 0.1663193573769358, + "grad_norm": 36.58755081961314, + "learning_rate": 5.898200645482221e-06, + "loss": 0.36578521728515623, + "step": 19235 + }, + { + "epoch": 0.16636259089847905, + "grad_norm": 13.665461897340736, + "learning_rate": 5.898148016142161e-06, + "loss": 0.2876708984375, + "step": 19240 + }, + { + "epoch": 0.1664058244200223, + "grad_norm": 36.658857857452176, + "learning_rate": 5.898095373436099e-06, + "loss": 0.35628585815429686, + "step": 19245 + }, + { + "epoch": 0.16644905794156556, + "grad_norm": 0.7518856187667499, + "learning_rate": 5.898042717364276e-06, + "loss": 0.08272361755371094, + "step": 19250 + }, + { + "epoch": 0.16649229146310884, + "grad_norm": 13.879178788229945, + "learning_rate": 5.897990047926936e-06, + "loss": 0.427880859375, + "step": 19255 + }, + { + "epoch": 0.1665355249846521, + "grad_norm": 20.818970114040496, + "learning_rate": 5.897937365124323e-06, + "loss": 0.08686447143554688, + "step": 19260 + }, + { + "epoch": 0.16657875850619536, + "grad_norm": 2.884344785490106, + "learning_rate": 5.897884668956679e-06, + "loss": 0.13834152221679688, + "step": 19265 + }, + { + "epoch": 0.16662199202773864, + "grad_norm": 8.468525048494707, + "learning_rate": 5.897831959424247e-06, + "loss": 0.0865509033203125, + "step": 19270 + }, + { + "epoch": 0.1666652255492819, + "grad_norm": 1.1657653531874754, + "learning_rate": 5.897779236527269e-06, + "loss": 0.6663490295410156, + "step": 19275 + }, + { + "epoch": 0.16670845907082515, + "grad_norm": 1.307782461695027, + "learning_rate": 5.89772650026599e-06, + "loss": 0.1338409423828125, + "step": 19280 + }, + { + "epoch": 0.1667516925923684, + "grad_norm": 188.57982853956892, + "learning_rate": 5.897673750640651e-06, + "loss": 0.4425384521484375, + "step": 19285 + }, + { + "epoch": 0.1667949261139117, + "grad_norm": 3.763323276385562, + "learning_rate": 5.897620987651499e-06, + "loss": 0.2185791015625, + "step": 19290 + }, + { + "epoch": 0.16683815963545495, + "grad_norm": 20.286638742462266, + "learning_rate": 5.897568211298773e-06, + "loss": 0.3656787872314453, + "step": 19295 + }, + { + "epoch": 0.1668813931569982, + "grad_norm": 5.594589155966525, + "learning_rate": 5.8975154215827195e-06, + "loss": 0.10850982666015625, + "step": 19300 + }, + { + "epoch": 0.16692462667854147, + "grad_norm": 29.36288765939617, + "learning_rate": 5.897462618503581e-06, + "loss": 0.10019073486328126, + "step": 19305 + }, + { + "epoch": 0.16696786020008475, + "grad_norm": 11.86653531378857, + "learning_rate": 5.897409802061601e-06, + "loss": 0.08846282958984375, + "step": 19310 + }, + { + "epoch": 0.167011093721628, + "grad_norm": 32.352584790923345, + "learning_rate": 5.897356972257023e-06, + "loss": 0.9787303924560546, + "step": 19315 + }, + { + "epoch": 0.16705432724317126, + "grad_norm": 1.7616526537750532, + "learning_rate": 5.89730412909009e-06, + "loss": 0.12905807495117189, + "step": 19320 + }, + { + "epoch": 0.16709756076471452, + "grad_norm": 19.469739548963027, + "learning_rate": 5.897251272561046e-06, + "loss": 0.2224395751953125, + "step": 19325 + }, + { + "epoch": 0.1671407942862578, + "grad_norm": 11.150543240209192, + "learning_rate": 5.897198402670136e-06, + "loss": 0.23929901123046876, + "step": 19330 + }, + { + "epoch": 0.16718402780780106, + "grad_norm": 9.38757754695777, + "learning_rate": 5.897145519417604e-06, + "loss": 0.32874755859375, + "step": 19335 + }, + { + "epoch": 0.16722726132934432, + "grad_norm": 9.066576732989397, + "learning_rate": 5.897092622803691e-06, + "loss": 0.2438812255859375, + "step": 19340 + }, + { + "epoch": 0.16727049485088757, + "grad_norm": 7.881524238290179, + "learning_rate": 5.8970397128286435e-06, + "loss": 0.37751922607421873, + "step": 19345 + }, + { + "epoch": 0.16731372837243086, + "grad_norm": 34.00039971125765, + "learning_rate": 5.8969867894927045e-06, + "loss": 0.15836639404296876, + "step": 19350 + }, + { + "epoch": 0.16735696189397412, + "grad_norm": 22.06226156144979, + "learning_rate": 5.896933852796119e-06, + "loss": 0.3908203125, + "step": 19355 + }, + { + "epoch": 0.16740019541551737, + "grad_norm": 25.834488941222887, + "learning_rate": 5.89688090273913e-06, + "loss": 0.09736785888671876, + "step": 19360 + }, + { + "epoch": 0.16744342893706063, + "grad_norm": 12.796412988457094, + "learning_rate": 5.8968279393219825e-06, + "loss": 0.15099334716796875, + "step": 19365 + }, + { + "epoch": 0.1674866624586039, + "grad_norm": 13.955381531821482, + "learning_rate": 5.896774962544921e-06, + "loss": 0.0803375244140625, + "step": 19370 + }, + { + "epoch": 0.16752989598014717, + "grad_norm": 22.631998606954923, + "learning_rate": 5.896721972408189e-06, + "loss": 0.1865264892578125, + "step": 19375 + }, + { + "epoch": 0.16757312950169043, + "grad_norm": 13.456621063870001, + "learning_rate": 5.896668968912031e-06, + "loss": 0.31639862060546875, + "step": 19380 + }, + { + "epoch": 0.16761636302323368, + "grad_norm": 5.220213134554717, + "learning_rate": 5.896615952056692e-06, + "loss": 0.22945556640625, + "step": 19385 + }, + { + "epoch": 0.16765959654477697, + "grad_norm": 44.0793474963272, + "learning_rate": 5.896562921842415e-06, + "loss": 0.2299633026123047, + "step": 19390 + }, + { + "epoch": 0.16770283006632022, + "grad_norm": 5.674454174797702, + "learning_rate": 5.896509878269447e-06, + "loss": 0.1108123779296875, + "step": 19395 + }, + { + "epoch": 0.16774606358786348, + "grad_norm": 3.9467394282909285, + "learning_rate": 5.8964568213380315e-06, + "loss": 0.34472808837890623, + "step": 19400 + }, + { + "epoch": 0.16778929710940674, + "grad_norm": 1.0518586899863718, + "learning_rate": 5.896403751048413e-06, + "loss": 0.31759033203125, + "step": 19405 + }, + { + "epoch": 0.16783253063095002, + "grad_norm": 14.19740863214449, + "learning_rate": 5.896350667400836e-06, + "loss": 0.4885162353515625, + "step": 19410 + }, + { + "epoch": 0.16787576415249328, + "grad_norm": 2.681120440204562, + "learning_rate": 5.896297570395545e-06, + "loss": 0.35957984924316405, + "step": 19415 + }, + { + "epoch": 0.16791899767403654, + "grad_norm": 5.206996727451824, + "learning_rate": 5.896244460032787e-06, + "loss": 0.0910919189453125, + "step": 19420 + }, + { + "epoch": 0.1679622311955798, + "grad_norm": 29.961466731957955, + "learning_rate": 5.896191336312804e-06, + "loss": 0.299029541015625, + "step": 19425 + }, + { + "epoch": 0.16800546471712308, + "grad_norm": 3.862629379530116, + "learning_rate": 5.896138199235843e-06, + "loss": 0.07808151245117187, + "step": 19430 + }, + { + "epoch": 0.16804869823866633, + "grad_norm": 10.74587296110412, + "learning_rate": 5.896085048802149e-06, + "loss": 0.5592864990234375, + "step": 19435 + }, + { + "epoch": 0.1680919317602096, + "grad_norm": 22.568986527445183, + "learning_rate": 5.896031885011966e-06, + "loss": 0.306707763671875, + "step": 19440 + }, + { + "epoch": 0.16813516528175285, + "grad_norm": 15.694651873033127, + "learning_rate": 5.89597870786554e-06, + "loss": 0.2331939697265625, + "step": 19445 + }, + { + "epoch": 0.16817839880329613, + "grad_norm": 19.841620591408994, + "learning_rate": 5.895925517363117e-06, + "loss": 0.1643829345703125, + "step": 19450 + }, + { + "epoch": 0.1682216323248394, + "grad_norm": 25.838247583049817, + "learning_rate": 5.8958723135049405e-06, + "loss": 0.193280029296875, + "step": 19455 + }, + { + "epoch": 0.16826486584638264, + "grad_norm": 38.401769461460596, + "learning_rate": 5.895819096291257e-06, + "loss": 0.3315765380859375, + "step": 19460 + }, + { + "epoch": 0.16830809936792593, + "grad_norm": 3.1398576315881765, + "learning_rate": 5.895765865722311e-06, + "loss": 0.19241943359375, + "step": 19465 + }, + { + "epoch": 0.16835133288946919, + "grad_norm": 15.366863174204907, + "learning_rate": 5.89571262179835e-06, + "loss": 0.31494140625, + "step": 19470 + }, + { + "epoch": 0.16839456641101244, + "grad_norm": 20.90745854197201, + "learning_rate": 5.895659364519617e-06, + "loss": 0.23389434814453125, + "step": 19475 + }, + { + "epoch": 0.1684377999325557, + "grad_norm": 17.276786804918306, + "learning_rate": 5.895606093886359e-06, + "loss": 0.23475418090820313, + "step": 19480 + }, + { + "epoch": 0.16848103345409898, + "grad_norm": 7.734017482620451, + "learning_rate": 5.895552809898822e-06, + "loss": 0.1226165771484375, + "step": 19485 + }, + { + "epoch": 0.16852426697564224, + "grad_norm": 7.144563792981767, + "learning_rate": 5.895499512557252e-06, + "loss": 0.2722076416015625, + "step": 19490 + }, + { + "epoch": 0.1685675004971855, + "grad_norm": 17.000854769731745, + "learning_rate": 5.895446201861893e-06, + "loss": 0.5630828857421875, + "step": 19495 + }, + { + "epoch": 0.16861073401872875, + "grad_norm": 38.804084750774635, + "learning_rate": 5.895392877812993e-06, + "loss": 0.31044921875, + "step": 19500 + }, + { + "epoch": 0.16865396754027204, + "grad_norm": 4.881009141512086, + "learning_rate": 5.895339540410796e-06, + "loss": 0.178778076171875, + "step": 19505 + }, + { + "epoch": 0.1686972010618153, + "grad_norm": 3.1467456135799554, + "learning_rate": 5.895286189655549e-06, + "loss": 0.093377685546875, + "step": 19510 + }, + { + "epoch": 0.16874043458335855, + "grad_norm": 7.95814649381515, + "learning_rate": 5.895232825547498e-06, + "loss": 0.1938262939453125, + "step": 19515 + }, + { + "epoch": 0.1687836681049018, + "grad_norm": 0.7293668603923557, + "learning_rate": 5.895179448086889e-06, + "loss": 0.0698760986328125, + "step": 19520 + }, + { + "epoch": 0.1688269016264451, + "grad_norm": 6.7929026696332615, + "learning_rate": 5.895126057273968e-06, + "loss": 0.15612945556640626, + "step": 19525 + }, + { + "epoch": 0.16887013514798835, + "grad_norm": 34.39649602917991, + "learning_rate": 5.895072653108982e-06, + "loss": 0.42281494140625, + "step": 19530 + }, + { + "epoch": 0.1689133686695316, + "grad_norm": 9.441877794401243, + "learning_rate": 5.895019235592177e-06, + "loss": 0.12692413330078126, + "step": 19535 + }, + { + "epoch": 0.16895660219107486, + "grad_norm": 27.98456415645856, + "learning_rate": 5.8949658047237985e-06, + "loss": 0.17752227783203126, + "step": 19540 + }, + { + "epoch": 0.16899983571261815, + "grad_norm": 1.277368733792755, + "learning_rate": 5.894912360504093e-06, + "loss": 0.26366310119628905, + "step": 19545 + }, + { + "epoch": 0.1690430692341614, + "grad_norm": 7.585655076845845, + "learning_rate": 5.894858902933308e-06, + "loss": 0.25234375, + "step": 19550 + }, + { + "epoch": 0.16908630275570466, + "grad_norm": 2.3222638196133896, + "learning_rate": 5.89480543201169e-06, + "loss": 0.1207061767578125, + "step": 19555 + }, + { + "epoch": 0.16912953627724792, + "grad_norm": 25.409015337983117, + "learning_rate": 5.894751947739485e-06, + "loss": 0.48464412689208985, + "step": 19560 + }, + { + "epoch": 0.1691727697987912, + "grad_norm": 3.119354691247033, + "learning_rate": 5.894698450116939e-06, + "loss": 0.0377777099609375, + "step": 19565 + }, + { + "epoch": 0.16921600332033446, + "grad_norm": 10.634263221827384, + "learning_rate": 5.8946449391443005e-06, + "loss": 0.194415283203125, + "step": 19570 + }, + { + "epoch": 0.1692592368418777, + "grad_norm": 6.7560315099458235, + "learning_rate": 5.894591414821815e-06, + "loss": 0.29404296875, + "step": 19575 + }, + { + "epoch": 0.16930247036342097, + "grad_norm": 1.6030012683939332, + "learning_rate": 5.894537877149729e-06, + "loss": 0.2408935546875, + "step": 19580 + }, + { + "epoch": 0.16934570388496425, + "grad_norm": 16.783915267978557, + "learning_rate": 5.894484326128291e-06, + "loss": 0.12579116821289063, + "step": 19585 + }, + { + "epoch": 0.1693889374065075, + "grad_norm": 6.777361501753731, + "learning_rate": 5.894430761757747e-06, + "loss": 0.0624420166015625, + "step": 19590 + }, + { + "epoch": 0.16943217092805077, + "grad_norm": 9.980643410876233, + "learning_rate": 5.8943771840383436e-06, + "loss": 0.1752777099609375, + "step": 19595 + }, + { + "epoch": 0.16947540444959402, + "grad_norm": 3.667160699256644, + "learning_rate": 5.894323592970328e-06, + "loss": 0.193487548828125, + "step": 19600 + }, + { + "epoch": 0.1695186379711373, + "grad_norm": 2.822206477408497, + "learning_rate": 5.894269988553949e-06, + "loss": 0.33677978515625, + "step": 19605 + }, + { + "epoch": 0.16956187149268057, + "grad_norm": 4.727399682126478, + "learning_rate": 5.894216370789452e-06, + "loss": 0.35816650390625, + "step": 19610 + }, + { + "epoch": 0.16960510501422382, + "grad_norm": 5.8082122673089405, + "learning_rate": 5.894162739677084e-06, + "loss": 0.30703582763671877, + "step": 19615 + }, + { + "epoch": 0.16964833853576708, + "grad_norm": 41.452169934326214, + "learning_rate": 5.894109095217094e-06, + "loss": 0.10981216430664062, + "step": 19620 + }, + { + "epoch": 0.16969157205731036, + "grad_norm": 4.334936111757978, + "learning_rate": 5.894055437409728e-06, + "loss": 0.115374755859375, + "step": 19625 + }, + { + "epoch": 0.16973480557885362, + "grad_norm": 37.499307684367096, + "learning_rate": 5.894001766255235e-06, + "loss": 0.31040191650390625, + "step": 19630 + }, + { + "epoch": 0.16977803910039688, + "grad_norm": 34.56155642574477, + "learning_rate": 5.893948081753861e-06, + "loss": 0.33275299072265624, + "step": 19635 + }, + { + "epoch": 0.16982127262194016, + "grad_norm": 7.626581048464319, + "learning_rate": 5.893894383905854e-06, + "loss": 0.14148788452148436, + "step": 19640 + }, + { + "epoch": 0.16986450614348342, + "grad_norm": 17.52302682545812, + "learning_rate": 5.893840672711462e-06, + "loss": 0.329119873046875, + "step": 19645 + }, + { + "epoch": 0.16990773966502667, + "grad_norm": 2.018023418263198, + "learning_rate": 5.893786948170933e-06, + "loss": 0.14331207275390626, + "step": 19650 + }, + { + "epoch": 0.16995097318656993, + "grad_norm": 0.6472362984200181, + "learning_rate": 5.893733210284513e-06, + "loss": 0.09834842681884766, + "step": 19655 + }, + { + "epoch": 0.16999420670811322, + "grad_norm": 1.9177685673727909, + "learning_rate": 5.893679459052453e-06, + "loss": 0.175738525390625, + "step": 19660 + }, + { + "epoch": 0.17003744022965647, + "grad_norm": 22.068593947585867, + "learning_rate": 5.893625694474997e-06, + "loss": 0.3316490173339844, + "step": 19665 + }, + { + "epoch": 0.17008067375119973, + "grad_norm": 53.60348482178433, + "learning_rate": 5.893571916552397e-06, + "loss": 0.5905845642089844, + "step": 19670 + }, + { + "epoch": 0.17012390727274299, + "grad_norm": 10.33974168504898, + "learning_rate": 5.893518125284898e-06, + "loss": 0.2245635986328125, + "step": 19675 + }, + { + "epoch": 0.17016714079428627, + "grad_norm": 1.9375732044400178, + "learning_rate": 5.893464320672748e-06, + "loss": 0.29077606201171874, + "step": 19680 + }, + { + "epoch": 0.17021037431582953, + "grad_norm": 0.5158510194262176, + "learning_rate": 5.893410502716198e-06, + "loss": 0.11367301940917969, + "step": 19685 + }, + { + "epoch": 0.17025360783737278, + "grad_norm": 37.84547111469372, + "learning_rate": 5.893356671415493e-06, + "loss": 0.503070068359375, + "step": 19690 + }, + { + "epoch": 0.17029684135891604, + "grad_norm": 18.134543076893326, + "learning_rate": 5.893302826770884e-06, + "loss": 0.15812530517578124, + "step": 19695 + }, + { + "epoch": 0.17034007488045932, + "grad_norm": 10.730128817688128, + "learning_rate": 5.8932489687826164e-06, + "loss": 0.066119384765625, + "step": 19700 + }, + { + "epoch": 0.17038330840200258, + "grad_norm": 1.6199095981442384, + "learning_rate": 5.893195097450942e-06, + "loss": 0.1257537841796875, + "step": 19705 + }, + { + "epoch": 0.17042654192354584, + "grad_norm": 6.88871246769352, + "learning_rate": 5.893141212776106e-06, + "loss": 0.12350502014160156, + "step": 19710 + }, + { + "epoch": 0.1704697754450891, + "grad_norm": 39.722256971073534, + "learning_rate": 5.8930873147583595e-06, + "loss": 0.1284820556640625, + "step": 19715 + }, + { + "epoch": 0.17051300896663238, + "grad_norm": 2.6884570711671225, + "learning_rate": 5.89303340339795e-06, + "loss": 0.39290847778320315, + "step": 19720 + }, + { + "epoch": 0.17055624248817564, + "grad_norm": 6.729292962245727, + "learning_rate": 5.892979478695125e-06, + "loss": 0.21494140625, + "step": 19725 + }, + { + "epoch": 0.1705994760097189, + "grad_norm": 54.990602624943044, + "learning_rate": 5.892925540650135e-06, + "loss": 0.471185302734375, + "step": 19730 + }, + { + "epoch": 0.17064270953126215, + "grad_norm": 5.388858010687356, + "learning_rate": 5.892871589263228e-06, + "loss": 0.12701644897460937, + "step": 19735 + }, + { + "epoch": 0.17068594305280543, + "grad_norm": 18.24116082775071, + "learning_rate": 5.892817624534652e-06, + "loss": 0.17941741943359374, + "step": 19740 + }, + { + "epoch": 0.1707291765743487, + "grad_norm": 26.687708721466983, + "learning_rate": 5.892763646464658e-06, + "loss": 0.33797683715820315, + "step": 19745 + }, + { + "epoch": 0.17077241009589195, + "grad_norm": 16.75600607143391, + "learning_rate": 5.892709655053493e-06, + "loss": 0.12020263671875, + "step": 19750 + }, + { + "epoch": 0.1708156436174352, + "grad_norm": 2.223969628546006, + "learning_rate": 5.892655650301407e-06, + "loss": 0.36429595947265625, + "step": 19755 + }, + { + "epoch": 0.1708588771389785, + "grad_norm": 4.947046158404125, + "learning_rate": 5.892601632208648e-06, + "loss": 0.131854248046875, + "step": 19760 + }, + { + "epoch": 0.17090211066052174, + "grad_norm": 0.7569345982648311, + "learning_rate": 5.892547600775467e-06, + "loss": 0.07259902954101563, + "step": 19765 + }, + { + "epoch": 0.170945344182065, + "grad_norm": 32.509552223092236, + "learning_rate": 5.892493556002111e-06, + "loss": 0.05241069793701172, + "step": 19770 + }, + { + "epoch": 0.17098857770360826, + "grad_norm": 11.317499201746461, + "learning_rate": 5.892439497888831e-06, + "loss": 0.088726806640625, + "step": 19775 + }, + { + "epoch": 0.17103181122515154, + "grad_norm": 1.245135674306318, + "learning_rate": 5.8923854264358755e-06, + "loss": 0.41992530822753904, + "step": 19780 + }, + { + "epoch": 0.1710750447466948, + "grad_norm": 10.152492617247505, + "learning_rate": 5.892331341643493e-06, + "loss": 0.18502197265625, + "step": 19785 + }, + { + "epoch": 0.17111827826823806, + "grad_norm": 17.166490181857455, + "learning_rate": 5.892277243511936e-06, + "loss": 0.721923828125, + "step": 19790 + }, + { + "epoch": 0.1711615117897813, + "grad_norm": 33.65392987160457, + "learning_rate": 5.89222313204145e-06, + "loss": 0.2087371826171875, + "step": 19795 + }, + { + "epoch": 0.1712047453113246, + "grad_norm": 1.4709005742623655, + "learning_rate": 5.892169007232287e-06, + "loss": 0.6455474853515625, + "step": 19800 + }, + { + "epoch": 0.17124797883286785, + "grad_norm": 1.1994359371799097, + "learning_rate": 5.892114869084696e-06, + "loss": 0.374932861328125, + "step": 19805 + }, + { + "epoch": 0.1712912123544111, + "grad_norm": 15.590505242736528, + "learning_rate": 5.892060717598927e-06, + "loss": 0.31952667236328125, + "step": 19810 + }, + { + "epoch": 0.17133444587595437, + "grad_norm": 17.08576517471819, + "learning_rate": 5.8920065527752305e-06, + "loss": 0.2010986328125, + "step": 19815 + }, + { + "epoch": 0.17137767939749765, + "grad_norm": 15.068118586961848, + "learning_rate": 5.891952374613854e-06, + "loss": 0.3114288330078125, + "step": 19820 + }, + { + "epoch": 0.1714209129190409, + "grad_norm": 36.38481905519011, + "learning_rate": 5.891898183115049e-06, + "loss": 0.43634033203125, + "step": 19825 + }, + { + "epoch": 0.17146414644058416, + "grad_norm": 21.88866172805354, + "learning_rate": 5.891843978279065e-06, + "loss": 0.227972412109375, + "step": 19830 + }, + { + "epoch": 0.17150737996212745, + "grad_norm": 0.509476208070799, + "learning_rate": 5.891789760106153e-06, + "loss": 0.10867156982421874, + "step": 19835 + }, + { + "epoch": 0.1715506134836707, + "grad_norm": 5.241900419967077, + "learning_rate": 5.891735528596561e-06, + "loss": 0.558184814453125, + "step": 19840 + }, + { + "epoch": 0.17159384700521396, + "grad_norm": 3.0419650043970314, + "learning_rate": 5.8916812837505405e-06, + "loss": 0.1669147491455078, + "step": 19845 + }, + { + "epoch": 0.17163708052675722, + "grad_norm": 3.5639413834174762, + "learning_rate": 5.891627025568342e-06, + "loss": 0.4244651794433594, + "step": 19850 + }, + { + "epoch": 0.1716803140483005, + "grad_norm": 21.323280927755317, + "learning_rate": 5.891572754050214e-06, + "loss": 0.32811279296875, + "step": 19855 + }, + { + "epoch": 0.17172354756984376, + "grad_norm": 4.014723927783626, + "learning_rate": 5.891518469196409e-06, + "loss": 0.40241241455078125, + "step": 19860 + }, + { + "epoch": 0.17176678109138702, + "grad_norm": 49.98758215382415, + "learning_rate": 5.891464171007176e-06, + "loss": 0.4045654296875, + "step": 19865 + }, + { + "epoch": 0.17181001461293027, + "grad_norm": 12.1350247339622, + "learning_rate": 5.891409859482766e-06, + "loss": 0.24642333984375, + "step": 19870 + }, + { + "epoch": 0.17185324813447356, + "grad_norm": 28.830938415197377, + "learning_rate": 5.891355534623429e-06, + "loss": 0.4011405944824219, + "step": 19875 + }, + { + "epoch": 0.17189648165601681, + "grad_norm": 1.5783449910137946, + "learning_rate": 5.8913011964294156e-06, + "loss": 0.16882524490356446, + "step": 19880 + }, + { + "epoch": 0.17193971517756007, + "grad_norm": 7.10809147548617, + "learning_rate": 5.891246844900977e-06, + "loss": 0.12192230224609375, + "step": 19885 + }, + { + "epoch": 0.17198294869910333, + "grad_norm": 13.055582618762902, + "learning_rate": 5.891192480038362e-06, + "loss": 0.43612060546875, + "step": 19890 + }, + { + "epoch": 0.1720261822206466, + "grad_norm": 1.9428733433579024, + "learning_rate": 5.891138101841825e-06, + "loss": 0.120965576171875, + "step": 19895 + }, + { + "epoch": 0.17206941574218987, + "grad_norm": 3.1644723674752435, + "learning_rate": 5.891083710311613e-06, + "loss": 0.17362060546875, + "step": 19900 + }, + { + "epoch": 0.17211264926373313, + "grad_norm": 21.04963103635032, + "learning_rate": 5.8910293054479795e-06, + "loss": 0.06699371337890625, + "step": 19905 + }, + { + "epoch": 0.17215588278527638, + "grad_norm": 1.33571832924388, + "learning_rate": 5.890974887251173e-06, + "loss": 0.2344146728515625, + "step": 19910 + }, + { + "epoch": 0.17219911630681967, + "grad_norm": 0.8149991680367795, + "learning_rate": 5.890920455721447e-06, + "loss": 0.070843505859375, + "step": 19915 + }, + { + "epoch": 0.17224234982836292, + "grad_norm": 5.272031069297755, + "learning_rate": 5.890866010859051e-06, + "loss": 0.19485092163085938, + "step": 19920 + }, + { + "epoch": 0.17228558334990618, + "grad_norm": 1.2217554345582111, + "learning_rate": 5.890811552664236e-06, + "loss": 0.19954833984375, + "step": 19925 + }, + { + "epoch": 0.17232881687144944, + "grad_norm": 19.52368419539954, + "learning_rate": 5.890757081137253e-06, + "loss": 0.18093490600585938, + "step": 19930 + }, + { + "epoch": 0.17237205039299272, + "grad_norm": 0.452558655446878, + "learning_rate": 5.890702596278354e-06, + "loss": 0.1798919677734375, + "step": 19935 + }, + { + "epoch": 0.17241528391453598, + "grad_norm": 15.63525849227562, + "learning_rate": 5.890648098087791e-06, + "loss": 0.093365478515625, + "step": 19940 + }, + { + "epoch": 0.17245851743607923, + "grad_norm": 3.3204368774762125, + "learning_rate": 5.890593586565814e-06, + "loss": 0.41485061645507815, + "step": 19945 + }, + { + "epoch": 0.1725017509576225, + "grad_norm": 1.8747687158440811, + "learning_rate": 5.890539061712675e-06, + "loss": 0.12036590576171875, + "step": 19950 + }, + { + "epoch": 0.17254498447916577, + "grad_norm": 21.697045610468113, + "learning_rate": 5.890484523528624e-06, + "loss": 0.389013671875, + "step": 19955 + }, + { + "epoch": 0.17258821800070903, + "grad_norm": 15.849203411336694, + "learning_rate": 5.890429972013915e-06, + "loss": 0.1169189453125, + "step": 19960 + }, + { + "epoch": 0.1726314515222523, + "grad_norm": 31.975418483459112, + "learning_rate": 5.890375407168798e-06, + "loss": 0.113433837890625, + "step": 19965 + }, + { + "epoch": 0.17267468504379554, + "grad_norm": 14.84997959983496, + "learning_rate": 5.8903208289935255e-06, + "loss": 0.1435546875, + "step": 19970 + }, + { + "epoch": 0.17271791856533883, + "grad_norm": 0.9311583561834127, + "learning_rate": 5.890266237488349e-06, + "loss": 0.0192047119140625, + "step": 19975 + }, + { + "epoch": 0.17276115208688209, + "grad_norm": 12.15697159274025, + "learning_rate": 5.890211632653519e-06, + "loss": 0.13046875, + "step": 19980 + }, + { + "epoch": 0.17280438560842534, + "grad_norm": 56.652290146766816, + "learning_rate": 5.890157014489288e-06, + "loss": 0.29560089111328125, + "step": 19985 + }, + { + "epoch": 0.1728476191299686, + "grad_norm": 31.355198983369124, + "learning_rate": 5.890102382995909e-06, + "loss": 0.293096923828125, + "step": 19990 + }, + { + "epoch": 0.17289085265151188, + "grad_norm": 10.65941400410722, + "learning_rate": 5.890047738173633e-06, + "loss": 0.2545166015625, + "step": 19995 + }, + { + "epoch": 0.17293408617305514, + "grad_norm": 10.334147063157067, + "learning_rate": 5.889993080022713e-06, + "loss": 0.26416168212890623, + "step": 20000 + }, + { + "epoch": 0.1729773196945984, + "grad_norm": 29.01672391664138, + "learning_rate": 5.889938408543399e-06, + "loss": 0.23399887084960938, + "step": 20005 + }, + { + "epoch": 0.17302055321614165, + "grad_norm": 8.775337955433363, + "learning_rate": 5.889883723735945e-06, + "loss": 0.1027557373046875, + "step": 20010 + }, + { + "epoch": 0.17306378673768494, + "grad_norm": 1.5186144490296956, + "learning_rate": 5.889829025600603e-06, + "loss": 0.07191162109375, + "step": 20015 + }, + { + "epoch": 0.1731070202592282, + "grad_norm": 19.5261009279317, + "learning_rate": 5.889774314137625e-06, + "loss": 0.5007118225097656, + "step": 20020 + }, + { + "epoch": 0.17315025378077145, + "grad_norm": 7.337581616748138, + "learning_rate": 5.889719589347262e-06, + "loss": 0.136065673828125, + "step": 20025 + }, + { + "epoch": 0.17319348730231474, + "grad_norm": 39.54910890164721, + "learning_rate": 5.889664851229768e-06, + "loss": 0.18707122802734374, + "step": 20030 + }, + { + "epoch": 0.173236720823858, + "grad_norm": 7.074381944973889, + "learning_rate": 5.889610099785396e-06, + "loss": 0.077862548828125, + "step": 20035 + }, + { + "epoch": 0.17327995434540125, + "grad_norm": 0.35659424310120846, + "learning_rate": 5.889555335014397e-06, + "loss": 0.19270782470703124, + "step": 20040 + }, + { + "epoch": 0.1733231878669445, + "grad_norm": 28.360372767704575, + "learning_rate": 5.889500556917023e-06, + "loss": 0.2214569091796875, + "step": 20045 + }, + { + "epoch": 0.1733664213884878, + "grad_norm": 2.1369492797739023, + "learning_rate": 5.8894457654935295e-06, + "loss": 0.25305023193359377, + "step": 20050 + }, + { + "epoch": 0.17340965491003105, + "grad_norm": 7.0292632035311895, + "learning_rate": 5.889390960744167e-06, + "loss": 0.25223770141601565, + "step": 20055 + }, + { + "epoch": 0.1734528884315743, + "grad_norm": 9.39818158503394, + "learning_rate": 5.889336142669188e-06, + "loss": 0.278515625, + "step": 20060 + }, + { + "epoch": 0.17349612195311756, + "grad_norm": 0.6856247115702555, + "learning_rate": 5.889281311268847e-06, + "loss": 0.11894302368164063, + "step": 20065 + }, + { + "epoch": 0.17353935547466084, + "grad_norm": 9.570738018788802, + "learning_rate": 5.889226466543395e-06, + "loss": 0.130108642578125, + "step": 20070 + }, + { + "epoch": 0.1735825889962041, + "grad_norm": 0.773335936211954, + "learning_rate": 5.889171608493086e-06, + "loss": 0.0936431884765625, + "step": 20075 + }, + { + "epoch": 0.17362582251774736, + "grad_norm": 10.290049937025012, + "learning_rate": 5.889116737118172e-06, + "loss": 0.5163856506347656, + "step": 20080 + }, + { + "epoch": 0.17366905603929061, + "grad_norm": 21.422497194373125, + "learning_rate": 5.889061852418908e-06, + "loss": 0.4447998046875, + "step": 20085 + }, + { + "epoch": 0.1737122895608339, + "grad_norm": 8.070151030474099, + "learning_rate": 5.889006954395546e-06, + "loss": 0.17216911315917968, + "step": 20090 + }, + { + "epoch": 0.17375552308237716, + "grad_norm": 22.371805810199472, + "learning_rate": 5.88895204304834e-06, + "loss": 0.09418182373046875, + "step": 20095 + }, + { + "epoch": 0.1737987566039204, + "grad_norm": 1.4800653652455713, + "learning_rate": 5.888897118377541e-06, + "loss": 0.1064697265625, + "step": 20100 + }, + { + "epoch": 0.17384199012546367, + "grad_norm": 5.349290099030137, + "learning_rate": 5.888842180383404e-06, + "loss": 0.08854446411132813, + "step": 20105 + }, + { + "epoch": 0.17388522364700695, + "grad_norm": 10.441594033352658, + "learning_rate": 5.8887872290661825e-06, + "loss": 0.23276786804199218, + "step": 20110 + }, + { + "epoch": 0.1739284571685502, + "grad_norm": 1.5502378081080586, + "learning_rate": 5.888732264426129e-06, + "loss": 0.18853073120117186, + "step": 20115 + }, + { + "epoch": 0.17397169069009347, + "grad_norm": 1.748199671805245, + "learning_rate": 5.888677286463499e-06, + "loss": 0.15020523071289063, + "step": 20120 + }, + { + "epoch": 0.17401492421163672, + "grad_norm": 9.550518400153434, + "learning_rate": 5.8886222951785435e-06, + "loss": 0.16634521484375, + "step": 20125 + }, + { + "epoch": 0.17405815773318, + "grad_norm": 13.114797102224593, + "learning_rate": 5.888567290571517e-06, + "loss": 0.13013114929199218, + "step": 20130 + }, + { + "epoch": 0.17410139125472326, + "grad_norm": 13.853162988674166, + "learning_rate": 5.888512272642674e-06, + "loss": 0.08296966552734375, + "step": 20135 + }, + { + "epoch": 0.17414462477626652, + "grad_norm": 18.323477358833745, + "learning_rate": 5.8884572413922676e-06, + "loss": 0.43302001953125, + "step": 20140 + }, + { + "epoch": 0.17418785829780978, + "grad_norm": 4.723464163099276, + "learning_rate": 5.8884021968205515e-06, + "loss": 0.23311920166015626, + "step": 20145 + }, + { + "epoch": 0.17423109181935306, + "grad_norm": 5.170403171375898, + "learning_rate": 5.888347138927779e-06, + "loss": 0.15469894409179688, + "step": 20150 + }, + { + "epoch": 0.17427432534089632, + "grad_norm": 2.437604107909649, + "learning_rate": 5.888292067714206e-06, + "loss": 0.3313568115234375, + "step": 20155 + }, + { + "epoch": 0.17431755886243958, + "grad_norm": 20.522153339024406, + "learning_rate": 5.888236983180084e-06, + "loss": 0.25267333984375, + "step": 20160 + }, + { + "epoch": 0.17436079238398283, + "grad_norm": 3.9832007182066533, + "learning_rate": 5.888181885325669e-06, + "loss": 0.35603790283203124, + "step": 20165 + }, + { + "epoch": 0.17440402590552612, + "grad_norm": 12.530099239387809, + "learning_rate": 5.8881267741512135e-06, + "loss": 0.3307697296142578, + "step": 20170 + }, + { + "epoch": 0.17444725942706937, + "grad_norm": 7.900619155497907, + "learning_rate": 5.8880716496569735e-06, + "loss": 0.131463623046875, + "step": 20175 + }, + { + "epoch": 0.17449049294861263, + "grad_norm": 3.3467645710291793, + "learning_rate": 5.8880165118432015e-06, + "loss": 0.18648681640625, + "step": 20180 + }, + { + "epoch": 0.1745337264701559, + "grad_norm": 31.289121511762882, + "learning_rate": 5.887961360710153e-06, + "loss": 0.18477554321289064, + "step": 20185 + }, + { + "epoch": 0.17457695999169917, + "grad_norm": 23.79635892527612, + "learning_rate": 5.887906196258082e-06, + "loss": 0.142242431640625, + "step": 20190 + }, + { + "epoch": 0.17462019351324243, + "grad_norm": 87.19330239222712, + "learning_rate": 5.887851018487242e-06, + "loss": 0.3265209197998047, + "step": 20195 + }, + { + "epoch": 0.17466342703478568, + "grad_norm": 0.8475927011641179, + "learning_rate": 5.8877958273978895e-06, + "loss": 0.0481353759765625, + "step": 20200 + }, + { + "epoch": 0.17470666055632897, + "grad_norm": 33.02644753829308, + "learning_rate": 5.887740622990277e-06, + "loss": 0.1539520263671875, + "step": 20205 + }, + { + "epoch": 0.17474989407787223, + "grad_norm": 19.322314914195033, + "learning_rate": 5.8876854052646595e-06, + "loss": 0.2625732421875, + "step": 20210 + }, + { + "epoch": 0.17479312759941548, + "grad_norm": 31.272075038299498, + "learning_rate": 5.887630174221293e-06, + "loss": 0.2024810791015625, + "step": 20215 + }, + { + "epoch": 0.17483636112095874, + "grad_norm": 27.11246282669758, + "learning_rate": 5.887574929860431e-06, + "loss": 0.6522071838378907, + "step": 20220 + }, + { + "epoch": 0.17487959464250202, + "grad_norm": 15.82838936161555, + "learning_rate": 5.887519672182328e-06, + "loss": 0.16704788208007812, + "step": 20225 + }, + { + "epoch": 0.17492282816404528, + "grad_norm": 10.580325440456788, + "learning_rate": 5.887464401187241e-06, + "loss": 0.0887054443359375, + "step": 20230 + }, + { + "epoch": 0.17496606168558854, + "grad_norm": 6.526301390105611, + "learning_rate": 5.8874091168754215e-06, + "loss": 0.19377288818359376, + "step": 20235 + }, + { + "epoch": 0.1750092952071318, + "grad_norm": 32.876182667705706, + "learning_rate": 5.887353819247128e-06, + "loss": 0.29461669921875, + "step": 20240 + }, + { + "epoch": 0.17505252872867508, + "grad_norm": 3.2867926738834163, + "learning_rate": 5.887298508302612e-06, + "loss": 0.32408447265625, + "step": 20245 + }, + { + "epoch": 0.17509576225021833, + "grad_norm": 17.186228629161093, + "learning_rate": 5.8872431840421315e-06, + "loss": 0.6325103759765625, + "step": 20250 + }, + { + "epoch": 0.1751389957717616, + "grad_norm": 2.8546983050705736, + "learning_rate": 5.8871878464659404e-06, + "loss": 0.117413330078125, + "step": 20255 + }, + { + "epoch": 0.17518222929330485, + "grad_norm": 0.6373767981038495, + "learning_rate": 5.887132495574294e-06, + "loss": 0.02772674560546875, + "step": 20260 + }, + { + "epoch": 0.17522546281484813, + "grad_norm": 80.16533600210786, + "learning_rate": 5.887077131367447e-06, + "loss": 0.4083892822265625, + "step": 20265 + }, + { + "epoch": 0.1752686963363914, + "grad_norm": 6.831102822662649, + "learning_rate": 5.887021753845656e-06, + "loss": 0.15771484375, + "step": 20270 + }, + { + "epoch": 0.17531192985793465, + "grad_norm": 4.965520672821397, + "learning_rate": 5.886966363009176e-06, + "loss": 0.10999755859375, + "step": 20275 + }, + { + "epoch": 0.1753551633794779, + "grad_norm": 43.9180565218922, + "learning_rate": 5.8869109588582626e-06, + "loss": 0.19210662841796874, + "step": 20280 + }, + { + "epoch": 0.1753983969010212, + "grad_norm": 19.498577513451423, + "learning_rate": 5.8868555413931706e-06, + "loss": 0.174554443359375, + "step": 20285 + }, + { + "epoch": 0.17544163042256444, + "grad_norm": 25.896381673133558, + "learning_rate": 5.8868001106141555e-06, + "loss": 0.30128021240234376, + "step": 20290 + }, + { + "epoch": 0.1754848639441077, + "grad_norm": 6.461584655543459, + "learning_rate": 5.886744666521473e-06, + "loss": 0.099310302734375, + "step": 20295 + }, + { + "epoch": 0.17552809746565096, + "grad_norm": 1.6940691498560332, + "learning_rate": 5.88668920911538e-06, + "loss": 0.16500701904296874, + "step": 20300 + }, + { + "epoch": 0.17557133098719424, + "grad_norm": 5.7675457824732375, + "learning_rate": 5.886633738396131e-06, + "loss": 0.09557952880859374, + "step": 20305 + }, + { + "epoch": 0.1756145645087375, + "grad_norm": 9.951625036569705, + "learning_rate": 5.8865782543639825e-06, + "loss": 0.13250732421875, + "step": 20310 + }, + { + "epoch": 0.17565779803028075, + "grad_norm": 4.05670708232669, + "learning_rate": 5.88652275701919e-06, + "loss": 0.16605758666992188, + "step": 20315 + }, + { + "epoch": 0.175701031551824, + "grad_norm": 2.163912012313471, + "learning_rate": 5.88646724636201e-06, + "loss": 0.1489501953125, + "step": 20320 + }, + { + "epoch": 0.1757442650733673, + "grad_norm": 1.3790907613190602, + "learning_rate": 5.886411722392698e-06, + "loss": 0.0791046142578125, + "step": 20325 + }, + { + "epoch": 0.17578749859491055, + "grad_norm": 27.3710083061107, + "learning_rate": 5.88635618511151e-06, + "loss": 0.19654693603515624, + "step": 20330 + }, + { + "epoch": 0.1758307321164538, + "grad_norm": 6.353349843637689, + "learning_rate": 5.886300634518701e-06, + "loss": 0.15853118896484375, + "step": 20335 + }, + { + "epoch": 0.17587396563799707, + "grad_norm": 6.01687601535223, + "learning_rate": 5.886245070614531e-06, + "loss": 0.047613525390625, + "step": 20340 + }, + { + "epoch": 0.17591719915954035, + "grad_norm": 0.8766272555953475, + "learning_rate": 5.886189493399253e-06, + "loss": 0.05611419677734375, + "step": 20345 + }, + { + "epoch": 0.1759604326810836, + "grad_norm": 1.1545607983383712, + "learning_rate": 5.886133902873124e-06, + "loss": 0.29300365447998045, + "step": 20350 + }, + { + "epoch": 0.17600366620262686, + "grad_norm": 12.664555267656386, + "learning_rate": 5.886078299036399e-06, + "loss": 0.07386322021484375, + "step": 20355 + }, + { + "epoch": 0.17604689972417012, + "grad_norm": 14.377997806313154, + "learning_rate": 5.886022681889338e-06, + "loss": 0.05372467041015625, + "step": 20360 + }, + { + "epoch": 0.1760901332457134, + "grad_norm": 37.40320271666378, + "learning_rate": 5.8859670514321936e-06, + "loss": 0.23583984375, + "step": 20365 + }, + { + "epoch": 0.17613336676725666, + "grad_norm": 4.262133313394518, + "learning_rate": 5.885911407665225e-06, + "loss": 0.23051605224609376, + "step": 20370 + }, + { + "epoch": 0.17617660028879992, + "grad_norm": 36.28018965933038, + "learning_rate": 5.885855750588688e-06, + "loss": 0.44923095703125, + "step": 20375 + }, + { + "epoch": 0.17621983381034317, + "grad_norm": 9.278750354989834, + "learning_rate": 5.88580008020284e-06, + "loss": 0.2690948486328125, + "step": 20380 + }, + { + "epoch": 0.17626306733188646, + "grad_norm": 2.085435310834243, + "learning_rate": 5.885744396507935e-06, + "loss": 0.084326171875, + "step": 20385 + }, + { + "epoch": 0.17630630085342971, + "grad_norm": 0.9681402064102658, + "learning_rate": 5.8856886995042345e-06, + "loss": 0.1080810546875, + "step": 20390 + }, + { + "epoch": 0.17634953437497297, + "grad_norm": 4.221710273742367, + "learning_rate": 5.885632989191991e-06, + "loss": 0.36837158203125, + "step": 20395 + }, + { + "epoch": 0.17639276789651626, + "grad_norm": 20.61061689445707, + "learning_rate": 5.885577265571463e-06, + "loss": 0.182305908203125, + "step": 20400 + }, + { + "epoch": 0.1764360014180595, + "grad_norm": 1.1431655229078754, + "learning_rate": 5.885521528642908e-06, + "loss": 0.1372039794921875, + "step": 20405 + }, + { + "epoch": 0.17647923493960277, + "grad_norm": 16.43895033686461, + "learning_rate": 5.885465778406583e-06, + "loss": 0.451043701171875, + "step": 20410 + }, + { + "epoch": 0.17652246846114603, + "grad_norm": 0.13763876253051271, + "learning_rate": 5.885410014862744e-06, + "loss": 0.791741943359375, + "step": 20415 + }, + { + "epoch": 0.1765657019826893, + "grad_norm": 3.561079523195175, + "learning_rate": 5.88535423801165e-06, + "loss": 0.12928848266601561, + "step": 20420 + }, + { + "epoch": 0.17660893550423257, + "grad_norm": 4.07838142913592, + "learning_rate": 5.885298447853557e-06, + "loss": 0.12570343017578126, + "step": 20425 + }, + { + "epoch": 0.17665216902577582, + "grad_norm": 4.841962529032994, + "learning_rate": 5.885242644388722e-06, + "loss": 0.1229736328125, + "step": 20430 + }, + { + "epoch": 0.17669540254731908, + "grad_norm": 18.028570884559418, + "learning_rate": 5.885186827617403e-06, + "loss": 0.22572021484375, + "step": 20435 + }, + { + "epoch": 0.17673863606886236, + "grad_norm": 9.589380895858849, + "learning_rate": 5.885130997539858e-06, + "loss": 0.07212066650390625, + "step": 20440 + }, + { + "epoch": 0.17678186959040562, + "grad_norm": 14.112530255401255, + "learning_rate": 5.885075154156343e-06, + "loss": 0.2938411712646484, + "step": 20445 + }, + { + "epoch": 0.17682510311194888, + "grad_norm": 2.5036953322739155, + "learning_rate": 5.885019297467116e-06, + "loss": 0.44122161865234377, + "step": 20450 + }, + { + "epoch": 0.17686833663349213, + "grad_norm": 5.78451292852447, + "learning_rate": 5.884963427472436e-06, + "loss": 0.5156623840332031, + "step": 20455 + }, + { + "epoch": 0.17691157015503542, + "grad_norm": 32.31280010558112, + "learning_rate": 5.884907544172559e-06, + "loss": 0.317822265625, + "step": 20460 + }, + { + "epoch": 0.17695480367657868, + "grad_norm": 1.2803834531794915, + "learning_rate": 5.884851647567743e-06, + "loss": 0.267462158203125, + "step": 20465 + }, + { + "epoch": 0.17699803719812193, + "grad_norm": 43.061380284725345, + "learning_rate": 5.884795737658246e-06, + "loss": 0.475115966796875, + "step": 20470 + }, + { + "epoch": 0.1770412707196652, + "grad_norm": 3.6033226752669725, + "learning_rate": 5.884739814444327e-06, + "loss": 0.07650909423828126, + "step": 20475 + }, + { + "epoch": 0.17708450424120847, + "grad_norm": 7.487374084161253, + "learning_rate": 5.884683877926242e-06, + "loss": 0.125006103515625, + "step": 20480 + }, + { + "epoch": 0.17712773776275173, + "grad_norm": 9.476990238837116, + "learning_rate": 5.88462792810425e-06, + "loss": 0.154925537109375, + "step": 20485 + }, + { + "epoch": 0.177170971284295, + "grad_norm": 11.203974681442224, + "learning_rate": 5.88457196497861e-06, + "loss": 0.2774862289428711, + "step": 20490 + }, + { + "epoch": 0.17721420480583824, + "grad_norm": 28.292350758487917, + "learning_rate": 5.884515988549578e-06, + "loss": 0.2847900390625, + "step": 20495 + }, + { + "epoch": 0.17725743832738153, + "grad_norm": 14.007751641215286, + "learning_rate": 5.884459998817412e-06, + "loss": 0.17320556640625, + "step": 20500 + }, + { + "epoch": 0.17730067184892478, + "grad_norm": 13.541582211779385, + "learning_rate": 5.884403995782372e-06, + "loss": 0.60523681640625, + "step": 20505 + }, + { + "epoch": 0.17734390537046804, + "grad_norm": 1.2881012901260187, + "learning_rate": 5.884347979444717e-06, + "loss": 0.1625396728515625, + "step": 20510 + }, + { + "epoch": 0.1773871388920113, + "grad_norm": 2.3837356335420767, + "learning_rate": 5.884291949804703e-06, + "loss": 0.0842254638671875, + "step": 20515 + }, + { + "epoch": 0.17743037241355458, + "grad_norm": 0.1477455655700565, + "learning_rate": 5.8842359068625895e-06, + "loss": 0.13549728393554689, + "step": 20520 + }, + { + "epoch": 0.17747360593509784, + "grad_norm": 28.77200832883564, + "learning_rate": 5.884179850618635e-06, + "loss": 0.30901031494140624, + "step": 20525 + }, + { + "epoch": 0.1775168394566411, + "grad_norm": 25.497010012366744, + "learning_rate": 5.884123781073098e-06, + "loss": 0.0937286376953125, + "step": 20530 + }, + { + "epoch": 0.17756007297818435, + "grad_norm": 6.844864231488217, + "learning_rate": 5.884067698226236e-06, + "loss": 0.19227066040039062, + "step": 20535 + }, + { + "epoch": 0.17760330649972764, + "grad_norm": 3.9844869412927677, + "learning_rate": 5.884011602078309e-06, + "loss": 0.050518798828125, + "step": 20540 + }, + { + "epoch": 0.1776465400212709, + "grad_norm": 6.577545426953781, + "learning_rate": 5.8839554926295765e-06, + "loss": 0.1564117431640625, + "step": 20545 + }, + { + "epoch": 0.17768977354281415, + "grad_norm": 36.72476897319931, + "learning_rate": 5.883899369880295e-06, + "loss": 0.23967475891113282, + "step": 20550 + }, + { + "epoch": 0.1777330070643574, + "grad_norm": 49.02517966328327, + "learning_rate": 5.883843233830725e-06, + "loss": 0.4148681640625, + "step": 20555 + }, + { + "epoch": 0.1777762405859007, + "grad_norm": 18.129558464928962, + "learning_rate": 5.8837870844811245e-06, + "loss": 0.14476318359375, + "step": 20560 + }, + { + "epoch": 0.17781947410744395, + "grad_norm": 24.93513354939421, + "learning_rate": 5.883730921831752e-06, + "loss": 0.1555999755859375, + "step": 20565 + }, + { + "epoch": 0.1778627076289872, + "grad_norm": 23.998687826877422, + "learning_rate": 5.883674745882869e-06, + "loss": 0.2106658935546875, + "step": 20570 + }, + { + "epoch": 0.1779059411505305, + "grad_norm": 0.5807235458425836, + "learning_rate": 5.883618556634732e-06, + "loss": 0.062054443359375, + "step": 20575 + }, + { + "epoch": 0.17794917467207375, + "grad_norm": 5.537833669468553, + "learning_rate": 5.883562354087601e-06, + "loss": 0.1599039077758789, + "step": 20580 + }, + { + "epoch": 0.177992408193617, + "grad_norm": 22.445717680367792, + "learning_rate": 5.883506138241735e-06, + "loss": 0.14809112548828124, + "step": 20585 + }, + { + "epoch": 0.17803564171516026, + "grad_norm": 8.891505589111395, + "learning_rate": 5.8834499090973935e-06, + "loss": 0.41282958984375, + "step": 20590 + }, + { + "epoch": 0.17807887523670354, + "grad_norm": 39.616681199287754, + "learning_rate": 5.883393666654837e-06, + "loss": 0.30255889892578125, + "step": 20595 + }, + { + "epoch": 0.1781221087582468, + "grad_norm": 7.309898098030012, + "learning_rate": 5.883337410914322e-06, + "loss": 0.08968429565429688, + "step": 20600 + }, + { + "epoch": 0.17816534227979006, + "grad_norm": 3.9795196545948364, + "learning_rate": 5.88328114187611e-06, + "loss": 0.0928506851196289, + "step": 20605 + }, + { + "epoch": 0.1782085758013333, + "grad_norm": 0.594662624439801, + "learning_rate": 5.883224859540461e-06, + "loss": 0.09317474365234375, + "step": 20610 + }, + { + "epoch": 0.1782518093228766, + "grad_norm": 8.980033188265502, + "learning_rate": 5.883168563907633e-06, + "loss": 0.07813720703125, + "step": 20615 + }, + { + "epoch": 0.17829504284441985, + "grad_norm": 1.2577412285075202, + "learning_rate": 5.883112254977886e-06, + "loss": 0.43222293853759763, + "step": 20620 + }, + { + "epoch": 0.1783382763659631, + "grad_norm": 21.83471520282473, + "learning_rate": 5.883055932751481e-06, + "loss": 0.10965728759765625, + "step": 20625 + }, + { + "epoch": 0.17838150988750637, + "grad_norm": 44.99042018443765, + "learning_rate": 5.882999597228676e-06, + "loss": 0.2867088317871094, + "step": 20630 + }, + { + "epoch": 0.17842474340904965, + "grad_norm": 23.627022084562867, + "learning_rate": 5.882943248409733e-06, + "loss": 0.411651611328125, + "step": 20635 + }, + { + "epoch": 0.1784679769305929, + "grad_norm": 15.790804605762913, + "learning_rate": 5.882886886294908e-06, + "loss": 0.3511634826660156, + "step": 20640 + }, + { + "epoch": 0.17851121045213617, + "grad_norm": 15.039308170276525, + "learning_rate": 5.882830510884466e-06, + "loss": 0.13143692016601563, + "step": 20645 + }, + { + "epoch": 0.17855444397367942, + "grad_norm": 7.493657564916958, + "learning_rate": 5.8827741221786636e-06, + "loss": 0.068939208984375, + "step": 20650 + }, + { + "epoch": 0.1785976774952227, + "grad_norm": 17.203772916646944, + "learning_rate": 5.88271772017776e-06, + "loss": 0.352703857421875, + "step": 20655 + }, + { + "epoch": 0.17864091101676596, + "grad_norm": 22.04382555151646, + "learning_rate": 5.882661304882019e-06, + "loss": 0.4058197021484375, + "step": 20660 + }, + { + "epoch": 0.17868414453830922, + "grad_norm": 10.865889642206167, + "learning_rate": 5.882604876291698e-06, + "loss": 0.1023712158203125, + "step": 20665 + }, + { + "epoch": 0.17872737805985248, + "grad_norm": 5.436886754690457, + "learning_rate": 5.882548434407058e-06, + "loss": 0.17402801513671876, + "step": 20670 + }, + { + "epoch": 0.17877061158139576, + "grad_norm": 1.1284877581048236, + "learning_rate": 5.88249197922836e-06, + "loss": 0.11140518188476563, + "step": 20675 + }, + { + "epoch": 0.17881384510293902, + "grad_norm": 53.485370242152015, + "learning_rate": 5.882435510755863e-06, + "loss": 0.314642333984375, + "step": 20680 + }, + { + "epoch": 0.17885707862448227, + "grad_norm": 0.9208477179103253, + "learning_rate": 5.882379028989828e-06, + "loss": 0.315863037109375, + "step": 20685 + }, + { + "epoch": 0.17890031214602553, + "grad_norm": 8.205719635554196, + "learning_rate": 5.882322533930516e-06, + "loss": 0.1715911865234375, + "step": 20690 + }, + { + "epoch": 0.17894354566756882, + "grad_norm": 12.848970111707956, + "learning_rate": 5.8822660255781875e-06, + "loss": 0.20301132202148436, + "step": 20695 + }, + { + "epoch": 0.17898677918911207, + "grad_norm": 12.85992116949283, + "learning_rate": 5.882209503933101e-06, + "loss": 0.425555419921875, + "step": 20700 + }, + { + "epoch": 0.17903001271065533, + "grad_norm": 1.1044131712531984, + "learning_rate": 5.882152968995521e-06, + "loss": 0.42220611572265626, + "step": 20705 + }, + { + "epoch": 0.17907324623219859, + "grad_norm": 18.066264440847796, + "learning_rate": 5.882096420765705e-06, + "loss": 0.12890625, + "step": 20710 + }, + { + "epoch": 0.17911647975374187, + "grad_norm": 12.842121775717015, + "learning_rate": 5.882039859243916e-06, + "loss": 0.2138671875, + "step": 20715 + }, + { + "epoch": 0.17915971327528513, + "grad_norm": 18.728648842513323, + "learning_rate": 5.881983284430413e-06, + "loss": 0.189105224609375, + "step": 20720 + }, + { + "epoch": 0.17920294679682838, + "grad_norm": 18.13009042887377, + "learning_rate": 5.881926696325458e-06, + "loss": 0.23519744873046874, + "step": 20725 + }, + { + "epoch": 0.17924618031837164, + "grad_norm": 4.359516168499685, + "learning_rate": 5.881870094929312e-06, + "loss": 0.3602642059326172, + "step": 20730 + }, + { + "epoch": 0.17928941383991492, + "grad_norm": 12.079221640703832, + "learning_rate": 5.881813480242235e-06, + "loss": 0.5368011474609375, + "step": 20735 + }, + { + "epoch": 0.17933264736145818, + "grad_norm": 9.513110166062662, + "learning_rate": 5.88175685226449e-06, + "loss": 0.06318359375, + "step": 20740 + }, + { + "epoch": 0.17937588088300144, + "grad_norm": 51.3958745131504, + "learning_rate": 5.881700210996336e-06, + "loss": 0.354443359375, + "step": 20745 + }, + { + "epoch": 0.1794191144045447, + "grad_norm": 4.211502374099748, + "learning_rate": 5.881643556438035e-06, + "loss": 0.32146224975585935, + "step": 20750 + }, + { + "epoch": 0.17946234792608798, + "grad_norm": 6.366084277237497, + "learning_rate": 5.88158688858985e-06, + "loss": 0.2407379150390625, + "step": 20755 + }, + { + "epoch": 0.17950558144763124, + "grad_norm": 3.555517013448199, + "learning_rate": 5.88153020745204e-06, + "loss": 0.2313751220703125, + "step": 20760 + }, + { + "epoch": 0.1795488149691745, + "grad_norm": 21.39427366710603, + "learning_rate": 5.8814735130248675e-06, + "loss": 0.20904388427734374, + "step": 20765 + }, + { + "epoch": 0.17959204849071778, + "grad_norm": 1.7648801121834317, + "learning_rate": 5.881416805308594e-06, + "loss": 0.19589691162109374, + "step": 20770 + }, + { + "epoch": 0.17963528201226103, + "grad_norm": 12.982857143942665, + "learning_rate": 5.88136008430348e-06, + "loss": 0.21308155059814454, + "step": 20775 + }, + { + "epoch": 0.1796785155338043, + "grad_norm": 0.866435593811762, + "learning_rate": 5.881303350009788e-06, + "loss": 0.3035003662109375, + "step": 20780 + }, + { + "epoch": 0.17972174905534755, + "grad_norm": 25.942586681266416, + "learning_rate": 5.88124660242778e-06, + "loss": 0.20755615234375, + "step": 20785 + }, + { + "epoch": 0.17976498257689083, + "grad_norm": 1.3022322959788701, + "learning_rate": 5.881189841557717e-06, + "loss": 0.13088455200195312, + "step": 20790 + }, + { + "epoch": 0.1798082160984341, + "grad_norm": 2.1331129930868977, + "learning_rate": 5.8811330673998615e-06, + "loss": 0.14638137817382812, + "step": 20795 + }, + { + "epoch": 0.17985144961997734, + "grad_norm": 7.551203942644651, + "learning_rate": 5.881076279954474e-06, + "loss": 0.24563140869140626, + "step": 20800 + }, + { + "epoch": 0.1798946831415206, + "grad_norm": 35.136051903140824, + "learning_rate": 5.881019479221818e-06, + "loss": 0.21072998046875, + "step": 20805 + }, + { + "epoch": 0.17993791666306388, + "grad_norm": 1.247241860538009, + "learning_rate": 5.880962665202154e-06, + "loss": 0.141363525390625, + "step": 20810 + }, + { + "epoch": 0.17998115018460714, + "grad_norm": 19.72273152203791, + "learning_rate": 5.8809058378957464e-06, + "loss": 0.1467742919921875, + "step": 20815 + }, + { + "epoch": 0.1800243837061504, + "grad_norm": 1.6745359938614177, + "learning_rate": 5.8808489973028535e-06, + "loss": 0.2194446563720703, + "step": 20820 + }, + { + "epoch": 0.18006761722769365, + "grad_norm": 7.477188667907504, + "learning_rate": 5.8807921434237414e-06, + "loss": 0.08094558715820313, + "step": 20825 + }, + { + "epoch": 0.18011085074923694, + "grad_norm": 18.232975105822238, + "learning_rate": 5.880735276258669e-06, + "loss": 0.42159500122070315, + "step": 20830 + }, + { + "epoch": 0.1801540842707802, + "grad_norm": 2.7239804483862295, + "learning_rate": 5.880678395807902e-06, + "loss": 0.046068572998046876, + "step": 20835 + }, + { + "epoch": 0.18019731779232345, + "grad_norm": 0.554118054557515, + "learning_rate": 5.880621502071699e-06, + "loss": 0.2348297119140625, + "step": 20840 + }, + { + "epoch": 0.1802405513138667, + "grad_norm": 29.483893169424913, + "learning_rate": 5.880564595050324e-06, + "loss": 0.22118377685546875, + "step": 20845 + }, + { + "epoch": 0.18028378483541, + "grad_norm": 15.429098095468609, + "learning_rate": 5.880507674744041e-06, + "loss": 0.33136444091796874, + "step": 20850 + }, + { + "epoch": 0.18032701835695325, + "grad_norm": 29.545478432349775, + "learning_rate": 5.8804507411531095e-06, + "loss": 0.479620361328125, + "step": 20855 + }, + { + "epoch": 0.1803702518784965, + "grad_norm": 6.2193435428771595, + "learning_rate": 5.880393794277796e-06, + "loss": 0.2135009765625, + "step": 20860 + }, + { + "epoch": 0.18041348540003976, + "grad_norm": 8.032294356397468, + "learning_rate": 5.880336834118359e-06, + "loss": 0.03021697998046875, + "step": 20865 + }, + { + "epoch": 0.18045671892158305, + "grad_norm": 6.512332157454313, + "learning_rate": 5.880279860675064e-06, + "loss": 0.23496551513671876, + "step": 20870 + }, + { + "epoch": 0.1804999524431263, + "grad_norm": 0.13177645151304226, + "learning_rate": 5.8802228739481715e-06, + "loss": 0.3158843994140625, + "step": 20875 + }, + { + "epoch": 0.18054318596466956, + "grad_norm": 26.320801893296117, + "learning_rate": 5.880165873937946e-06, + "loss": 0.3304046630859375, + "step": 20880 + }, + { + "epoch": 0.18058641948621282, + "grad_norm": 0.29980919292690933, + "learning_rate": 5.8801088606446516e-06, + "loss": 0.07335472106933594, + "step": 20885 + }, + { + "epoch": 0.1806296530077561, + "grad_norm": 36.562100116091216, + "learning_rate": 5.880051834068548e-06, + "loss": 0.271905517578125, + "step": 20890 + }, + { + "epoch": 0.18067288652929936, + "grad_norm": 1.0525130139831689, + "learning_rate": 5.879994794209901e-06, + "loss": 0.2504249572753906, + "step": 20895 + }, + { + "epoch": 0.18071612005084262, + "grad_norm": 21.627698195154746, + "learning_rate": 5.879937741068971e-06, + "loss": 0.16885986328125, + "step": 20900 + }, + { + "epoch": 0.18075935357238587, + "grad_norm": 38.240012084566686, + "learning_rate": 5.879880674646024e-06, + "loss": 0.31091461181640623, + "step": 20905 + }, + { + "epoch": 0.18080258709392916, + "grad_norm": 22.13424590035388, + "learning_rate": 5.879823594941321e-06, + "loss": 0.1558258056640625, + "step": 20910 + }, + { + "epoch": 0.1808458206154724, + "grad_norm": 16.364173780396086, + "learning_rate": 5.879766501955126e-06, + "loss": 0.1675201416015625, + "step": 20915 + }, + { + "epoch": 0.18088905413701567, + "grad_norm": 1.8733267599561787, + "learning_rate": 5.879709395687702e-06, + "loss": 0.05356922149658203, + "step": 20920 + }, + { + "epoch": 0.18093228765855893, + "grad_norm": 13.472050122350364, + "learning_rate": 5.879652276139313e-06, + "loss": 0.053631591796875, + "step": 20925 + }, + { + "epoch": 0.1809755211801022, + "grad_norm": 31.43756517664071, + "learning_rate": 5.879595143310223e-06, + "loss": 0.278369140625, + "step": 20930 + }, + { + "epoch": 0.18101875470164547, + "grad_norm": 0.30403267921633925, + "learning_rate": 5.879537997200694e-06, + "loss": 0.09922027587890625, + "step": 20935 + }, + { + "epoch": 0.18106198822318872, + "grad_norm": 2.3853425100080576, + "learning_rate": 5.879480837810991e-06, + "loss": 0.14736328125, + "step": 20940 + }, + { + "epoch": 0.181105221744732, + "grad_norm": 2.231161249919396, + "learning_rate": 5.8794236651413755e-06, + "loss": 0.536474609375, + "step": 20945 + }, + { + "epoch": 0.18114845526627527, + "grad_norm": 27.428531306023675, + "learning_rate": 5.879366479192113e-06, + "loss": 0.16770477294921876, + "step": 20950 + }, + { + "epoch": 0.18119168878781852, + "grad_norm": 18.187155598455636, + "learning_rate": 5.879309279963466e-06, + "loss": 0.32613382339477537, + "step": 20955 + }, + { + "epoch": 0.18123492230936178, + "grad_norm": 6.558554362579812, + "learning_rate": 5.879252067455701e-06, + "loss": 0.3829132080078125, + "step": 20960 + }, + { + "epoch": 0.18127815583090506, + "grad_norm": 61.219404988134016, + "learning_rate": 5.879194841669079e-06, + "loss": 0.4983528137207031, + "step": 20965 + }, + { + "epoch": 0.18132138935244832, + "grad_norm": 4.284280697190966, + "learning_rate": 5.879137602603863e-06, + "loss": 0.22584228515625, + "step": 20970 + }, + { + "epoch": 0.18136462287399158, + "grad_norm": 1.0210915524706183, + "learning_rate": 5.8790803502603214e-06, + "loss": 0.0519073486328125, + "step": 20975 + }, + { + "epoch": 0.18140785639553483, + "grad_norm": 4.362580131497282, + "learning_rate": 5.879023084638714e-06, + "loss": 0.08219070434570312, + "step": 20980 + }, + { + "epoch": 0.18145108991707812, + "grad_norm": 30.787101521829467, + "learning_rate": 5.878965805739308e-06, + "loss": 0.5358110427856445, + "step": 20985 + }, + { + "epoch": 0.18149432343862137, + "grad_norm": 8.280686726469577, + "learning_rate": 5.878908513562364e-06, + "loss": 0.6025543212890625, + "step": 20990 + }, + { + "epoch": 0.18153755696016463, + "grad_norm": 30.73583760045838, + "learning_rate": 5.87885120810815e-06, + "loss": 0.1431884765625, + "step": 20995 + }, + { + "epoch": 0.1815807904817079, + "grad_norm": 13.244354721627046, + "learning_rate": 5.878793889376928e-06, + "loss": 0.34077911376953124, + "step": 21000 + }, + { + "epoch": 0.18162402400325117, + "grad_norm": 6.277995921614821, + "learning_rate": 5.878736557368963e-06, + "loss": 0.07233505249023438, + "step": 21005 + }, + { + "epoch": 0.18166725752479443, + "grad_norm": 48.698555017138276, + "learning_rate": 5.878679212084519e-06, + "loss": 0.31813507080078124, + "step": 21010 + }, + { + "epoch": 0.18171049104633769, + "grad_norm": 6.083726195581889, + "learning_rate": 5.878621853523861e-06, + "loss": 0.0984588623046875, + "step": 21015 + }, + { + "epoch": 0.18175372456788094, + "grad_norm": 5.249951022214553, + "learning_rate": 5.878564481687254e-06, + "loss": 0.30972900390625, + "step": 21020 + }, + { + "epoch": 0.18179695808942423, + "grad_norm": 11.384809749363841, + "learning_rate": 5.878507096574962e-06, + "loss": 0.36969528198242185, + "step": 21025 + }, + { + "epoch": 0.18184019161096748, + "grad_norm": 1.6279715992921422, + "learning_rate": 5.8784496981872485e-06, + "loss": 0.0824005126953125, + "step": 21030 + }, + { + "epoch": 0.18188342513251074, + "grad_norm": 21.86563789430987, + "learning_rate": 5.878392286524381e-06, + "loss": 0.23720932006835938, + "step": 21035 + }, + { + "epoch": 0.181926658654054, + "grad_norm": 9.753212280949754, + "learning_rate": 5.878334861586621e-06, + "loss": 0.1508209228515625, + "step": 21040 + }, + { + "epoch": 0.18196989217559728, + "grad_norm": 6.06890951170675, + "learning_rate": 5.878277423374235e-06, + "loss": 0.160791015625, + "step": 21045 + }, + { + "epoch": 0.18201312569714054, + "grad_norm": 7.291526531872958, + "learning_rate": 5.8782199718874885e-06, + "loss": 0.11744422912597656, + "step": 21050 + }, + { + "epoch": 0.1820563592186838, + "grad_norm": 23.813064536841996, + "learning_rate": 5.878162507126646e-06, + "loss": 0.601922607421875, + "step": 21055 + }, + { + "epoch": 0.18209959274022705, + "grad_norm": 16.174267504398674, + "learning_rate": 5.878105029091972e-06, + "loss": 0.14524459838867188, + "step": 21060 + }, + { + "epoch": 0.18214282626177034, + "grad_norm": 17.457890374159536, + "learning_rate": 5.878047537783733e-06, + "loss": 0.14921875, + "step": 21065 + }, + { + "epoch": 0.1821860597833136, + "grad_norm": 3.920433252207952, + "learning_rate": 5.877990033202191e-06, + "loss": 0.0923828125, + "step": 21070 + }, + { + "epoch": 0.18222929330485685, + "grad_norm": 65.54444518425429, + "learning_rate": 5.877932515347614e-06, + "loss": 0.705291748046875, + "step": 21075 + }, + { + "epoch": 0.1822725268264001, + "grad_norm": 22.386017245310065, + "learning_rate": 5.8778749842202675e-06, + "loss": 0.14692840576171876, + "step": 21080 + }, + { + "epoch": 0.1823157603479434, + "grad_norm": 14.602680538644861, + "learning_rate": 5.877817439820415e-06, + "loss": 0.2941253662109375, + "step": 21085 + }, + { + "epoch": 0.18235899386948665, + "grad_norm": 2.2267634894131856, + "learning_rate": 5.877759882148323e-06, + "loss": 0.1958629608154297, + "step": 21090 + }, + { + "epoch": 0.1824022273910299, + "grad_norm": 0.7775641637030071, + "learning_rate": 5.877702311204257e-06, + "loss": 0.189984130859375, + "step": 21095 + }, + { + "epoch": 0.18244546091257316, + "grad_norm": 2.5569677491755725, + "learning_rate": 5.877644726988482e-06, + "loss": 0.07589111328125, + "step": 21100 + }, + { + "epoch": 0.18248869443411644, + "grad_norm": 13.813201944574153, + "learning_rate": 5.877587129501263e-06, + "loss": 0.237530517578125, + "step": 21105 + }, + { + "epoch": 0.1825319279556597, + "grad_norm": 22.459587501543655, + "learning_rate": 5.877529518742867e-06, + "loss": 0.07019271850585937, + "step": 21110 + }, + { + "epoch": 0.18257516147720296, + "grad_norm": 15.701103706896822, + "learning_rate": 5.877471894713559e-06, + "loss": 0.1875152587890625, + "step": 21115 + }, + { + "epoch": 0.18261839499874621, + "grad_norm": 16.550806912641512, + "learning_rate": 5.877414257413606e-06, + "loss": 0.1688446044921875, + "step": 21120 + }, + { + "epoch": 0.1826616285202895, + "grad_norm": 1.0721492343123409, + "learning_rate": 5.8773566068432715e-06, + "loss": 0.27310791015625, + "step": 21125 + }, + { + "epoch": 0.18270486204183276, + "grad_norm": 18.05818361754846, + "learning_rate": 5.877298943002823e-06, + "loss": 0.176275634765625, + "step": 21130 + }, + { + "epoch": 0.182748095563376, + "grad_norm": 0.13049958943141146, + "learning_rate": 5.877241265892526e-06, + "loss": 0.055767822265625, + "step": 21135 + }, + { + "epoch": 0.1827913290849193, + "grad_norm": 10.886094180023246, + "learning_rate": 5.877183575512646e-06, + "loss": 0.291583251953125, + "step": 21140 + }, + { + "epoch": 0.18283456260646255, + "grad_norm": 28.351845848279506, + "learning_rate": 5.87712587186345e-06, + "loss": 0.19253921508789062, + "step": 21145 + }, + { + "epoch": 0.1828777961280058, + "grad_norm": 15.992279433038828, + "learning_rate": 5.8770681549452036e-06, + "loss": 0.152740478515625, + "step": 21150 + }, + { + "epoch": 0.18292102964954907, + "grad_norm": 10.169650396404853, + "learning_rate": 5.877010424758174e-06, + "loss": 0.06372947692871093, + "step": 21155 + }, + { + "epoch": 0.18296426317109235, + "grad_norm": 3.6745826217007136, + "learning_rate": 5.876952681302625e-06, + "loss": 0.2916778564453125, + "step": 21160 + }, + { + "epoch": 0.1830074966926356, + "grad_norm": 6.0057692076353675, + "learning_rate": 5.876894924578826e-06, + "loss": 0.13312225341796874, + "step": 21165 + }, + { + "epoch": 0.18305073021417886, + "grad_norm": 0.664268744503775, + "learning_rate": 5.8768371545870405e-06, + "loss": 0.0862274169921875, + "step": 21170 + }, + { + "epoch": 0.18309396373572212, + "grad_norm": 10.37537356544415, + "learning_rate": 5.876779371327537e-06, + "loss": 0.276275634765625, + "step": 21175 + }, + { + "epoch": 0.1831371972572654, + "grad_norm": 3.9150673827787696, + "learning_rate": 5.8767215748005805e-06, + "loss": 0.09868316650390625, + "step": 21180 + }, + { + "epoch": 0.18318043077880866, + "grad_norm": 7.143939309701952, + "learning_rate": 5.87666376500644e-06, + "loss": 0.0967742919921875, + "step": 21185 + }, + { + "epoch": 0.18322366430035192, + "grad_norm": 10.981774573676566, + "learning_rate": 5.876605941945379e-06, + "loss": 0.3997314453125, + "step": 21190 + }, + { + "epoch": 0.18326689782189517, + "grad_norm": 6.487943629832818, + "learning_rate": 5.876548105617665e-06, + "loss": 0.227001953125, + "step": 21195 + }, + { + "epoch": 0.18331013134343846, + "grad_norm": 15.546214220123174, + "learning_rate": 5.876490256023566e-06, + "loss": 0.20160903930664062, + "step": 21200 + }, + { + "epoch": 0.18335336486498172, + "grad_norm": 1.244799274626489, + "learning_rate": 5.876432393163348e-06, + "loss": 0.051025390625, + "step": 21205 + }, + { + "epoch": 0.18339659838652497, + "grad_norm": 19.815803463194268, + "learning_rate": 5.876374517037279e-06, + "loss": 0.2514007568359375, + "step": 21210 + }, + { + "epoch": 0.18343983190806823, + "grad_norm": 41.75651650722558, + "learning_rate": 5.876316627645623e-06, + "loss": 0.240789794921875, + "step": 21215 + }, + { + "epoch": 0.1834830654296115, + "grad_norm": 17.868700336586837, + "learning_rate": 5.876258724988651e-06, + "loss": 0.4523681640625, + "step": 21220 + }, + { + "epoch": 0.18352629895115477, + "grad_norm": 2.0784360981956658, + "learning_rate": 5.876200809066626e-06, + "loss": 0.18774337768554689, + "step": 21225 + }, + { + "epoch": 0.18356953247269803, + "grad_norm": 17.753048443045174, + "learning_rate": 5.876142879879818e-06, + "loss": 0.260711669921875, + "step": 21230 + }, + { + "epoch": 0.18361276599424128, + "grad_norm": 1.3996887569757976, + "learning_rate": 5.876084937428493e-06, + "loss": 0.2605194091796875, + "step": 21235 + }, + { + "epoch": 0.18365599951578457, + "grad_norm": 6.5738761757290725, + "learning_rate": 5.8760269817129185e-06, + "loss": 0.22082157135009767, + "step": 21240 + }, + { + "epoch": 0.18369923303732782, + "grad_norm": 3.6138791383869076, + "learning_rate": 5.875969012733361e-06, + "loss": 0.26627578735351565, + "step": 21245 + }, + { + "epoch": 0.18374246655887108, + "grad_norm": 2.4714305246446893, + "learning_rate": 5.875911030490089e-06, + "loss": 0.05511627197265625, + "step": 21250 + }, + { + "epoch": 0.18378570008041434, + "grad_norm": 1.8058937757969107, + "learning_rate": 5.87585303498337e-06, + "loss": 0.146563720703125, + "step": 21255 + }, + { + "epoch": 0.18382893360195762, + "grad_norm": 43.28403912134169, + "learning_rate": 5.875795026213471e-06, + "loss": 0.2014089584350586, + "step": 21260 + }, + { + "epoch": 0.18387216712350088, + "grad_norm": 27.659147936883695, + "learning_rate": 5.875737004180658e-06, + "loss": 0.33968505859375, + "step": 21265 + }, + { + "epoch": 0.18391540064504414, + "grad_norm": 10.526844065055275, + "learning_rate": 5.875678968885201e-06, + "loss": 0.24710006713867189, + "step": 21270 + }, + { + "epoch": 0.1839586341665874, + "grad_norm": 6.613396645274641, + "learning_rate": 5.875620920327367e-06, + "loss": 0.2482513427734375, + "step": 21275 + }, + { + "epoch": 0.18400186768813068, + "grad_norm": 7.863069089721263, + "learning_rate": 5.875562858507423e-06, + "loss": 0.196893310546875, + "step": 21280 + }, + { + "epoch": 0.18404510120967393, + "grad_norm": 11.255384125447607, + "learning_rate": 5.875504783425638e-06, + "loss": 0.3334564208984375, + "step": 21285 + }, + { + "epoch": 0.1840883347312172, + "grad_norm": 4.989245509584092, + "learning_rate": 5.8754466950822784e-06, + "loss": 0.1465505599975586, + "step": 21290 + }, + { + "epoch": 0.18413156825276045, + "grad_norm": 19.794868149201026, + "learning_rate": 5.875388593477612e-06, + "loss": 0.1364715576171875, + "step": 21295 + }, + { + "epoch": 0.18417480177430373, + "grad_norm": 25.237381654760647, + "learning_rate": 5.875330478611909e-06, + "loss": 0.1450775146484375, + "step": 21300 + }, + { + "epoch": 0.184218035295847, + "grad_norm": 3.6107883206372655, + "learning_rate": 5.875272350485434e-06, + "loss": 0.04890632629394531, + "step": 21305 + }, + { + "epoch": 0.18426126881739024, + "grad_norm": 15.963136442739394, + "learning_rate": 5.8752142090984585e-06, + "loss": 0.044756317138671876, + "step": 21310 + }, + { + "epoch": 0.18430450233893353, + "grad_norm": 7.688525475920161, + "learning_rate": 5.8751560544512485e-06, + "loss": 0.18595733642578124, + "step": 21315 + }, + { + "epoch": 0.18434773586047679, + "grad_norm": 6.152638145011942, + "learning_rate": 5.875097886544073e-06, + "loss": 0.34970703125, + "step": 21320 + }, + { + "epoch": 0.18439096938202004, + "grad_norm": 2.6175878431185136, + "learning_rate": 5.8750397053772e-06, + "loss": 0.0343109130859375, + "step": 21325 + }, + { + "epoch": 0.1844342029035633, + "grad_norm": 25.042501900441, + "learning_rate": 5.874981510950898e-06, + "loss": 0.19412841796875, + "step": 21330 + }, + { + "epoch": 0.18447743642510658, + "grad_norm": 8.740471222308202, + "learning_rate": 5.8749233032654345e-06, + "loss": 0.09729480743408203, + "step": 21335 + }, + { + "epoch": 0.18452066994664984, + "grad_norm": 1.8194068868900393, + "learning_rate": 5.874865082321079e-06, + "loss": 0.2840904235839844, + "step": 21340 + }, + { + "epoch": 0.1845639034681931, + "grad_norm": 3.7285291931559827, + "learning_rate": 5.874806848118101e-06, + "loss": 0.1801910400390625, + "step": 21345 + }, + { + "epoch": 0.18460713698973635, + "grad_norm": 7.854263115076854, + "learning_rate": 5.874748600656767e-06, + "loss": 0.38015098571777345, + "step": 21350 + }, + { + "epoch": 0.18465037051127964, + "grad_norm": 1.7216268162593646, + "learning_rate": 5.8746903399373475e-06, + "loss": 0.1073455810546875, + "step": 21355 + }, + { + "epoch": 0.1846936040328229, + "grad_norm": 18.081612098285202, + "learning_rate": 5.8746320659601095e-06, + "loss": 0.3122261047363281, + "step": 21360 + }, + { + "epoch": 0.18473683755436615, + "grad_norm": 41.29263539262714, + "learning_rate": 5.874573778725321e-06, + "loss": 0.160260009765625, + "step": 21365 + }, + { + "epoch": 0.1847800710759094, + "grad_norm": 13.544073203996906, + "learning_rate": 5.874515478233254e-06, + "loss": 0.62144775390625, + "step": 21370 + }, + { + "epoch": 0.1848233045974527, + "grad_norm": 14.046156626735309, + "learning_rate": 5.8744571644841745e-06, + "loss": 0.06530990600585937, + "step": 21375 + }, + { + "epoch": 0.18486653811899595, + "grad_norm": 7.510675271533575, + "learning_rate": 5.874398837478353e-06, + "loss": 0.11317520141601563, + "step": 21380 + }, + { + "epoch": 0.1849097716405392, + "grad_norm": 14.474104851288516, + "learning_rate": 5.874340497216058e-06, + "loss": 0.09023056030273438, + "step": 21385 + }, + { + "epoch": 0.18495300516208246, + "grad_norm": 7.349382372304474, + "learning_rate": 5.8742821436975595e-06, + "loss": 0.19891128540039063, + "step": 21390 + }, + { + "epoch": 0.18499623868362575, + "grad_norm": 21.829158476593125, + "learning_rate": 5.874223776923125e-06, + "loss": 0.125872802734375, + "step": 21395 + }, + { + "epoch": 0.185039472205169, + "grad_norm": 19.625509388223715, + "learning_rate": 5.874165396893024e-06, + "loss": 0.27794342041015624, + "step": 21400 + }, + { + "epoch": 0.18508270572671226, + "grad_norm": 42.825008885617734, + "learning_rate": 5.874107003607526e-06, + "loss": 0.39981765747070314, + "step": 21405 + }, + { + "epoch": 0.18512593924825552, + "grad_norm": 4.864061522705223, + "learning_rate": 5.874048597066902e-06, + "loss": 0.38833770751953123, + "step": 21410 + }, + { + "epoch": 0.1851691727697988, + "grad_norm": 27.806103160734658, + "learning_rate": 5.873990177271418e-06, + "loss": 0.17121124267578125, + "step": 21415 + }, + { + "epoch": 0.18521240629134206, + "grad_norm": 76.83814351659662, + "learning_rate": 5.873931744221346e-06, + "loss": 0.360968017578125, + "step": 21420 + }, + { + "epoch": 0.18525563981288531, + "grad_norm": 0.322720021614988, + "learning_rate": 5.873873297916955e-06, + "loss": 0.193212890625, + "step": 21425 + }, + { + "epoch": 0.18529887333442857, + "grad_norm": 0.3356735181195824, + "learning_rate": 5.8738148383585146e-06, + "loss": 0.15351715087890624, + "step": 21430 + }, + { + "epoch": 0.18534210685597186, + "grad_norm": 2.931592341144086, + "learning_rate": 5.8737563655462935e-06, + "loss": 0.13985595703125, + "step": 21435 + }, + { + "epoch": 0.1853853403775151, + "grad_norm": 4.823450114319843, + "learning_rate": 5.873697879480562e-06, + "loss": 0.483734130859375, + "step": 21440 + }, + { + "epoch": 0.18542857389905837, + "grad_norm": 9.270185041964124, + "learning_rate": 5.873639380161589e-06, + "loss": 0.18223495483398439, + "step": 21445 + }, + { + "epoch": 0.18547180742060163, + "grad_norm": 0.6316892189199569, + "learning_rate": 5.873580867589647e-06, + "loss": 0.22694320678710939, + "step": 21450 + }, + { + "epoch": 0.1855150409421449, + "grad_norm": 9.003634167058046, + "learning_rate": 5.873522341765003e-06, + "loss": 0.1073974609375, + "step": 21455 + }, + { + "epoch": 0.18555827446368817, + "grad_norm": 14.43590956438359, + "learning_rate": 5.8734638026879275e-06, + "loss": 0.0869354248046875, + "step": 21460 + }, + { + "epoch": 0.18560150798523142, + "grad_norm": 60.86973526681963, + "learning_rate": 5.873405250358691e-06, + "loss": 0.588525390625, + "step": 21465 + }, + { + "epoch": 0.18564474150677468, + "grad_norm": 29.51865976020598, + "learning_rate": 5.873346684777564e-06, + "loss": 0.38738555908203126, + "step": 21470 + }, + { + "epoch": 0.18568797502831796, + "grad_norm": 4.493537687959559, + "learning_rate": 5.873288105944815e-06, + "loss": 0.472650146484375, + "step": 21475 + }, + { + "epoch": 0.18573120854986122, + "grad_norm": 5.115071190057457, + "learning_rate": 5.8732295138607165e-06, + "loss": 0.3153656005859375, + "step": 21480 + }, + { + "epoch": 0.18577444207140448, + "grad_norm": 0.19389489773583451, + "learning_rate": 5.8731709085255355e-06, + "loss": 0.16612930297851564, + "step": 21485 + }, + { + "epoch": 0.18581767559294773, + "grad_norm": 40.743315746390635, + "learning_rate": 5.873112289939546e-06, + "loss": 0.3138313293457031, + "step": 21490 + }, + { + "epoch": 0.18586090911449102, + "grad_norm": 3.3352864514295066, + "learning_rate": 5.873053658103015e-06, + "loss": 0.08569259643554687, + "step": 21495 + }, + { + "epoch": 0.18590414263603428, + "grad_norm": 2.444473683745108, + "learning_rate": 5.872995013016215e-06, + "loss": 0.161688232421875, + "step": 21500 + }, + { + "epoch": 0.18594737615757753, + "grad_norm": 0.2989995230680304, + "learning_rate": 5.872936354679417e-06, + "loss": 0.03878173828125, + "step": 21505 + }, + { + "epoch": 0.18599060967912082, + "grad_norm": 2.2778929580661558, + "learning_rate": 5.872877683092889e-06, + "loss": 0.388970947265625, + "step": 21510 + }, + { + "epoch": 0.18603384320066407, + "grad_norm": 20.42550569948896, + "learning_rate": 5.872818998256902e-06, + "loss": 0.28830108642578123, + "step": 21515 + }, + { + "epoch": 0.18607707672220733, + "grad_norm": 3.7028909999039845, + "learning_rate": 5.8727603001717295e-06, + "loss": 0.0566558837890625, + "step": 21520 + }, + { + "epoch": 0.1861203102437506, + "grad_norm": 6.102710664751748, + "learning_rate": 5.872701588837639e-06, + "loss": 0.07188034057617188, + "step": 21525 + }, + { + "epoch": 0.18616354376529387, + "grad_norm": 70.93970298441732, + "learning_rate": 5.8726428642549035e-06, + "loss": 0.34330215454101565, + "step": 21530 + }, + { + "epoch": 0.18620677728683713, + "grad_norm": 7.405039959288593, + "learning_rate": 5.872584126423793e-06, + "loss": 0.2294342041015625, + "step": 21535 + }, + { + "epoch": 0.18625001080838038, + "grad_norm": 31.506341492516, + "learning_rate": 5.8725253753445784e-06, + "loss": 0.3039115905761719, + "step": 21540 + }, + { + "epoch": 0.18629324432992364, + "grad_norm": 2.866934924957292, + "learning_rate": 5.87246661101753e-06, + "loss": 0.1198486328125, + "step": 21545 + }, + { + "epoch": 0.18633647785146693, + "grad_norm": 2.070772459379238, + "learning_rate": 5.87240783344292e-06, + "loss": 0.34414825439453123, + "step": 21550 + }, + { + "epoch": 0.18637971137301018, + "grad_norm": 23.083584248277614, + "learning_rate": 5.872349042621019e-06, + "loss": 0.15300369262695312, + "step": 21555 + }, + { + "epoch": 0.18642294489455344, + "grad_norm": 1.5673723616251711, + "learning_rate": 5.872290238552097e-06, + "loss": 0.073834228515625, + "step": 21560 + }, + { + "epoch": 0.1864661784160967, + "grad_norm": 4.193672728865252, + "learning_rate": 5.872231421236427e-06, + "loss": 0.2086181640625, + "step": 21565 + }, + { + "epoch": 0.18650941193763998, + "grad_norm": 7.968397348939357, + "learning_rate": 5.87217259067428e-06, + "loss": 0.1777679443359375, + "step": 21570 + }, + { + "epoch": 0.18655264545918324, + "grad_norm": 2.7655815900702443, + "learning_rate": 5.872113746865926e-06, + "loss": 0.26619300842285154, + "step": 21575 + }, + { + "epoch": 0.1865958789807265, + "grad_norm": 12.667968918968167, + "learning_rate": 5.8720548898116365e-06, + "loss": 0.1449554443359375, + "step": 21580 + }, + { + "epoch": 0.18663911250226975, + "grad_norm": 0.10157506885393351, + "learning_rate": 5.871996019511685e-06, + "loss": 0.27976455688476565, + "step": 21585 + }, + { + "epoch": 0.18668234602381303, + "grad_norm": 17.12562109305868, + "learning_rate": 5.871937135966341e-06, + "loss": 0.12190093994140624, + "step": 21590 + }, + { + "epoch": 0.1867255795453563, + "grad_norm": 35.28049600861382, + "learning_rate": 5.871878239175876e-06, + "loss": 0.2783447265625, + "step": 21595 + }, + { + "epoch": 0.18676881306689955, + "grad_norm": 6.957375651871472, + "learning_rate": 5.871819329140564e-06, + "loss": 0.364288330078125, + "step": 21600 + }, + { + "epoch": 0.1868120465884428, + "grad_norm": 6.873190287294949, + "learning_rate": 5.871760405860674e-06, + "loss": 0.0915283203125, + "step": 21605 + }, + { + "epoch": 0.1868552801099861, + "grad_norm": 4.3578548767678225, + "learning_rate": 5.871701469336478e-06, + "loss": 0.1156036376953125, + "step": 21610 + }, + { + "epoch": 0.18689851363152934, + "grad_norm": 16.89158287544615, + "learning_rate": 5.87164251956825e-06, + "loss": 0.18517036437988282, + "step": 21615 + }, + { + "epoch": 0.1869417471530726, + "grad_norm": 62.57817340065569, + "learning_rate": 5.87158355655626e-06, + "loss": 0.267523193359375, + "step": 21620 + }, + { + "epoch": 0.18698498067461586, + "grad_norm": 36.0682800007237, + "learning_rate": 5.87152458030078e-06, + "loss": 0.555828857421875, + "step": 21625 + }, + { + "epoch": 0.18702821419615914, + "grad_norm": 31.86724291760991, + "learning_rate": 5.871465590802083e-06, + "loss": 0.15604667663574218, + "step": 21630 + }, + { + "epoch": 0.1870714477177024, + "grad_norm": 11.010820285330896, + "learning_rate": 5.871406588060439e-06, + "loss": 0.28895263671875, + "step": 21635 + }, + { + "epoch": 0.18711468123924566, + "grad_norm": 0.5484750954171665, + "learning_rate": 5.871347572076123e-06, + "loss": 0.07703514099121093, + "step": 21640 + }, + { + "epoch": 0.1871579147607889, + "grad_norm": 3.936667291275616, + "learning_rate": 5.871288542849406e-06, + "loss": 0.16003570556640626, + "step": 21645 + }, + { + "epoch": 0.1872011482823322, + "grad_norm": 48.14679630876382, + "learning_rate": 5.871229500380558e-06, + "loss": 0.26051025390625, + "step": 21650 + }, + { + "epoch": 0.18724438180387545, + "grad_norm": 7.573381727094162, + "learning_rate": 5.871170444669855e-06, + "loss": 0.40577144622802735, + "step": 21655 + }, + { + "epoch": 0.1872876153254187, + "grad_norm": 22.166773761647374, + "learning_rate": 5.871111375717567e-06, + "loss": 0.20772476196289064, + "step": 21660 + }, + { + "epoch": 0.18733084884696197, + "grad_norm": 40.02180656352921, + "learning_rate": 5.871052293523967e-06, + "loss": 0.5137252807617188, + "step": 21665 + }, + { + "epoch": 0.18737408236850525, + "grad_norm": 2.9582207826704563, + "learning_rate": 5.8709931980893276e-06, + "loss": 0.22626876831054688, + "step": 21670 + }, + { + "epoch": 0.1874173158900485, + "grad_norm": 58.756983291058845, + "learning_rate": 5.8709340894139216e-06, + "loss": 0.335467529296875, + "step": 21675 + }, + { + "epoch": 0.18746054941159176, + "grad_norm": 2.510193395412185, + "learning_rate": 5.870874967498021e-06, + "loss": 0.126580810546875, + "step": 21680 + }, + { + "epoch": 0.18750378293313505, + "grad_norm": 2.855788492392981, + "learning_rate": 5.8708158323418985e-06, + "loss": 0.09204559326171875, + "step": 21685 + }, + { + "epoch": 0.1875470164546783, + "grad_norm": 33.528877600230445, + "learning_rate": 5.870756683945827e-06, + "loss": 0.0904571533203125, + "step": 21690 + }, + { + "epoch": 0.18759024997622156, + "grad_norm": 1.2169061359827762, + "learning_rate": 5.87069752231008e-06, + "loss": 0.05661849975585938, + "step": 21695 + }, + { + "epoch": 0.18763348349776482, + "grad_norm": 12.249559139335368, + "learning_rate": 5.870638347434928e-06, + "loss": 0.22839202880859374, + "step": 21700 + }, + { + "epoch": 0.1876767170193081, + "grad_norm": 4.620040852990479, + "learning_rate": 5.870579159320648e-06, + "loss": 0.0919891357421875, + "step": 21705 + }, + { + "epoch": 0.18771995054085136, + "grad_norm": 4.479639675291097, + "learning_rate": 5.87051995796751e-06, + "loss": 0.13033065795898438, + "step": 21710 + }, + { + "epoch": 0.18776318406239462, + "grad_norm": 10.502424376660041, + "learning_rate": 5.8704607433757865e-06, + "loss": 0.0541168212890625, + "step": 21715 + }, + { + "epoch": 0.18780641758393787, + "grad_norm": 33.350536966962174, + "learning_rate": 5.870401515545752e-06, + "loss": 0.4574249267578125, + "step": 21720 + }, + { + "epoch": 0.18784965110548116, + "grad_norm": 0.16145544874980508, + "learning_rate": 5.870342274477679e-06, + "loss": 0.15100326538085937, + "step": 21725 + }, + { + "epoch": 0.18789288462702441, + "grad_norm": 77.26649436136748, + "learning_rate": 5.870283020171842e-06, + "loss": 0.8060028076171875, + "step": 21730 + }, + { + "epoch": 0.18793611814856767, + "grad_norm": 6.712572145738963, + "learning_rate": 5.870223752628514e-06, + "loss": 0.0880340576171875, + "step": 21735 + }, + { + "epoch": 0.18797935167011093, + "grad_norm": 31.318941807288574, + "learning_rate": 5.870164471847965e-06, + "loss": 0.4479278564453125, + "step": 21740 + }, + { + "epoch": 0.1880225851916542, + "grad_norm": 1.1667908692548457, + "learning_rate": 5.870105177830473e-06, + "loss": 0.45706787109375, + "step": 21745 + }, + { + "epoch": 0.18806581871319747, + "grad_norm": 1.5096651592106136, + "learning_rate": 5.870045870576309e-06, + "loss": 0.41570587158203126, + "step": 21750 + }, + { + "epoch": 0.18810905223474073, + "grad_norm": 4.138212971496417, + "learning_rate": 5.869986550085747e-06, + "loss": 0.44744873046875, + "step": 21755 + }, + { + "epoch": 0.18815228575628398, + "grad_norm": 17.614690606463345, + "learning_rate": 5.8699272163590615e-06, + "loss": 0.14906005859375, + "step": 21760 + }, + { + "epoch": 0.18819551927782727, + "grad_norm": 14.916010253649244, + "learning_rate": 5.869867869396525e-06, + "loss": 0.27501983642578126, + "step": 21765 + }, + { + "epoch": 0.18823875279937052, + "grad_norm": 6.832659263896433, + "learning_rate": 5.869808509198411e-06, + "loss": 0.3213714599609375, + "step": 21770 + }, + { + "epoch": 0.18828198632091378, + "grad_norm": 9.645305820827316, + "learning_rate": 5.8697491357649935e-06, + "loss": 0.22667083740234376, + "step": 21775 + }, + { + "epoch": 0.18832521984245704, + "grad_norm": 20.089844508288518, + "learning_rate": 5.869689749096546e-06, + "loss": 0.158880615234375, + "step": 21780 + }, + { + "epoch": 0.18836845336400032, + "grad_norm": 1.7011072631245103, + "learning_rate": 5.869630349193345e-06, + "loss": 0.5528228759765625, + "step": 21785 + }, + { + "epoch": 0.18841168688554358, + "grad_norm": 4.54667794659182, + "learning_rate": 5.86957093605566e-06, + "loss": 0.0335601806640625, + "step": 21790 + }, + { + "epoch": 0.18845492040708683, + "grad_norm": 42.642287141230454, + "learning_rate": 5.8695115096837695e-06, + "loss": 0.20259323120117187, + "step": 21795 + }, + { + "epoch": 0.1884981539286301, + "grad_norm": 20.67540679321583, + "learning_rate": 5.869452070077944e-06, + "loss": 0.1060882568359375, + "step": 21800 + }, + { + "epoch": 0.18854138745017338, + "grad_norm": 0.5372988133988945, + "learning_rate": 5.869392617238461e-06, + "loss": 0.22964630126953126, + "step": 21805 + }, + { + "epoch": 0.18858462097171663, + "grad_norm": 6.1218673705833435, + "learning_rate": 5.86933315116559e-06, + "loss": 0.3286285400390625, + "step": 21810 + }, + { + "epoch": 0.1886278544932599, + "grad_norm": 16.147121333313123, + "learning_rate": 5.86927367185961e-06, + "loss": 0.3448028564453125, + "step": 21815 + }, + { + "epoch": 0.18867108801480315, + "grad_norm": 3.8933845674628174, + "learning_rate": 5.869214179320794e-06, + "loss": 0.13477783203125, + "step": 21820 + }, + { + "epoch": 0.18871432153634643, + "grad_norm": 1.2096166029460884, + "learning_rate": 5.869154673549413e-06, + "loss": 0.19449996948242188, + "step": 21825 + }, + { + "epoch": 0.1887575550578897, + "grad_norm": 34.29348467853847, + "learning_rate": 5.869095154545747e-06, + "loss": 0.2534149169921875, + "step": 21830 + }, + { + "epoch": 0.18880078857943294, + "grad_norm": 25.921399924430933, + "learning_rate": 5.8690356223100665e-06, + "loss": 0.12001419067382812, + "step": 21835 + }, + { + "epoch": 0.1888440221009762, + "grad_norm": 35.31289789716918, + "learning_rate": 5.868976076842647e-06, + "loss": 0.1888946533203125, + "step": 21840 + }, + { + "epoch": 0.18888725562251948, + "grad_norm": 4.332640648422631, + "learning_rate": 5.868916518143763e-06, + "loss": 0.21834335327148438, + "step": 21845 + }, + { + "epoch": 0.18893048914406274, + "grad_norm": 38.81491649951832, + "learning_rate": 5.8688569462136905e-06, + "loss": 0.430487060546875, + "step": 21850 + }, + { + "epoch": 0.188973722665606, + "grad_norm": 2.561906926556733, + "learning_rate": 5.868797361052703e-06, + "loss": 0.2346466064453125, + "step": 21855 + }, + { + "epoch": 0.18901695618714925, + "grad_norm": 30.109301601466655, + "learning_rate": 5.868737762661075e-06, + "loss": 0.33575439453125, + "step": 21860 + }, + { + "epoch": 0.18906018970869254, + "grad_norm": 14.18933047268346, + "learning_rate": 5.868678151039082e-06, + "loss": 0.252716064453125, + "step": 21865 + }, + { + "epoch": 0.1891034232302358, + "grad_norm": 6.346301796620062, + "learning_rate": 5.868618526187e-06, + "loss": 0.14566650390625, + "step": 21870 + }, + { + "epoch": 0.18914665675177905, + "grad_norm": 0.14336176889029456, + "learning_rate": 5.8685588881051015e-06, + "loss": 0.127020263671875, + "step": 21875 + }, + { + "epoch": 0.18918989027332234, + "grad_norm": 27.88099257338855, + "learning_rate": 5.868499236793664e-06, + "loss": 0.21579742431640625, + "step": 21880 + }, + { + "epoch": 0.1892331237948656, + "grad_norm": 4.52706128284278, + "learning_rate": 5.868439572252961e-06, + "loss": 0.46593399047851564, + "step": 21885 + }, + { + "epoch": 0.18927635731640885, + "grad_norm": 7.846608124189647, + "learning_rate": 5.8683798944832676e-06, + "loss": 0.2235321044921875, + "step": 21890 + }, + { + "epoch": 0.1893195908379521, + "grad_norm": 4.962388208262404, + "learning_rate": 5.868320203484859e-06, + "loss": 0.4901702880859375, + "step": 21895 + }, + { + "epoch": 0.1893628243594954, + "grad_norm": 16.097427602599144, + "learning_rate": 5.868260499258012e-06, + "loss": 0.2104217529296875, + "step": 21900 + }, + { + "epoch": 0.18940605788103865, + "grad_norm": 25.894960262365462, + "learning_rate": 5.868200781803002e-06, + "loss": 0.53699951171875, + "step": 21905 + }, + { + "epoch": 0.1894492914025819, + "grad_norm": 6.399108753970528, + "learning_rate": 5.868141051120102e-06, + "loss": 0.0273529052734375, + "step": 21910 + }, + { + "epoch": 0.18949252492412516, + "grad_norm": 33.34811488510714, + "learning_rate": 5.86808130720959e-06, + "loss": 0.11507797241210938, + "step": 21915 + }, + { + "epoch": 0.18953575844566845, + "grad_norm": 53.83858406299912, + "learning_rate": 5.8680215500717395e-06, + "loss": 0.1959197998046875, + "step": 21920 + }, + { + "epoch": 0.1895789919672117, + "grad_norm": 9.488430142952334, + "learning_rate": 5.867961779706827e-06, + "loss": 0.0822052001953125, + "step": 21925 + }, + { + "epoch": 0.18962222548875496, + "grad_norm": 1.9753128346069275, + "learning_rate": 5.867901996115129e-06, + "loss": 0.3820281982421875, + "step": 21930 + }, + { + "epoch": 0.18966545901029822, + "grad_norm": 8.040290255892934, + "learning_rate": 5.867842199296919e-06, + "loss": 0.2437774658203125, + "step": 21935 + }, + { + "epoch": 0.1897086925318415, + "grad_norm": 14.105743236124624, + "learning_rate": 5.8677823892524755e-06, + "loss": 0.236614990234375, + "step": 21940 + }, + { + "epoch": 0.18975192605338476, + "grad_norm": 5.387871618453326, + "learning_rate": 5.867722565982073e-06, + "loss": 0.10797958374023438, + "step": 21945 + }, + { + "epoch": 0.189795159574928, + "grad_norm": 2.2063994493728467, + "learning_rate": 5.867662729485987e-06, + "loss": 0.699530029296875, + "step": 21950 + }, + { + "epoch": 0.18983839309647127, + "grad_norm": 17.304399289552087, + "learning_rate": 5.867602879764494e-06, + "loss": 0.329034423828125, + "step": 21955 + }, + { + "epoch": 0.18988162661801455, + "grad_norm": 2.014658601179748, + "learning_rate": 5.86754301681787e-06, + "loss": 0.043952560424804686, + "step": 21960 + }, + { + "epoch": 0.1899248601395578, + "grad_norm": 0.3142217848900013, + "learning_rate": 5.867483140646391e-06, + "loss": 0.06913909912109376, + "step": 21965 + }, + { + "epoch": 0.18996809366110107, + "grad_norm": 37.02688999371535, + "learning_rate": 5.8674232512503335e-06, + "loss": 0.3913749694824219, + "step": 21970 + }, + { + "epoch": 0.19001132718264432, + "grad_norm": 3.344264167176065, + "learning_rate": 5.867363348629973e-06, + "loss": 0.06228713989257813, + "step": 21975 + }, + { + "epoch": 0.1900545607041876, + "grad_norm": 21.14040580651527, + "learning_rate": 5.867303432785585e-06, + "loss": 0.3863525390625, + "step": 21980 + }, + { + "epoch": 0.19009779422573087, + "grad_norm": 2.6248645903546124, + "learning_rate": 5.867243503717448e-06, + "loss": 0.1903411865234375, + "step": 21985 + }, + { + "epoch": 0.19014102774727412, + "grad_norm": 5.496045530314243, + "learning_rate": 5.867183561425837e-06, + "loss": 0.06944046020507813, + "step": 21990 + }, + { + "epoch": 0.19018426126881738, + "grad_norm": 22.29417545714611, + "learning_rate": 5.86712360591103e-06, + "loss": 0.24349365234375, + "step": 21995 + }, + { + "epoch": 0.19022749479036066, + "grad_norm": 6.342552022538197, + "learning_rate": 5.867063637173301e-06, + "loss": 0.09793167114257813, + "step": 22000 + }, + { + "epoch": 0.19027072831190392, + "grad_norm": 12.275557042709963, + "learning_rate": 5.867003655212927e-06, + "loss": 0.234820556640625, + "step": 22005 + }, + { + "epoch": 0.19031396183344718, + "grad_norm": 9.106873907743884, + "learning_rate": 5.866943660030187e-06, + "loss": 0.23506088256835939, + "step": 22010 + }, + { + "epoch": 0.19035719535499043, + "grad_norm": 0.9080041961279115, + "learning_rate": 5.866883651625356e-06, + "loss": 0.19497909545898437, + "step": 22015 + }, + { + "epoch": 0.19040042887653372, + "grad_norm": 4.107271841295899, + "learning_rate": 5.86682362999871e-06, + "loss": 0.18384284973144532, + "step": 22020 + }, + { + "epoch": 0.19044366239807697, + "grad_norm": 3.190589758399019, + "learning_rate": 5.8667635951505276e-06, + "loss": 0.05800018310546875, + "step": 22025 + }, + { + "epoch": 0.19048689591962023, + "grad_norm": 0.7770540250484931, + "learning_rate": 5.866703547081084e-06, + "loss": 0.130230712890625, + "step": 22030 + }, + { + "epoch": 0.1905301294411635, + "grad_norm": 0.8398705138860193, + "learning_rate": 5.866643485790658e-06, + "loss": 0.16572723388671876, + "step": 22035 + }, + { + "epoch": 0.19057336296270677, + "grad_norm": 0.4814000414348885, + "learning_rate": 5.866583411279526e-06, + "loss": 0.0608428955078125, + "step": 22040 + }, + { + "epoch": 0.19061659648425003, + "grad_norm": 12.06408146466691, + "learning_rate": 5.866523323547963e-06, + "loss": 0.15632553100585939, + "step": 22045 + }, + { + "epoch": 0.19065983000579328, + "grad_norm": 0.5824036190477265, + "learning_rate": 5.866463222596249e-06, + "loss": 0.2516323089599609, + "step": 22050 + }, + { + "epoch": 0.19070306352733657, + "grad_norm": 5.775307841435832, + "learning_rate": 5.866403108424658e-06, + "loss": 0.23399810791015624, + "step": 22055 + }, + { + "epoch": 0.19074629704887983, + "grad_norm": 16.465589673148095, + "learning_rate": 5.8663429810334705e-06, + "loss": 0.18978729248046874, + "step": 22060 + }, + { + "epoch": 0.19078953057042308, + "grad_norm": 1.5909135390957687, + "learning_rate": 5.866282840422963e-06, + "loss": 0.3498382568359375, + "step": 22065 + }, + { + "epoch": 0.19083276409196634, + "grad_norm": 7.0158270915779655, + "learning_rate": 5.866222686593411e-06, + "loss": 0.08135986328125, + "step": 22070 + }, + { + "epoch": 0.19087599761350962, + "grad_norm": 2.0910475858365807, + "learning_rate": 5.8661625195450944e-06, + "loss": 0.2966796875, + "step": 22075 + }, + { + "epoch": 0.19091923113505288, + "grad_norm": 6.92177665827523, + "learning_rate": 5.866102339278289e-06, + "loss": 0.09849853515625, + "step": 22080 + }, + { + "epoch": 0.19096246465659614, + "grad_norm": 14.27225979147242, + "learning_rate": 5.866042145793273e-06, + "loss": 0.13708763122558593, + "step": 22085 + }, + { + "epoch": 0.1910056981781394, + "grad_norm": 6.63260182642812, + "learning_rate": 5.865981939090323e-06, + "loss": 0.10392074584960938, + "step": 22090 + }, + { + "epoch": 0.19104893169968268, + "grad_norm": 0.2599696038948343, + "learning_rate": 5.865921719169719e-06, + "loss": 0.41109466552734375, + "step": 22095 + }, + { + "epoch": 0.19109216522122593, + "grad_norm": 1.7566259899490742, + "learning_rate": 5.865861486031737e-06, + "loss": 0.049604034423828124, + "step": 22100 + }, + { + "epoch": 0.1911353987427692, + "grad_norm": 0.2610433028030771, + "learning_rate": 5.8658012396766546e-06, + "loss": 0.07852096557617187, + "step": 22105 + }, + { + "epoch": 0.19117863226431245, + "grad_norm": 23.607710482441977, + "learning_rate": 5.86574098010475e-06, + "loss": 0.09896163940429688, + "step": 22110 + }, + { + "epoch": 0.19122186578585573, + "grad_norm": 7.1834667463321225, + "learning_rate": 5.865680707316301e-06, + "loss": 0.4510498046875, + "step": 22115 + }, + { + "epoch": 0.191265099307399, + "grad_norm": 4.148543865554011, + "learning_rate": 5.865620421311586e-06, + "loss": 0.2307307243347168, + "step": 22120 + }, + { + "epoch": 0.19130833282894225, + "grad_norm": 1.5396230501550756, + "learning_rate": 5.865560122090883e-06, + "loss": 0.2881805419921875, + "step": 22125 + }, + { + "epoch": 0.1913515663504855, + "grad_norm": 1.2610373771952195, + "learning_rate": 5.86549980965447e-06, + "loss": 0.23908157348632814, + "step": 22130 + }, + { + "epoch": 0.1913947998720288, + "grad_norm": 17.510929657339375, + "learning_rate": 5.865439484002625e-06, + "loss": 0.113543701171875, + "step": 22135 + }, + { + "epoch": 0.19143803339357204, + "grad_norm": 27.605828828821615, + "learning_rate": 5.865379145135626e-06, + "loss": 0.5954818725585938, + "step": 22140 + }, + { + "epoch": 0.1914812669151153, + "grad_norm": 16.097907263328317, + "learning_rate": 5.865318793053752e-06, + "loss": 0.17833251953125, + "step": 22145 + }, + { + "epoch": 0.19152450043665856, + "grad_norm": 3.349144331921233, + "learning_rate": 5.865258427757281e-06, + "loss": 0.11475067138671875, + "step": 22150 + }, + { + "epoch": 0.19156773395820184, + "grad_norm": 5.8242682888344905, + "learning_rate": 5.865198049246491e-06, + "loss": 0.157861328125, + "step": 22155 + }, + { + "epoch": 0.1916109674797451, + "grad_norm": 28.331411115966194, + "learning_rate": 5.865137657521661e-06, + "loss": 0.1083221435546875, + "step": 22160 + }, + { + "epoch": 0.19165420100128835, + "grad_norm": 16.097047241835956, + "learning_rate": 5.865077252583069e-06, + "loss": 0.10525455474853515, + "step": 22165 + }, + { + "epoch": 0.1916974345228316, + "grad_norm": 12.446439989474738, + "learning_rate": 5.865016834430994e-06, + "loss": 0.3334617614746094, + "step": 22170 + }, + { + "epoch": 0.1917406680443749, + "grad_norm": 31.72551361431171, + "learning_rate": 5.864956403065715e-06, + "loss": 0.28712997436523435, + "step": 22175 + }, + { + "epoch": 0.19178390156591815, + "grad_norm": 25.46623629408629, + "learning_rate": 5.864895958487509e-06, + "loss": 0.29596748352050783, + "step": 22180 + }, + { + "epoch": 0.1918271350874614, + "grad_norm": 55.23930821575202, + "learning_rate": 5.864835500696656e-06, + "loss": 0.379205322265625, + "step": 22185 + }, + { + "epoch": 0.19187036860900467, + "grad_norm": 3.185288631617384, + "learning_rate": 5.864775029693436e-06, + "loss": 0.17557373046875, + "step": 22190 + }, + { + "epoch": 0.19191360213054795, + "grad_norm": 5.293045889050281, + "learning_rate": 5.864714545478127e-06, + "loss": 0.02821636199951172, + "step": 22195 + }, + { + "epoch": 0.1919568356520912, + "grad_norm": 0.9900347506965943, + "learning_rate": 5.864654048051007e-06, + "loss": 0.08417510986328125, + "step": 22200 + }, + { + "epoch": 0.19200006917363446, + "grad_norm": 23.919104989818358, + "learning_rate": 5.8645935374123554e-06, + "loss": 0.274017333984375, + "step": 22205 + }, + { + "epoch": 0.19204330269517772, + "grad_norm": 5.245887504558426, + "learning_rate": 5.864533013562452e-06, + "loss": 0.13103790283203126, + "step": 22210 + }, + { + "epoch": 0.192086536216721, + "grad_norm": 28.722278410719476, + "learning_rate": 5.864472476501574e-06, + "loss": 0.24517822265625, + "step": 22215 + }, + { + "epoch": 0.19212976973826426, + "grad_norm": 48.04565493576998, + "learning_rate": 5.864411926230004e-06, + "loss": 0.42476806640625, + "step": 22220 + }, + { + "epoch": 0.19217300325980752, + "grad_norm": 35.02020218852999, + "learning_rate": 5.864351362748018e-06, + "loss": 0.1750244140625, + "step": 22225 + }, + { + "epoch": 0.19221623678135077, + "grad_norm": 0.5404396572266504, + "learning_rate": 5.8642907860558976e-06, + "loss": 0.240240478515625, + "step": 22230 + }, + { + "epoch": 0.19225947030289406, + "grad_norm": 15.27480742592431, + "learning_rate": 5.864230196153921e-06, + "loss": 0.2543933868408203, + "step": 22235 + }, + { + "epoch": 0.19230270382443732, + "grad_norm": 7.264772085789342, + "learning_rate": 5.864169593042367e-06, + "loss": 0.0937042236328125, + "step": 22240 + }, + { + "epoch": 0.19234593734598057, + "grad_norm": 0.5683320074380758, + "learning_rate": 5.864108976721516e-06, + "loss": 0.11910400390625, + "step": 22245 + }, + { + "epoch": 0.19238917086752386, + "grad_norm": 2.2736044376755773, + "learning_rate": 5.864048347191648e-06, + "loss": 0.5635406494140625, + "step": 22250 + }, + { + "epoch": 0.1924324043890671, + "grad_norm": 20.810958470705486, + "learning_rate": 5.863987704453042e-06, + "loss": 0.114874267578125, + "step": 22255 + }, + { + "epoch": 0.19247563791061037, + "grad_norm": 2.134457362184891, + "learning_rate": 5.863927048505977e-06, + "loss": 0.18774452209472656, + "step": 22260 + }, + { + "epoch": 0.19251887143215363, + "grad_norm": 2.531022558258962, + "learning_rate": 5.863866379350735e-06, + "loss": 0.1555255889892578, + "step": 22265 + }, + { + "epoch": 0.1925621049536969, + "grad_norm": 7.4352415046215405, + "learning_rate": 5.863805696987594e-06, + "loss": 0.09129486083984376, + "step": 22270 + }, + { + "epoch": 0.19260533847524017, + "grad_norm": 8.812657233212507, + "learning_rate": 5.863745001416833e-06, + "loss": 0.1321990966796875, + "step": 22275 + }, + { + "epoch": 0.19264857199678342, + "grad_norm": 3.1631169179572813, + "learning_rate": 5.863684292638734e-06, + "loss": 0.04957313537597656, + "step": 22280 + }, + { + "epoch": 0.19269180551832668, + "grad_norm": 4.431789140704108, + "learning_rate": 5.863623570653576e-06, + "loss": 0.26738739013671875, + "step": 22285 + }, + { + "epoch": 0.19273503903986997, + "grad_norm": 23.711062389787635, + "learning_rate": 5.863562835461639e-06, + "loss": 0.24573974609375, + "step": 22290 + }, + { + "epoch": 0.19277827256141322, + "grad_norm": 4.584166736638355, + "learning_rate": 5.863502087063203e-06, + "loss": 0.1864837646484375, + "step": 22295 + }, + { + "epoch": 0.19282150608295648, + "grad_norm": 19.6933033562399, + "learning_rate": 5.863441325458549e-06, + "loss": 0.45753021240234376, + "step": 22300 + }, + { + "epoch": 0.19286473960449974, + "grad_norm": 13.536719870648914, + "learning_rate": 5.863380550647956e-06, + "loss": 0.30605545043945315, + "step": 22305 + }, + { + "epoch": 0.19290797312604302, + "grad_norm": 5.232690271931493, + "learning_rate": 5.863319762631706e-06, + "loss": 0.1760650634765625, + "step": 22310 + }, + { + "epoch": 0.19295120664758628, + "grad_norm": 11.487274562616234, + "learning_rate": 5.8632589614100765e-06, + "loss": 0.08151741027832031, + "step": 22315 + }, + { + "epoch": 0.19299444016912953, + "grad_norm": 44.56088104442664, + "learning_rate": 5.863198146983351e-06, + "loss": 0.20796356201171876, + "step": 22320 + }, + { + "epoch": 0.1930376736906728, + "grad_norm": 2.4054692760637373, + "learning_rate": 5.8631373193518086e-06, + "loss": 0.28399906158447263, + "step": 22325 + }, + { + "epoch": 0.19308090721221607, + "grad_norm": 22.30970374894719, + "learning_rate": 5.863076478515729e-06, + "loss": 0.0831268310546875, + "step": 22330 + }, + { + "epoch": 0.19312414073375933, + "grad_norm": 20.871242867924447, + "learning_rate": 5.863015624475394e-06, + "loss": 0.1897674560546875, + "step": 22335 + }, + { + "epoch": 0.1931673742553026, + "grad_norm": 28.06622946061526, + "learning_rate": 5.862954757231085e-06, + "loss": 0.23481292724609376, + "step": 22340 + }, + { + "epoch": 0.19321060777684584, + "grad_norm": 0.167511364402812, + "learning_rate": 5.862893876783081e-06, + "loss": 0.42522449493408204, + "step": 22345 + }, + { + "epoch": 0.19325384129838913, + "grad_norm": 2.242432811551426, + "learning_rate": 5.862832983131663e-06, + "loss": 0.1756622314453125, + "step": 22350 + }, + { + "epoch": 0.19329707481993239, + "grad_norm": 13.85076953971422, + "learning_rate": 5.862772076277112e-06, + "loss": 0.12917633056640626, + "step": 22355 + }, + { + "epoch": 0.19334030834147564, + "grad_norm": 7.9701330959987535, + "learning_rate": 5.862711156219709e-06, + "loss": 0.29564208984375, + "step": 22360 + }, + { + "epoch": 0.1933835418630189, + "grad_norm": 15.694487894078032, + "learning_rate": 5.8626502229597365e-06, + "loss": 0.2226654052734375, + "step": 22365 + }, + { + "epoch": 0.19342677538456218, + "grad_norm": 3.4803190585055233, + "learning_rate": 5.8625892764974735e-06, + "loss": 0.1490264892578125, + "step": 22370 + }, + { + "epoch": 0.19347000890610544, + "grad_norm": 21.788393641119722, + "learning_rate": 5.862528316833201e-06, + "loss": 0.2419219970703125, + "step": 22375 + }, + { + "epoch": 0.1935132424276487, + "grad_norm": 0.7000363538270554, + "learning_rate": 5.8624673439672015e-06, + "loss": 0.3144340515136719, + "step": 22380 + }, + { + "epoch": 0.19355647594919195, + "grad_norm": 6.321182345388734, + "learning_rate": 5.862406357899756e-06, + "loss": 0.11888313293457031, + "step": 22385 + }, + { + "epoch": 0.19359970947073524, + "grad_norm": 43.909196958214416, + "learning_rate": 5.862345358631145e-06, + "loss": 0.3396888732910156, + "step": 22390 + }, + { + "epoch": 0.1936429429922785, + "grad_norm": 18.37890050003508, + "learning_rate": 5.862284346161649e-06, + "loss": 0.1387847900390625, + "step": 22395 + }, + { + "epoch": 0.19368617651382175, + "grad_norm": 39.641979627685735, + "learning_rate": 5.862223320491551e-06, + "loss": 0.372930908203125, + "step": 22400 + }, + { + "epoch": 0.193729410035365, + "grad_norm": 9.786744133204346, + "learning_rate": 5.862162281621132e-06, + "loss": 0.15721893310546875, + "step": 22405 + }, + { + "epoch": 0.1937726435569083, + "grad_norm": 3.710236246690078, + "learning_rate": 5.862101229550675e-06, + "loss": 0.337164306640625, + "step": 22410 + }, + { + "epoch": 0.19381587707845155, + "grad_norm": 3.010931196279188, + "learning_rate": 5.862040164280458e-06, + "loss": 0.249188232421875, + "step": 22415 + }, + { + "epoch": 0.1938591105999948, + "grad_norm": 12.953945509708701, + "learning_rate": 5.861979085810766e-06, + "loss": 0.1020172119140625, + "step": 22420 + }, + { + "epoch": 0.1939023441215381, + "grad_norm": 16.63671107648014, + "learning_rate": 5.861917994141878e-06, + "loss": 0.15240936279296874, + "step": 22425 + }, + { + "epoch": 0.19394557764308135, + "grad_norm": 5.196422995878376, + "learning_rate": 5.861856889274077e-06, + "loss": 0.32598876953125, + "step": 22430 + }, + { + "epoch": 0.1939888111646246, + "grad_norm": 1.6892727199675222, + "learning_rate": 5.861795771207647e-06, + "loss": 0.08867340087890625, + "step": 22435 + }, + { + "epoch": 0.19403204468616786, + "grad_norm": 3.1903675804178895, + "learning_rate": 5.861734639942866e-06, + "loss": 0.0494415283203125, + "step": 22440 + }, + { + "epoch": 0.19407527820771114, + "grad_norm": 21.13102912226736, + "learning_rate": 5.861673495480019e-06, + "loss": 0.187725830078125, + "step": 22445 + }, + { + "epoch": 0.1941185117292544, + "grad_norm": 29.986267188609396, + "learning_rate": 5.861612337819386e-06, + "loss": 0.4754920959472656, + "step": 22450 + }, + { + "epoch": 0.19416174525079766, + "grad_norm": 1.5616541418153358, + "learning_rate": 5.8615511669612494e-06, + "loss": 0.0771575927734375, + "step": 22455 + }, + { + "epoch": 0.1942049787723409, + "grad_norm": 5.239297951257349, + "learning_rate": 5.8614899829058916e-06, + "loss": 0.14698486328125, + "step": 22460 + }, + { + "epoch": 0.1942482122938842, + "grad_norm": 11.87315089573837, + "learning_rate": 5.861428785653595e-06, + "loss": 0.1388916015625, + "step": 22465 + }, + { + "epoch": 0.19429144581542745, + "grad_norm": 20.155679982846465, + "learning_rate": 5.8613675752046424e-06, + "loss": 0.318011474609375, + "step": 22470 + }, + { + "epoch": 0.1943346793369707, + "grad_norm": 39.46451019144805, + "learning_rate": 5.8613063515593144e-06, + "loss": 0.54091796875, + "step": 22475 + }, + { + "epoch": 0.19437791285851397, + "grad_norm": 3.099596381043489, + "learning_rate": 5.861245114717896e-06, + "loss": 0.19754791259765625, + "step": 22480 + }, + { + "epoch": 0.19442114638005725, + "grad_norm": 0.884756555278389, + "learning_rate": 5.861183864680665e-06, + "loss": 0.10776557922363281, + "step": 22485 + }, + { + "epoch": 0.1944643799016005, + "grad_norm": 32.45199299903796, + "learning_rate": 5.861122601447909e-06, + "loss": 0.30749053955078126, + "step": 22490 + }, + { + "epoch": 0.19450761342314377, + "grad_norm": 2.0218760633601964, + "learning_rate": 5.8610613250199085e-06, + "loss": 0.1853729248046875, + "step": 22495 + }, + { + "epoch": 0.19455084694468702, + "grad_norm": 24.38114532227512, + "learning_rate": 5.861000035396946e-06, + "loss": 0.152923583984375, + "step": 22500 + }, + { + "epoch": 0.1945940804662303, + "grad_norm": 3.419490159368763, + "learning_rate": 5.860938732579302e-06, + "loss": 0.121185302734375, + "step": 22505 + }, + { + "epoch": 0.19463731398777356, + "grad_norm": 34.6207291801196, + "learning_rate": 5.860877416567264e-06, + "loss": 0.3470947265625, + "step": 22510 + }, + { + "epoch": 0.19468054750931682, + "grad_norm": 10.720418182761291, + "learning_rate": 5.860816087361111e-06, + "loss": 0.16121063232421876, + "step": 22515 + }, + { + "epoch": 0.19472378103086008, + "grad_norm": 25.692024863011724, + "learning_rate": 5.8607547449611275e-06, + "loss": 0.4937713623046875, + "step": 22520 + }, + { + "epoch": 0.19476701455240336, + "grad_norm": 7.293240400375852, + "learning_rate": 5.860693389367595e-06, + "loss": 0.327850341796875, + "step": 22525 + }, + { + "epoch": 0.19481024807394662, + "grad_norm": 6.851532609872418, + "learning_rate": 5.860632020580799e-06, + "loss": 0.11160888671875, + "step": 22530 + }, + { + "epoch": 0.19485348159548987, + "grad_norm": 0.5611751711253353, + "learning_rate": 5.860570638601019e-06, + "loss": 0.1134429931640625, + "step": 22535 + }, + { + "epoch": 0.19489671511703313, + "grad_norm": 15.202909509282147, + "learning_rate": 5.86050924342854e-06, + "loss": 0.37227630615234375, + "step": 22540 + }, + { + "epoch": 0.19493994863857642, + "grad_norm": 2.693503319483886, + "learning_rate": 5.860447835063647e-06, + "loss": 0.237567138671875, + "step": 22545 + }, + { + "epoch": 0.19498318216011967, + "grad_norm": 6.378334887160531, + "learning_rate": 5.86038641350662e-06, + "loss": 0.12769775390625, + "step": 22550 + }, + { + "epoch": 0.19502641568166293, + "grad_norm": 0.27820692795692203, + "learning_rate": 5.860324978757744e-06, + "loss": 0.0638397216796875, + "step": 22555 + }, + { + "epoch": 0.19506964920320619, + "grad_norm": 9.720376814310296, + "learning_rate": 5.860263530817301e-06, + "loss": 0.192083740234375, + "step": 22560 + }, + { + "epoch": 0.19511288272474947, + "grad_norm": 5.511240506772869, + "learning_rate": 5.860202069685576e-06, + "loss": 0.1613922119140625, + "step": 22565 + }, + { + "epoch": 0.19515611624629273, + "grad_norm": 1.512231595426176, + "learning_rate": 5.860140595362851e-06, + "loss": 0.15387725830078125, + "step": 22570 + }, + { + "epoch": 0.19519934976783598, + "grad_norm": 3.160470415420685, + "learning_rate": 5.860079107849412e-06, + "loss": 0.23897552490234375, + "step": 22575 + }, + { + "epoch": 0.19524258328937924, + "grad_norm": 24.053506169653282, + "learning_rate": 5.8600176071455396e-06, + "loss": 0.46474609375, + "step": 22580 + }, + { + "epoch": 0.19528581681092252, + "grad_norm": 17.32047097636393, + "learning_rate": 5.85995609325152e-06, + "loss": 0.264691162109375, + "step": 22585 + }, + { + "epoch": 0.19532905033246578, + "grad_norm": 9.408323138148452, + "learning_rate": 5.859894566167635e-06, + "loss": 0.132952880859375, + "step": 22590 + }, + { + "epoch": 0.19537228385400904, + "grad_norm": 16.595350512238188, + "learning_rate": 5.859833025894169e-06, + "loss": 0.105450439453125, + "step": 22595 + }, + { + "epoch": 0.1954155173755523, + "grad_norm": 7.697034346476442, + "learning_rate": 5.859771472431404e-06, + "loss": 0.1700664520263672, + "step": 22600 + }, + { + "epoch": 0.19545875089709558, + "grad_norm": 14.19911849725456, + "learning_rate": 5.8597099057796276e-06, + "loss": 0.1345947265625, + "step": 22605 + }, + { + "epoch": 0.19550198441863884, + "grad_norm": 3.083200444312876, + "learning_rate": 5.859648325939122e-06, + "loss": 0.10961532592773438, + "step": 22610 + }, + { + "epoch": 0.1955452179401821, + "grad_norm": 15.948955522197878, + "learning_rate": 5.85958673291017e-06, + "loss": 0.14294891357421874, + "step": 22615 + }, + { + "epoch": 0.19558845146172538, + "grad_norm": 0.3830197541496184, + "learning_rate": 5.859525126693057e-06, + "loss": 0.2228790283203125, + "step": 22620 + }, + { + "epoch": 0.19563168498326863, + "grad_norm": 0.5410022231483386, + "learning_rate": 5.859463507288067e-06, + "loss": 0.16266555786132814, + "step": 22625 + }, + { + "epoch": 0.1956749185048119, + "grad_norm": 40.19490185740864, + "learning_rate": 5.8594018746954835e-06, + "loss": 0.3453155517578125, + "step": 22630 + }, + { + "epoch": 0.19571815202635515, + "grad_norm": 21.22760377183231, + "learning_rate": 5.859340228915591e-06, + "loss": 0.114813232421875, + "step": 22635 + }, + { + "epoch": 0.19576138554789843, + "grad_norm": 2.8990383777805646, + "learning_rate": 5.859278569948675e-06, + "loss": 0.05804367065429687, + "step": 22640 + }, + { + "epoch": 0.1958046190694417, + "grad_norm": 9.067655364470452, + "learning_rate": 5.859216897795019e-06, + "loss": 0.2764739990234375, + "step": 22645 + }, + { + "epoch": 0.19584785259098494, + "grad_norm": 28.094898396644865, + "learning_rate": 5.859155212454905e-06, + "loss": 0.28585205078125, + "step": 22650 + }, + { + "epoch": 0.1958910861125282, + "grad_norm": 8.52854711823238, + "learning_rate": 5.859093513928623e-06, + "loss": 0.3207206726074219, + "step": 22655 + }, + { + "epoch": 0.19593431963407149, + "grad_norm": 26.039878573791174, + "learning_rate": 5.859031802216453e-06, + "loss": 0.11593475341796874, + "step": 22660 + }, + { + "epoch": 0.19597755315561474, + "grad_norm": 3.6154982553442148, + "learning_rate": 5.85897007731868e-06, + "loss": 0.1641204833984375, + "step": 22665 + }, + { + "epoch": 0.196020786677158, + "grad_norm": 25.951698699368276, + "learning_rate": 5.85890833923559e-06, + "loss": 0.15360794067382813, + "step": 22670 + }, + { + "epoch": 0.19606402019870126, + "grad_norm": 1.4662373245458127, + "learning_rate": 5.8588465879674686e-06, + "loss": 0.17361373901367189, + "step": 22675 + }, + { + "epoch": 0.19610725372024454, + "grad_norm": 12.380899380703669, + "learning_rate": 5.858784823514599e-06, + "loss": 0.2240966796875, + "step": 22680 + }, + { + "epoch": 0.1961504872417878, + "grad_norm": 17.603896717083142, + "learning_rate": 5.858723045877265e-06, + "loss": 0.12325592041015625, + "step": 22685 + }, + { + "epoch": 0.19619372076333105, + "grad_norm": 7.276013508850264, + "learning_rate": 5.858661255055754e-06, + "loss": 0.1530792236328125, + "step": 22690 + }, + { + "epoch": 0.1962369542848743, + "grad_norm": 2.360800242094855, + "learning_rate": 5.85859945105035e-06, + "loss": 0.4776954650878906, + "step": 22695 + }, + { + "epoch": 0.1962801878064176, + "grad_norm": 1.4401580456236316, + "learning_rate": 5.858537633861338e-06, + "loss": 0.20728302001953125, + "step": 22700 + }, + { + "epoch": 0.19632342132796085, + "grad_norm": 2.984597745169309, + "learning_rate": 5.858475803489003e-06, + "loss": 0.22579345703125, + "step": 22705 + }, + { + "epoch": 0.1963666548495041, + "grad_norm": 0.381443750527335, + "learning_rate": 5.8584139599336285e-06, + "loss": 0.10422744750976562, + "step": 22710 + }, + { + "epoch": 0.19640988837104736, + "grad_norm": 10.931450871652743, + "learning_rate": 5.858352103195504e-06, + "loss": 0.1031494140625, + "step": 22715 + }, + { + "epoch": 0.19645312189259065, + "grad_norm": 21.009280333822442, + "learning_rate": 5.85829023327491e-06, + "loss": 0.06288299560546876, + "step": 22720 + }, + { + "epoch": 0.1964963554141339, + "grad_norm": 1.1438317985585322, + "learning_rate": 5.858228350172135e-06, + "loss": 0.10816650390625, + "step": 22725 + }, + { + "epoch": 0.19653958893567716, + "grad_norm": 3.3491213010398257, + "learning_rate": 5.858166453887464e-06, + "loss": 0.025248146057128905, + "step": 22730 + }, + { + "epoch": 0.19658282245722042, + "grad_norm": 1.7255035641161052, + "learning_rate": 5.8581045444211815e-06, + "loss": 0.05365753173828125, + "step": 22735 + }, + { + "epoch": 0.1966260559787637, + "grad_norm": 1.5852352385663302, + "learning_rate": 5.858042621773573e-06, + "loss": 0.44144134521484374, + "step": 22740 + }, + { + "epoch": 0.19666928950030696, + "grad_norm": 18.58726377073656, + "learning_rate": 5.857980685944924e-06, + "loss": 0.4809783935546875, + "step": 22745 + }, + { + "epoch": 0.19671252302185022, + "grad_norm": 27.429878032110782, + "learning_rate": 5.857918736935521e-06, + "loss": 0.22566757202148438, + "step": 22750 + }, + { + "epoch": 0.19675575654339347, + "grad_norm": 6.648853885792301, + "learning_rate": 5.85785677474565e-06, + "loss": 0.10206680297851563, + "step": 22755 + }, + { + "epoch": 0.19679899006493676, + "grad_norm": 11.028127619492428, + "learning_rate": 5.857794799375595e-06, + "loss": 0.28160781860351564, + "step": 22760 + }, + { + "epoch": 0.19684222358648001, + "grad_norm": 14.60545475699284, + "learning_rate": 5.857732810825644e-06, + "loss": 0.20281982421875, + "step": 22765 + }, + { + "epoch": 0.19688545710802327, + "grad_norm": 55.85011341688203, + "learning_rate": 5.857670809096081e-06, + "loss": 0.3260467529296875, + "step": 22770 + }, + { + "epoch": 0.19692869062956653, + "grad_norm": 1.6558152760985143, + "learning_rate": 5.8576087941871926e-06, + "loss": 0.385675048828125, + "step": 22775 + }, + { + "epoch": 0.1969719241511098, + "grad_norm": 47.399853324394094, + "learning_rate": 5.857546766099265e-06, + "loss": 0.40704345703125, + "step": 22780 + }, + { + "epoch": 0.19701515767265307, + "grad_norm": 199.28137294474502, + "learning_rate": 5.857484724832585e-06, + "loss": 0.3189201354980469, + "step": 22785 + }, + { + "epoch": 0.19705839119419633, + "grad_norm": 1.0769887019066857, + "learning_rate": 5.857422670387437e-06, + "loss": 0.09769287109375, + "step": 22790 + }, + { + "epoch": 0.1971016247157396, + "grad_norm": 20.89660859801174, + "learning_rate": 5.857360602764109e-06, + "loss": 0.09589881896972656, + "step": 22795 + }, + { + "epoch": 0.19714485823728287, + "grad_norm": 27.142171277599445, + "learning_rate": 5.8572985219628866e-06, + "loss": 0.42091064453125, + "step": 22800 + }, + { + "epoch": 0.19718809175882612, + "grad_norm": 8.326076638300824, + "learning_rate": 5.8572364279840555e-06, + "loss": 0.162347412109375, + "step": 22805 + }, + { + "epoch": 0.19723132528036938, + "grad_norm": 44.40733287263182, + "learning_rate": 5.8571743208279015e-06, + "loss": 0.45850830078125, + "step": 22810 + }, + { + "epoch": 0.19727455880191266, + "grad_norm": 19.619113041697368, + "learning_rate": 5.857112200494713e-06, + "loss": 0.3905181884765625, + "step": 22815 + }, + { + "epoch": 0.19731779232345592, + "grad_norm": 0.6143723529596941, + "learning_rate": 5.857050066984775e-06, + "loss": 0.0573394775390625, + "step": 22820 + }, + { + "epoch": 0.19736102584499918, + "grad_norm": 1.9716987003829387, + "learning_rate": 5.856987920298375e-06, + "loss": 0.336431884765625, + "step": 22825 + }, + { + "epoch": 0.19740425936654243, + "grad_norm": 44.82996315630749, + "learning_rate": 5.856925760435799e-06, + "loss": 0.2411876678466797, + "step": 22830 + }, + { + "epoch": 0.19744749288808572, + "grad_norm": 16.39038858305754, + "learning_rate": 5.856863587397334e-06, + "loss": 0.342083740234375, + "step": 22835 + }, + { + "epoch": 0.19749072640962897, + "grad_norm": 8.26961382087487, + "learning_rate": 5.856801401183266e-06, + "loss": 0.10743331909179688, + "step": 22840 + }, + { + "epoch": 0.19753395993117223, + "grad_norm": 23.49061112408498, + "learning_rate": 5.856739201793882e-06, + "loss": 0.1868133544921875, + "step": 22845 + }, + { + "epoch": 0.1975771934527155, + "grad_norm": 39.106351685367905, + "learning_rate": 5.85667698922947e-06, + "loss": 0.1740346908569336, + "step": 22850 + }, + { + "epoch": 0.19762042697425877, + "grad_norm": 37.716937522779894, + "learning_rate": 5.856614763490317e-06, + "loss": 0.13524017333984376, + "step": 22855 + }, + { + "epoch": 0.19766366049580203, + "grad_norm": 5.8742611908544164, + "learning_rate": 5.856552524576708e-06, + "loss": 0.15961761474609376, + "step": 22860 + }, + { + "epoch": 0.19770689401734529, + "grad_norm": 0.8640039527745493, + "learning_rate": 5.856490272488931e-06, + "loss": 0.2708160400390625, + "step": 22865 + }, + { + "epoch": 0.19775012753888854, + "grad_norm": 6.380315710505167, + "learning_rate": 5.856428007227274e-06, + "loss": 0.0461029052734375, + "step": 22870 + }, + { + "epoch": 0.19779336106043183, + "grad_norm": 14.548699801624437, + "learning_rate": 5.856365728792023e-06, + "loss": 0.13182373046875, + "step": 22875 + }, + { + "epoch": 0.19783659458197508, + "grad_norm": 41.701114349233, + "learning_rate": 5.8563034371834655e-06, + "loss": 0.32164764404296875, + "step": 22880 + }, + { + "epoch": 0.19787982810351834, + "grad_norm": 2.90236352376096, + "learning_rate": 5.85624113240189e-06, + "loss": 0.04207534790039062, + "step": 22885 + }, + { + "epoch": 0.1979230616250616, + "grad_norm": 2.3094989502103935, + "learning_rate": 5.856178814447581e-06, + "loss": 0.3834197998046875, + "step": 22890 + }, + { + "epoch": 0.19796629514660488, + "grad_norm": 51.396259041536304, + "learning_rate": 5.856116483320829e-06, + "loss": 0.3884674072265625, + "step": 22895 + }, + { + "epoch": 0.19800952866814814, + "grad_norm": 27.995936948372655, + "learning_rate": 5.85605413902192e-06, + "loss": 0.1602020263671875, + "step": 22900 + }, + { + "epoch": 0.1980527621896914, + "grad_norm": 25.341871474230427, + "learning_rate": 5.8559917815511415e-06, + "loss": 0.21876373291015624, + "step": 22905 + }, + { + "epoch": 0.19809599571123465, + "grad_norm": 0.4153091444341537, + "learning_rate": 5.855929410908781e-06, + "loss": 0.19220733642578125, + "step": 22910 + }, + { + "epoch": 0.19813922923277794, + "grad_norm": 7.117906900665336, + "learning_rate": 5.855867027095126e-06, + "loss": 0.30882568359375, + "step": 22915 + }, + { + "epoch": 0.1981824627543212, + "grad_norm": 27.041375718452862, + "learning_rate": 5.855804630110466e-06, + "loss": 0.24464111328125, + "step": 22920 + }, + { + "epoch": 0.19822569627586445, + "grad_norm": 18.357372228162117, + "learning_rate": 5.855742219955086e-06, + "loss": 0.1920135498046875, + "step": 22925 + }, + { + "epoch": 0.1982689297974077, + "grad_norm": 11.447541353716987, + "learning_rate": 5.8556797966292765e-06, + "loss": 0.15574951171875, + "step": 22930 + }, + { + "epoch": 0.198312163318951, + "grad_norm": 18.256489411560548, + "learning_rate": 5.855617360133323e-06, + "loss": 0.254913330078125, + "step": 22935 + }, + { + "epoch": 0.19835539684049425, + "grad_norm": 11.926786663064721, + "learning_rate": 5.855554910467515e-06, + "loss": 0.112255859375, + "step": 22940 + }, + { + "epoch": 0.1983986303620375, + "grad_norm": 2.297840514894065, + "learning_rate": 5.85549244763214e-06, + "loss": 0.0889923095703125, + "step": 22945 + }, + { + "epoch": 0.19844186388358076, + "grad_norm": 26.728370115648673, + "learning_rate": 5.8554299716274865e-06, + "loss": 0.4066375732421875, + "step": 22950 + }, + { + "epoch": 0.19848509740512404, + "grad_norm": 5.000783055499771, + "learning_rate": 5.855367482453841e-06, + "loss": 0.118280029296875, + "step": 22955 + }, + { + "epoch": 0.1985283309266673, + "grad_norm": 8.887390305148182, + "learning_rate": 5.855304980111495e-06, + "loss": 0.20194473266601562, + "step": 22960 + }, + { + "epoch": 0.19857156444821056, + "grad_norm": 17.751125100360372, + "learning_rate": 5.855242464600734e-06, + "loss": 0.1510894775390625, + "step": 22965 + }, + { + "epoch": 0.19861479796975381, + "grad_norm": 37.38091200111339, + "learning_rate": 5.855179935921846e-06, + "loss": 0.43593597412109375, + "step": 22970 + }, + { + "epoch": 0.1986580314912971, + "grad_norm": 16.21870876090799, + "learning_rate": 5.855117394075122e-06, + "loss": 0.31238746643066406, + "step": 22975 + }, + { + "epoch": 0.19870126501284036, + "grad_norm": 0.919291585354582, + "learning_rate": 5.855054839060848e-06, + "loss": 0.196490478515625, + "step": 22980 + }, + { + "epoch": 0.1987444985343836, + "grad_norm": 25.485560726753466, + "learning_rate": 5.854992270879313e-06, + "loss": 0.1742389678955078, + "step": 22985 + }, + { + "epoch": 0.1987877320559269, + "grad_norm": 10.749369222489081, + "learning_rate": 5.854929689530807e-06, + "loss": 0.1591644287109375, + "step": 22990 + }, + { + "epoch": 0.19883096557747015, + "grad_norm": 10.459726488021868, + "learning_rate": 5.854867095015617e-06, + "loss": 0.125048828125, + "step": 22995 + }, + { + "epoch": 0.1988741990990134, + "grad_norm": 57.03043065050666, + "learning_rate": 5.854804487334031e-06, + "loss": 0.4274559020996094, + "step": 23000 + }, + { + "epoch": 0.19891743262055667, + "grad_norm": 10.740045425439277, + "learning_rate": 5.854741866486341e-06, + "loss": 0.2428558349609375, + "step": 23005 + }, + { + "epoch": 0.19896066614209995, + "grad_norm": 13.652435003291572, + "learning_rate": 5.8546792324728325e-06, + "loss": 0.16514892578125, + "step": 23010 + }, + { + "epoch": 0.1990038996636432, + "grad_norm": 21.826425727669196, + "learning_rate": 5.854616585293796e-06, + "loss": 0.230926513671875, + "step": 23015 + }, + { + "epoch": 0.19904713318518646, + "grad_norm": 6.132410826440032, + "learning_rate": 5.85455392494952e-06, + "loss": 0.3145263671875, + "step": 23020 + }, + { + "epoch": 0.19909036670672972, + "grad_norm": 9.65234489838843, + "learning_rate": 5.854491251440294e-06, + "loss": 0.38302001953125, + "step": 23025 + }, + { + "epoch": 0.199133600228273, + "grad_norm": 10.267906528254283, + "learning_rate": 5.854428564766406e-06, + "loss": 0.1773143768310547, + "step": 23030 + }, + { + "epoch": 0.19917683374981626, + "grad_norm": 11.940279244173887, + "learning_rate": 5.854365864928145e-06, + "loss": 0.147711181640625, + "step": 23035 + }, + { + "epoch": 0.19922006727135952, + "grad_norm": 17.992450086802176, + "learning_rate": 5.854303151925802e-06, + "loss": 0.1346527099609375, + "step": 23040 + }, + { + "epoch": 0.19926330079290278, + "grad_norm": 2.3290342831022612, + "learning_rate": 5.854240425759664e-06, + "loss": 0.03841743469238281, + "step": 23045 + }, + { + "epoch": 0.19930653431444606, + "grad_norm": 1.6791429093565597, + "learning_rate": 5.854177686430023e-06, + "loss": 0.14162101745605468, + "step": 23050 + }, + { + "epoch": 0.19934976783598932, + "grad_norm": 7.102102677968916, + "learning_rate": 5.854114933937165e-06, + "loss": 0.16911354064941406, + "step": 23055 + }, + { + "epoch": 0.19939300135753257, + "grad_norm": 20.696634234880822, + "learning_rate": 5.8540521682813815e-06, + "loss": 0.6058685302734375, + "step": 23060 + }, + { + "epoch": 0.19943623487907583, + "grad_norm": 4.378418790382426, + "learning_rate": 5.8539893894629625e-06, + "loss": 0.1071624755859375, + "step": 23065 + }, + { + "epoch": 0.19947946840061911, + "grad_norm": 33.90910116160249, + "learning_rate": 5.853926597482196e-06, + "loss": 0.19142303466796876, + "step": 23070 + }, + { + "epoch": 0.19952270192216237, + "grad_norm": 8.672505970898477, + "learning_rate": 5.853863792339372e-06, + "loss": 0.386309814453125, + "step": 23075 + }, + { + "epoch": 0.19956593544370563, + "grad_norm": 2.809347014898976, + "learning_rate": 5.853800974034781e-06, + "loss": 0.12197723388671874, + "step": 23080 + }, + { + "epoch": 0.19960916896524888, + "grad_norm": 19.730028072595122, + "learning_rate": 5.8537381425687115e-06, + "loss": 0.22581787109375, + "step": 23085 + }, + { + "epoch": 0.19965240248679217, + "grad_norm": 1.228915930962338, + "learning_rate": 5.853675297941454e-06, + "loss": 0.105615234375, + "step": 23090 + }, + { + "epoch": 0.19969563600833543, + "grad_norm": 0.3737294518652027, + "learning_rate": 5.853612440153298e-06, + "loss": 0.069683837890625, + "step": 23095 + }, + { + "epoch": 0.19973886952987868, + "grad_norm": 9.917201421348269, + "learning_rate": 5.853549569204534e-06, + "loss": 0.3986358642578125, + "step": 23100 + }, + { + "epoch": 0.19978210305142194, + "grad_norm": 0.19569074676258033, + "learning_rate": 5.853486685095451e-06, + "loss": 0.05915679931640625, + "step": 23105 + }, + { + "epoch": 0.19982533657296522, + "grad_norm": 8.019443991619546, + "learning_rate": 5.853423787826341e-06, + "loss": 0.17716217041015625, + "step": 23110 + }, + { + "epoch": 0.19986857009450848, + "grad_norm": 3.3741717961495272, + "learning_rate": 5.8533608773974905e-06, + "loss": 0.124908447265625, + "step": 23115 + }, + { + "epoch": 0.19991180361605174, + "grad_norm": 31.588412800239407, + "learning_rate": 5.8532979538091935e-06, + "loss": 0.55931396484375, + "step": 23120 + }, + { + "epoch": 0.199955037137595, + "grad_norm": 3.8371853583315043, + "learning_rate": 5.853235017061737e-06, + "loss": 0.19615631103515624, + "step": 23125 + }, + { + "epoch": 0.19999827065913828, + "grad_norm": 3.7365281640127663, + "learning_rate": 5.853172067155413e-06, + "loss": 0.292578125, + "step": 23130 + }, + { + "epoch": 0.20004150418068153, + "grad_norm": 3.814469819352131, + "learning_rate": 5.853109104090513e-06, + "loss": 0.1391448974609375, + "step": 23135 + }, + { + "epoch": 0.2000847377022248, + "grad_norm": 5.828703013492606, + "learning_rate": 5.853046127867325e-06, + "loss": 0.17613525390625, + "step": 23140 + }, + { + "epoch": 0.20012797122376805, + "grad_norm": 9.865944690704215, + "learning_rate": 5.85298313848614e-06, + "loss": 0.1801055908203125, + "step": 23145 + }, + { + "epoch": 0.20017120474531133, + "grad_norm": 28.551315134356337, + "learning_rate": 5.852920135947249e-06, + "loss": 0.104010009765625, + "step": 23150 + }, + { + "epoch": 0.2002144382668546, + "grad_norm": 28.006071779582424, + "learning_rate": 5.852857120250943e-06, + "loss": 0.4527411460876465, + "step": 23155 + }, + { + "epoch": 0.20025767178839785, + "grad_norm": 9.660375477564175, + "learning_rate": 5.852794091397512e-06, + "loss": 0.1137725830078125, + "step": 23160 + }, + { + "epoch": 0.20030090530994113, + "grad_norm": 8.864100269916854, + "learning_rate": 5.852731049387247e-06, + "loss": 0.09114990234375, + "step": 23165 + }, + { + "epoch": 0.2003441388314844, + "grad_norm": 4.997209822199721, + "learning_rate": 5.852667994220437e-06, + "loss": 0.102227783203125, + "step": 23170 + }, + { + "epoch": 0.20038737235302764, + "grad_norm": 0.542478627379296, + "learning_rate": 5.852604925897374e-06, + "loss": 0.17054615020751954, + "step": 23175 + }, + { + "epoch": 0.2004306058745709, + "grad_norm": 6.399078976395625, + "learning_rate": 5.852541844418351e-06, + "loss": 0.12496261596679688, + "step": 23180 + }, + { + "epoch": 0.20047383939611418, + "grad_norm": 29.45819887343614, + "learning_rate": 5.852478749783656e-06, + "loss": 0.5051174163818359, + "step": 23185 + }, + { + "epoch": 0.20051707291765744, + "grad_norm": 7.084547650892551, + "learning_rate": 5.852415641993582e-06, + "loss": 0.21468276977539064, + "step": 23190 + }, + { + "epoch": 0.2005603064392007, + "grad_norm": 42.01220222229918, + "learning_rate": 5.852352521048418e-06, + "loss": 0.32350502014160154, + "step": 23195 + }, + { + "epoch": 0.20060353996074395, + "grad_norm": 1.839888631729216, + "learning_rate": 5.852289386948457e-06, + "loss": 0.1018280029296875, + "step": 23200 + }, + { + "epoch": 0.20064677348228724, + "grad_norm": 5.376769238471683, + "learning_rate": 5.852226239693989e-06, + "loss": 0.2792144775390625, + "step": 23205 + }, + { + "epoch": 0.2006900070038305, + "grad_norm": 4.615576226790585, + "learning_rate": 5.852163079285305e-06, + "loss": 0.324493408203125, + "step": 23210 + }, + { + "epoch": 0.20073324052537375, + "grad_norm": 48.18230286293638, + "learning_rate": 5.852099905722698e-06, + "loss": 0.1758697509765625, + "step": 23215 + }, + { + "epoch": 0.200776474046917, + "grad_norm": 7.1237465503685815, + "learning_rate": 5.852036719006457e-06, + "loss": 0.08209228515625, + "step": 23220 + }, + { + "epoch": 0.2008197075684603, + "grad_norm": 5.157913724618283, + "learning_rate": 5.8519735191368765e-06, + "loss": 0.27132568359375, + "step": 23225 + }, + { + "epoch": 0.20086294109000355, + "grad_norm": 26.327699639109113, + "learning_rate": 5.851910306114244e-06, + "loss": 0.201953125, + "step": 23230 + }, + { + "epoch": 0.2009061746115468, + "grad_norm": 33.2201047851292, + "learning_rate": 5.8518470799388545e-06, + "loss": 0.3641143798828125, + "step": 23235 + }, + { + "epoch": 0.20094940813309006, + "grad_norm": 8.217853456630104, + "learning_rate": 5.851783840610999e-06, + "loss": 0.08883056640625, + "step": 23240 + }, + { + "epoch": 0.20099264165463335, + "grad_norm": 1.10419927919728, + "learning_rate": 5.8517205881309665e-06, + "loss": 0.09645004272460937, + "step": 23245 + }, + { + "epoch": 0.2010358751761766, + "grad_norm": 16.09355026227659, + "learning_rate": 5.851657322499052e-06, + "loss": 0.23592529296875, + "step": 23250 + }, + { + "epoch": 0.20107910869771986, + "grad_norm": 39.14153912231967, + "learning_rate": 5.851594043715545e-06, + "loss": 0.1023681640625, + "step": 23255 + }, + { + "epoch": 0.20112234221926312, + "grad_norm": 5.32952953557582, + "learning_rate": 5.851530751780739e-06, + "loss": 0.19551239013671876, + "step": 23260 + }, + { + "epoch": 0.2011655757408064, + "grad_norm": 26.01215471793653, + "learning_rate": 5.851467446694925e-06, + "loss": 0.176300048828125, + "step": 23265 + }, + { + "epoch": 0.20120880926234966, + "grad_norm": 7.048751162551444, + "learning_rate": 5.851404128458394e-06, + "loss": 0.152496337890625, + "step": 23270 + }, + { + "epoch": 0.20125204278389291, + "grad_norm": 36.89965563115395, + "learning_rate": 5.851340797071441e-06, + "loss": 0.14881057739257814, + "step": 23275 + }, + { + "epoch": 0.20129527630543617, + "grad_norm": 7.341675676405665, + "learning_rate": 5.851277452534354e-06, + "loss": 0.0695892333984375, + "step": 23280 + }, + { + "epoch": 0.20133850982697946, + "grad_norm": 1.529219992186839, + "learning_rate": 5.85121409484743e-06, + "loss": 0.1366363525390625, + "step": 23285 + }, + { + "epoch": 0.2013817433485227, + "grad_norm": 10.40485108216937, + "learning_rate": 5.851150724010957e-06, + "loss": 0.112213134765625, + "step": 23290 + }, + { + "epoch": 0.20142497687006597, + "grad_norm": 3.54422702082798, + "learning_rate": 5.851087340025229e-06, + "loss": 0.1562274932861328, + "step": 23295 + }, + { + "epoch": 0.20146821039160923, + "grad_norm": 0.8432760315600786, + "learning_rate": 5.851023942890538e-06, + "loss": 0.448193359375, + "step": 23300 + }, + { + "epoch": 0.2015114439131525, + "grad_norm": 28.417695817639515, + "learning_rate": 5.850960532607176e-06, + "loss": 0.35177459716796877, + "step": 23305 + }, + { + "epoch": 0.20155467743469577, + "grad_norm": 17.381448460510413, + "learning_rate": 5.850897109175436e-06, + "loss": 0.29749221801757814, + "step": 23310 + }, + { + "epoch": 0.20159791095623902, + "grad_norm": 33.18835935422533, + "learning_rate": 5.850833672595611e-06, + "loss": 0.189166259765625, + "step": 23315 + }, + { + "epoch": 0.20164114447778228, + "grad_norm": 5.526388318853202, + "learning_rate": 5.850770222867994e-06, + "loss": 0.0792633056640625, + "step": 23320 + }, + { + "epoch": 0.20168437799932556, + "grad_norm": 2.4878422976379584, + "learning_rate": 5.8507067599928745e-06, + "loss": 0.27763671875, + "step": 23325 + }, + { + "epoch": 0.20172761152086882, + "grad_norm": 11.749933554800922, + "learning_rate": 5.850643283970548e-06, + "loss": 0.12555694580078125, + "step": 23330 + }, + { + "epoch": 0.20177084504241208, + "grad_norm": 14.541853174676781, + "learning_rate": 5.850579794801308e-06, + "loss": 0.35316314697265627, + "step": 23335 + }, + { + "epoch": 0.20181407856395533, + "grad_norm": 13.843234653088818, + "learning_rate": 5.850516292485445e-06, + "loss": 0.131005859375, + "step": 23340 + }, + { + "epoch": 0.20185731208549862, + "grad_norm": 0.7666817953435704, + "learning_rate": 5.850452777023252e-06, + "loss": 0.08115119934082031, + "step": 23345 + }, + { + "epoch": 0.20190054560704188, + "grad_norm": 1.9328675340306365, + "learning_rate": 5.850389248415023e-06, + "loss": 0.05680084228515625, + "step": 23350 + }, + { + "epoch": 0.20194377912858513, + "grad_norm": 50.793574792396306, + "learning_rate": 5.85032570666105e-06, + "loss": 0.22396240234375, + "step": 23355 + }, + { + "epoch": 0.20198701265012842, + "grad_norm": 0.7725033232934887, + "learning_rate": 5.8502621517616285e-06, + "loss": 0.27558746337890627, + "step": 23360 + }, + { + "epoch": 0.20203024617167167, + "grad_norm": 4.1075083208578285, + "learning_rate": 5.850198583717048e-06, + "loss": 0.064324951171875, + "step": 23365 + }, + { + "epoch": 0.20207347969321493, + "grad_norm": 3.96569817207576, + "learning_rate": 5.850135002527605e-06, + "loss": 0.0862640380859375, + "step": 23370 + }, + { + "epoch": 0.2021167132147582, + "grad_norm": 11.805173510497005, + "learning_rate": 5.850071408193591e-06, + "loss": 0.6892578125, + "step": 23375 + }, + { + "epoch": 0.20215994673630147, + "grad_norm": 7.526350155830267, + "learning_rate": 5.850007800715298e-06, + "loss": 0.15779991149902345, + "step": 23380 + }, + { + "epoch": 0.20220318025784473, + "grad_norm": 6.551749030745714, + "learning_rate": 5.849944180093022e-06, + "loss": 0.13520965576171876, + "step": 23385 + }, + { + "epoch": 0.20224641377938798, + "grad_norm": 20.224674150866086, + "learning_rate": 5.8498805463270556e-06, + "loss": 0.251348876953125, + "step": 23390 + }, + { + "epoch": 0.20228964730093124, + "grad_norm": 24.901145758123903, + "learning_rate": 5.849816899417691e-06, + "loss": 0.2618011474609375, + "step": 23395 + }, + { + "epoch": 0.20233288082247453, + "grad_norm": 5.647657760541022, + "learning_rate": 5.8497532393652234e-06, + "loss": 0.5567192077636719, + "step": 23400 + }, + { + "epoch": 0.20237611434401778, + "grad_norm": 3.9797380439800745, + "learning_rate": 5.849689566169945e-06, + "loss": 0.27491111755371095, + "step": 23405 + }, + { + "epoch": 0.20241934786556104, + "grad_norm": 0.938579849503575, + "learning_rate": 5.849625879832152e-06, + "loss": 0.1077606201171875, + "step": 23410 + }, + { + "epoch": 0.2024625813871043, + "grad_norm": 1.6240677148113996, + "learning_rate": 5.849562180352134e-06, + "loss": 0.18213653564453125, + "step": 23415 + }, + { + "epoch": 0.20250581490864758, + "grad_norm": 2.7107320237418935, + "learning_rate": 5.849498467730187e-06, + "loss": 0.28782958984375, + "step": 23420 + }, + { + "epoch": 0.20254904843019084, + "grad_norm": 19.86466675688788, + "learning_rate": 5.849434741966606e-06, + "loss": 0.128387451171875, + "step": 23425 + }, + { + "epoch": 0.2025922819517341, + "grad_norm": 12.798065157228125, + "learning_rate": 5.849371003061684e-06, + "loss": 0.212261962890625, + "step": 23430 + }, + { + "epoch": 0.20263551547327735, + "grad_norm": 0.4801788706988941, + "learning_rate": 5.8493072510157135e-06, + "loss": 0.15225677490234374, + "step": 23435 + }, + { + "epoch": 0.20267874899482063, + "grad_norm": 30.082661354823234, + "learning_rate": 5.84924348582899e-06, + "loss": 0.4337882995605469, + "step": 23440 + }, + { + "epoch": 0.2027219825163639, + "grad_norm": 28.969662347413397, + "learning_rate": 5.849179707501809e-06, + "loss": 0.3457275390625, + "step": 23445 + }, + { + "epoch": 0.20276521603790715, + "grad_norm": 6.935291602046648, + "learning_rate": 5.849115916034461e-06, + "loss": 0.144622802734375, + "step": 23450 + }, + { + "epoch": 0.2028084495594504, + "grad_norm": 11.191324692029983, + "learning_rate": 5.849052111427242e-06, + "loss": 0.1594940185546875, + "step": 23455 + }, + { + "epoch": 0.2028516830809937, + "grad_norm": 9.057375001271708, + "learning_rate": 5.848988293680448e-06, + "loss": 0.301568603515625, + "step": 23460 + }, + { + "epoch": 0.20289491660253695, + "grad_norm": 10.153284363696768, + "learning_rate": 5.84892446279437e-06, + "loss": 0.33749542236328123, + "step": 23465 + }, + { + "epoch": 0.2029381501240802, + "grad_norm": 20.085047843674896, + "learning_rate": 5.848860618769306e-06, + "loss": 0.0859344482421875, + "step": 23470 + }, + { + "epoch": 0.20298138364562346, + "grad_norm": 0.18863542348989432, + "learning_rate": 5.848796761605547e-06, + "loss": 0.10720672607421874, + "step": 23475 + }, + { + "epoch": 0.20302461716716674, + "grad_norm": 36.1783520840859, + "learning_rate": 5.84873289130339e-06, + "loss": 0.2271453857421875, + "step": 23480 + }, + { + "epoch": 0.20306785068871, + "grad_norm": 12.605820195924737, + "learning_rate": 5.848669007863128e-06, + "loss": 0.318048095703125, + "step": 23485 + }, + { + "epoch": 0.20311108421025326, + "grad_norm": 1.5658222491737883, + "learning_rate": 5.848605111285056e-06, + "loss": 0.07747650146484375, + "step": 23490 + }, + { + "epoch": 0.2031543177317965, + "grad_norm": 16.484037748695734, + "learning_rate": 5.848541201569469e-06, + "loss": 0.18244705200195313, + "step": 23495 + }, + { + "epoch": 0.2031975512533398, + "grad_norm": 10.854194686757555, + "learning_rate": 5.848477278716663e-06, + "loss": 0.12711811065673828, + "step": 23500 + }, + { + "epoch": 0.20324078477488305, + "grad_norm": 18.63072757446888, + "learning_rate": 5.8484133427269306e-06, + "loss": 0.1793701171875, + "step": 23505 + }, + { + "epoch": 0.2032840182964263, + "grad_norm": 5.702118611039557, + "learning_rate": 5.848349393600567e-06, + "loss": 0.2735504150390625, + "step": 23510 + }, + { + "epoch": 0.20332725181796957, + "grad_norm": 14.945815599112985, + "learning_rate": 5.8482854313378685e-06, + "loss": 0.09361381530761718, + "step": 23515 + }, + { + "epoch": 0.20337048533951285, + "grad_norm": 53.60441335158495, + "learning_rate": 5.848221455939129e-06, + "loss": 0.34754486083984376, + "step": 23520 + }, + { + "epoch": 0.2034137188610561, + "grad_norm": 3.9308296448950415, + "learning_rate": 5.848157467404644e-06, + "loss": 0.290252685546875, + "step": 23525 + }, + { + "epoch": 0.20345695238259937, + "grad_norm": 52.95825394913106, + "learning_rate": 5.848093465734708e-06, + "loss": 0.5192501068115234, + "step": 23530 + }, + { + "epoch": 0.20350018590414265, + "grad_norm": 19.102160973869474, + "learning_rate": 5.848029450929617e-06, + "loss": 0.2825225830078125, + "step": 23535 + }, + { + "epoch": 0.2035434194256859, + "grad_norm": 19.111741133024662, + "learning_rate": 5.8479654229896656e-06, + "loss": 0.17345428466796875, + "step": 23540 + }, + { + "epoch": 0.20358665294722916, + "grad_norm": 6.237894563989984, + "learning_rate": 5.847901381915149e-06, + "loss": 0.224371337890625, + "step": 23545 + }, + { + "epoch": 0.20362988646877242, + "grad_norm": 6.271701735092187, + "learning_rate": 5.847837327706363e-06, + "loss": 0.0653717041015625, + "step": 23550 + }, + { + "epoch": 0.2036731199903157, + "grad_norm": 1.4142204518687993, + "learning_rate": 5.8477732603636034e-06, + "loss": 0.2074981689453125, + "step": 23555 + }, + { + "epoch": 0.20371635351185896, + "grad_norm": 27.814953036115096, + "learning_rate": 5.847709179887166e-06, + "loss": 0.3418701171875, + "step": 23560 + }, + { + "epoch": 0.20375958703340222, + "grad_norm": 34.10151436068776, + "learning_rate": 5.847645086277343e-06, + "loss": 0.1687744140625, + "step": 23565 + }, + { + "epoch": 0.20380282055494547, + "grad_norm": 2.503132901841628, + "learning_rate": 5.847580979534434e-06, + "loss": 0.05743408203125, + "step": 23570 + }, + { + "epoch": 0.20384605407648876, + "grad_norm": 10.327628200779744, + "learning_rate": 5.847516859658733e-06, + "loss": 0.26220855712890623, + "step": 23575 + }, + { + "epoch": 0.20388928759803202, + "grad_norm": 73.44312123683476, + "learning_rate": 5.847452726650536e-06, + "loss": 0.0985382080078125, + "step": 23580 + }, + { + "epoch": 0.20393252111957527, + "grad_norm": 7.8173670956857535, + "learning_rate": 5.847388580510139e-06, + "loss": 0.21907882690429686, + "step": 23585 + }, + { + "epoch": 0.20397575464111853, + "grad_norm": 1.4599111822794595, + "learning_rate": 5.847324421237836e-06, + "loss": 0.0724212646484375, + "step": 23590 + }, + { + "epoch": 0.2040189881626618, + "grad_norm": 1.9557922374228904, + "learning_rate": 5.847260248833926e-06, + "loss": 0.2076171875, + "step": 23595 + }, + { + "epoch": 0.20406222168420507, + "grad_norm": 7.425683961931681, + "learning_rate": 5.847196063298703e-06, + "loss": 0.48988037109375, + "step": 23600 + }, + { + "epoch": 0.20410545520574833, + "grad_norm": 8.572143998068345, + "learning_rate": 5.847131864632462e-06, + "loss": 0.141259765625, + "step": 23605 + }, + { + "epoch": 0.20414868872729158, + "grad_norm": 0.5024986512422259, + "learning_rate": 5.847067652835502e-06, + "loss": 0.11726913452148438, + "step": 23610 + }, + { + "epoch": 0.20419192224883487, + "grad_norm": 36.3059791681518, + "learning_rate": 5.847003427908117e-06, + "loss": 0.3355133056640625, + "step": 23615 + }, + { + "epoch": 0.20423515577037812, + "grad_norm": 1.5149563011366538, + "learning_rate": 5.846939189850603e-06, + "loss": 0.09973564147949218, + "step": 23620 + }, + { + "epoch": 0.20427838929192138, + "grad_norm": 20.75738918947986, + "learning_rate": 5.8468749386632575e-06, + "loss": 0.24176483154296874, + "step": 23625 + }, + { + "epoch": 0.20432162281346464, + "grad_norm": 3.1504418258909865, + "learning_rate": 5.846810674346377e-06, + "loss": 0.0948974609375, + "step": 23630 + }, + { + "epoch": 0.20436485633500792, + "grad_norm": 4.216732188204589, + "learning_rate": 5.846746396900257e-06, + "loss": 0.4363136291503906, + "step": 23635 + }, + { + "epoch": 0.20440808985655118, + "grad_norm": 41.18253924800018, + "learning_rate": 5.846682106325193e-06, + "loss": 0.10401687622070313, + "step": 23640 + }, + { + "epoch": 0.20445132337809444, + "grad_norm": 17.47499098326148, + "learning_rate": 5.846617802621484e-06, + "loss": 0.2333698272705078, + "step": 23645 + }, + { + "epoch": 0.2044945568996377, + "grad_norm": 1.3423538869477454, + "learning_rate": 5.846553485789424e-06, + "loss": 0.08290252685546876, + "step": 23650 + }, + { + "epoch": 0.20453779042118098, + "grad_norm": 37.25649191849691, + "learning_rate": 5.846489155829313e-06, + "loss": 0.582781982421875, + "step": 23655 + }, + { + "epoch": 0.20458102394272423, + "grad_norm": 1.2534322597888623, + "learning_rate": 5.846424812741444e-06, + "loss": 0.08099441528320313, + "step": 23660 + }, + { + "epoch": 0.2046242574642675, + "grad_norm": 4.5369293957980155, + "learning_rate": 5.846360456526115e-06, + "loss": 0.3647472381591797, + "step": 23665 + }, + { + "epoch": 0.20466749098581075, + "grad_norm": 6.876656982927747, + "learning_rate": 5.846296087183624e-06, + "loss": 0.19932861328125, + "step": 23670 + }, + { + "epoch": 0.20471072450735403, + "grad_norm": 53.43307043621085, + "learning_rate": 5.8462317047142655e-06, + "loss": 0.39546966552734375, + "step": 23675 + }, + { + "epoch": 0.2047539580288973, + "grad_norm": 46.238220853362016, + "learning_rate": 5.84616730911834e-06, + "loss": 0.21834716796875, + "step": 23680 + }, + { + "epoch": 0.20479719155044054, + "grad_norm": 10.17827469206076, + "learning_rate": 5.846102900396141e-06, + "loss": 0.0802734375, + "step": 23685 + }, + { + "epoch": 0.2048404250719838, + "grad_norm": 36.492487730920985, + "learning_rate": 5.846038478547967e-06, + "loss": 0.354156494140625, + "step": 23690 + }, + { + "epoch": 0.20488365859352708, + "grad_norm": 24.040395300594476, + "learning_rate": 5.845974043574115e-06, + "loss": 0.2539886474609375, + "step": 23695 + }, + { + "epoch": 0.20492689211507034, + "grad_norm": 4.85796557053671, + "learning_rate": 5.845909595474883e-06, + "loss": 0.4495887756347656, + "step": 23700 + }, + { + "epoch": 0.2049701256366136, + "grad_norm": 13.2034265975806, + "learning_rate": 5.845845134250566e-06, + "loss": 0.285760498046875, + "step": 23705 + }, + { + "epoch": 0.20501335915815685, + "grad_norm": 1.595899899542441, + "learning_rate": 5.8457806599014635e-06, + "loss": 0.1952362060546875, + "step": 23710 + }, + { + "epoch": 0.20505659267970014, + "grad_norm": 0.23064303975758907, + "learning_rate": 5.845716172427872e-06, + "loss": 0.21002044677734374, + "step": 23715 + }, + { + "epoch": 0.2050998262012434, + "grad_norm": 11.156319451761112, + "learning_rate": 5.845651671830089e-06, + "loss": 0.177044677734375, + "step": 23720 + }, + { + "epoch": 0.20514305972278665, + "grad_norm": 27.260191827030393, + "learning_rate": 5.845587158108412e-06, + "loss": 0.39937744140625, + "step": 23725 + }, + { + "epoch": 0.20518629324432994, + "grad_norm": 7.03114822653209, + "learning_rate": 5.845522631263139e-06, + "loss": 0.07777786254882812, + "step": 23730 + }, + { + "epoch": 0.2052295267658732, + "grad_norm": 11.321001971190851, + "learning_rate": 5.845458091294566e-06, + "loss": 0.23326377868652343, + "step": 23735 + }, + { + "epoch": 0.20527276028741645, + "grad_norm": 0.29062706262517934, + "learning_rate": 5.8453935382029915e-06, + "loss": 0.11329345703125, + "step": 23740 + }, + { + "epoch": 0.2053159938089597, + "grad_norm": 6.728448243396363, + "learning_rate": 5.845328971988714e-06, + "loss": 0.15838775634765626, + "step": 23745 + }, + { + "epoch": 0.205359227330503, + "grad_norm": 7.315921355299094, + "learning_rate": 5.845264392652032e-06, + "loss": 0.06777191162109375, + "step": 23750 + }, + { + "epoch": 0.20540246085204625, + "grad_norm": 12.275225862846867, + "learning_rate": 5.84519980019324e-06, + "loss": 0.11856956481933593, + "step": 23755 + }, + { + "epoch": 0.2054456943735895, + "grad_norm": 0.8614092530395497, + "learning_rate": 5.845135194612639e-06, + "loss": 0.16461257934570311, + "step": 23760 + }, + { + "epoch": 0.20548892789513276, + "grad_norm": 15.67324265942527, + "learning_rate": 5.845070575910525e-06, + "loss": 0.09024810791015625, + "step": 23765 + }, + { + "epoch": 0.20553216141667605, + "grad_norm": 0.7415924853158053, + "learning_rate": 5.845005944087198e-06, + "loss": 0.3058769226074219, + "step": 23770 + }, + { + "epoch": 0.2055753949382193, + "grad_norm": 5.1102854144522745, + "learning_rate": 5.844941299142954e-06, + "loss": 0.1118927001953125, + "step": 23775 + }, + { + "epoch": 0.20561862845976256, + "grad_norm": 0.672761729895257, + "learning_rate": 5.844876641078093e-06, + "loss": 0.0602935791015625, + "step": 23780 + }, + { + "epoch": 0.20566186198130582, + "grad_norm": 0.4118033105601813, + "learning_rate": 5.844811969892912e-06, + "loss": 0.1423828125, + "step": 23785 + }, + { + "epoch": 0.2057050955028491, + "grad_norm": 4.775373870756705, + "learning_rate": 5.844747285587709e-06, + "loss": 0.0668487548828125, + "step": 23790 + }, + { + "epoch": 0.20574832902439236, + "grad_norm": 54.52915356939206, + "learning_rate": 5.844682588162784e-06, + "loss": 0.4458953857421875, + "step": 23795 + }, + { + "epoch": 0.2057915625459356, + "grad_norm": 6.6942102824960275, + "learning_rate": 5.8446178776184334e-06, + "loss": 0.354852294921875, + "step": 23800 + }, + { + "epoch": 0.20583479606747887, + "grad_norm": 19.711115634659436, + "learning_rate": 5.8445531539549565e-06, + "loss": 0.183270263671875, + "step": 23805 + }, + { + "epoch": 0.20587802958902215, + "grad_norm": 0.5333728197506702, + "learning_rate": 5.844488417172653e-06, + "loss": 0.052130126953125, + "step": 23810 + }, + { + "epoch": 0.2059212631105654, + "grad_norm": 6.238103900916912, + "learning_rate": 5.844423667271819e-06, + "loss": 0.093524169921875, + "step": 23815 + }, + { + "epoch": 0.20596449663210867, + "grad_norm": 6.6328523442447676, + "learning_rate": 5.844358904252754e-06, + "loss": 0.39159698486328126, + "step": 23820 + }, + { + "epoch": 0.20600773015365192, + "grad_norm": 27.449738461304097, + "learning_rate": 5.844294128115758e-06, + "loss": 0.3302490234375, + "step": 23825 + }, + { + "epoch": 0.2060509636751952, + "grad_norm": 56.604740865504375, + "learning_rate": 5.844229338861129e-06, + "loss": 0.36706390380859377, + "step": 23830 + }, + { + "epoch": 0.20609419719673847, + "grad_norm": 2.1507029033073297, + "learning_rate": 5.844164536489165e-06, + "loss": 0.0655059814453125, + "step": 23835 + }, + { + "epoch": 0.20613743071828172, + "grad_norm": 2.861126663436868, + "learning_rate": 5.844099721000166e-06, + "loss": 0.12562484741210939, + "step": 23840 + }, + { + "epoch": 0.20618066423982498, + "grad_norm": 9.466590980461621, + "learning_rate": 5.844034892394429e-06, + "loss": 0.149835205078125, + "step": 23845 + }, + { + "epoch": 0.20622389776136826, + "grad_norm": 11.998529905282918, + "learning_rate": 5.843970050672255e-06, + "loss": 0.19864501953125, + "step": 23850 + }, + { + "epoch": 0.20626713128291152, + "grad_norm": 13.784489497076523, + "learning_rate": 5.843905195833944e-06, + "loss": 0.21590728759765626, + "step": 23855 + }, + { + "epoch": 0.20631036480445478, + "grad_norm": 14.439927046637385, + "learning_rate": 5.843840327879791e-06, + "loss": 0.37375717163085936, + "step": 23860 + }, + { + "epoch": 0.20635359832599803, + "grad_norm": 1.2436003348679567, + "learning_rate": 5.8437754468101e-06, + "loss": 0.08290481567382812, + "step": 23865 + }, + { + "epoch": 0.20639683184754132, + "grad_norm": 2.3896722285991725, + "learning_rate": 5.843710552625166e-06, + "loss": 0.102734375, + "step": 23870 + }, + { + "epoch": 0.20644006536908457, + "grad_norm": 11.61623882685201, + "learning_rate": 5.843645645325292e-06, + "loss": 0.16302337646484374, + "step": 23875 + }, + { + "epoch": 0.20648329889062783, + "grad_norm": 7.284711694961996, + "learning_rate": 5.843580724910775e-06, + "loss": 0.237774658203125, + "step": 23880 + }, + { + "epoch": 0.2065265324121711, + "grad_norm": 3.0873073060489578, + "learning_rate": 5.843515791381915e-06, + "loss": 0.150860595703125, + "step": 23885 + }, + { + "epoch": 0.20656976593371437, + "grad_norm": 40.36048216934424, + "learning_rate": 5.843450844739011e-06, + "loss": 0.3068977355957031, + "step": 23890 + }, + { + "epoch": 0.20661299945525763, + "grad_norm": 0.23642449899564924, + "learning_rate": 5.843385884982362e-06, + "loss": 0.34919586181640627, + "step": 23895 + }, + { + "epoch": 0.20665623297680089, + "grad_norm": 19.623801417860243, + "learning_rate": 5.843320912112271e-06, + "loss": 0.20147857666015626, + "step": 23900 + }, + { + "epoch": 0.20669946649834417, + "grad_norm": 0.2752105125710414, + "learning_rate": 5.843255926129034e-06, + "loss": 0.1526397705078125, + "step": 23905 + }, + { + "epoch": 0.20674270001988743, + "grad_norm": 0.99446654469723, + "learning_rate": 5.8431909270329505e-06, + "loss": 0.08778076171875, + "step": 23910 + }, + { + "epoch": 0.20678593354143068, + "grad_norm": 49.35031497347438, + "learning_rate": 5.843125914824323e-06, + "loss": 0.47239990234375, + "step": 23915 + }, + { + "epoch": 0.20682916706297394, + "grad_norm": 6.6076368368104195, + "learning_rate": 5.84306088950345e-06, + "loss": 0.169903564453125, + "step": 23920 + }, + { + "epoch": 0.20687240058451722, + "grad_norm": 31.82975853183062, + "learning_rate": 5.842995851070631e-06, + "loss": 0.11807098388671874, + "step": 23925 + }, + { + "epoch": 0.20691563410606048, + "grad_norm": 23.06537646052466, + "learning_rate": 5.842930799526167e-06, + "loss": 0.275439453125, + "step": 23930 + }, + { + "epoch": 0.20695886762760374, + "grad_norm": 19.782596591476512, + "learning_rate": 5.842865734870357e-06, + "loss": 0.2437744140625, + "step": 23935 + }, + { + "epoch": 0.207002101149147, + "grad_norm": 1.2401728591923677, + "learning_rate": 5.842800657103501e-06, + "loss": 0.15274200439453126, + "step": 23940 + }, + { + "epoch": 0.20704533467069028, + "grad_norm": 0.6996337095181298, + "learning_rate": 5.8427355662259e-06, + "loss": 0.1863311767578125, + "step": 23945 + }, + { + "epoch": 0.20708856819223354, + "grad_norm": 16.32102494468886, + "learning_rate": 5.842670462237854e-06, + "loss": 0.10366058349609375, + "step": 23950 + }, + { + "epoch": 0.2071318017137768, + "grad_norm": 1.5957935282953661, + "learning_rate": 5.842605345139663e-06, + "loss": 0.0848968505859375, + "step": 23955 + }, + { + "epoch": 0.20717503523532005, + "grad_norm": 37.54848125847217, + "learning_rate": 5.842540214931627e-06, + "loss": 0.3549468994140625, + "step": 23960 + }, + { + "epoch": 0.20721826875686333, + "grad_norm": 28.098685854371762, + "learning_rate": 5.842475071614047e-06, + "loss": 0.1900054931640625, + "step": 23965 + }, + { + "epoch": 0.2072615022784066, + "grad_norm": 9.355335604130007, + "learning_rate": 5.842409915187222e-06, + "loss": 0.16030349731445312, + "step": 23970 + }, + { + "epoch": 0.20730473579994985, + "grad_norm": 4.261462061592509, + "learning_rate": 5.842344745651455e-06, + "loss": 0.235150146484375, + "step": 23975 + }, + { + "epoch": 0.2073479693214931, + "grad_norm": 20.203145129711945, + "learning_rate": 5.842279563007044e-06, + "loss": 0.22150192260742188, + "step": 23980 + }, + { + "epoch": 0.2073912028430364, + "grad_norm": 2.8372940708702954, + "learning_rate": 5.842214367254292e-06, + "loss": 0.31943206787109374, + "step": 23985 + }, + { + "epoch": 0.20743443636457964, + "grad_norm": 2.4301314904453446, + "learning_rate": 5.842149158393498e-06, + "loss": 0.19644927978515625, + "step": 23990 + }, + { + "epoch": 0.2074776698861229, + "grad_norm": 28.718382882541796, + "learning_rate": 5.842083936424964e-06, + "loss": 0.1234130859375, + "step": 23995 + }, + { + "epoch": 0.20752090340766616, + "grad_norm": 3.817997085164104, + "learning_rate": 5.842018701348989e-06, + "loss": 0.15555877685546876, + "step": 24000 + }, + { + "epoch": 0.20756413692920944, + "grad_norm": 11.33451736401625, + "learning_rate": 5.841953453165874e-06, + "loss": 0.306597900390625, + "step": 24005 + }, + { + "epoch": 0.2076073704507527, + "grad_norm": 22.494445999900556, + "learning_rate": 5.841888191875923e-06, + "loss": 0.29722442626953127, + "step": 24010 + }, + { + "epoch": 0.20765060397229596, + "grad_norm": 15.08096053939879, + "learning_rate": 5.841822917479433e-06, + "loss": 0.26334228515625, + "step": 24015 + }, + { + "epoch": 0.2076938374938392, + "grad_norm": 9.777734604787, + "learning_rate": 5.841757629976708e-06, + "loss": 0.12915687561035155, + "step": 24020 + }, + { + "epoch": 0.2077370710153825, + "grad_norm": 17.482497009375646, + "learning_rate": 5.841692329368048e-06, + "loss": 0.17524032592773436, + "step": 24025 + }, + { + "epoch": 0.20778030453692575, + "grad_norm": 12.891351240093247, + "learning_rate": 5.841627015653752e-06, + "loss": 0.210888671875, + "step": 24030 + }, + { + "epoch": 0.207823538058469, + "grad_norm": 8.14573008009153, + "learning_rate": 5.841561688834125e-06, + "loss": 0.1129150390625, + "step": 24035 + }, + { + "epoch": 0.20786677158001227, + "grad_norm": 20.084670528834597, + "learning_rate": 5.841496348909467e-06, + "loss": 0.26249542236328127, + "step": 24040 + }, + { + "epoch": 0.20791000510155555, + "grad_norm": 14.298115144427028, + "learning_rate": 5.841430995880078e-06, + "loss": 0.0896697998046875, + "step": 24045 + }, + { + "epoch": 0.2079532386230988, + "grad_norm": 4.970174273269182, + "learning_rate": 5.841365629746261e-06, + "loss": 0.069659423828125, + "step": 24050 + }, + { + "epoch": 0.20799647214464206, + "grad_norm": 6.577518739761146, + "learning_rate": 5.841300250508316e-06, + "loss": 0.14285221099853515, + "step": 24055 + }, + { + "epoch": 0.20803970566618532, + "grad_norm": 36.902203237175335, + "learning_rate": 5.841234858166545e-06, + "loss": 0.4991756439208984, + "step": 24060 + }, + { + "epoch": 0.2080829391877286, + "grad_norm": 22.097944996704314, + "learning_rate": 5.841169452721251e-06, + "loss": 0.1393707275390625, + "step": 24065 + }, + { + "epoch": 0.20812617270927186, + "grad_norm": 33.508080158101166, + "learning_rate": 5.8411040341727345e-06, + "loss": 0.243267822265625, + "step": 24070 + }, + { + "epoch": 0.20816940623081512, + "grad_norm": 14.607609155666212, + "learning_rate": 5.841038602521297e-06, + "loss": 0.5013015747070313, + "step": 24075 + }, + { + "epoch": 0.20821263975235837, + "grad_norm": 0.6762044160485473, + "learning_rate": 5.8409731577672395e-06, + "loss": 0.34692840576171874, + "step": 24080 + }, + { + "epoch": 0.20825587327390166, + "grad_norm": 4.163268726403339, + "learning_rate": 5.840907699910866e-06, + "loss": 0.1858367919921875, + "step": 24085 + }, + { + "epoch": 0.20829910679544492, + "grad_norm": 3.744051952807961, + "learning_rate": 5.840842228952476e-06, + "loss": 0.12026710510253906, + "step": 24090 + }, + { + "epoch": 0.20834234031698817, + "grad_norm": 0.9567558330893091, + "learning_rate": 5.840776744892374e-06, + "loss": 0.30667724609375, + "step": 24095 + }, + { + "epoch": 0.20838557383853146, + "grad_norm": 8.307281136445312, + "learning_rate": 5.84071124773086e-06, + "loss": 0.08142852783203125, + "step": 24100 + }, + { + "epoch": 0.2084288073600747, + "grad_norm": 6.272330691547922, + "learning_rate": 5.840645737468237e-06, + "loss": 0.283978271484375, + "step": 24105 + }, + { + "epoch": 0.20847204088161797, + "grad_norm": 1.2540959358159158, + "learning_rate": 5.840580214104808e-06, + "loss": 0.10729942321777344, + "step": 24110 + }, + { + "epoch": 0.20851527440316123, + "grad_norm": 10.822726738931248, + "learning_rate": 5.840514677640872e-06, + "loss": 0.3088043212890625, + "step": 24115 + }, + { + "epoch": 0.2085585079247045, + "grad_norm": 0.41994179634806705, + "learning_rate": 5.840449128076734e-06, + "loss": 0.2817474365234375, + "step": 24120 + }, + { + "epoch": 0.20860174144624777, + "grad_norm": 32.31011503508629, + "learning_rate": 5.840383565412696e-06, + "loss": 0.20578765869140625, + "step": 24125 + }, + { + "epoch": 0.20864497496779102, + "grad_norm": 9.747809458375226, + "learning_rate": 5.84031798964906e-06, + "loss": 0.18236618041992186, + "step": 24130 + }, + { + "epoch": 0.20868820848933428, + "grad_norm": 8.647384426588669, + "learning_rate": 5.840252400786128e-06, + "loss": 0.19453125, + "step": 24135 + }, + { + "epoch": 0.20873144201087757, + "grad_norm": 14.98835868423251, + "learning_rate": 5.840186798824202e-06, + "loss": 0.0703033447265625, + "step": 24140 + }, + { + "epoch": 0.20877467553242082, + "grad_norm": 0.5646464464338269, + "learning_rate": 5.840121183763587e-06, + "loss": 0.21373748779296875, + "step": 24145 + }, + { + "epoch": 0.20881790905396408, + "grad_norm": 130.92990854627234, + "learning_rate": 5.840055555604584e-06, + "loss": 0.15191650390625, + "step": 24150 + }, + { + "epoch": 0.20886114257550734, + "grad_norm": 12.294213355069587, + "learning_rate": 5.839989914347495e-06, + "loss": 0.23843994140625, + "step": 24155 + }, + { + "epoch": 0.20890437609705062, + "grad_norm": 2.475279702819386, + "learning_rate": 5.839924259992624e-06, + "loss": 0.2295745849609375, + "step": 24160 + }, + { + "epoch": 0.20894760961859388, + "grad_norm": 1.1247242686121133, + "learning_rate": 5.839858592540273e-06, + "loss": 0.064178466796875, + "step": 24165 + }, + { + "epoch": 0.20899084314013713, + "grad_norm": 7.2838063153014385, + "learning_rate": 5.8397929119907446e-06, + "loss": 0.19809112548828126, + "step": 24170 + }, + { + "epoch": 0.2090340766616804, + "grad_norm": 4.725690065116859, + "learning_rate": 5.839727218344343e-06, + "loss": 0.110302734375, + "step": 24175 + }, + { + "epoch": 0.20907731018322367, + "grad_norm": 25.434457505741346, + "learning_rate": 5.8396615116013705e-06, + "loss": 0.052639007568359375, + "step": 24180 + }, + { + "epoch": 0.20912054370476693, + "grad_norm": 25.907694881079667, + "learning_rate": 5.839595791762129e-06, + "loss": 0.191680908203125, + "step": 24185 + }, + { + "epoch": 0.2091637772263102, + "grad_norm": 4.64041194643551, + "learning_rate": 5.839530058826924e-06, + "loss": 0.0572265625, + "step": 24190 + }, + { + "epoch": 0.20920701074785344, + "grad_norm": 0.34718755431026505, + "learning_rate": 5.839464312796056e-06, + "loss": 0.4582305908203125, + "step": 24195 + }, + { + "epoch": 0.20925024426939673, + "grad_norm": 8.891130685712048, + "learning_rate": 5.83939855366983e-06, + "loss": 0.3181640625, + "step": 24200 + }, + { + "epoch": 0.20929347779093999, + "grad_norm": 5.716430005144296, + "learning_rate": 5.839332781448549e-06, + "loss": 0.6643386840820312, + "step": 24205 + }, + { + "epoch": 0.20933671131248324, + "grad_norm": 18.077584089648294, + "learning_rate": 5.839266996132515e-06, + "loss": 0.154766845703125, + "step": 24210 + }, + { + "epoch": 0.2093799448340265, + "grad_norm": 2.332088282718102, + "learning_rate": 5.839201197722034e-06, + "loss": 0.198876953125, + "step": 24215 + }, + { + "epoch": 0.20942317835556978, + "grad_norm": 2.6617484995821563, + "learning_rate": 5.839135386217407e-06, + "loss": 0.1343780517578125, + "step": 24220 + }, + { + "epoch": 0.20946641187711304, + "grad_norm": 3.408988111466531, + "learning_rate": 5.8390695616189386e-06, + "loss": 0.4628868103027344, + "step": 24225 + }, + { + "epoch": 0.2095096453986563, + "grad_norm": 78.85787130353005, + "learning_rate": 5.839003723926933e-06, + "loss": 0.1095672607421875, + "step": 24230 + }, + { + "epoch": 0.20955287892019955, + "grad_norm": 4.7795003804588445, + "learning_rate": 5.838937873141692e-06, + "loss": 0.1750885009765625, + "step": 24235 + }, + { + "epoch": 0.20959611244174284, + "grad_norm": 1.4575510086277204, + "learning_rate": 5.838872009263521e-06, + "loss": 0.0886260986328125, + "step": 24240 + }, + { + "epoch": 0.2096393459632861, + "grad_norm": 0.6283467957329525, + "learning_rate": 5.838806132292723e-06, + "loss": 0.15692062377929689, + "step": 24245 + }, + { + "epoch": 0.20968257948482935, + "grad_norm": 5.026492775058404, + "learning_rate": 5.838740242229602e-06, + "loss": 0.09656982421875, + "step": 24250 + }, + { + "epoch": 0.2097258130063726, + "grad_norm": 2.2104029392932314, + "learning_rate": 5.8386743390744625e-06, + "loss": 0.2200103759765625, + "step": 24255 + }, + { + "epoch": 0.2097690465279159, + "grad_norm": 2.647275137806961, + "learning_rate": 5.838608422827607e-06, + "loss": 0.104736328125, + "step": 24260 + }, + { + "epoch": 0.20981228004945915, + "grad_norm": 8.531973588549144, + "learning_rate": 5.83854249348934e-06, + "loss": 0.41375732421875, + "step": 24265 + }, + { + "epoch": 0.2098555135710024, + "grad_norm": 9.32982865647474, + "learning_rate": 5.838476551059966e-06, + "loss": 0.18541259765625, + "step": 24270 + }, + { + "epoch": 0.2098987470925457, + "grad_norm": 2.3558816627593515, + "learning_rate": 5.838410595539789e-06, + "loss": 0.3568675994873047, + "step": 24275 + }, + { + "epoch": 0.20994198061408895, + "grad_norm": 0.9529438500567772, + "learning_rate": 5.838344626929113e-06, + "loss": 0.11717033386230469, + "step": 24280 + }, + { + "epoch": 0.2099852141356322, + "grad_norm": 28.665567061263356, + "learning_rate": 5.838278645228244e-06, + "loss": 0.1532806396484375, + "step": 24285 + }, + { + "epoch": 0.21002844765717546, + "grad_norm": 70.04025701509863, + "learning_rate": 5.8382126504374826e-06, + "loss": 0.38365478515625, + "step": 24290 + }, + { + "epoch": 0.21007168117871874, + "grad_norm": 23.18613795360198, + "learning_rate": 5.8381466425571356e-06, + "loss": 0.162744140625, + "step": 24295 + }, + { + "epoch": 0.210114914700262, + "grad_norm": 50.02750097336696, + "learning_rate": 5.838080621587508e-06, + "loss": 0.4549720764160156, + "step": 24300 + }, + { + "epoch": 0.21015814822180526, + "grad_norm": 9.327410399141538, + "learning_rate": 5.838014587528903e-06, + "loss": 0.3157985687255859, + "step": 24305 + }, + { + "epoch": 0.21020138174334851, + "grad_norm": 15.235280624600854, + "learning_rate": 5.837948540381625e-06, + "loss": 0.16957931518554686, + "step": 24310 + }, + { + "epoch": 0.2102446152648918, + "grad_norm": 3.139211421009012, + "learning_rate": 5.837882480145979e-06, + "loss": 0.1712890625, + "step": 24315 + }, + { + "epoch": 0.21028784878643506, + "grad_norm": 9.547998826855048, + "learning_rate": 5.837816406822271e-06, + "loss": 0.29730911254882814, + "step": 24320 + }, + { + "epoch": 0.2103310823079783, + "grad_norm": 3.0869093390797566, + "learning_rate": 5.837750320410804e-06, + "loss": 0.1256500244140625, + "step": 24325 + }, + { + "epoch": 0.21037431582952157, + "grad_norm": 2.7522831614670085, + "learning_rate": 5.837684220911883e-06, + "loss": 0.45749645233154296, + "step": 24330 + }, + { + "epoch": 0.21041754935106485, + "grad_norm": 3.3346897110743763, + "learning_rate": 5.837618108325813e-06, + "loss": 0.1552215576171875, + "step": 24335 + }, + { + "epoch": 0.2104607828726081, + "grad_norm": 0.28208551872830795, + "learning_rate": 5.837551982652898e-06, + "loss": 0.3822353363037109, + "step": 24340 + }, + { + "epoch": 0.21050401639415137, + "grad_norm": 20.600924497479262, + "learning_rate": 5.837485843893445e-06, + "loss": 0.1852996826171875, + "step": 24345 + }, + { + "epoch": 0.21054724991569462, + "grad_norm": 1.4654041583451658, + "learning_rate": 5.837419692047758e-06, + "loss": 0.12362136840820312, + "step": 24350 + }, + { + "epoch": 0.2105904834372379, + "grad_norm": 3.9595673093151253, + "learning_rate": 5.8373535271161425e-06, + "loss": 0.07061309814453125, + "step": 24355 + }, + { + "epoch": 0.21063371695878116, + "grad_norm": 2.6905635711684486, + "learning_rate": 5.837287349098903e-06, + "loss": 0.10611724853515625, + "step": 24360 + }, + { + "epoch": 0.21067695048032442, + "grad_norm": 3.028041298789882, + "learning_rate": 5.837221157996345e-06, + "loss": 0.021460914611816408, + "step": 24365 + }, + { + "epoch": 0.21072018400186768, + "grad_norm": 4.736204499669762, + "learning_rate": 5.8371549538087735e-06, + "loss": 0.2036881446838379, + "step": 24370 + }, + { + "epoch": 0.21076341752341096, + "grad_norm": 8.525182396395387, + "learning_rate": 5.837088736536494e-06, + "loss": 0.3072601318359375, + "step": 24375 + }, + { + "epoch": 0.21080665104495422, + "grad_norm": 1.0964790280070322, + "learning_rate": 5.837022506179813e-06, + "loss": 0.25088958740234374, + "step": 24380 + }, + { + "epoch": 0.21084988456649748, + "grad_norm": 0.014839114990732316, + "learning_rate": 5.836956262739033e-06, + "loss": 0.31881256103515626, + "step": 24385 + }, + { + "epoch": 0.21089311808804073, + "grad_norm": 1.336417786819306, + "learning_rate": 5.836890006214462e-06, + "loss": 0.171282958984375, + "step": 24390 + }, + { + "epoch": 0.21093635160958402, + "grad_norm": 0.680733202969233, + "learning_rate": 5.836823736606406e-06, + "loss": 0.3995964050292969, + "step": 24395 + }, + { + "epoch": 0.21097958513112727, + "grad_norm": 3.1385918951500043, + "learning_rate": 5.836757453915169e-06, + "loss": 0.24361572265625, + "step": 24400 + }, + { + "epoch": 0.21102281865267053, + "grad_norm": 52.00429057326359, + "learning_rate": 5.836691158141057e-06, + "loss": 0.3245025634765625, + "step": 24405 + }, + { + "epoch": 0.2110660521742138, + "grad_norm": 6.004454675828276, + "learning_rate": 5.836624849284376e-06, + "loss": 0.075164794921875, + "step": 24410 + }, + { + "epoch": 0.21110928569575707, + "grad_norm": 0.9082565665345396, + "learning_rate": 5.836558527345432e-06, + "loss": 0.16083984375, + "step": 24415 + }, + { + "epoch": 0.21115251921730033, + "grad_norm": 42.70355404901238, + "learning_rate": 5.836492192324532e-06, + "loss": 0.264892578125, + "step": 24420 + }, + { + "epoch": 0.21119575273884358, + "grad_norm": 6.408931707660811, + "learning_rate": 5.836425844221978e-06, + "loss": 0.6400054931640625, + "step": 24425 + }, + { + "epoch": 0.21123898626038684, + "grad_norm": 9.071754653885609, + "learning_rate": 5.83635948303808e-06, + "loss": 0.12167930603027344, + "step": 24430 + }, + { + "epoch": 0.21128221978193013, + "grad_norm": 18.018893410952614, + "learning_rate": 5.836293108773143e-06, + "loss": 0.2245452880859375, + "step": 24435 + }, + { + "epoch": 0.21132545330347338, + "grad_norm": 2.496069427730961, + "learning_rate": 5.836226721427473e-06, + "loss": 0.08822174072265625, + "step": 24440 + }, + { + "epoch": 0.21136868682501664, + "grad_norm": 24.88033520291254, + "learning_rate": 5.836160321001375e-06, + "loss": 0.23286895751953124, + "step": 24445 + }, + { + "epoch": 0.2114119203465599, + "grad_norm": 0.680206637830523, + "learning_rate": 5.836093907495157e-06, + "loss": 0.06450958251953125, + "step": 24450 + }, + { + "epoch": 0.21145515386810318, + "grad_norm": 7.991429740040007, + "learning_rate": 5.836027480909124e-06, + "loss": 0.3700439453125, + "step": 24455 + }, + { + "epoch": 0.21149838738964644, + "grad_norm": 46.48218676659349, + "learning_rate": 5.835961041243584e-06, + "loss": 0.46702880859375, + "step": 24460 + }, + { + "epoch": 0.2115416209111897, + "grad_norm": 0.6584448739901418, + "learning_rate": 5.835894588498841e-06, + "loss": 0.07021293640136719, + "step": 24465 + }, + { + "epoch": 0.21158485443273298, + "grad_norm": 4.309054003353484, + "learning_rate": 5.835828122675202e-06, + "loss": 0.07904052734375, + "step": 24470 + }, + { + "epoch": 0.21162808795427623, + "grad_norm": 4.93599341148508, + "learning_rate": 5.835761643772976e-06, + "loss": 0.15081787109375, + "step": 24475 + }, + { + "epoch": 0.2116713214758195, + "grad_norm": 11.343796235004927, + "learning_rate": 5.835695151792468e-06, + "loss": 0.10136566162109376, + "step": 24480 + }, + { + "epoch": 0.21171455499736275, + "grad_norm": 12.879507285220619, + "learning_rate": 5.835628646733983e-06, + "loss": 0.0791351318359375, + "step": 24485 + }, + { + "epoch": 0.21175778851890603, + "grad_norm": 2.2546869564783987, + "learning_rate": 5.83556212859783e-06, + "loss": 0.10748977661132812, + "step": 24490 + }, + { + "epoch": 0.2118010220404493, + "grad_norm": 5.374645449887589, + "learning_rate": 5.835495597384315e-06, + "loss": 0.3473823547363281, + "step": 24495 + }, + { + "epoch": 0.21184425556199254, + "grad_norm": 27.129858784035125, + "learning_rate": 5.835429053093745e-06, + "loss": 0.22806396484375, + "step": 24500 + }, + { + "epoch": 0.2118874890835358, + "grad_norm": 38.463262661700604, + "learning_rate": 5.8353624957264265e-06, + "loss": 0.6274261474609375, + "step": 24505 + }, + { + "epoch": 0.21193072260507909, + "grad_norm": 14.703228538491022, + "learning_rate": 5.835295925282668e-06, + "loss": 0.2890380859375, + "step": 24510 + }, + { + "epoch": 0.21197395612662234, + "grad_norm": 26.023853144529674, + "learning_rate": 5.8352293417627736e-06, + "loss": 0.331396484375, + "step": 24515 + }, + { + "epoch": 0.2120171896481656, + "grad_norm": 6.6486808382545135, + "learning_rate": 5.835162745167051e-06, + "loss": 0.2673492431640625, + "step": 24520 + }, + { + "epoch": 0.21206042316970886, + "grad_norm": 4.574108623519575, + "learning_rate": 5.835096135495811e-06, + "loss": 0.06342048645019531, + "step": 24525 + }, + { + "epoch": 0.21210365669125214, + "grad_norm": 3.043869242357519, + "learning_rate": 5.835029512749358e-06, + "loss": 0.2580860137939453, + "step": 24530 + }, + { + "epoch": 0.2121468902127954, + "grad_norm": 1.5080171947232488, + "learning_rate": 5.834962876927997e-06, + "loss": 0.16358642578125, + "step": 24535 + }, + { + "epoch": 0.21219012373433865, + "grad_norm": 13.802020859209513, + "learning_rate": 5.83489622803204e-06, + "loss": 0.1706207275390625, + "step": 24540 + }, + { + "epoch": 0.2122333572558819, + "grad_norm": 16.76772029763116, + "learning_rate": 5.8348295660617905e-06, + "loss": 0.1531829833984375, + "step": 24545 + }, + { + "epoch": 0.2122765907774252, + "grad_norm": 9.274045178634772, + "learning_rate": 5.834762891017558e-06, + "loss": 0.2355712890625, + "step": 24550 + }, + { + "epoch": 0.21231982429896845, + "grad_norm": 12.380280489558638, + "learning_rate": 5.8346962028996504e-06, + "loss": 0.11400775909423828, + "step": 24555 + }, + { + "epoch": 0.2123630578205117, + "grad_norm": 3.3387849499046713, + "learning_rate": 5.834629501708373e-06, + "loss": 0.40296249389648436, + "step": 24560 + }, + { + "epoch": 0.21240629134205496, + "grad_norm": 0.5149354991701118, + "learning_rate": 5.834562787444036e-06, + "loss": 0.3274444580078125, + "step": 24565 + }, + { + "epoch": 0.21244952486359825, + "grad_norm": 43.134032701979216, + "learning_rate": 5.834496060106945e-06, + "loss": 0.19080047607421874, + "step": 24570 + }, + { + "epoch": 0.2124927583851415, + "grad_norm": 52.924746307026, + "learning_rate": 5.834429319697409e-06, + "loss": 0.23846569061279296, + "step": 24575 + }, + { + "epoch": 0.21253599190668476, + "grad_norm": 1.503669205378823, + "learning_rate": 5.834362566215735e-06, + "loss": 0.691168212890625, + "step": 24580 + }, + { + "epoch": 0.21257922542822802, + "grad_norm": 13.502321351782035, + "learning_rate": 5.834295799662232e-06, + "loss": 0.18099365234375, + "step": 24585 + }, + { + "epoch": 0.2126224589497713, + "grad_norm": 9.093493818776604, + "learning_rate": 5.834229020037207e-06, + "loss": 0.20466079711914062, + "step": 24590 + }, + { + "epoch": 0.21266569247131456, + "grad_norm": 67.01210059412976, + "learning_rate": 5.834162227340968e-06, + "loss": 0.5118148803710938, + "step": 24595 + }, + { + "epoch": 0.21270892599285782, + "grad_norm": 45.97874462189334, + "learning_rate": 5.834095421573823e-06, + "loss": 0.666796875, + "step": 24600 + }, + { + "epoch": 0.21275215951440107, + "grad_norm": 9.70850743715071, + "learning_rate": 5.8340286027360815e-06, + "loss": 0.1073333740234375, + "step": 24605 + }, + { + "epoch": 0.21279539303594436, + "grad_norm": 3.274704228268605, + "learning_rate": 5.83396177082805e-06, + "loss": 0.12718963623046875, + "step": 24610 + }, + { + "epoch": 0.21283862655748761, + "grad_norm": 6.422214691199235, + "learning_rate": 5.833894925850036e-06, + "loss": 0.19236679077148439, + "step": 24615 + }, + { + "epoch": 0.21288186007903087, + "grad_norm": 52.313405321396715, + "learning_rate": 5.8338280678023505e-06, + "loss": 0.551300048828125, + "step": 24620 + }, + { + "epoch": 0.21292509360057413, + "grad_norm": 0.4762409843110066, + "learning_rate": 5.8337611966853e-06, + "loss": 0.3085186004638672, + "step": 24625 + }, + { + "epoch": 0.2129683271221174, + "grad_norm": 20.965154155278626, + "learning_rate": 5.833694312499193e-06, + "loss": 0.14770278930664063, + "step": 24630 + }, + { + "epoch": 0.21301156064366067, + "grad_norm": 1.2518534979043048, + "learning_rate": 5.833627415244339e-06, + "loss": 0.0675384521484375, + "step": 24635 + }, + { + "epoch": 0.21305479416520393, + "grad_norm": 8.023083369360666, + "learning_rate": 5.833560504921044e-06, + "loss": 0.1248931884765625, + "step": 24640 + }, + { + "epoch": 0.2130980276867472, + "grad_norm": 15.835348525095, + "learning_rate": 5.83349358152962e-06, + "loss": 0.20149078369140624, + "step": 24645 + }, + { + "epoch": 0.21314126120829047, + "grad_norm": 32.470600989825115, + "learning_rate": 5.833426645070372e-06, + "loss": 0.277203369140625, + "step": 24650 + }, + { + "epoch": 0.21318449472983372, + "grad_norm": 4.446134081486194, + "learning_rate": 5.833359695543613e-06, + "loss": 0.28062744140625, + "step": 24655 + }, + { + "epoch": 0.21322772825137698, + "grad_norm": 36.815584643617, + "learning_rate": 5.8332927329496485e-06, + "loss": 0.4231292724609375, + "step": 24660 + }, + { + "epoch": 0.21327096177292026, + "grad_norm": 1.0684446398903111, + "learning_rate": 5.833225757288789e-06, + "loss": 0.2783599853515625, + "step": 24665 + }, + { + "epoch": 0.21331419529446352, + "grad_norm": 0.4779574339710118, + "learning_rate": 5.8331587685613404e-06, + "loss": 0.081640625, + "step": 24670 + }, + { + "epoch": 0.21335742881600678, + "grad_norm": 11.984738890833862, + "learning_rate": 5.833091766767616e-06, + "loss": 0.3689605712890625, + "step": 24675 + }, + { + "epoch": 0.21340066233755003, + "grad_norm": 0.8572154785086713, + "learning_rate": 5.8330247519079215e-06, + "loss": 0.251556396484375, + "step": 24680 + }, + { + "epoch": 0.21344389585909332, + "grad_norm": 12.120376949672455, + "learning_rate": 5.832957723982568e-06, + "loss": 0.18963394165039063, + "step": 24685 + }, + { + "epoch": 0.21348712938063658, + "grad_norm": 0.17071943211734028, + "learning_rate": 5.832890682991863e-06, + "loss": 0.14523391723632811, + "step": 24690 + }, + { + "epoch": 0.21353036290217983, + "grad_norm": 2.5877786967486918, + "learning_rate": 5.832823628936118e-06, + "loss": 0.13945159912109376, + "step": 24695 + }, + { + "epoch": 0.2135735964237231, + "grad_norm": 0.9434782055008666, + "learning_rate": 5.83275656181564e-06, + "loss": 0.06766281127929688, + "step": 24700 + }, + { + "epoch": 0.21361682994526637, + "grad_norm": 3.423098215591583, + "learning_rate": 5.832689481630738e-06, + "loss": 0.11063232421875, + "step": 24705 + }, + { + "epoch": 0.21366006346680963, + "grad_norm": 0.9434410778301372, + "learning_rate": 5.832622388381724e-06, + "loss": 0.19172019958496095, + "step": 24710 + }, + { + "epoch": 0.2137032969883529, + "grad_norm": 3.3467044845577574, + "learning_rate": 5.8325552820689045e-06, + "loss": 0.17847900390625, + "step": 24715 + }, + { + "epoch": 0.21374653050989614, + "grad_norm": 9.763195209535287, + "learning_rate": 5.832488162692591e-06, + "loss": 0.0917724609375, + "step": 24720 + }, + { + "epoch": 0.21378976403143943, + "grad_norm": 4.490113138070229, + "learning_rate": 5.832421030253092e-06, + "loss": 0.0736724853515625, + "step": 24725 + }, + { + "epoch": 0.21383299755298268, + "grad_norm": 10.280622349824863, + "learning_rate": 5.832353884750718e-06, + "loss": 0.0363800048828125, + "step": 24730 + }, + { + "epoch": 0.21387623107452594, + "grad_norm": 2.165612911531282, + "learning_rate": 5.8322867261857784e-06, + "loss": 0.136297607421875, + "step": 24735 + }, + { + "epoch": 0.2139194645960692, + "grad_norm": 11.904616620139553, + "learning_rate": 5.832219554558582e-06, + "loss": 0.20522918701171874, + "step": 24740 + }, + { + "epoch": 0.21396269811761248, + "grad_norm": 25.00820381635767, + "learning_rate": 5.83215236986944e-06, + "loss": 0.15635528564453124, + "step": 24745 + }, + { + "epoch": 0.21400593163915574, + "grad_norm": 22.650785052257525, + "learning_rate": 5.8320851721186605e-06, + "loss": 0.130120849609375, + "step": 24750 + }, + { + "epoch": 0.214049165160699, + "grad_norm": 28.68453246883182, + "learning_rate": 5.832017961306555e-06, + "loss": 0.36043853759765626, + "step": 24755 + }, + { + "epoch": 0.21409239868224225, + "grad_norm": 3.741902027314926, + "learning_rate": 5.831950737433432e-06, + "loss": 0.1648101806640625, + "step": 24760 + }, + { + "epoch": 0.21413563220378554, + "grad_norm": 28.8925736241363, + "learning_rate": 5.8318835004996045e-06, + "loss": 0.26719322204589846, + "step": 24765 + }, + { + "epoch": 0.2141788657253288, + "grad_norm": 17.583159109376787, + "learning_rate": 5.831816250505379e-06, + "loss": 0.3044189453125, + "step": 24770 + }, + { + "epoch": 0.21422209924687205, + "grad_norm": 2.933918484111793, + "learning_rate": 5.831748987451067e-06, + "loss": 0.055279541015625, + "step": 24775 + }, + { + "epoch": 0.2142653327684153, + "grad_norm": 4.001996945176057, + "learning_rate": 5.83168171133698e-06, + "loss": 0.18806304931640624, + "step": 24780 + }, + { + "epoch": 0.2143085662899586, + "grad_norm": 9.854266371131011, + "learning_rate": 5.831614422163426e-06, + "loss": 0.1920928955078125, + "step": 24785 + }, + { + "epoch": 0.21435179981150185, + "grad_norm": 6.1256173416321245, + "learning_rate": 5.831547119930718e-06, + "loss": 0.3854393005371094, + "step": 24790 + }, + { + "epoch": 0.2143950333330451, + "grad_norm": 40.11801547230161, + "learning_rate": 5.8314798046391634e-06, + "loss": 0.6272796630859375, + "step": 24795 + }, + { + "epoch": 0.21443826685458836, + "grad_norm": 51.627214283791815, + "learning_rate": 5.831412476289074e-06, + "loss": 0.2686805725097656, + "step": 24800 + }, + { + "epoch": 0.21448150037613165, + "grad_norm": 2.8258334930969045, + "learning_rate": 5.831345134880762e-06, + "loss": 0.263055419921875, + "step": 24805 + }, + { + "epoch": 0.2145247338976749, + "grad_norm": 27.970734248395832, + "learning_rate": 5.831277780414535e-06, + "loss": 0.07011756896972657, + "step": 24810 + }, + { + "epoch": 0.21456796741921816, + "grad_norm": 3.5414849127392385, + "learning_rate": 5.831210412890705e-06, + "loss": 0.18631591796875, + "step": 24815 + }, + { + "epoch": 0.21461120094076142, + "grad_norm": 9.771218108830734, + "learning_rate": 5.831143032309585e-06, + "loss": 0.1605224609375, + "step": 24820 + }, + { + "epoch": 0.2146544344623047, + "grad_norm": 2.5608597386249734, + "learning_rate": 5.831075638671481e-06, + "loss": 0.0626983642578125, + "step": 24825 + }, + { + "epoch": 0.21469766798384796, + "grad_norm": 2.3751734679160927, + "learning_rate": 5.831008231976707e-06, + "loss": 0.2564697265625, + "step": 24830 + }, + { + "epoch": 0.2147409015053912, + "grad_norm": 8.52978392169137, + "learning_rate": 5.830940812225573e-06, + "loss": 0.16126480102539062, + "step": 24835 + }, + { + "epoch": 0.2147841350269345, + "grad_norm": 2.314143562307942, + "learning_rate": 5.830873379418391e-06, + "loss": 0.1691192626953125, + "step": 24840 + }, + { + "epoch": 0.21482736854847775, + "grad_norm": 37.172595296134546, + "learning_rate": 5.830805933555469e-06, + "loss": 0.2333740234375, + "step": 24845 + }, + { + "epoch": 0.214870602070021, + "grad_norm": 3.1422173113214114, + "learning_rate": 5.8307384746371225e-06, + "loss": 0.13458404541015626, + "step": 24850 + }, + { + "epoch": 0.21491383559156427, + "grad_norm": 18.625178447740115, + "learning_rate": 5.830671002663659e-06, + "loss": 0.2722503662109375, + "step": 24855 + }, + { + "epoch": 0.21495706911310755, + "grad_norm": 5.803993033649772, + "learning_rate": 5.830603517635392e-06, + "loss": 0.22046279907226562, + "step": 24860 + }, + { + "epoch": 0.2150003026346508, + "grad_norm": 0.35953068696715357, + "learning_rate": 5.830536019552631e-06, + "loss": 0.098553466796875, + "step": 24865 + }, + { + "epoch": 0.21504353615619407, + "grad_norm": 25.092870757167695, + "learning_rate": 5.830468508415688e-06, + "loss": 0.13040809631347655, + "step": 24870 + }, + { + "epoch": 0.21508676967773732, + "grad_norm": 4.395423266152057, + "learning_rate": 5.830400984224875e-06, + "loss": 0.1567169189453125, + "step": 24875 + }, + { + "epoch": 0.2151300031992806, + "grad_norm": 1.2518421250815275, + "learning_rate": 5.830333446980502e-06, + "loss": 0.09739990234375, + "step": 24880 + }, + { + "epoch": 0.21517323672082386, + "grad_norm": 2.9349402059012735, + "learning_rate": 5.830265896682881e-06, + "loss": 0.14611892700195311, + "step": 24885 + }, + { + "epoch": 0.21521647024236712, + "grad_norm": 9.824063993082271, + "learning_rate": 5.830198333332325e-06, + "loss": 0.20748214721679686, + "step": 24890 + }, + { + "epoch": 0.21525970376391038, + "grad_norm": 5.991278092980972, + "learning_rate": 5.830130756929143e-06, + "loss": 0.2737701416015625, + "step": 24895 + }, + { + "epoch": 0.21530293728545366, + "grad_norm": 14.120679305263321, + "learning_rate": 5.830063167473649e-06, + "loss": 0.19441070556640624, + "step": 24900 + }, + { + "epoch": 0.21534617080699692, + "grad_norm": 22.6942782707087, + "learning_rate": 5.829995564966153e-06, + "loss": 0.09117431640625, + "step": 24905 + }, + { + "epoch": 0.21538940432854017, + "grad_norm": 0.6333642181471859, + "learning_rate": 5.829927949406968e-06, + "loss": 0.04317207336425781, + "step": 24910 + }, + { + "epoch": 0.21543263785008343, + "grad_norm": 8.868041748039047, + "learning_rate": 5.829860320796406e-06, + "loss": 0.56805419921875, + "step": 24915 + }, + { + "epoch": 0.21547587137162671, + "grad_norm": 28.576832096473417, + "learning_rate": 5.829792679134777e-06, + "loss": 0.13499603271484376, + "step": 24920 + }, + { + "epoch": 0.21551910489316997, + "grad_norm": 12.767542033154454, + "learning_rate": 5.829725024422395e-06, + "loss": 0.0967987060546875, + "step": 24925 + }, + { + "epoch": 0.21556233841471323, + "grad_norm": 13.916997029978265, + "learning_rate": 5.829657356659571e-06, + "loss": 0.3310882568359375, + "step": 24930 + }, + { + "epoch": 0.21560557193625648, + "grad_norm": 30.62781281844369, + "learning_rate": 5.829589675846617e-06, + "loss": 0.17193603515625, + "step": 24935 + }, + { + "epoch": 0.21564880545779977, + "grad_norm": 11.58816500786239, + "learning_rate": 5.8295219819838456e-06, + "loss": 0.09269866943359376, + "step": 24940 + }, + { + "epoch": 0.21569203897934303, + "grad_norm": 37.43076157694929, + "learning_rate": 5.8294542750715684e-06, + "loss": 0.184307861328125, + "step": 24945 + }, + { + "epoch": 0.21573527250088628, + "grad_norm": 48.350521268260096, + "learning_rate": 5.829386555110099e-06, + "loss": 0.535205078125, + "step": 24950 + }, + { + "epoch": 0.21577850602242954, + "grad_norm": 0.5823480504581902, + "learning_rate": 5.829318822099748e-06, + "loss": 0.4045867919921875, + "step": 24955 + }, + { + "epoch": 0.21582173954397282, + "grad_norm": 32.72397266109491, + "learning_rate": 5.829251076040829e-06, + "loss": 0.2647430419921875, + "step": 24960 + }, + { + "epoch": 0.21586497306551608, + "grad_norm": 1.4322306431358696, + "learning_rate": 5.829183316933655e-06, + "loss": 0.22497911453247071, + "step": 24965 + }, + { + "epoch": 0.21590820658705934, + "grad_norm": 46.88891194221741, + "learning_rate": 5.829115544778536e-06, + "loss": 0.219903564453125, + "step": 24970 + }, + { + "epoch": 0.2159514401086026, + "grad_norm": 2.349830968711366, + "learning_rate": 5.8290477595757865e-06, + "loss": 0.20594940185546876, + "step": 24975 + }, + { + "epoch": 0.21599467363014588, + "grad_norm": 2.4504764627159865, + "learning_rate": 5.828979961325719e-06, + "loss": 0.1483827590942383, + "step": 24980 + }, + { + "epoch": 0.21603790715168913, + "grad_norm": 9.489294993711134, + "learning_rate": 5.828912150028645e-06, + "loss": 0.15023956298828126, + "step": 24985 + }, + { + "epoch": 0.2160811406732324, + "grad_norm": 1.1395228271390871, + "learning_rate": 5.828844325684879e-06, + "loss": 0.22479705810546874, + "step": 24990 + }, + { + "epoch": 0.21612437419477565, + "grad_norm": 14.283626485025914, + "learning_rate": 5.828776488294734e-06, + "loss": 0.3923053741455078, + "step": 24995 + }, + { + "epoch": 0.21616760771631893, + "grad_norm": 33.88506428830287, + "learning_rate": 5.82870863785852e-06, + "loss": 0.28084869384765626, + "step": 25000 + }, + { + "epoch": 0.2162108412378622, + "grad_norm": 2.692130664678852, + "learning_rate": 5.828640774376553e-06, + "loss": 0.1720703125, + "step": 25005 + }, + { + "epoch": 0.21625407475940545, + "grad_norm": 0.8449937181758832, + "learning_rate": 5.8285728978491436e-06, + "loss": 0.07324981689453125, + "step": 25010 + }, + { + "epoch": 0.21629730828094873, + "grad_norm": 5.370149690994604, + "learning_rate": 5.828505008276607e-06, + "loss": 0.024381637573242188, + "step": 25015 + }, + { + "epoch": 0.216340541802492, + "grad_norm": 28.146107957067663, + "learning_rate": 5.8284371056592536e-06, + "loss": 0.1865081787109375, + "step": 25020 + }, + { + "epoch": 0.21638377532403524, + "grad_norm": 22.685561149559913, + "learning_rate": 5.828369189997399e-06, + "loss": 0.13275413513183593, + "step": 25025 + }, + { + "epoch": 0.2164270088455785, + "grad_norm": 11.643585555087421, + "learning_rate": 5.8283012612913564e-06, + "loss": 0.15627059936523438, + "step": 25030 + }, + { + "epoch": 0.21647024236712178, + "grad_norm": 9.227004983839244, + "learning_rate": 5.828233319541437e-06, + "loss": 0.17672805786132811, + "step": 25035 + }, + { + "epoch": 0.21651347588866504, + "grad_norm": 36.349687118783656, + "learning_rate": 5.828165364747957e-06, + "loss": 0.2235870361328125, + "step": 25040 + }, + { + "epoch": 0.2165567094102083, + "grad_norm": 45.066933698206256, + "learning_rate": 5.8280973969112275e-06, + "loss": 0.21251373291015624, + "step": 25045 + }, + { + "epoch": 0.21659994293175155, + "grad_norm": 18.768394259902635, + "learning_rate": 5.828029416031562e-06, + "loss": 0.1733613967895508, + "step": 25050 + }, + { + "epoch": 0.21664317645329484, + "grad_norm": 22.948638212248984, + "learning_rate": 5.827961422109275e-06, + "loss": 0.14684600830078126, + "step": 25055 + }, + { + "epoch": 0.2166864099748381, + "grad_norm": 3.809192148867104, + "learning_rate": 5.82789341514468e-06, + "loss": 0.1682575225830078, + "step": 25060 + }, + { + "epoch": 0.21672964349638135, + "grad_norm": 8.81578199556255, + "learning_rate": 5.827825395138091e-06, + "loss": 0.06575927734375, + "step": 25065 + }, + { + "epoch": 0.2167728770179246, + "grad_norm": 21.587080359614596, + "learning_rate": 5.827757362089821e-06, + "loss": 0.181671142578125, + "step": 25070 + }, + { + "epoch": 0.2168161105394679, + "grad_norm": 20.781823469047417, + "learning_rate": 5.827689316000183e-06, + "loss": 0.11727294921875, + "step": 25075 + }, + { + "epoch": 0.21685934406101115, + "grad_norm": 11.73123555627425, + "learning_rate": 5.827621256869493e-06, + "loss": 0.06363897323608399, + "step": 25080 + }, + { + "epoch": 0.2169025775825544, + "grad_norm": 0.22914592310493523, + "learning_rate": 5.827553184698063e-06, + "loss": 0.1514068603515625, + "step": 25085 + }, + { + "epoch": 0.21694581110409766, + "grad_norm": 6.417247962013896, + "learning_rate": 5.827485099486207e-06, + "loss": 0.1585357666015625, + "step": 25090 + }, + { + "epoch": 0.21698904462564095, + "grad_norm": 6.263284315768331, + "learning_rate": 5.82741700123424e-06, + "loss": 0.09857444763183594, + "step": 25095 + }, + { + "epoch": 0.2170322781471842, + "grad_norm": 6.372925760320328, + "learning_rate": 5.827348889942476e-06, + "loss": 0.14729461669921876, + "step": 25100 + }, + { + "epoch": 0.21707551166872746, + "grad_norm": 5.482080713885376, + "learning_rate": 5.827280765611228e-06, + "loss": 0.2671028137207031, + "step": 25105 + }, + { + "epoch": 0.21711874519027072, + "grad_norm": 2.2072732080525093, + "learning_rate": 5.827212628240812e-06, + "loss": 0.3993961334228516, + "step": 25110 + }, + { + "epoch": 0.217161978711814, + "grad_norm": 30.222552437761404, + "learning_rate": 5.827144477831541e-06, + "loss": 0.3313079833984375, + "step": 25115 + }, + { + "epoch": 0.21720521223335726, + "grad_norm": 0.3283293003441428, + "learning_rate": 5.827076314383728e-06, + "loss": 0.34199676513671873, + "step": 25120 + }, + { + "epoch": 0.21724844575490052, + "grad_norm": 7.050488358071579, + "learning_rate": 5.827008137897689e-06, + "loss": 0.2580718994140625, + "step": 25125 + }, + { + "epoch": 0.21729167927644377, + "grad_norm": 0.810793223634119, + "learning_rate": 5.82693994837374e-06, + "loss": 0.0254791259765625, + "step": 25130 + }, + { + "epoch": 0.21733491279798706, + "grad_norm": 10.729898228360073, + "learning_rate": 5.826871745812193e-06, + "loss": 0.1959014892578125, + "step": 25135 + }, + { + "epoch": 0.2173781463195303, + "grad_norm": 13.01826494533821, + "learning_rate": 5.826803530213364e-06, + "loss": 0.19911041259765624, + "step": 25140 + }, + { + "epoch": 0.21742137984107357, + "grad_norm": 9.813727389855366, + "learning_rate": 5.826735301577565e-06, + "loss": 0.1381622314453125, + "step": 25145 + }, + { + "epoch": 0.21746461336261683, + "grad_norm": 7.661280689843665, + "learning_rate": 5.826667059905114e-06, + "loss": 0.3891937255859375, + "step": 25150 + }, + { + "epoch": 0.2175078468841601, + "grad_norm": 18.736548976950733, + "learning_rate": 5.826598805196324e-06, + "loss": 0.31654815673828124, + "step": 25155 + }, + { + "epoch": 0.21755108040570337, + "grad_norm": 12.120455475397879, + "learning_rate": 5.82653053745151e-06, + "loss": 0.3198760986328125, + "step": 25160 + }, + { + "epoch": 0.21759431392724662, + "grad_norm": 1.6209329323613053, + "learning_rate": 5.826462256670987e-06, + "loss": 0.068243408203125, + "step": 25165 + }, + { + "epoch": 0.21763754744878988, + "grad_norm": 14.606815405155956, + "learning_rate": 5.82639396285507e-06, + "loss": 0.14729766845703124, + "step": 25170 + }, + { + "epoch": 0.21768078097033317, + "grad_norm": 12.89748814175932, + "learning_rate": 5.826325656004073e-06, + "loss": 0.3329559326171875, + "step": 25175 + }, + { + "epoch": 0.21772401449187642, + "grad_norm": 13.099078272654188, + "learning_rate": 5.8262573361183126e-06, + "loss": 0.3671409606933594, + "step": 25180 + }, + { + "epoch": 0.21776724801341968, + "grad_norm": 17.95570861446287, + "learning_rate": 5.826189003198103e-06, + "loss": 0.07764339447021484, + "step": 25185 + }, + { + "epoch": 0.21781048153496294, + "grad_norm": 0.7186146966997031, + "learning_rate": 5.826120657243759e-06, + "loss": 0.198956298828125, + "step": 25190 + }, + { + "epoch": 0.21785371505650622, + "grad_norm": 42.81081624982518, + "learning_rate": 5.826052298255596e-06, + "loss": 0.24889984130859374, + "step": 25195 + }, + { + "epoch": 0.21789694857804948, + "grad_norm": 26.18172936604297, + "learning_rate": 5.82598392623393e-06, + "loss": 0.3292682647705078, + "step": 25200 + }, + { + "epoch": 0.21794018209959273, + "grad_norm": 27.96787428021308, + "learning_rate": 5.825915541179076e-06, + "loss": 0.325933837890625, + "step": 25205 + }, + { + "epoch": 0.21798341562113602, + "grad_norm": 0.5731574949422275, + "learning_rate": 5.82584714309135e-06, + "loss": 0.08006134033203124, + "step": 25210 + }, + { + "epoch": 0.21802664914267927, + "grad_norm": 13.834716413570192, + "learning_rate": 5.825778731971065e-06, + "loss": 0.08699951171875, + "step": 25215 + }, + { + "epoch": 0.21806988266422253, + "grad_norm": 19.796106033753016, + "learning_rate": 5.825710307818539e-06, + "loss": 0.157159423828125, + "step": 25220 + }, + { + "epoch": 0.2181131161857658, + "grad_norm": 18.16781303808143, + "learning_rate": 5.825641870634087e-06, + "loss": 0.3520111083984375, + "step": 25225 + }, + { + "epoch": 0.21815634970730907, + "grad_norm": 3.1572255371603246, + "learning_rate": 5.825573420418024e-06, + "loss": 0.1256134033203125, + "step": 25230 + }, + { + "epoch": 0.21819958322885233, + "grad_norm": 35.2695289136389, + "learning_rate": 5.825504957170666e-06, + "loss": 0.30977592468261717, + "step": 25235 + }, + { + "epoch": 0.21824281675039559, + "grad_norm": 8.227750871518586, + "learning_rate": 5.825436480892329e-06, + "loss": 0.1109893798828125, + "step": 25240 + }, + { + "epoch": 0.21828605027193884, + "grad_norm": 2.583190889102068, + "learning_rate": 5.8253679915833285e-06, + "loss": 0.03887863159179687, + "step": 25245 + }, + { + "epoch": 0.21832928379348213, + "grad_norm": 18.686748755464322, + "learning_rate": 5.82529948924398e-06, + "loss": 0.52557373046875, + "step": 25250 + }, + { + "epoch": 0.21837251731502538, + "grad_norm": 5.9824225779195395, + "learning_rate": 5.825230973874601e-06, + "loss": 0.11147613525390625, + "step": 25255 + }, + { + "epoch": 0.21841575083656864, + "grad_norm": 25.530780334426762, + "learning_rate": 5.825162445475506e-06, + "loss": 0.2074726104736328, + "step": 25260 + }, + { + "epoch": 0.2184589843581119, + "grad_norm": 0.6227010762853293, + "learning_rate": 5.8250939040470114e-06, + "loss": 0.06305313110351562, + "step": 25265 + }, + { + "epoch": 0.21850221787965518, + "grad_norm": 16.58799504097456, + "learning_rate": 5.825025349589432e-06, + "loss": 0.15829391479492189, + "step": 25270 + }, + { + "epoch": 0.21854545140119844, + "grad_norm": 9.628357042339518, + "learning_rate": 5.8249567821030876e-06, + "loss": 0.1517333984375, + "step": 25275 + }, + { + "epoch": 0.2185886849227417, + "grad_norm": 27.357298468924817, + "learning_rate": 5.824888201588291e-06, + "loss": 0.42571272850036623, + "step": 25280 + }, + { + "epoch": 0.21863191844428495, + "grad_norm": 2.9978558138571243, + "learning_rate": 5.824819608045359e-06, + "loss": 0.07730712890625, + "step": 25285 + }, + { + "epoch": 0.21867515196582824, + "grad_norm": 0.9126777678748857, + "learning_rate": 5.82475100147461e-06, + "loss": 0.20704498291015624, + "step": 25290 + }, + { + "epoch": 0.2187183854873715, + "grad_norm": 1.7325991631494666, + "learning_rate": 5.824682381876358e-06, + "loss": 0.08599777221679687, + "step": 25295 + }, + { + "epoch": 0.21876161900891475, + "grad_norm": 36.30716135735567, + "learning_rate": 5.824613749250921e-06, + "loss": 0.3130653381347656, + "step": 25300 + }, + { + "epoch": 0.218804852530458, + "grad_norm": 3.326978817832301, + "learning_rate": 5.824545103598614e-06, + "loss": 0.067803955078125, + "step": 25305 + }, + { + "epoch": 0.2188480860520013, + "grad_norm": 0.2575007633655405, + "learning_rate": 5.824476444919755e-06, + "loss": 0.28379058837890625, + "step": 25310 + }, + { + "epoch": 0.21889131957354455, + "grad_norm": 7.678708311196391, + "learning_rate": 5.824407773214661e-06, + "loss": 0.129400634765625, + "step": 25315 + }, + { + "epoch": 0.2189345530950878, + "grad_norm": 12.57166440104466, + "learning_rate": 5.824339088483647e-06, + "loss": 0.17884521484375, + "step": 25320 + }, + { + "epoch": 0.21897778661663106, + "grad_norm": 1.054771843768298, + "learning_rate": 5.824270390727031e-06, + "loss": 0.054308319091796876, + "step": 25325 + }, + { + "epoch": 0.21902102013817434, + "grad_norm": 6.549423997790667, + "learning_rate": 5.8242016799451294e-06, + "loss": 0.2103424072265625, + "step": 25330 + }, + { + "epoch": 0.2190642536597176, + "grad_norm": 5.954255588089411, + "learning_rate": 5.82413295613826e-06, + "loss": 0.16848182678222656, + "step": 25335 + }, + { + "epoch": 0.21910748718126086, + "grad_norm": 2.814033778260658, + "learning_rate": 5.824064219306738e-06, + "loss": 0.07877960205078124, + "step": 25340 + }, + { + "epoch": 0.2191507207028041, + "grad_norm": 14.058507938316207, + "learning_rate": 5.823995469450882e-06, + "loss": 0.10598983764648437, + "step": 25345 + }, + { + "epoch": 0.2191939542243474, + "grad_norm": 9.936572703130777, + "learning_rate": 5.823926706571007e-06, + "loss": 0.11707420349121093, + "step": 25350 + }, + { + "epoch": 0.21923718774589065, + "grad_norm": 9.250840374322788, + "learning_rate": 5.823857930667433e-06, + "loss": 0.0815826416015625, + "step": 25355 + }, + { + "epoch": 0.2192804212674339, + "grad_norm": 20.55219272623324, + "learning_rate": 5.823789141740476e-06, + "loss": 0.43187103271484373, + "step": 25360 + }, + { + "epoch": 0.21932365478897717, + "grad_norm": 12.600431981723105, + "learning_rate": 5.823720339790452e-06, + "loss": 0.16148147583007813, + "step": 25365 + }, + { + "epoch": 0.21936688831052045, + "grad_norm": 1.2552130799145245, + "learning_rate": 5.8236515248176805e-06, + "loss": 0.10971412658691407, + "step": 25370 + }, + { + "epoch": 0.2194101218320637, + "grad_norm": 10.123811622495978, + "learning_rate": 5.823582696822475e-06, + "loss": 0.29798431396484376, + "step": 25375 + }, + { + "epoch": 0.21945335535360697, + "grad_norm": 31.948602721018478, + "learning_rate": 5.823513855805158e-06, + "loss": 0.12159500122070313, + "step": 25380 + }, + { + "epoch": 0.21949658887515025, + "grad_norm": 14.021085137466027, + "learning_rate": 5.823445001766045e-06, + "loss": 0.48514556884765625, + "step": 25385 + }, + { + "epoch": 0.2195398223966935, + "grad_norm": 16.584652367239848, + "learning_rate": 5.8233761347054514e-06, + "loss": 0.338616943359375, + "step": 25390 + }, + { + "epoch": 0.21958305591823676, + "grad_norm": 41.90683588920464, + "learning_rate": 5.8233072546236975e-06, + "loss": 0.5764907836914063, + "step": 25395 + }, + { + "epoch": 0.21962628943978002, + "grad_norm": 13.245555392286569, + "learning_rate": 5.823238361521099e-06, + "loss": 0.10927581787109375, + "step": 25400 + }, + { + "epoch": 0.2196695229613233, + "grad_norm": 9.20641732562667, + "learning_rate": 5.823169455397976e-06, + "loss": 0.1553131103515625, + "step": 25405 + }, + { + "epoch": 0.21971275648286656, + "grad_norm": 14.456684291113334, + "learning_rate": 5.823100536254645e-06, + "loss": 0.0817047119140625, + "step": 25410 + }, + { + "epoch": 0.21975599000440982, + "grad_norm": 0.33902422255751835, + "learning_rate": 5.823031604091423e-06, + "loss": 0.09417572021484374, + "step": 25415 + }, + { + "epoch": 0.21979922352595307, + "grad_norm": 1.2614328308201912, + "learning_rate": 5.822962658908629e-06, + "loss": 0.06998443603515625, + "step": 25420 + }, + { + "epoch": 0.21984245704749636, + "grad_norm": 4.019828449479732, + "learning_rate": 5.82289370070658e-06, + "loss": 0.551904296875, + "step": 25425 + }, + { + "epoch": 0.21988569056903962, + "grad_norm": 23.100774950056685, + "learning_rate": 5.822824729485595e-06, + "loss": 0.1706268310546875, + "step": 25430 + }, + { + "epoch": 0.21992892409058287, + "grad_norm": 8.682483404525843, + "learning_rate": 5.822755745245992e-06, + "loss": 0.17250518798828124, + "step": 25435 + }, + { + "epoch": 0.21997215761212613, + "grad_norm": 4.831208450563552, + "learning_rate": 5.822686747988089e-06, + "loss": 0.1006927490234375, + "step": 25440 + }, + { + "epoch": 0.2200153911336694, + "grad_norm": 48.975669230193894, + "learning_rate": 5.822617737712204e-06, + "loss": 0.16958961486816407, + "step": 25445 + }, + { + "epoch": 0.22005862465521267, + "grad_norm": 1.9139387956143041, + "learning_rate": 5.822548714418655e-06, + "loss": 0.08058319091796876, + "step": 25450 + }, + { + "epoch": 0.22010185817675593, + "grad_norm": 2.4567392383683933, + "learning_rate": 5.822479678107762e-06, + "loss": 0.21885986328125, + "step": 25455 + }, + { + "epoch": 0.22014509169829918, + "grad_norm": 0.453033247766341, + "learning_rate": 5.822410628779842e-06, + "loss": 0.5675254821777344, + "step": 25460 + }, + { + "epoch": 0.22018832521984247, + "grad_norm": 5.02291350353667, + "learning_rate": 5.822341566435212e-06, + "loss": 0.1246917724609375, + "step": 25465 + }, + { + "epoch": 0.22023155874138572, + "grad_norm": 3.735089908028627, + "learning_rate": 5.822272491074193e-06, + "loss": 0.1099639892578125, + "step": 25470 + }, + { + "epoch": 0.22027479226292898, + "grad_norm": 123.95451850394318, + "learning_rate": 5.822203402697102e-06, + "loss": 0.2892333984375, + "step": 25475 + }, + { + "epoch": 0.22031802578447224, + "grad_norm": 2.3054014996813557, + "learning_rate": 5.82213430130426e-06, + "loss": 0.17143478393554687, + "step": 25480 + }, + { + "epoch": 0.22036125930601552, + "grad_norm": 15.442014790189921, + "learning_rate": 5.822065186895982e-06, + "loss": 0.09572906494140625, + "step": 25485 + }, + { + "epoch": 0.22040449282755878, + "grad_norm": 4.354305595914568, + "learning_rate": 5.82199605947259e-06, + "loss": 0.211761474609375, + "step": 25490 + }, + { + "epoch": 0.22044772634910204, + "grad_norm": 34.797183189902626, + "learning_rate": 5.821926919034401e-06, + "loss": 0.32127685546875, + "step": 25495 + }, + { + "epoch": 0.2204909598706453, + "grad_norm": 39.47222576418826, + "learning_rate": 5.821857765581735e-06, + "loss": 0.19878158569335938, + "step": 25500 + }, + { + "epoch": 0.22053419339218858, + "grad_norm": 2.6222425293244087, + "learning_rate": 5.821788599114909e-06, + "loss": 0.060845947265625, + "step": 25505 + }, + { + "epoch": 0.22057742691373183, + "grad_norm": 11.458178296730457, + "learning_rate": 5.821719419634245e-06, + "loss": 0.1412994384765625, + "step": 25510 + }, + { + "epoch": 0.2206206604352751, + "grad_norm": 33.02104573462116, + "learning_rate": 5.821650227140059e-06, + "loss": 0.16356353759765624, + "step": 25515 + }, + { + "epoch": 0.22066389395681835, + "grad_norm": 4.632432734569488, + "learning_rate": 5.821581021632672e-06, + "loss": 0.1551116943359375, + "step": 25520 + }, + { + "epoch": 0.22070712747836163, + "grad_norm": 3.12188533887498, + "learning_rate": 5.821511803112403e-06, + "loss": 0.45707550048828127, + "step": 25525 + }, + { + "epoch": 0.2207503609999049, + "grad_norm": 14.761570788372218, + "learning_rate": 5.82144257157957e-06, + "loss": 0.690728759765625, + "step": 25530 + }, + { + "epoch": 0.22079359452144814, + "grad_norm": 20.538891612095593, + "learning_rate": 5.821373327034494e-06, + "loss": 0.34072265625, + "step": 25535 + }, + { + "epoch": 0.2208368280429914, + "grad_norm": 22.735963991966308, + "learning_rate": 5.821304069477493e-06, + "loss": 0.15746307373046875, + "step": 25540 + }, + { + "epoch": 0.22088006156453469, + "grad_norm": 3.1994288018942223, + "learning_rate": 5.821234798908887e-06, + "loss": 0.3909393310546875, + "step": 25545 + }, + { + "epoch": 0.22092329508607794, + "grad_norm": 0.48763088736900884, + "learning_rate": 5.821165515328996e-06, + "loss": 0.12425308227539063, + "step": 25550 + }, + { + "epoch": 0.2209665286076212, + "grad_norm": 2.4288334131621974, + "learning_rate": 5.821096218738138e-06, + "loss": 0.09930763244628907, + "step": 25555 + }, + { + "epoch": 0.22100976212916446, + "grad_norm": 0.4562340042255274, + "learning_rate": 5.821026909136634e-06, + "loss": 0.23492355346679689, + "step": 25560 + }, + { + "epoch": 0.22105299565070774, + "grad_norm": 6.1040054946236575, + "learning_rate": 5.820957586524803e-06, + "loss": 0.4743988037109375, + "step": 25565 + }, + { + "epoch": 0.221096229172251, + "grad_norm": 1.1185637778821338, + "learning_rate": 5.820888250902965e-06, + "loss": 0.1766021728515625, + "step": 25570 + }, + { + "epoch": 0.22113946269379425, + "grad_norm": 2.775651837832494, + "learning_rate": 5.82081890227144e-06, + "loss": 0.06116218566894531, + "step": 25575 + }, + { + "epoch": 0.22118269621533754, + "grad_norm": 8.494950964471691, + "learning_rate": 5.820749540630546e-06, + "loss": 0.225128173828125, + "step": 25580 + }, + { + "epoch": 0.2212259297368808, + "grad_norm": 51.03417867861632, + "learning_rate": 5.820680165980607e-06, + "loss": 0.21938552856445312, + "step": 25585 + }, + { + "epoch": 0.22126916325842405, + "grad_norm": 2.680253317146913, + "learning_rate": 5.820610778321938e-06, + "loss": 0.06427383422851562, + "step": 25590 + }, + { + "epoch": 0.2213123967799673, + "grad_norm": 9.412458392435235, + "learning_rate": 5.820541377654862e-06, + "loss": 0.2517364501953125, + "step": 25595 + }, + { + "epoch": 0.2213556303015106, + "grad_norm": 3.9996449181631877, + "learning_rate": 5.820471963979698e-06, + "loss": 0.10043869018554688, + "step": 25600 + }, + { + "epoch": 0.22139886382305385, + "grad_norm": 1.096845134074595, + "learning_rate": 5.820402537296767e-06, + "loss": 0.028433990478515626, + "step": 25605 + }, + { + "epoch": 0.2214420973445971, + "grad_norm": 32.04012528428138, + "learning_rate": 5.820333097606389e-06, + "loss": 0.3325164794921875, + "step": 25610 + }, + { + "epoch": 0.22148533086614036, + "grad_norm": 4.167832775025494, + "learning_rate": 5.820263644908885e-06, + "loss": 0.11171321868896485, + "step": 25615 + }, + { + "epoch": 0.22152856438768365, + "grad_norm": 2.2778191629174835, + "learning_rate": 5.820194179204572e-06, + "loss": 0.08509674072265624, + "step": 25620 + }, + { + "epoch": 0.2215717979092269, + "grad_norm": 5.498013151228639, + "learning_rate": 5.820124700493773e-06, + "loss": 0.18227691650390626, + "step": 25625 + }, + { + "epoch": 0.22161503143077016, + "grad_norm": 30.094114625160266, + "learning_rate": 5.820055208776809e-06, + "loss": 0.4230255126953125, + "step": 25630 + }, + { + "epoch": 0.22165826495231342, + "grad_norm": 4.519768560246945, + "learning_rate": 5.819985704054e-06, + "loss": 0.110125732421875, + "step": 25635 + }, + { + "epoch": 0.2217014984738567, + "grad_norm": 12.492100940964262, + "learning_rate": 5.8199161863256656e-06, + "loss": 0.38677139282226564, + "step": 25640 + }, + { + "epoch": 0.22174473199539996, + "grad_norm": 4.290868925114096, + "learning_rate": 5.819846655592126e-06, + "loss": 0.1357707977294922, + "step": 25645 + }, + { + "epoch": 0.22178796551694321, + "grad_norm": 11.440172284960756, + "learning_rate": 5.819777111853704e-06, + "loss": 0.09881134033203125, + "step": 25650 + }, + { + "epoch": 0.22183119903848647, + "grad_norm": 7.448085615490204, + "learning_rate": 5.819707555110718e-06, + "loss": 0.06728668212890625, + "step": 25655 + }, + { + "epoch": 0.22187443256002976, + "grad_norm": 14.220096903943352, + "learning_rate": 5.819637985363491e-06, + "loss": 0.31525459289550783, + "step": 25660 + }, + { + "epoch": 0.221917666081573, + "grad_norm": 13.984014271234534, + "learning_rate": 5.819568402612342e-06, + "loss": 0.110235595703125, + "step": 25665 + }, + { + "epoch": 0.22196089960311627, + "grad_norm": 52.093265322452055, + "learning_rate": 5.819498806857593e-06, + "loss": 0.5665817260742188, + "step": 25670 + }, + { + "epoch": 0.22200413312465953, + "grad_norm": 12.777533169004027, + "learning_rate": 5.8194291980995644e-06, + "loss": 0.09911956787109374, + "step": 25675 + }, + { + "epoch": 0.2220473666462028, + "grad_norm": 1.6943189631426578, + "learning_rate": 5.819359576338578e-06, + "loss": 0.14105224609375, + "step": 25680 + }, + { + "epoch": 0.22209060016774607, + "grad_norm": 32.75117139487882, + "learning_rate": 5.819289941574954e-06, + "loss": 0.32725982666015624, + "step": 25685 + }, + { + "epoch": 0.22213383368928932, + "grad_norm": 55.84720975325476, + "learning_rate": 5.819220293809013e-06, + "loss": 0.23580780029296874, + "step": 25690 + }, + { + "epoch": 0.22217706721083258, + "grad_norm": 47.89613452703918, + "learning_rate": 5.819150633041079e-06, + "loss": 0.245965576171875, + "step": 25695 + }, + { + "epoch": 0.22222030073237586, + "grad_norm": 12.313983215518602, + "learning_rate": 5.81908095927147e-06, + "loss": 0.189239501953125, + "step": 25700 + }, + { + "epoch": 0.22226353425391912, + "grad_norm": 6.048909713568842, + "learning_rate": 5.819011272500509e-06, + "loss": 0.1217559814453125, + "step": 25705 + }, + { + "epoch": 0.22230676777546238, + "grad_norm": 1.9824932569593599, + "learning_rate": 5.818941572728518e-06, + "loss": 0.1638120651245117, + "step": 25710 + }, + { + "epoch": 0.22235000129700563, + "grad_norm": 2.2795767937064366, + "learning_rate": 5.8188718599558165e-06, + "loss": 0.2288970947265625, + "step": 25715 + }, + { + "epoch": 0.22239323481854892, + "grad_norm": 9.699295586726702, + "learning_rate": 5.818802134182727e-06, + "loss": 0.178985595703125, + "step": 25720 + }, + { + "epoch": 0.22243646834009217, + "grad_norm": 3.18579552971348, + "learning_rate": 5.8187323954095715e-06, + "loss": 0.11405715942382813, + "step": 25725 + }, + { + "epoch": 0.22247970186163543, + "grad_norm": 15.727331371379304, + "learning_rate": 5.8186626436366715e-06, + "loss": 0.11202392578125, + "step": 25730 + }, + { + "epoch": 0.2225229353831787, + "grad_norm": 33.094911980782896, + "learning_rate": 5.818592878864348e-06, + "loss": 0.290692138671875, + "step": 25735 + }, + { + "epoch": 0.22256616890472197, + "grad_norm": 8.132323468520164, + "learning_rate": 5.818523101092923e-06, + "loss": 0.14851226806640624, + "step": 25740 + }, + { + "epoch": 0.22260940242626523, + "grad_norm": 0.9102303757361114, + "learning_rate": 5.818453310322719e-06, + "loss": 0.0315093994140625, + "step": 25745 + }, + { + "epoch": 0.22265263594780849, + "grad_norm": 26.82093048972473, + "learning_rate": 5.818383506554058e-06, + "loss": 0.34085540771484374, + "step": 25750 + }, + { + "epoch": 0.22269586946935177, + "grad_norm": 0.6500002086242429, + "learning_rate": 5.81831368978726e-06, + "loss": 0.1109466552734375, + "step": 25755 + }, + { + "epoch": 0.22273910299089503, + "grad_norm": 3.4229962822319595, + "learning_rate": 5.818243860022649e-06, + "loss": 0.24886474609375, + "step": 25760 + }, + { + "epoch": 0.22278233651243828, + "grad_norm": 0.8914194305444733, + "learning_rate": 5.818174017260545e-06, + "loss": 0.04273300170898438, + "step": 25765 + }, + { + "epoch": 0.22282557003398154, + "grad_norm": 4.434460882258766, + "learning_rate": 5.818104161501274e-06, + "loss": 0.06710357666015625, + "step": 25770 + }, + { + "epoch": 0.22286880355552482, + "grad_norm": 1.1926066453801976, + "learning_rate": 5.818034292745154e-06, + "loss": 0.08885040283203124, + "step": 25775 + }, + { + "epoch": 0.22291203707706808, + "grad_norm": 4.4863562428338675, + "learning_rate": 5.817964410992509e-06, + "loss": 0.1502716064453125, + "step": 25780 + }, + { + "epoch": 0.22295527059861134, + "grad_norm": 11.30573938822315, + "learning_rate": 5.8178945162436616e-06, + "loss": 0.1083749771118164, + "step": 25785 + }, + { + "epoch": 0.2229985041201546, + "grad_norm": 6.163594681252614, + "learning_rate": 5.817824608498933e-06, + "loss": 0.06976184844970704, + "step": 25790 + }, + { + "epoch": 0.22304173764169788, + "grad_norm": 0.9257092856829112, + "learning_rate": 5.817754687758647e-06, + "loss": 0.11743392944335937, + "step": 25795 + }, + { + "epoch": 0.22308497116324114, + "grad_norm": 53.3888551693154, + "learning_rate": 5.817684754023124e-06, + "loss": 0.40719146728515626, + "step": 25800 + }, + { + "epoch": 0.2231282046847844, + "grad_norm": 27.41813958595524, + "learning_rate": 5.81761480729269e-06, + "loss": 0.31671142578125, + "step": 25805 + }, + { + "epoch": 0.22317143820632765, + "grad_norm": 7.905451746265885, + "learning_rate": 5.817544847567663e-06, + "loss": 0.09303131103515624, + "step": 25810 + }, + { + "epoch": 0.22321467172787093, + "grad_norm": 0.6329248839852332, + "learning_rate": 5.8174748748483695e-06, + "loss": 0.25113601684570314, + "step": 25815 + }, + { + "epoch": 0.2232579052494142, + "grad_norm": 6.9135440084949, + "learning_rate": 5.817404889135132e-06, + "loss": 0.34400634765625, + "step": 25820 + }, + { + "epoch": 0.22330113877095745, + "grad_norm": 23.83103129911678, + "learning_rate": 5.81733489042827e-06, + "loss": 0.4794708251953125, + "step": 25825 + }, + { + "epoch": 0.2233443722925007, + "grad_norm": 45.74691438137485, + "learning_rate": 5.8172648787281096e-06, + "loss": 0.22309112548828125, + "step": 25830 + }, + { + "epoch": 0.223387605814044, + "grad_norm": 2.515322409956157, + "learning_rate": 5.8171948540349724e-06, + "loss": 0.027776336669921874, + "step": 25835 + }, + { + "epoch": 0.22343083933558724, + "grad_norm": 0.4531160741866392, + "learning_rate": 5.817124816349181e-06, + "loss": 0.1198089599609375, + "step": 25840 + }, + { + "epoch": 0.2234740728571305, + "grad_norm": 7.063913950631645, + "learning_rate": 5.81705476567106e-06, + "loss": 0.05534820556640625, + "step": 25845 + }, + { + "epoch": 0.22351730637867376, + "grad_norm": 6.851977155172563, + "learning_rate": 5.81698470200093e-06, + "loss": 0.118353271484375, + "step": 25850 + }, + { + "epoch": 0.22356053990021704, + "grad_norm": 27.32928859131286, + "learning_rate": 5.816914625339116e-06, + "loss": 0.06677322387695313, + "step": 25855 + }, + { + "epoch": 0.2236037734217603, + "grad_norm": 21.24097738784261, + "learning_rate": 5.816844535685941e-06, + "loss": 0.240924072265625, + "step": 25860 + }, + { + "epoch": 0.22364700694330356, + "grad_norm": 5.031520500856941, + "learning_rate": 5.816774433041727e-06, + "loss": 0.21784210205078125, + "step": 25865 + }, + { + "epoch": 0.2236902404648468, + "grad_norm": 0.1792073524572208, + "learning_rate": 5.816704317406799e-06, + "loss": 0.1857757568359375, + "step": 25870 + }, + { + "epoch": 0.2237334739863901, + "grad_norm": 9.491121720377338, + "learning_rate": 5.816634188781479e-06, + "loss": 0.070050048828125, + "step": 25875 + }, + { + "epoch": 0.22377670750793335, + "grad_norm": 17.92423150315839, + "learning_rate": 5.816564047166091e-06, + "loss": 0.23959426879882811, + "step": 25880 + }, + { + "epoch": 0.2238199410294766, + "grad_norm": 11.18380127289862, + "learning_rate": 5.816493892560959e-06, + "loss": 0.08887958526611328, + "step": 25885 + }, + { + "epoch": 0.22386317455101987, + "grad_norm": 4.418972132000027, + "learning_rate": 5.816423724966406e-06, + "loss": 0.1209259033203125, + "step": 25890 + }, + { + "epoch": 0.22390640807256315, + "grad_norm": 21.006934199288615, + "learning_rate": 5.8163535443827555e-06, + "loss": 0.21713485717773437, + "step": 25895 + }, + { + "epoch": 0.2239496415941064, + "grad_norm": 3.2885295110145916, + "learning_rate": 5.8162833508103315e-06, + "loss": 0.2422210693359375, + "step": 25900 + }, + { + "epoch": 0.22399287511564966, + "grad_norm": 1.2282567287754893, + "learning_rate": 5.816213144249457e-06, + "loss": 0.07576980590820312, + "step": 25905 + }, + { + "epoch": 0.22403610863719292, + "grad_norm": 1.630802149298134, + "learning_rate": 5.816142924700457e-06, + "loss": 0.1951568603515625, + "step": 25910 + }, + { + "epoch": 0.2240793421587362, + "grad_norm": 4.501156632526596, + "learning_rate": 5.816072692163654e-06, + "loss": 0.15830574035644532, + "step": 25915 + }, + { + "epoch": 0.22412257568027946, + "grad_norm": 12.05749372677869, + "learning_rate": 5.816002446639373e-06, + "loss": 0.20311279296875, + "step": 25920 + }, + { + "epoch": 0.22416580920182272, + "grad_norm": 8.475660820228738, + "learning_rate": 5.8159321881279375e-06, + "loss": 0.22588653564453126, + "step": 25925 + }, + { + "epoch": 0.22420904272336598, + "grad_norm": 4.586026044099138, + "learning_rate": 5.815861916629672e-06, + "loss": 0.10192413330078125, + "step": 25930 + }, + { + "epoch": 0.22425227624490926, + "grad_norm": 3.0254529827261565, + "learning_rate": 5.8157916321449e-06, + "loss": 0.15615310668945312, + "step": 25935 + }, + { + "epoch": 0.22429550976645252, + "grad_norm": 20.823704316669662, + "learning_rate": 5.815721334673945e-06, + "loss": 0.10272789001464844, + "step": 25940 + }, + { + "epoch": 0.22433874328799577, + "grad_norm": 0.06684049238170213, + "learning_rate": 5.815651024217132e-06, + "loss": 0.10966873168945312, + "step": 25945 + }, + { + "epoch": 0.22438197680953906, + "grad_norm": 1.527936533033905, + "learning_rate": 5.815580700774786e-06, + "loss": 0.07327785491943359, + "step": 25950 + }, + { + "epoch": 0.22442521033108231, + "grad_norm": 11.001681873998857, + "learning_rate": 5.8155103643472305e-06, + "loss": 0.5255950927734375, + "step": 25955 + }, + { + "epoch": 0.22446844385262557, + "grad_norm": 17.42544961215378, + "learning_rate": 5.8154400149347906e-06, + "loss": 0.21298904418945314, + "step": 25960 + }, + { + "epoch": 0.22451167737416883, + "grad_norm": 74.65055408600425, + "learning_rate": 5.815369652537789e-06, + "loss": 0.6133228302001953, + "step": 25965 + }, + { + "epoch": 0.2245549108957121, + "grad_norm": 4.704759476283139, + "learning_rate": 5.815299277156551e-06, + "loss": 0.03263378143310547, + "step": 25970 + }, + { + "epoch": 0.22459814441725537, + "grad_norm": 4.013728445967779, + "learning_rate": 5.815228888791402e-06, + "loss": 0.0640228271484375, + "step": 25975 + }, + { + "epoch": 0.22464137793879863, + "grad_norm": 19.910411060820962, + "learning_rate": 5.815158487442667e-06, + "loss": 0.17318115234375, + "step": 25980 + }, + { + "epoch": 0.22468461146034188, + "grad_norm": 10.513496721878411, + "learning_rate": 5.815088073110669e-06, + "loss": 0.0929718017578125, + "step": 25985 + }, + { + "epoch": 0.22472784498188517, + "grad_norm": 0.2032204487625674, + "learning_rate": 5.815017645795733e-06, + "loss": 0.34031028747558595, + "step": 25990 + }, + { + "epoch": 0.22477107850342842, + "grad_norm": 10.694735670382666, + "learning_rate": 5.814947205498186e-06, + "loss": 0.16570968627929689, + "step": 25995 + }, + { + "epoch": 0.22481431202497168, + "grad_norm": 41.57473412486229, + "learning_rate": 5.814876752218349e-06, + "loss": 0.17954788208007813, + "step": 26000 + }, + { + "epoch": 0.22485754554651494, + "grad_norm": 35.55652833108414, + "learning_rate": 5.814806285956551e-06, + "loss": 0.5811607360839843, + "step": 26005 + }, + { + "epoch": 0.22490077906805822, + "grad_norm": 28.387766646471704, + "learning_rate": 5.8147358067131146e-06, + "loss": 0.371295166015625, + "step": 26010 + }, + { + "epoch": 0.22494401258960148, + "grad_norm": 2.1357570076899215, + "learning_rate": 5.814665314488365e-06, + "loss": 0.2024658203125, + "step": 26015 + }, + { + "epoch": 0.22498724611114473, + "grad_norm": 91.11224384360638, + "learning_rate": 5.814594809282629e-06, + "loss": 0.15553131103515624, + "step": 26020 + }, + { + "epoch": 0.225030479632688, + "grad_norm": 0.9715050859238215, + "learning_rate": 5.814524291096229e-06, + "loss": 0.2846771240234375, + "step": 26025 + }, + { + "epoch": 0.22507371315423128, + "grad_norm": 6.868017387719313, + "learning_rate": 5.814453759929492e-06, + "loss": 0.19093170166015624, + "step": 26030 + }, + { + "epoch": 0.22511694667577453, + "grad_norm": 8.824453719338047, + "learning_rate": 5.8143832157827435e-06, + "loss": 0.2754100799560547, + "step": 26035 + }, + { + "epoch": 0.2251601801973178, + "grad_norm": 5.448413562013261, + "learning_rate": 5.814312658656307e-06, + "loss": 0.3206596374511719, + "step": 26040 + }, + { + "epoch": 0.22520341371886105, + "grad_norm": 11.410767935291332, + "learning_rate": 5.814242088550511e-06, + "loss": 0.12416534423828125, + "step": 26045 + }, + { + "epoch": 0.22524664724040433, + "grad_norm": 7.629773395624566, + "learning_rate": 5.814171505465678e-06, + "loss": 0.08182449340820312, + "step": 26050 + }, + { + "epoch": 0.2252898807619476, + "grad_norm": 0.5033521550653207, + "learning_rate": 5.8141009094021346e-06, + "loss": 0.297601318359375, + "step": 26055 + }, + { + "epoch": 0.22533311428349084, + "grad_norm": 2.7061438989013187, + "learning_rate": 5.814030300360207e-06, + "loss": 0.22818603515625, + "step": 26060 + }, + { + "epoch": 0.2253763478050341, + "grad_norm": 0.397297649389728, + "learning_rate": 5.8139596783402195e-06, + "loss": 0.202301025390625, + "step": 26065 + }, + { + "epoch": 0.22541958132657738, + "grad_norm": 63.83386720367278, + "learning_rate": 5.813889043342498e-06, + "loss": 0.17428970336914062, + "step": 26070 + }, + { + "epoch": 0.22546281484812064, + "grad_norm": 24.303029366674707, + "learning_rate": 5.813818395367371e-06, + "loss": 0.3463165283203125, + "step": 26075 + }, + { + "epoch": 0.2255060483696639, + "grad_norm": 7.737536813876754, + "learning_rate": 5.813747734415161e-06, + "loss": 0.527728271484375, + "step": 26080 + }, + { + "epoch": 0.22554928189120715, + "grad_norm": 7.630592762489024, + "learning_rate": 5.813677060486195e-06, + "loss": 0.1810527801513672, + "step": 26085 + }, + { + "epoch": 0.22559251541275044, + "grad_norm": 7.5116787287842355, + "learning_rate": 5.8136063735808e-06, + "loss": 0.07487030029296875, + "step": 26090 + }, + { + "epoch": 0.2256357489342937, + "grad_norm": 26.008270244717995, + "learning_rate": 5.8135356736993e-06, + "loss": 0.14433364868164061, + "step": 26095 + }, + { + "epoch": 0.22567898245583695, + "grad_norm": 1.5190842910943119, + "learning_rate": 5.813464960842022e-06, + "loss": 0.1260528564453125, + "step": 26100 + }, + { + "epoch": 0.2257222159773802, + "grad_norm": 38.03894962113632, + "learning_rate": 5.813394235009293e-06, + "loss": 0.29230194091796874, + "step": 26105 + }, + { + "epoch": 0.2257654494989235, + "grad_norm": 2.3874022804801704, + "learning_rate": 5.8133234962014376e-06, + "loss": 0.06909942626953125, + "step": 26110 + }, + { + "epoch": 0.22580868302046675, + "grad_norm": 6.180371317209079, + "learning_rate": 5.813252744418784e-06, + "loss": 0.2550384521484375, + "step": 26115 + }, + { + "epoch": 0.22585191654201, + "grad_norm": 14.68436602297059, + "learning_rate": 5.813181979661657e-06, + "loss": 0.10644073486328125, + "step": 26120 + }, + { + "epoch": 0.2258951500635533, + "grad_norm": 11.946010402429554, + "learning_rate": 5.8131112019303824e-06, + "loss": 0.0659576416015625, + "step": 26125 + }, + { + "epoch": 0.22593838358509655, + "grad_norm": 0.7158439686276798, + "learning_rate": 5.813040411225287e-06, + "loss": 0.0959503173828125, + "step": 26130 + }, + { + "epoch": 0.2259816171066398, + "grad_norm": 11.177638246275684, + "learning_rate": 5.812969607546699e-06, + "loss": 0.11246261596679688, + "step": 26135 + }, + { + "epoch": 0.22602485062818306, + "grad_norm": 38.74231065946183, + "learning_rate": 5.8128987908949444e-06, + "loss": 0.30489501953125, + "step": 26140 + }, + { + "epoch": 0.22606808414972634, + "grad_norm": 1.9794234836730003, + "learning_rate": 5.812827961270348e-06, + "loss": 0.0990966796875, + "step": 26145 + }, + { + "epoch": 0.2261113176712696, + "grad_norm": 6.191424209746124, + "learning_rate": 5.812757118673237e-06, + "loss": 0.3074455261230469, + "step": 26150 + }, + { + "epoch": 0.22615455119281286, + "grad_norm": 18.81984094871375, + "learning_rate": 5.8126862631039395e-06, + "loss": 0.36279754638671874, + "step": 26155 + }, + { + "epoch": 0.22619778471435611, + "grad_norm": 11.880189460429438, + "learning_rate": 5.812615394562781e-06, + "loss": 0.1857208251953125, + "step": 26160 + }, + { + "epoch": 0.2262410182358994, + "grad_norm": 17.011446615073723, + "learning_rate": 5.81254451305009e-06, + "loss": 0.22405548095703126, + "step": 26165 + }, + { + "epoch": 0.22628425175744266, + "grad_norm": 38.908733990704235, + "learning_rate": 5.812473618566191e-06, + "loss": 0.19219026565551758, + "step": 26170 + }, + { + "epoch": 0.2263274852789859, + "grad_norm": 12.032591662025974, + "learning_rate": 5.8124027111114134e-06, + "loss": 0.37847900390625, + "step": 26175 + }, + { + "epoch": 0.22637071880052917, + "grad_norm": 11.012730005202968, + "learning_rate": 5.812331790686083e-06, + "loss": 0.178515625, + "step": 26180 + }, + { + "epoch": 0.22641395232207245, + "grad_norm": 16.18499112990027, + "learning_rate": 5.8122608572905255e-06, + "loss": 0.4275848388671875, + "step": 26185 + }, + { + "epoch": 0.2264571858436157, + "grad_norm": 16.20455867992363, + "learning_rate": 5.812189910925069e-06, + "loss": 0.11394805908203125, + "step": 26190 + }, + { + "epoch": 0.22650041936515897, + "grad_norm": 8.066351632390004, + "learning_rate": 5.812118951590043e-06, + "loss": 0.1696075439453125, + "step": 26195 + }, + { + "epoch": 0.22654365288670222, + "grad_norm": 23.12586257060927, + "learning_rate": 5.812047979285772e-06, + "loss": 0.129571533203125, + "step": 26200 + }, + { + "epoch": 0.2265868864082455, + "grad_norm": 17.911074125858445, + "learning_rate": 5.811976994012584e-06, + "loss": 0.10953369140625, + "step": 26205 + }, + { + "epoch": 0.22663011992978876, + "grad_norm": 17.57067748289049, + "learning_rate": 5.811905995770808e-06, + "loss": 0.20863800048828124, + "step": 26210 + }, + { + "epoch": 0.22667335345133202, + "grad_norm": 4.379329246191548, + "learning_rate": 5.811834984560768e-06, + "loss": 0.22965774536132813, + "step": 26215 + }, + { + "epoch": 0.22671658697287528, + "grad_norm": 11.307648570702892, + "learning_rate": 5.811763960382794e-06, + "loss": 0.21675262451171876, + "step": 26220 + }, + { + "epoch": 0.22675982049441856, + "grad_norm": 45.259250795274376, + "learning_rate": 5.811692923237214e-06, + "loss": 0.332373046875, + "step": 26225 + }, + { + "epoch": 0.22680305401596182, + "grad_norm": 4.9586596087419235, + "learning_rate": 5.811621873124354e-06, + "loss": 0.179193115234375, + "step": 26230 + }, + { + "epoch": 0.22684628753750508, + "grad_norm": 9.905924849518794, + "learning_rate": 5.8115508100445425e-06, + "loss": 0.27417755126953125, + "step": 26235 + }, + { + "epoch": 0.22688952105904833, + "grad_norm": 10.714909333522181, + "learning_rate": 5.8114797339981074e-06, + "loss": 0.176416015625, + "step": 26240 + }, + { + "epoch": 0.22693275458059162, + "grad_norm": 8.130227443848506, + "learning_rate": 5.811408644985376e-06, + "loss": 0.06959514617919922, + "step": 26245 + }, + { + "epoch": 0.22697598810213487, + "grad_norm": 0.7997662212402546, + "learning_rate": 5.8113375430066756e-06, + "loss": 0.5541046142578125, + "step": 26250 + }, + { + "epoch": 0.22701922162367813, + "grad_norm": 6.088861790047627, + "learning_rate": 5.811266428062336e-06, + "loss": 0.227337646484375, + "step": 26255 + }, + { + "epoch": 0.2270624551452214, + "grad_norm": 2.800480264090222, + "learning_rate": 5.811195300152683e-06, + "loss": 0.06504974365234376, + "step": 26260 + }, + { + "epoch": 0.22710568866676467, + "grad_norm": 6.165753725374706, + "learning_rate": 5.811124159278046e-06, + "loss": 0.09599075317382813, + "step": 26265 + }, + { + "epoch": 0.22714892218830793, + "grad_norm": 5.255681206523729, + "learning_rate": 5.8110530054387536e-06, + "loss": 0.11220703125, + "step": 26270 + }, + { + "epoch": 0.22719215570985118, + "grad_norm": 30.012539187609715, + "learning_rate": 5.810981838635133e-06, + "loss": 0.16846923828125, + "step": 26275 + }, + { + "epoch": 0.22723538923139444, + "grad_norm": 0.6731743159447247, + "learning_rate": 5.810910658867512e-06, + "loss": 0.03770904541015625, + "step": 26280 + }, + { + "epoch": 0.22727862275293773, + "grad_norm": 8.361909508709562, + "learning_rate": 5.810839466136219e-06, + "loss": 0.205035400390625, + "step": 26285 + }, + { + "epoch": 0.22732185627448098, + "grad_norm": 4.603702043898383, + "learning_rate": 5.810768260441584e-06, + "loss": 0.164599609375, + "step": 26290 + }, + { + "epoch": 0.22736508979602424, + "grad_norm": 10.706487486414492, + "learning_rate": 5.810697041783932e-06, + "loss": 0.137261962890625, + "step": 26295 + }, + { + "epoch": 0.2274083233175675, + "grad_norm": 22.526427464932492, + "learning_rate": 5.810625810163595e-06, + "loss": 0.240185546875, + "step": 26300 + }, + { + "epoch": 0.22745155683911078, + "grad_norm": 26.598663945472868, + "learning_rate": 5.8105545655809004e-06, + "loss": 0.18484420776367189, + "step": 26305 + }, + { + "epoch": 0.22749479036065404, + "grad_norm": 7.481282540540808, + "learning_rate": 5.8104833080361765e-06, + "loss": 0.34638671875, + "step": 26310 + }, + { + "epoch": 0.2275380238821973, + "grad_norm": 0.5856537363230181, + "learning_rate": 5.810412037529751e-06, + "loss": 0.20369949340820312, + "step": 26315 + }, + { + "epoch": 0.22758125740374058, + "grad_norm": 5.55529686614856, + "learning_rate": 5.810340754061955e-06, + "loss": 0.080560302734375, + "step": 26320 + }, + { + "epoch": 0.22762449092528383, + "grad_norm": 0.9456102885697579, + "learning_rate": 5.8102694576331145e-06, + "loss": 0.300506591796875, + "step": 26325 + }, + { + "epoch": 0.2276677244468271, + "grad_norm": 2.1054551640454244, + "learning_rate": 5.81019814824356e-06, + "loss": 0.403082275390625, + "step": 26330 + }, + { + "epoch": 0.22771095796837035, + "grad_norm": 4.590139307234638, + "learning_rate": 5.8101268258936205e-06, + "loss": 0.265673828125, + "step": 26335 + }, + { + "epoch": 0.22775419148991363, + "grad_norm": 41.01935125485195, + "learning_rate": 5.810055490583623e-06, + "loss": 0.37776947021484375, + "step": 26340 + }, + { + "epoch": 0.2277974250114569, + "grad_norm": 24.18011754227653, + "learning_rate": 5.809984142313899e-06, + "loss": 0.2127166748046875, + "step": 26345 + }, + { + "epoch": 0.22784065853300015, + "grad_norm": 3.058662070325427, + "learning_rate": 5.809912781084777e-06, + "loss": 0.14519195556640624, + "step": 26350 + }, + { + "epoch": 0.2278838920545434, + "grad_norm": 24.357487308379408, + "learning_rate": 5.809841406896583e-06, + "loss": 0.238201904296875, + "step": 26355 + }, + { + "epoch": 0.2279271255760867, + "grad_norm": 3.5434259549277276, + "learning_rate": 5.809770019749651e-06, + "loss": 0.095849609375, + "step": 26360 + }, + { + "epoch": 0.22797035909762994, + "grad_norm": 7.470391962143171, + "learning_rate": 5.809698619644307e-06, + "loss": 0.32177581787109377, + "step": 26365 + }, + { + "epoch": 0.2280135926191732, + "grad_norm": 1.4464626020268683, + "learning_rate": 5.809627206580882e-06, + "loss": 0.2712047576904297, + "step": 26370 + }, + { + "epoch": 0.22805682614071646, + "grad_norm": 3.8664506001674117, + "learning_rate": 5.809555780559704e-06, + "loss": 0.3270263671875, + "step": 26375 + }, + { + "epoch": 0.22810005966225974, + "grad_norm": 3.1684753728622197, + "learning_rate": 5.809484341581103e-06, + "loss": 0.14888916015625, + "step": 26380 + }, + { + "epoch": 0.228143293183803, + "grad_norm": 3.8131124731932973, + "learning_rate": 5.809412889645408e-06, + "loss": 0.04997100830078125, + "step": 26385 + }, + { + "epoch": 0.22818652670534625, + "grad_norm": 2.56960256131103, + "learning_rate": 5.80934142475295e-06, + "loss": 0.06124858856201172, + "step": 26390 + }, + { + "epoch": 0.2282297602268895, + "grad_norm": 9.79671578190334, + "learning_rate": 5.809269946904057e-06, + "loss": 0.411553955078125, + "step": 26395 + }, + { + "epoch": 0.2282729937484328, + "grad_norm": 25.11913851121071, + "learning_rate": 5.809198456099059e-06, + "loss": 0.46378936767578127, + "step": 26400 + }, + { + "epoch": 0.22831622726997605, + "grad_norm": 75.37326249689461, + "learning_rate": 5.809126952338287e-06, + "loss": 0.7716796875, + "step": 26405 + }, + { + "epoch": 0.2283594607915193, + "grad_norm": 45.15560395251805, + "learning_rate": 5.80905543562207e-06, + "loss": 0.37069091796875, + "step": 26410 + }, + { + "epoch": 0.22840269431306257, + "grad_norm": 6.266904493091642, + "learning_rate": 5.808983905950736e-06, + "loss": 0.1662994384765625, + "step": 26415 + }, + { + "epoch": 0.22844592783460585, + "grad_norm": 5.432171377291413, + "learning_rate": 5.8089123633246165e-06, + "loss": 0.06753273010253906, + "step": 26420 + }, + { + "epoch": 0.2284891613561491, + "grad_norm": 3.5711634920257946, + "learning_rate": 5.808840807744043e-06, + "loss": 0.3549102783203125, + "step": 26425 + }, + { + "epoch": 0.22853239487769236, + "grad_norm": 1.1968335292860006, + "learning_rate": 5.808769239209343e-06, + "loss": 0.345587158203125, + "step": 26430 + }, + { + "epoch": 0.22857562839923562, + "grad_norm": 6.2567617755936675, + "learning_rate": 5.808697657720846e-06, + "loss": 0.1304473876953125, + "step": 26435 + }, + { + "epoch": 0.2286188619207789, + "grad_norm": 29.952039291944374, + "learning_rate": 5.8086260632788856e-06, + "loss": 0.15886573791503905, + "step": 26440 + }, + { + "epoch": 0.22866209544232216, + "grad_norm": 6.312812688205817, + "learning_rate": 5.8085544558837885e-06, + "loss": 0.18624420166015626, + "step": 26445 + }, + { + "epoch": 0.22870532896386542, + "grad_norm": 25.13287278551407, + "learning_rate": 5.808482835535888e-06, + "loss": 0.20149993896484375, + "step": 26450 + }, + { + "epoch": 0.22874856248540867, + "grad_norm": 5.2751670521020015, + "learning_rate": 5.8084112022355115e-06, + "loss": 0.15145263671875, + "step": 26455 + }, + { + "epoch": 0.22879179600695196, + "grad_norm": 0.7095409165183224, + "learning_rate": 5.8083395559829914e-06, + "loss": 0.08066024780273437, + "step": 26460 + }, + { + "epoch": 0.22883502952849522, + "grad_norm": 15.844321781169272, + "learning_rate": 5.808267896778657e-06, + "loss": 0.5864494323730469, + "step": 26465 + }, + { + "epoch": 0.22887826305003847, + "grad_norm": 4.071040191858094, + "learning_rate": 5.80819622462284e-06, + "loss": 0.1004364013671875, + "step": 26470 + }, + { + "epoch": 0.22892149657158173, + "grad_norm": 2.534723686106914, + "learning_rate": 5.808124539515869e-06, + "loss": 0.3058921813964844, + "step": 26475 + }, + { + "epoch": 0.228964730093125, + "grad_norm": 18.04430723568588, + "learning_rate": 5.808052841458076e-06, + "loss": 0.2623046875, + "step": 26480 + }, + { + "epoch": 0.22900796361466827, + "grad_norm": 5.699098013428692, + "learning_rate": 5.8079811304497915e-06, + "loss": 0.10789108276367188, + "step": 26485 + }, + { + "epoch": 0.22905119713621153, + "grad_norm": 47.89140729013285, + "learning_rate": 5.807909406491346e-06, + "loss": 0.31783294677734375, + "step": 26490 + }, + { + "epoch": 0.2290944306577548, + "grad_norm": 0.5164301951054029, + "learning_rate": 5.807837669583071e-06, + "loss": 0.3527069091796875, + "step": 26495 + }, + { + "epoch": 0.22913766417929807, + "grad_norm": 1.6222695928405593, + "learning_rate": 5.807765919725297e-06, + "loss": 0.16116485595703126, + "step": 26500 + }, + { + "epoch": 0.22918089770084132, + "grad_norm": 13.87511699935762, + "learning_rate": 5.807694156918354e-06, + "loss": 0.06410369873046876, + "step": 26505 + }, + { + "epoch": 0.22922413122238458, + "grad_norm": 20.62983792457027, + "learning_rate": 5.807622381162574e-06, + "loss": 0.144134521484375, + "step": 26510 + }, + { + "epoch": 0.22926736474392787, + "grad_norm": 2.8273782776891268, + "learning_rate": 5.807550592458288e-06, + "loss": 0.1673675537109375, + "step": 26515 + }, + { + "epoch": 0.22931059826547112, + "grad_norm": 8.936026838400984, + "learning_rate": 5.807478790805826e-06, + "loss": 0.111163330078125, + "step": 26520 + }, + { + "epoch": 0.22935383178701438, + "grad_norm": 1.859386890402599, + "learning_rate": 5.80740697620552e-06, + "loss": 0.18479537963867188, + "step": 26525 + }, + { + "epoch": 0.22939706530855763, + "grad_norm": 5.236169392851561, + "learning_rate": 5.807335148657701e-06, + "loss": 0.3483551025390625, + "step": 26530 + }, + { + "epoch": 0.22944029883010092, + "grad_norm": 2.046640255889789, + "learning_rate": 5.8072633081627e-06, + "loss": 0.28349151611328127, + "step": 26535 + }, + { + "epoch": 0.22948353235164418, + "grad_norm": 8.163905175201558, + "learning_rate": 5.807191454720849e-06, + "loss": 0.1284637451171875, + "step": 26540 + }, + { + "epoch": 0.22952676587318743, + "grad_norm": 2.383519481717226, + "learning_rate": 5.80711958833248e-06, + "loss": 0.0484527587890625, + "step": 26545 + }, + { + "epoch": 0.2295699993947307, + "grad_norm": 36.14061023953904, + "learning_rate": 5.807047708997923e-06, + "loss": 0.279498291015625, + "step": 26550 + }, + { + "epoch": 0.22961323291627397, + "grad_norm": 8.99591068702847, + "learning_rate": 5.806975816717511e-06, + "loss": 0.2334930419921875, + "step": 26555 + }, + { + "epoch": 0.22965646643781723, + "grad_norm": 8.601202661891618, + "learning_rate": 5.806903911491573e-06, + "loss": 0.20201416015625, + "step": 26560 + }, + { + "epoch": 0.2296996999593605, + "grad_norm": 45.12500039190703, + "learning_rate": 5.806831993320441e-06, + "loss": 0.26514663696289065, + "step": 26565 + }, + { + "epoch": 0.22974293348090374, + "grad_norm": 15.907616079386392, + "learning_rate": 5.806760062204451e-06, + "loss": 0.06954307556152343, + "step": 26570 + }, + { + "epoch": 0.22978616700244703, + "grad_norm": 1.3159223396050792, + "learning_rate": 5.80668811814393e-06, + "loss": 0.08641700744628907, + "step": 26575 + }, + { + "epoch": 0.22982940052399028, + "grad_norm": 0.4815453368045794, + "learning_rate": 5.806616161139211e-06, + "loss": 0.35244140625, + "step": 26580 + }, + { + "epoch": 0.22987263404553354, + "grad_norm": 7.9220965463542425, + "learning_rate": 5.806544191190627e-06, + "loss": 0.17517776489257814, + "step": 26585 + }, + { + "epoch": 0.2299158675670768, + "grad_norm": 16.751599537746877, + "learning_rate": 5.806472208298509e-06, + "loss": 0.25699005126953123, + "step": 26590 + }, + { + "epoch": 0.22995910108862008, + "grad_norm": 47.08619237201414, + "learning_rate": 5.8064002124631885e-06, + "loss": 0.3747589111328125, + "step": 26595 + }, + { + "epoch": 0.23000233461016334, + "grad_norm": 26.723294667896262, + "learning_rate": 5.806328203684999e-06, + "loss": 0.327008056640625, + "step": 26600 + }, + { + "epoch": 0.2300455681317066, + "grad_norm": 0.5798465174325695, + "learning_rate": 5.806256181964271e-06, + "loss": 0.113250732421875, + "step": 26605 + }, + { + "epoch": 0.23008880165324985, + "grad_norm": 0.7042237713995043, + "learning_rate": 5.8061841473013385e-06, + "loss": 0.203167724609375, + "step": 26610 + }, + { + "epoch": 0.23013203517479314, + "grad_norm": 1.831778799065678, + "learning_rate": 5.806112099696532e-06, + "loss": 0.14128570556640624, + "step": 26615 + }, + { + "epoch": 0.2301752686963364, + "grad_norm": 3.860253179775625, + "learning_rate": 5.806040039150184e-06, + "loss": 0.150543212890625, + "step": 26620 + }, + { + "epoch": 0.23021850221787965, + "grad_norm": 5.30956479617345, + "learning_rate": 5.8059679656626285e-06, + "loss": 0.3985107421875, + "step": 26625 + }, + { + "epoch": 0.2302617357394229, + "grad_norm": 20.62795955624525, + "learning_rate": 5.805895879234196e-06, + "loss": 0.19256744384765626, + "step": 26630 + }, + { + "epoch": 0.2303049692609662, + "grad_norm": 4.714857008825083, + "learning_rate": 5.80582377986522e-06, + "loss": 0.10848426818847656, + "step": 26635 + }, + { + "epoch": 0.23034820278250945, + "grad_norm": 59.42565260493981, + "learning_rate": 5.805751667556032e-06, + "loss": 0.2794486999511719, + "step": 26640 + }, + { + "epoch": 0.2303914363040527, + "grad_norm": 52.004391512312615, + "learning_rate": 5.8056795423069654e-06, + "loss": 0.2824138641357422, + "step": 26645 + }, + { + "epoch": 0.23043466982559596, + "grad_norm": 0.24167551811679444, + "learning_rate": 5.8056074041183535e-06, + "loss": 0.16840858459472657, + "step": 26650 + }, + { + "epoch": 0.23047790334713925, + "grad_norm": 2.577354238852904, + "learning_rate": 5.805535252990527e-06, + "loss": 0.3075916290283203, + "step": 26655 + }, + { + "epoch": 0.2305211368686825, + "grad_norm": 1.39189716243132, + "learning_rate": 5.80546308892382e-06, + "loss": 0.22451934814453126, + "step": 26660 + }, + { + "epoch": 0.23056437039022576, + "grad_norm": 21.34309365465928, + "learning_rate": 5.805390911918566e-06, + "loss": 0.306298828125, + "step": 26665 + }, + { + "epoch": 0.23060760391176902, + "grad_norm": 35.13041182173666, + "learning_rate": 5.8053187219750965e-06, + "loss": 0.36636962890625, + "step": 26670 + }, + { + "epoch": 0.2306508374333123, + "grad_norm": 30.685577470237106, + "learning_rate": 5.805246519093744e-06, + "loss": 0.33953857421875, + "step": 26675 + }, + { + "epoch": 0.23069407095485556, + "grad_norm": 9.397143023528955, + "learning_rate": 5.805174303274844e-06, + "loss": 0.122442626953125, + "step": 26680 + }, + { + "epoch": 0.2307373044763988, + "grad_norm": 10.053209001061445, + "learning_rate": 5.8051020745187274e-06, + "loss": 0.262860107421875, + "step": 26685 + }, + { + "epoch": 0.2307805379979421, + "grad_norm": 2.52645307039583, + "learning_rate": 5.805029832825728e-06, + "loss": 0.0912109375, + "step": 26690 + }, + { + "epoch": 0.23082377151948535, + "grad_norm": 14.369387340498129, + "learning_rate": 5.804957578196178e-06, + "loss": 0.12191162109375, + "step": 26695 + }, + { + "epoch": 0.2308670050410286, + "grad_norm": 1.643834109105857, + "learning_rate": 5.804885310630412e-06, + "loss": 0.23928985595703126, + "step": 26700 + }, + { + "epoch": 0.23091023856257187, + "grad_norm": 17.484706754259076, + "learning_rate": 5.804813030128763e-06, + "loss": 0.19804763793945312, + "step": 26705 + }, + { + "epoch": 0.23095347208411515, + "grad_norm": 16.758767252621205, + "learning_rate": 5.804740736691565e-06, + "loss": 0.1201751708984375, + "step": 26710 + }, + { + "epoch": 0.2309967056056584, + "grad_norm": 10.835794307733147, + "learning_rate": 5.804668430319149e-06, + "loss": 0.3552459716796875, + "step": 26715 + }, + { + "epoch": 0.23103993912720167, + "grad_norm": 37.314027719828374, + "learning_rate": 5.804596111011851e-06, + "loss": 0.41512908935546877, + "step": 26720 + }, + { + "epoch": 0.23108317264874492, + "grad_norm": 1.549283371917004, + "learning_rate": 5.8045237787700035e-06, + "loss": 0.11293411254882812, + "step": 26725 + }, + { + "epoch": 0.2311264061702882, + "grad_norm": 18.035621576380752, + "learning_rate": 5.80445143359394e-06, + "loss": 0.35369415283203126, + "step": 26730 + }, + { + "epoch": 0.23116963969183146, + "grad_norm": 38.743687629958195, + "learning_rate": 5.804379075483994e-06, + "loss": 0.19017333984375, + "step": 26735 + }, + { + "epoch": 0.23121287321337472, + "grad_norm": 0.41395752738426034, + "learning_rate": 5.8043067044405e-06, + "loss": 0.01697959899902344, + "step": 26740 + }, + { + "epoch": 0.23125610673491798, + "grad_norm": 51.59791663315031, + "learning_rate": 5.80423432046379e-06, + "loss": 0.20947723388671874, + "step": 26745 + }, + { + "epoch": 0.23129934025646126, + "grad_norm": 6.399171463612919, + "learning_rate": 5.8041619235542e-06, + "loss": 0.07149734497070312, + "step": 26750 + }, + { + "epoch": 0.23134257377800452, + "grad_norm": 11.327484026589705, + "learning_rate": 5.804089513712063e-06, + "loss": 0.1744251251220703, + "step": 26755 + }, + { + "epoch": 0.23138580729954777, + "grad_norm": 9.644616328647995, + "learning_rate": 5.8040170909377135e-06, + "loss": 0.16832237243652343, + "step": 26760 + }, + { + "epoch": 0.23142904082109103, + "grad_norm": 23.795883167635772, + "learning_rate": 5.803944655231484e-06, + "loss": 0.29196624755859374, + "step": 26765 + }, + { + "epoch": 0.23147227434263432, + "grad_norm": 2.1375113332306444, + "learning_rate": 5.80387220659371e-06, + "loss": 0.083074951171875, + "step": 26770 + }, + { + "epoch": 0.23151550786417757, + "grad_norm": 16.579152248329166, + "learning_rate": 5.8037997450247245e-06, + "loss": 0.1514373779296875, + "step": 26775 + }, + { + "epoch": 0.23155874138572083, + "grad_norm": 14.742762699338446, + "learning_rate": 5.803727270524863e-06, + "loss": 0.09342212677001953, + "step": 26780 + }, + { + "epoch": 0.23160197490726409, + "grad_norm": 2.6246879509777723, + "learning_rate": 5.80365478309446e-06, + "loss": 0.176910400390625, + "step": 26785 + }, + { + "epoch": 0.23164520842880737, + "grad_norm": 0.17888032740258913, + "learning_rate": 5.803582282733848e-06, + "loss": 0.14323883056640624, + "step": 26790 + }, + { + "epoch": 0.23168844195035063, + "grad_norm": 1.4375934548661817, + "learning_rate": 5.803509769443361e-06, + "loss": 0.04434814453125, + "step": 26795 + }, + { + "epoch": 0.23173167547189388, + "grad_norm": 3.953786283931232, + "learning_rate": 5.803437243223336e-06, + "loss": 0.12030029296875, + "step": 26800 + }, + { + "epoch": 0.23177490899343714, + "grad_norm": 43.48176813082643, + "learning_rate": 5.803364704074106e-06, + "loss": 0.45283203125, + "step": 26805 + }, + { + "epoch": 0.23181814251498042, + "grad_norm": 15.137482198179628, + "learning_rate": 5.803292151996006e-06, + "loss": 0.19140625, + "step": 26810 + }, + { + "epoch": 0.23186137603652368, + "grad_norm": 7.965728795954377, + "learning_rate": 5.8032195869893695e-06, + "loss": 0.08154373168945313, + "step": 26815 + }, + { + "epoch": 0.23190460955806694, + "grad_norm": 0.14245797088043272, + "learning_rate": 5.803147009054533e-06, + "loss": 0.3901054382324219, + "step": 26820 + }, + { + "epoch": 0.2319478430796102, + "grad_norm": 0.8474690736398697, + "learning_rate": 5.803074418191829e-06, + "loss": 0.13083343505859374, + "step": 26825 + }, + { + "epoch": 0.23199107660115348, + "grad_norm": 0.8721878032379633, + "learning_rate": 5.803001814401594e-06, + "loss": 0.2969198226928711, + "step": 26830 + }, + { + "epoch": 0.23203431012269674, + "grad_norm": 6.984558245891264, + "learning_rate": 5.802929197684162e-06, + "loss": 0.1110565185546875, + "step": 26835 + }, + { + "epoch": 0.23207754364424, + "grad_norm": 2.053159401057871, + "learning_rate": 5.802856568039869e-06, + "loss": 0.339727783203125, + "step": 26840 + }, + { + "epoch": 0.23212077716578325, + "grad_norm": 15.8487647596724, + "learning_rate": 5.8027839254690485e-06, + "loss": 0.35166015625, + "step": 26845 + }, + { + "epoch": 0.23216401068732653, + "grad_norm": 2.159658499086385, + "learning_rate": 5.802711269972037e-06, + "loss": 0.3352481842041016, + "step": 26850 + }, + { + "epoch": 0.2322072442088698, + "grad_norm": 32.93522949838993, + "learning_rate": 5.802638601549168e-06, + "loss": 0.1185272216796875, + "step": 26855 + }, + { + "epoch": 0.23225047773041305, + "grad_norm": 14.577479626565324, + "learning_rate": 5.8025659202007775e-06, + "loss": 0.08087387084960937, + "step": 26860 + }, + { + "epoch": 0.2322937112519563, + "grad_norm": 17.46857282177834, + "learning_rate": 5.8024932259272e-06, + "loss": 0.1602569580078125, + "step": 26865 + }, + { + "epoch": 0.2323369447734996, + "grad_norm": 9.164720465337853, + "learning_rate": 5.8024205187287726e-06, + "loss": 0.051904296875, + "step": 26870 + }, + { + "epoch": 0.23238017829504284, + "grad_norm": 29.874141205927998, + "learning_rate": 5.802347798605829e-06, + "loss": 0.37047348022460935, + "step": 26875 + }, + { + "epoch": 0.2324234118165861, + "grad_norm": 40.22222694151068, + "learning_rate": 5.802275065558705e-06, + "loss": 0.4748291015625, + "step": 26880 + }, + { + "epoch": 0.23246664533812939, + "grad_norm": 1.5502529501225313, + "learning_rate": 5.8022023195877356e-06, + "loss": 0.07659912109375, + "step": 26885 + }, + { + "epoch": 0.23250987885967264, + "grad_norm": 37.24568290147914, + "learning_rate": 5.802129560693256e-06, + "loss": 0.45885009765625, + "step": 26890 + }, + { + "epoch": 0.2325531123812159, + "grad_norm": 0.11906831440834671, + "learning_rate": 5.802056788875604e-06, + "loss": 0.2506889343261719, + "step": 26895 + }, + { + "epoch": 0.23259634590275916, + "grad_norm": 3.666040632250846, + "learning_rate": 5.801984004135113e-06, + "loss": 0.17156524658203126, + "step": 26900 + }, + { + "epoch": 0.23263957942430244, + "grad_norm": 21.045144568430466, + "learning_rate": 5.80191120647212e-06, + "loss": 0.2689544677734375, + "step": 26905 + }, + { + "epoch": 0.2326828129458457, + "grad_norm": 3.165091837809561, + "learning_rate": 5.801838395886959e-06, + "loss": 0.088995361328125, + "step": 26910 + }, + { + "epoch": 0.23272604646738895, + "grad_norm": 25.035814898365537, + "learning_rate": 5.801765572379967e-06, + "loss": 0.2277801513671875, + "step": 26915 + }, + { + "epoch": 0.2327692799889322, + "grad_norm": 1.0366344790293252, + "learning_rate": 5.801692735951481e-06, + "loss": 0.1112091064453125, + "step": 26920 + }, + { + "epoch": 0.2328125135104755, + "grad_norm": 0.2235416567446518, + "learning_rate": 5.801619886601835e-06, + "loss": 0.13759765625, + "step": 26925 + }, + { + "epoch": 0.23285574703201875, + "grad_norm": 20.388690431560885, + "learning_rate": 5.801547024331365e-06, + "loss": 0.2793704986572266, + "step": 26930 + }, + { + "epoch": 0.232898980553562, + "grad_norm": 0.1735965549934319, + "learning_rate": 5.801474149140409e-06, + "loss": 0.10464630126953126, + "step": 26935 + }, + { + "epoch": 0.23294221407510526, + "grad_norm": 14.606830898414689, + "learning_rate": 5.8014012610293e-06, + "loss": 0.10087432861328124, + "step": 26940 + }, + { + "epoch": 0.23298544759664855, + "grad_norm": 9.730623123752661, + "learning_rate": 5.801328359998377e-06, + "loss": 0.09963607788085938, + "step": 26945 + }, + { + "epoch": 0.2330286811181918, + "grad_norm": 0.345010139038883, + "learning_rate": 5.801255446047975e-06, + "loss": 0.09613494873046875, + "step": 26950 + }, + { + "epoch": 0.23307191463973506, + "grad_norm": 5.245978987337721, + "learning_rate": 5.80118251917843e-06, + "loss": 0.06133842468261719, + "step": 26955 + }, + { + "epoch": 0.23311514816127832, + "grad_norm": 14.688187340461733, + "learning_rate": 5.80110957939008e-06, + "loss": 0.21287841796875, + "step": 26960 + }, + { + "epoch": 0.2331583816828216, + "grad_norm": 8.567826631759361, + "learning_rate": 5.801036626683259e-06, + "loss": 0.207470703125, + "step": 26965 + }, + { + "epoch": 0.23320161520436486, + "grad_norm": 9.834762050904587, + "learning_rate": 5.800963661058305e-06, + "loss": 0.136480712890625, + "step": 26970 + }, + { + "epoch": 0.23324484872590812, + "grad_norm": 3.4964435614976956, + "learning_rate": 5.800890682515553e-06, + "loss": 0.17018280029296876, + "step": 26975 + }, + { + "epoch": 0.23328808224745137, + "grad_norm": 2.863159594429064, + "learning_rate": 5.800817691055342e-06, + "loss": 0.54620361328125, + "step": 26980 + }, + { + "epoch": 0.23333131576899466, + "grad_norm": 2.2862243974764147, + "learning_rate": 5.800744686678007e-06, + "loss": 0.143359375, + "step": 26985 + }, + { + "epoch": 0.2333745492905379, + "grad_norm": 3.0817856893970177, + "learning_rate": 5.8006716693838845e-06, + "loss": 0.1521728515625, + "step": 26990 + }, + { + "epoch": 0.23341778281208117, + "grad_norm": 46.70984449295377, + "learning_rate": 5.800598639173312e-06, + "loss": 0.36827850341796875, + "step": 26995 + }, + { + "epoch": 0.23346101633362443, + "grad_norm": 15.108178345771003, + "learning_rate": 5.8005255960466265e-06, + "loss": 0.20019989013671874, + "step": 27000 + }, + { + "epoch": 0.2335042498551677, + "grad_norm": 0.5601701869521354, + "learning_rate": 5.800452540004164e-06, + "loss": 0.05831451416015625, + "step": 27005 + }, + { + "epoch": 0.23354748337671097, + "grad_norm": 12.430542966412908, + "learning_rate": 5.800379471046262e-06, + "loss": 0.1432220458984375, + "step": 27010 + }, + { + "epoch": 0.23359071689825422, + "grad_norm": 31.23311781501841, + "learning_rate": 5.800306389173258e-06, + "loss": 0.1742034912109375, + "step": 27015 + }, + { + "epoch": 0.23363395041979748, + "grad_norm": 38.48386602611384, + "learning_rate": 5.800233294385487e-06, + "loss": 0.40581512451171875, + "step": 27020 + }, + { + "epoch": 0.23367718394134077, + "grad_norm": 1.5347705866851735, + "learning_rate": 5.800160186683288e-06, + "loss": 0.23231201171875, + "step": 27025 + }, + { + "epoch": 0.23372041746288402, + "grad_norm": 26.80313834485471, + "learning_rate": 5.800087066066998e-06, + "loss": 0.2446044921875, + "step": 27030 + }, + { + "epoch": 0.23376365098442728, + "grad_norm": 3.2462910730003025, + "learning_rate": 5.800013932536953e-06, + "loss": 0.1378814697265625, + "step": 27035 + }, + { + "epoch": 0.23380688450597054, + "grad_norm": 7.698096522179099, + "learning_rate": 5.799940786093492e-06, + "loss": 0.19968719482421876, + "step": 27040 + }, + { + "epoch": 0.23385011802751382, + "grad_norm": 7.115103273777437, + "learning_rate": 5.799867626736951e-06, + "loss": 0.19820556640625, + "step": 27045 + }, + { + "epoch": 0.23389335154905708, + "grad_norm": 1.506125134535628, + "learning_rate": 5.799794454467668e-06, + "loss": 0.06421890258789062, + "step": 27050 + }, + { + "epoch": 0.23393658507060033, + "grad_norm": 6.943569912387333, + "learning_rate": 5.799721269285981e-06, + "loss": 0.27356948852539065, + "step": 27055 + }, + { + "epoch": 0.23397981859214362, + "grad_norm": 27.121574021360882, + "learning_rate": 5.799648071192226e-06, + "loss": 0.40732269287109374, + "step": 27060 + }, + { + "epoch": 0.23402305211368687, + "grad_norm": 1.3978882344938004, + "learning_rate": 5.799574860186742e-06, + "loss": 0.1954986572265625, + "step": 27065 + }, + { + "epoch": 0.23406628563523013, + "grad_norm": 1.8537855398776213, + "learning_rate": 5.799501636269866e-06, + "loss": 0.0878265380859375, + "step": 27070 + }, + { + "epoch": 0.2341095191567734, + "grad_norm": 0.9227655806305571, + "learning_rate": 5.799428399441936e-06, + "loss": 0.3048248291015625, + "step": 27075 + }, + { + "epoch": 0.23415275267831667, + "grad_norm": 0.8766077040085006, + "learning_rate": 5.799355149703289e-06, + "loss": 0.12191619873046874, + "step": 27080 + }, + { + "epoch": 0.23419598619985993, + "grad_norm": 33.51539208468516, + "learning_rate": 5.799281887054264e-06, + "loss": 0.18135337829589843, + "step": 27085 + }, + { + "epoch": 0.23423921972140319, + "grad_norm": 0.8733842893265069, + "learning_rate": 5.799208611495198e-06, + "loss": 0.2880523681640625, + "step": 27090 + }, + { + "epoch": 0.23428245324294644, + "grad_norm": 42.68099319140069, + "learning_rate": 5.7991353230264296e-06, + "loss": 0.58939208984375, + "step": 27095 + }, + { + "epoch": 0.23432568676448973, + "grad_norm": 13.231678278492488, + "learning_rate": 5.7990620216482964e-06, + "loss": 0.15171661376953124, + "step": 27100 + }, + { + "epoch": 0.23436892028603298, + "grad_norm": 7.496386755828232, + "learning_rate": 5.7989887073611356e-06, + "loss": 0.074493408203125, + "step": 27105 + }, + { + "epoch": 0.23441215380757624, + "grad_norm": 5.605083416465063, + "learning_rate": 5.798915380165287e-06, + "loss": 0.5461761474609375, + "step": 27110 + }, + { + "epoch": 0.2344553873291195, + "grad_norm": 1.0857148291847527, + "learning_rate": 5.798842040061088e-06, + "loss": 0.20149459838867187, + "step": 27115 + }, + { + "epoch": 0.23449862085066278, + "grad_norm": 3.583094496552174, + "learning_rate": 5.798768687048877e-06, + "loss": 0.04519729614257813, + "step": 27120 + }, + { + "epoch": 0.23454185437220604, + "grad_norm": 1.7700232944049414, + "learning_rate": 5.798695321128991e-06, + "loss": 0.19761810302734376, + "step": 27125 + }, + { + "epoch": 0.2345850878937493, + "grad_norm": 9.200266237849048, + "learning_rate": 5.798621942301771e-06, + "loss": 0.069598388671875, + "step": 27130 + }, + { + "epoch": 0.23462832141529255, + "grad_norm": 49.40459737362597, + "learning_rate": 5.7985485505675525e-06, + "loss": 0.3207275390625, + "step": 27135 + }, + { + "epoch": 0.23467155493683584, + "grad_norm": 1.0590757324778735, + "learning_rate": 5.7984751459266765e-06, + "loss": 0.06735916137695312, + "step": 27140 + }, + { + "epoch": 0.2347147884583791, + "grad_norm": 0.4667230765848944, + "learning_rate": 5.798401728379479e-06, + "loss": 0.1169189453125, + "step": 27145 + }, + { + "epoch": 0.23475802197992235, + "grad_norm": 39.70126551829187, + "learning_rate": 5.798328297926301e-06, + "loss": 0.38967132568359375, + "step": 27150 + }, + { + "epoch": 0.2348012555014656, + "grad_norm": 13.53411084103601, + "learning_rate": 5.79825485456748e-06, + "loss": 0.16085433959960938, + "step": 27155 + }, + { + "epoch": 0.2348444890230089, + "grad_norm": 41.98373137263648, + "learning_rate": 5.798181398303355e-06, + "loss": 0.335723876953125, + "step": 27160 + }, + { + "epoch": 0.23488772254455215, + "grad_norm": 3.3658943318540704, + "learning_rate": 5.798107929134265e-06, + "loss": 0.40557403564453126, + "step": 27165 + }, + { + "epoch": 0.2349309560660954, + "grad_norm": 8.271813737890074, + "learning_rate": 5.798034447060548e-06, + "loss": 0.050537109375, + "step": 27170 + }, + { + "epoch": 0.23497418958763866, + "grad_norm": 2.3705294002318418, + "learning_rate": 5.7979609520825425e-06, + "loss": 0.060181045532226564, + "step": 27175 + }, + { + "epoch": 0.23501742310918194, + "grad_norm": 49.08617887816338, + "learning_rate": 5.79788744420059e-06, + "loss": 0.20013427734375, + "step": 27180 + }, + { + "epoch": 0.2350606566307252, + "grad_norm": 17.59170976962075, + "learning_rate": 5.797813923415027e-06, + "loss": 0.39136962890625, + "step": 27185 + }, + { + "epoch": 0.23510389015226846, + "grad_norm": 8.556475416234807, + "learning_rate": 5.797740389726193e-06, + "loss": 0.150323486328125, + "step": 27190 + }, + { + "epoch": 0.23514712367381171, + "grad_norm": 10.307112087177016, + "learning_rate": 5.797666843134429e-06, + "loss": 0.139532470703125, + "step": 27195 + }, + { + "epoch": 0.235190357195355, + "grad_norm": 6.204055565486545, + "learning_rate": 5.797593283640072e-06, + "loss": 0.0810089111328125, + "step": 27200 + }, + { + "epoch": 0.23523359071689826, + "grad_norm": 51.33441436393407, + "learning_rate": 5.797519711243461e-06, + "loss": 0.1138916015625, + "step": 27205 + }, + { + "epoch": 0.2352768242384415, + "grad_norm": 18.413618157579307, + "learning_rate": 5.7974461259449365e-06, + "loss": 0.18305206298828125, + "step": 27210 + }, + { + "epoch": 0.23532005775998477, + "grad_norm": 1.4865028658115056, + "learning_rate": 5.797372527744838e-06, + "loss": 0.07625617980957031, + "step": 27215 + }, + { + "epoch": 0.23536329128152805, + "grad_norm": 22.257666840392222, + "learning_rate": 5.797298916643506e-06, + "loss": 0.12912979125976562, + "step": 27220 + }, + { + "epoch": 0.2354065248030713, + "grad_norm": 6.34394772115398, + "learning_rate": 5.797225292641277e-06, + "loss": 0.0852203369140625, + "step": 27225 + }, + { + "epoch": 0.23544975832461457, + "grad_norm": 23.999589968075732, + "learning_rate": 5.797151655738492e-06, + "loss": 0.50579833984375, + "step": 27230 + }, + { + "epoch": 0.23549299184615782, + "grad_norm": 6.917630969880278, + "learning_rate": 5.797078005935491e-06, + "loss": 0.04025344848632813, + "step": 27235 + }, + { + "epoch": 0.2355362253677011, + "grad_norm": 12.679218800070938, + "learning_rate": 5.797004343232614e-06, + "loss": 0.09723663330078125, + "step": 27240 + }, + { + "epoch": 0.23557945888924436, + "grad_norm": 4.449631959060471, + "learning_rate": 5.796930667630199e-06, + "loss": 0.377667236328125, + "step": 27245 + }, + { + "epoch": 0.23562269241078762, + "grad_norm": 16.743762663418106, + "learning_rate": 5.796856979128588e-06, + "loss": 0.097308349609375, + "step": 27250 + }, + { + "epoch": 0.2356659259323309, + "grad_norm": 2.442512383622368, + "learning_rate": 5.796783277728119e-06, + "loss": 0.12164878845214844, + "step": 27255 + }, + { + "epoch": 0.23570915945387416, + "grad_norm": 3.0430584665418854, + "learning_rate": 5.796709563429133e-06, + "loss": 0.1955474853515625, + "step": 27260 + }, + { + "epoch": 0.23575239297541742, + "grad_norm": 2.795824177036621, + "learning_rate": 5.79663583623197e-06, + "loss": 0.04157562255859375, + "step": 27265 + }, + { + "epoch": 0.23579562649696068, + "grad_norm": 2.0678099711175344, + "learning_rate": 5.796562096136969e-06, + "loss": 0.1579833984375, + "step": 27270 + }, + { + "epoch": 0.23583886001850396, + "grad_norm": 15.09105499852598, + "learning_rate": 5.79648834314447e-06, + "loss": 0.08837661743164063, + "step": 27275 + }, + { + "epoch": 0.23588209354004722, + "grad_norm": 3.9559627786875975, + "learning_rate": 5.796414577254815e-06, + "loss": 0.3535888671875, + "step": 27280 + }, + { + "epoch": 0.23592532706159047, + "grad_norm": 16.076496280743516, + "learning_rate": 5.796340798468343e-06, + "loss": 0.20304718017578124, + "step": 27285 + }, + { + "epoch": 0.23596856058313373, + "grad_norm": 2.2297192644809822, + "learning_rate": 5.796267006785395e-06, + "loss": 0.1465158462524414, + "step": 27290 + }, + { + "epoch": 0.23601179410467701, + "grad_norm": 36.306157380730056, + "learning_rate": 5.7961932022063095e-06, + "loss": 0.21427154541015625, + "step": 27295 + }, + { + "epoch": 0.23605502762622027, + "grad_norm": 44.37327522070572, + "learning_rate": 5.796119384731428e-06, + "loss": 0.413470458984375, + "step": 27300 + }, + { + "epoch": 0.23609826114776353, + "grad_norm": 1.4787495629733478, + "learning_rate": 5.796045554361091e-06, + "loss": 0.13981781005859376, + "step": 27305 + }, + { + "epoch": 0.23614149466930678, + "grad_norm": 2.94151314726803, + "learning_rate": 5.79597171109564e-06, + "loss": 0.160845947265625, + "step": 27310 + }, + { + "epoch": 0.23618472819085007, + "grad_norm": 16.803262581310165, + "learning_rate": 5.795897854935413e-06, + "loss": 0.35254974365234376, + "step": 27315 + }, + { + "epoch": 0.23622796171239333, + "grad_norm": 6.19081155979669, + "learning_rate": 5.795823985880754e-06, + "loss": 0.13428573608398436, + "step": 27320 + }, + { + "epoch": 0.23627119523393658, + "grad_norm": 32.799418828351534, + "learning_rate": 5.795750103932e-06, + "loss": 0.260528564453125, + "step": 27325 + }, + { + "epoch": 0.23631442875547984, + "grad_norm": 5.770328750089305, + "learning_rate": 5.795676209089494e-06, + "loss": 0.2587165832519531, + "step": 27330 + }, + { + "epoch": 0.23635766227702312, + "grad_norm": 38.58836581716408, + "learning_rate": 5.795602301353577e-06, + "loss": 0.63912353515625, + "step": 27335 + }, + { + "epoch": 0.23640089579856638, + "grad_norm": 20.130829243047287, + "learning_rate": 5.795528380724588e-06, + "loss": 0.2598388671875, + "step": 27340 + }, + { + "epoch": 0.23644412932010964, + "grad_norm": 9.51112139698787, + "learning_rate": 5.795454447202871e-06, + "loss": 0.22078781127929686, + "step": 27345 + }, + { + "epoch": 0.2364873628416529, + "grad_norm": 0.8399179364037195, + "learning_rate": 5.7953805007887635e-06, + "loss": 0.77889404296875, + "step": 27350 + }, + { + "epoch": 0.23653059636319618, + "grad_norm": 33.61345058485683, + "learning_rate": 5.79530654148261e-06, + "loss": 0.2940681457519531, + "step": 27355 + }, + { + "epoch": 0.23657382988473943, + "grad_norm": 5.576345827035105, + "learning_rate": 5.795232569284748e-06, + "loss": 0.0970550537109375, + "step": 27360 + }, + { + "epoch": 0.2366170634062827, + "grad_norm": 28.494758648825318, + "learning_rate": 5.795158584195521e-06, + "loss": 0.16560440063476561, + "step": 27365 + }, + { + "epoch": 0.23666029692782595, + "grad_norm": 12.818640593861627, + "learning_rate": 5.79508458621527e-06, + "loss": 0.47562446594238283, + "step": 27370 + }, + { + "epoch": 0.23670353044936923, + "grad_norm": 28.354229582470275, + "learning_rate": 5.7950105753443345e-06, + "loss": 0.225518798828125, + "step": 27375 + }, + { + "epoch": 0.2367467639709125, + "grad_norm": 1.389039544281789, + "learning_rate": 5.794936551583058e-06, + "loss": 0.11701278686523438, + "step": 27380 + }, + { + "epoch": 0.23678999749245574, + "grad_norm": 4.219323550519198, + "learning_rate": 5.794862514931781e-06, + "loss": 0.1369476318359375, + "step": 27385 + }, + { + "epoch": 0.236833231013999, + "grad_norm": 19.756943715581464, + "learning_rate": 5.794788465390845e-06, + "loss": 0.2743316650390625, + "step": 27390 + }, + { + "epoch": 0.23687646453554229, + "grad_norm": 11.985142030914176, + "learning_rate": 5.794714402960591e-06, + "loss": 0.072503662109375, + "step": 27395 + }, + { + "epoch": 0.23691969805708554, + "grad_norm": 11.983495432788633, + "learning_rate": 5.794640327641362e-06, + "loss": 0.14893646240234376, + "step": 27400 + }, + { + "epoch": 0.2369629315786288, + "grad_norm": 6.977642524612531, + "learning_rate": 5.794566239433499e-06, + "loss": 0.20125274658203124, + "step": 27405 + }, + { + "epoch": 0.23700616510017206, + "grad_norm": 18.785858357763313, + "learning_rate": 5.794492138337343e-06, + "loss": 0.1669586181640625, + "step": 27410 + }, + { + "epoch": 0.23704939862171534, + "grad_norm": 3.1633970839619217, + "learning_rate": 5.794418024353236e-06, + "loss": 0.08565444946289062, + "step": 27415 + }, + { + "epoch": 0.2370926321432586, + "grad_norm": 0.8391013155719648, + "learning_rate": 5.79434389748152e-06, + "loss": 0.22702789306640625, + "step": 27420 + }, + { + "epoch": 0.23713586566480185, + "grad_norm": 11.126230420414313, + "learning_rate": 5.794269757722537e-06, + "loss": 0.19439697265625, + "step": 27425 + }, + { + "epoch": 0.23717909918634514, + "grad_norm": 5.511553353490478, + "learning_rate": 5.794195605076629e-06, + "loss": 0.1157958984375, + "step": 27430 + }, + { + "epoch": 0.2372223327078884, + "grad_norm": 11.957804955095037, + "learning_rate": 5.794121439544138e-06, + "loss": 0.6569313049316406, + "step": 27435 + }, + { + "epoch": 0.23726556622943165, + "grad_norm": 36.38836277096856, + "learning_rate": 5.794047261125405e-06, + "loss": 0.15310125350952147, + "step": 27440 + }, + { + "epoch": 0.2373087997509749, + "grad_norm": 2.2750022254341116, + "learning_rate": 5.793973069820774e-06, + "loss": 0.3186065673828125, + "step": 27445 + }, + { + "epoch": 0.2373520332725182, + "grad_norm": 27.11903840266083, + "learning_rate": 5.793898865630585e-06, + "loss": 0.4097412109375, + "step": 27450 + }, + { + "epoch": 0.23739526679406145, + "grad_norm": 3.3797807059601017, + "learning_rate": 5.793824648555181e-06, + "loss": 0.087298583984375, + "step": 27455 + }, + { + "epoch": 0.2374385003156047, + "grad_norm": 7.821546236213164, + "learning_rate": 5.793750418594905e-06, + "loss": 0.1476726531982422, + "step": 27460 + }, + { + "epoch": 0.23748173383714796, + "grad_norm": 11.420851330825371, + "learning_rate": 5.7936761757501e-06, + "loss": 0.4818023681640625, + "step": 27465 + }, + { + "epoch": 0.23752496735869125, + "grad_norm": 4.879794015416048, + "learning_rate": 5.7936019200211064e-06, + "loss": 0.38110885620117185, + "step": 27470 + }, + { + "epoch": 0.2375682008802345, + "grad_norm": 20.755594684666278, + "learning_rate": 5.793527651408268e-06, + "loss": 0.08828659057617187, + "step": 27475 + }, + { + "epoch": 0.23761143440177776, + "grad_norm": 2.0318154711010656, + "learning_rate": 5.793453369911926e-06, + "loss": 0.156201171875, + "step": 27480 + }, + { + "epoch": 0.23765466792332102, + "grad_norm": 25.041561007458085, + "learning_rate": 5.7933790755324245e-06, + "loss": 0.1489410400390625, + "step": 27485 + }, + { + "epoch": 0.2376979014448643, + "grad_norm": 15.274278257039777, + "learning_rate": 5.793304768270106e-06, + "loss": 0.33431396484375, + "step": 27490 + }, + { + "epoch": 0.23774113496640756, + "grad_norm": 0.12572069253895085, + "learning_rate": 5.793230448125312e-06, + "loss": 0.321661376953125, + "step": 27495 + }, + { + "epoch": 0.23778436848795081, + "grad_norm": 1.3695849692796236, + "learning_rate": 5.793156115098386e-06, + "loss": 0.42546768188476564, + "step": 27500 + }, + { + "epoch": 0.23782760200949407, + "grad_norm": 4.741925692028939, + "learning_rate": 5.793081769189672e-06, + "loss": 0.066387939453125, + "step": 27505 + }, + { + "epoch": 0.23787083553103736, + "grad_norm": 13.114310330889534, + "learning_rate": 5.7930074103995105e-06, + "loss": 0.318267822265625, + "step": 27510 + }, + { + "epoch": 0.2379140690525806, + "grad_norm": 14.559393655615613, + "learning_rate": 5.792933038728246e-06, + "loss": 0.081524658203125, + "step": 27515 + }, + { + "epoch": 0.23795730257412387, + "grad_norm": 0.47190414084102444, + "learning_rate": 5.79285865417622e-06, + "loss": 0.14945831298828124, + "step": 27520 + }, + { + "epoch": 0.23800053609566713, + "grad_norm": 43.928524894829025, + "learning_rate": 5.792784256743777e-06, + "loss": 0.257464599609375, + "step": 27525 + }, + { + "epoch": 0.2380437696172104, + "grad_norm": 2.1493497669597867, + "learning_rate": 5.792709846431262e-06, + "loss": 0.187872314453125, + "step": 27530 + }, + { + "epoch": 0.23808700313875367, + "grad_norm": 6.082328295365382, + "learning_rate": 5.792635423239014e-06, + "loss": 0.21659698486328124, + "step": 27535 + }, + { + "epoch": 0.23813023666029692, + "grad_norm": 29.20225908789484, + "learning_rate": 5.792560987167378e-06, + "loss": 0.5309326171875, + "step": 27540 + }, + { + "epoch": 0.23817347018184018, + "grad_norm": 45.29519526067391, + "learning_rate": 5.792486538216698e-06, + "loss": 0.1304841995239258, + "step": 27545 + }, + { + "epoch": 0.23821670370338346, + "grad_norm": 9.297146767508389, + "learning_rate": 5.792412076387317e-06, + "loss": 0.3725555419921875, + "step": 27550 + }, + { + "epoch": 0.23825993722492672, + "grad_norm": 11.077047843922756, + "learning_rate": 5.792337601679579e-06, + "loss": 0.1405517578125, + "step": 27555 + }, + { + "epoch": 0.23830317074646998, + "grad_norm": 14.69217997220465, + "learning_rate": 5.792263114093825e-06, + "loss": 0.3445556640625, + "step": 27560 + }, + { + "epoch": 0.23834640426801323, + "grad_norm": 0.9932224282025441, + "learning_rate": 5.792188613630401e-06, + "loss": 0.071484375, + "step": 27565 + }, + { + "epoch": 0.23838963778955652, + "grad_norm": 4.404548844918125, + "learning_rate": 5.7921141002896504e-06, + "loss": 0.10904693603515625, + "step": 27570 + }, + { + "epoch": 0.23843287131109978, + "grad_norm": 0.4527893343705313, + "learning_rate": 5.792039574071916e-06, + "loss": 0.21516075134277343, + "step": 27575 + }, + { + "epoch": 0.23847610483264303, + "grad_norm": 4.5306223180522, + "learning_rate": 5.791965034977542e-06, + "loss": 0.07050704956054688, + "step": 27580 + }, + { + "epoch": 0.2385193383541863, + "grad_norm": 5.661839844569883, + "learning_rate": 5.791890483006871e-06, + "loss": 0.138397216796875, + "step": 27585 + }, + { + "epoch": 0.23856257187572957, + "grad_norm": 61.540036127199514, + "learning_rate": 5.791815918160248e-06, + "loss": 0.5327926635742187, + "step": 27590 + }, + { + "epoch": 0.23860580539727283, + "grad_norm": 16.54081711526809, + "learning_rate": 5.791741340438017e-06, + "loss": 0.272601318359375, + "step": 27595 + }, + { + "epoch": 0.2386490389188161, + "grad_norm": 10.62922673726957, + "learning_rate": 5.791666749840522e-06, + "loss": 0.29172210693359374, + "step": 27600 + }, + { + "epoch": 0.23869227244035934, + "grad_norm": 26.68320220482701, + "learning_rate": 5.791592146368106e-06, + "loss": 0.22366943359375, + "step": 27605 + }, + { + "epoch": 0.23873550596190263, + "grad_norm": 4.4273700273324, + "learning_rate": 5.791517530021114e-06, + "loss": 0.11707763671875, + "step": 27610 + }, + { + "epoch": 0.23877873948344588, + "grad_norm": 9.277961484326823, + "learning_rate": 5.79144290079989e-06, + "loss": 0.09337272644042968, + "step": 27615 + }, + { + "epoch": 0.23882197300498914, + "grad_norm": 17.963896450853778, + "learning_rate": 5.791368258704778e-06, + "loss": 0.27783203125, + "step": 27620 + }, + { + "epoch": 0.23886520652653243, + "grad_norm": 12.162281313746183, + "learning_rate": 5.791293603736122e-06, + "loss": 0.1291107177734375, + "step": 27625 + }, + { + "epoch": 0.23890844004807568, + "grad_norm": 5.205140295811351, + "learning_rate": 5.791218935894266e-06, + "loss": 0.100018310546875, + "step": 27630 + }, + { + "epoch": 0.23895167356961894, + "grad_norm": 28.996550598337844, + "learning_rate": 5.791144255179555e-06, + "loss": 0.11572532653808594, + "step": 27635 + }, + { + "epoch": 0.2389949070911622, + "grad_norm": 25.128610217764123, + "learning_rate": 5.791069561592335e-06, + "loss": 0.2008270263671875, + "step": 27640 + }, + { + "epoch": 0.23903814061270548, + "grad_norm": 2.593985805862072, + "learning_rate": 5.790994855132947e-06, + "loss": 0.0434783935546875, + "step": 27645 + }, + { + "epoch": 0.23908137413424874, + "grad_norm": 26.783035676218535, + "learning_rate": 5.790920135801738e-06, + "loss": 0.2851959228515625, + "step": 27650 + }, + { + "epoch": 0.239124607655792, + "grad_norm": 1.307137479535294, + "learning_rate": 5.7908454035990515e-06, + "loss": 0.17623291015625, + "step": 27655 + }, + { + "epoch": 0.23916784117733525, + "grad_norm": 7.484001733376172, + "learning_rate": 5.790770658525233e-06, + "loss": 0.06389007568359376, + "step": 27660 + }, + { + "epoch": 0.23921107469887853, + "grad_norm": 13.426167014899393, + "learning_rate": 5.790695900580627e-06, + "loss": 0.13908729553222657, + "step": 27665 + }, + { + "epoch": 0.2392543082204218, + "grad_norm": 7.101301425251592, + "learning_rate": 5.790621129765578e-06, + "loss": 0.134661865234375, + "step": 27670 + }, + { + "epoch": 0.23929754174196505, + "grad_norm": 36.67081788931647, + "learning_rate": 5.790546346080431e-06, + "loss": 0.221575927734375, + "step": 27675 + }, + { + "epoch": 0.2393407752635083, + "grad_norm": 44.370328911741495, + "learning_rate": 5.7904715495255305e-06, + "loss": 0.5073715209960937, + "step": 27680 + }, + { + "epoch": 0.2393840087850516, + "grad_norm": 18.07857084191915, + "learning_rate": 5.790396740101221e-06, + "loss": 0.1486175537109375, + "step": 27685 + }, + { + "epoch": 0.23942724230659485, + "grad_norm": 67.97677411948564, + "learning_rate": 5.79032191780785e-06, + "loss": 0.198681640625, + "step": 27690 + }, + { + "epoch": 0.2394704758281381, + "grad_norm": 2.5370166659274025, + "learning_rate": 5.79024708264576e-06, + "loss": 0.146661376953125, + "step": 27695 + }, + { + "epoch": 0.23951370934968136, + "grad_norm": 10.712435911813618, + "learning_rate": 5.790172234615297e-06, + "loss": 0.1270904541015625, + "step": 27700 + }, + { + "epoch": 0.23955694287122464, + "grad_norm": 1.0695429838776715, + "learning_rate": 5.790097373716806e-06, + "loss": 0.19345550537109374, + "step": 27705 + }, + { + "epoch": 0.2396001763927679, + "grad_norm": 5.224096498542562, + "learning_rate": 5.7900224999506336e-06, + "loss": 0.39610748291015624, + "step": 27710 + }, + { + "epoch": 0.23964340991431116, + "grad_norm": 42.6851148615918, + "learning_rate": 5.789947613317123e-06, + "loss": 0.200054931640625, + "step": 27715 + }, + { + "epoch": 0.2396866434358544, + "grad_norm": 0.5559092606574686, + "learning_rate": 5.789872713816621e-06, + "loss": 0.050071334838867186, + "step": 27720 + }, + { + "epoch": 0.2397298769573977, + "grad_norm": 12.435411033231565, + "learning_rate": 5.789797801449472e-06, + "loss": 0.14142532348632814, + "step": 27725 + }, + { + "epoch": 0.23977311047894095, + "grad_norm": 0.42826515118354364, + "learning_rate": 5.789722876216022e-06, + "loss": 0.101531982421875, + "step": 27730 + }, + { + "epoch": 0.2398163440004842, + "grad_norm": 9.242273007046276, + "learning_rate": 5.789647938116617e-06, + "loss": 0.26283950805664064, + "step": 27735 + }, + { + "epoch": 0.23985957752202747, + "grad_norm": 6.175479832811218, + "learning_rate": 5.789572987151603e-06, + "loss": 0.08931884765625, + "step": 27740 + }, + { + "epoch": 0.23990281104357075, + "grad_norm": 6.049073696134327, + "learning_rate": 5.789498023321323e-06, + "loss": 0.08547134399414062, + "step": 27745 + }, + { + "epoch": 0.239946044565114, + "grad_norm": 0.7075348411778318, + "learning_rate": 5.789423046626126e-06, + "loss": 0.11838035583496094, + "step": 27750 + }, + { + "epoch": 0.23998927808665727, + "grad_norm": 0.5125917417129183, + "learning_rate": 5.789348057066356e-06, + "loss": 0.2038848876953125, + "step": 27755 + }, + { + "epoch": 0.24003251160820052, + "grad_norm": 0.7847258187189897, + "learning_rate": 5.789273054642359e-06, + "loss": 0.118670654296875, + "step": 27760 + }, + { + "epoch": 0.2400757451297438, + "grad_norm": 6.395485751725717, + "learning_rate": 5.789198039354481e-06, + "loss": 0.1298248291015625, + "step": 27765 + }, + { + "epoch": 0.24011897865128706, + "grad_norm": 6.4791776467407995, + "learning_rate": 5.7891230112030684e-06, + "loss": 0.07385787963867188, + "step": 27770 + }, + { + "epoch": 0.24016221217283032, + "grad_norm": 51.68441964892459, + "learning_rate": 5.789047970188467e-06, + "loss": 0.196728515625, + "step": 27775 + }, + { + "epoch": 0.24020544569437358, + "grad_norm": 7.219247701670627, + "learning_rate": 5.7889729163110224e-06, + "loss": 0.13472681045532225, + "step": 27780 + }, + { + "epoch": 0.24024867921591686, + "grad_norm": 2.5933108155787967, + "learning_rate": 5.788897849571082e-06, + "loss": 0.09821548461914062, + "step": 27785 + }, + { + "epoch": 0.24029191273746012, + "grad_norm": 6.521820418658088, + "learning_rate": 5.788822769968991e-06, + "loss": 0.4197113037109375, + "step": 27790 + }, + { + "epoch": 0.24033514625900337, + "grad_norm": 24.103092015171637, + "learning_rate": 5.788747677505094e-06, + "loss": 0.1905181884765625, + "step": 27795 + }, + { + "epoch": 0.24037837978054666, + "grad_norm": 0.37771850643446475, + "learning_rate": 5.7886725721797414e-06, + "loss": 0.08507347106933594, + "step": 27800 + }, + { + "epoch": 0.24042161330208991, + "grad_norm": 33.84770552928257, + "learning_rate": 5.7885974539932765e-06, + "loss": 0.252581787109375, + "step": 27805 + }, + { + "epoch": 0.24046484682363317, + "grad_norm": 2.9327853378477093, + "learning_rate": 5.788522322946046e-06, + "loss": 0.0443878173828125, + "step": 27810 + }, + { + "epoch": 0.24050808034517643, + "grad_norm": 8.389591075167365, + "learning_rate": 5.788447179038398e-06, + "loss": 0.10768070220947265, + "step": 27815 + }, + { + "epoch": 0.2405513138667197, + "grad_norm": 6.484242946876001, + "learning_rate": 5.788372022270677e-06, + "loss": 0.2850456237792969, + "step": 27820 + }, + { + "epoch": 0.24059454738826297, + "grad_norm": 30.19589946733064, + "learning_rate": 5.788296852643232e-06, + "loss": 0.2813323974609375, + "step": 27825 + }, + { + "epoch": 0.24063778090980623, + "grad_norm": 76.41448644385858, + "learning_rate": 5.788221670156407e-06, + "loss": 0.24568328857421876, + "step": 27830 + }, + { + "epoch": 0.24068101443134948, + "grad_norm": 0.7126909295288918, + "learning_rate": 5.78814647481055e-06, + "loss": 0.11068954467773437, + "step": 27835 + }, + { + "epoch": 0.24072424795289277, + "grad_norm": 2.9719922103870067, + "learning_rate": 5.788071266606008e-06, + "loss": 0.0215606689453125, + "step": 27840 + }, + { + "epoch": 0.24076748147443602, + "grad_norm": 9.397286513866852, + "learning_rate": 5.787996045543128e-06, + "loss": 0.3946525573730469, + "step": 27845 + }, + { + "epoch": 0.24081071499597928, + "grad_norm": 32.26057564343752, + "learning_rate": 5.787920811622256e-06, + "loss": 0.20422897338867188, + "step": 27850 + }, + { + "epoch": 0.24085394851752254, + "grad_norm": 10.056129205296978, + "learning_rate": 5.787845564843741e-06, + "loss": 0.11298828125, + "step": 27855 + }, + { + "epoch": 0.24089718203906582, + "grad_norm": 9.66989543935112, + "learning_rate": 5.787770305207928e-06, + "loss": 0.21044921875, + "step": 27860 + }, + { + "epoch": 0.24094041556060908, + "grad_norm": 39.76812211994726, + "learning_rate": 5.787695032715164e-06, + "loss": 0.33517303466796877, + "step": 27865 + }, + { + "epoch": 0.24098364908215233, + "grad_norm": 9.638926377163953, + "learning_rate": 5.787619747365797e-06, + "loss": 0.10783424377441406, + "step": 27870 + }, + { + "epoch": 0.2410268826036956, + "grad_norm": 6.751347730918564, + "learning_rate": 5.787544449160174e-06, + "loss": 0.3219429016113281, + "step": 27875 + }, + { + "epoch": 0.24107011612523888, + "grad_norm": 1.8043601408137528, + "learning_rate": 5.7874691380986435e-06, + "loss": 0.07328147888183593, + "step": 27880 + }, + { + "epoch": 0.24111334964678213, + "grad_norm": 15.156194422742999, + "learning_rate": 5.78739381418155e-06, + "loss": 0.08429012298583985, + "step": 27885 + }, + { + "epoch": 0.2411565831683254, + "grad_norm": 4.211901547873107, + "learning_rate": 5.787318477409243e-06, + "loss": 0.22133026123046876, + "step": 27890 + }, + { + "epoch": 0.24119981668986865, + "grad_norm": 13.319141300627873, + "learning_rate": 5.7872431277820694e-06, + "loss": 0.1714996337890625, + "step": 27895 + }, + { + "epoch": 0.24124305021141193, + "grad_norm": 6.569955101000726, + "learning_rate": 5.7871677653003775e-06, + "loss": 0.24916610717773438, + "step": 27900 + }, + { + "epoch": 0.2412862837329552, + "grad_norm": 5.636664623829065, + "learning_rate": 5.787092389964513e-06, + "loss": 0.1895233154296875, + "step": 27905 + }, + { + "epoch": 0.24132951725449844, + "grad_norm": 0.04023520737530255, + "learning_rate": 5.787017001774826e-06, + "loss": 0.21677474975585936, + "step": 27910 + }, + { + "epoch": 0.2413727507760417, + "grad_norm": 2.3187651377553724, + "learning_rate": 5.7869416007316626e-06, + "loss": 0.2613193511962891, + "step": 27915 + }, + { + "epoch": 0.24141598429758498, + "grad_norm": 2.358676006649074, + "learning_rate": 5.78686618683537e-06, + "loss": 0.09232215881347657, + "step": 27920 + }, + { + "epoch": 0.24145921781912824, + "grad_norm": 77.1075867915545, + "learning_rate": 5.786790760086297e-06, + "loss": 0.53814697265625, + "step": 27925 + }, + { + "epoch": 0.2415024513406715, + "grad_norm": 17.948227801969068, + "learning_rate": 5.786715320484792e-06, + "loss": 0.165228271484375, + "step": 27930 + }, + { + "epoch": 0.24154568486221475, + "grad_norm": 2.1317422884282085, + "learning_rate": 5.786639868031201e-06, + "loss": 0.2148876190185547, + "step": 27935 + }, + { + "epoch": 0.24158891838375804, + "grad_norm": 2.3500570193290726, + "learning_rate": 5.786564402725874e-06, + "loss": 0.12383880615234374, + "step": 27940 + }, + { + "epoch": 0.2416321519053013, + "grad_norm": 10.701379122660827, + "learning_rate": 5.7864889245691575e-06, + "loss": 0.19169921875, + "step": 27945 + }, + { + "epoch": 0.24167538542684455, + "grad_norm": 3.8559036653776224, + "learning_rate": 5.786413433561402e-06, + "loss": 0.28248291015625, + "step": 27950 + }, + { + "epoch": 0.2417186189483878, + "grad_norm": 8.8970072123379, + "learning_rate": 5.7863379297029525e-06, + "loss": 0.441455078125, + "step": 27955 + }, + { + "epoch": 0.2417618524699311, + "grad_norm": 6.328668293977184, + "learning_rate": 5.786262412994158e-06, + "loss": 0.2012725830078125, + "step": 27960 + }, + { + "epoch": 0.24180508599147435, + "grad_norm": 6.370118238530931, + "learning_rate": 5.786186883435369e-06, + "loss": 0.08256759643554687, + "step": 27965 + }, + { + "epoch": 0.2418483195130176, + "grad_norm": 14.011438260613614, + "learning_rate": 5.786111341026931e-06, + "loss": 0.11995124816894531, + "step": 27970 + }, + { + "epoch": 0.24189155303456086, + "grad_norm": 8.723916340094476, + "learning_rate": 5.786035785769195e-06, + "loss": 0.1389373779296875, + "step": 27975 + }, + { + "epoch": 0.24193478655610415, + "grad_norm": 6.7870040855249965, + "learning_rate": 5.785960217662508e-06, + "loss": 0.1033172607421875, + "step": 27980 + }, + { + "epoch": 0.2419780200776474, + "grad_norm": 1.305531699922596, + "learning_rate": 5.785884636707217e-06, + "loss": 0.17755355834960937, + "step": 27985 + }, + { + "epoch": 0.24202125359919066, + "grad_norm": 31.968626225933004, + "learning_rate": 5.785809042903673e-06, + "loss": 0.22043914794921876, + "step": 27990 + }, + { + "epoch": 0.24206448712073395, + "grad_norm": 2.316330385632129, + "learning_rate": 5.7857334362522245e-06, + "loss": 0.17838134765625, + "step": 27995 + }, + { + "epoch": 0.2421077206422772, + "grad_norm": 5.767614802390153, + "learning_rate": 5.78565781675322e-06, + "loss": 0.5821533203125, + "step": 28000 + }, + { + "epoch": 0.24215095416382046, + "grad_norm": 20.78469136655347, + "learning_rate": 5.785582184407007e-06, + "loss": 0.08027496337890624, + "step": 28005 + }, + { + "epoch": 0.24219418768536372, + "grad_norm": 13.735244236328946, + "learning_rate": 5.785506539213935e-06, + "loss": 0.1500885009765625, + "step": 28010 + }, + { + "epoch": 0.242237421206907, + "grad_norm": 1.624971483156952, + "learning_rate": 5.785430881174352e-06, + "loss": 0.2360504150390625, + "step": 28015 + }, + { + "epoch": 0.24228065472845026, + "grad_norm": 1.568066633853273, + "learning_rate": 5.785355210288609e-06, + "loss": 0.1697784423828125, + "step": 28020 + }, + { + "epoch": 0.2423238882499935, + "grad_norm": 55.288800543511165, + "learning_rate": 5.7852795265570546e-06, + "loss": 0.20137176513671876, + "step": 28025 + }, + { + "epoch": 0.24236712177153677, + "grad_norm": 1.9654016941091363, + "learning_rate": 5.785203829980036e-06, + "loss": 0.2187408447265625, + "step": 28030 + }, + { + "epoch": 0.24241035529308005, + "grad_norm": 15.763631032110176, + "learning_rate": 5.7851281205579044e-06, + "loss": 0.22455253601074218, + "step": 28035 + }, + { + "epoch": 0.2424535888146233, + "grad_norm": 0.7747701047316692, + "learning_rate": 5.785052398291008e-06, + "loss": 0.1952840805053711, + "step": 28040 + }, + { + "epoch": 0.24249682233616657, + "grad_norm": 45.833862652990575, + "learning_rate": 5.784976663179694e-06, + "loss": 0.589862060546875, + "step": 28045 + }, + { + "epoch": 0.24254005585770982, + "grad_norm": 4.820171207055633, + "learning_rate": 5.784900915224316e-06, + "loss": 0.10682373046875, + "step": 28050 + }, + { + "epoch": 0.2425832893792531, + "grad_norm": 6.721030690719297, + "learning_rate": 5.78482515442522e-06, + "loss": 0.21986083984375, + "step": 28055 + }, + { + "epoch": 0.24262652290079637, + "grad_norm": 4.184624464110314, + "learning_rate": 5.7847493807827565e-06, + "loss": 0.10008087158203124, + "step": 28060 + }, + { + "epoch": 0.24266975642233962, + "grad_norm": 35.2245858278329, + "learning_rate": 5.784673594297275e-06, + "loss": 0.394097900390625, + "step": 28065 + }, + { + "epoch": 0.24271298994388288, + "grad_norm": 12.744627353794096, + "learning_rate": 5.784597794969126e-06, + "loss": 0.115283203125, + "step": 28070 + }, + { + "epoch": 0.24275622346542616, + "grad_norm": 0.06950905526147474, + "learning_rate": 5.784521982798657e-06, + "loss": 0.3044921875, + "step": 28075 + }, + { + "epoch": 0.24279945698696942, + "grad_norm": 21.483457898611817, + "learning_rate": 5.784446157786218e-06, + "loss": 0.31693458557128906, + "step": 28080 + }, + { + "epoch": 0.24284269050851268, + "grad_norm": 6.525436009924464, + "learning_rate": 5.78437031993216e-06, + "loss": 0.150140380859375, + "step": 28085 + }, + { + "epoch": 0.24288592403005593, + "grad_norm": 4.824245733788252, + "learning_rate": 5.784294469236833e-06, + "loss": 0.6782638549804687, + "step": 28090 + }, + { + "epoch": 0.24292915755159922, + "grad_norm": 23.254978774670445, + "learning_rate": 5.784218605700585e-06, + "loss": 0.10826339721679687, + "step": 28095 + }, + { + "epoch": 0.24297239107314247, + "grad_norm": 0.9876341876506995, + "learning_rate": 5.784142729323767e-06, + "loss": 0.15198974609375, + "step": 28100 + }, + { + "epoch": 0.24301562459468573, + "grad_norm": 4.9117449028547995, + "learning_rate": 5.78406684010673e-06, + "loss": 0.129925537109375, + "step": 28105 + }, + { + "epoch": 0.243058858116229, + "grad_norm": 5.363444045567891, + "learning_rate": 5.783990938049821e-06, + "loss": 0.12001953125, + "step": 28110 + }, + { + "epoch": 0.24310209163777227, + "grad_norm": 1.0334426308048474, + "learning_rate": 5.7839150231533915e-06, + "loss": 0.113726806640625, + "step": 28115 + }, + { + "epoch": 0.24314532515931553, + "grad_norm": 2.256917143063956, + "learning_rate": 5.783839095417793e-06, + "loss": 0.24288082122802734, + "step": 28120 + }, + { + "epoch": 0.24318855868085879, + "grad_norm": 0.49782322634216175, + "learning_rate": 5.783763154843374e-06, + "loss": 0.405731201171875, + "step": 28125 + }, + { + "epoch": 0.24323179220240204, + "grad_norm": 34.43899208112142, + "learning_rate": 5.783687201430486e-06, + "loss": 0.3185089111328125, + "step": 28130 + }, + { + "epoch": 0.24327502572394533, + "grad_norm": 25.61643506130396, + "learning_rate": 5.783611235179477e-06, + "loss": 0.252880859375, + "step": 28135 + }, + { + "epoch": 0.24331825924548858, + "grad_norm": 8.647794591588061, + "learning_rate": 5.783535256090701e-06, + "loss": 0.09014549255371093, + "step": 28140 + }, + { + "epoch": 0.24336149276703184, + "grad_norm": 27.489678704131016, + "learning_rate": 5.783459264164505e-06, + "loss": 0.48563232421875, + "step": 28145 + }, + { + "epoch": 0.2434047262885751, + "grad_norm": 2.214871766825055, + "learning_rate": 5.783383259401241e-06, + "loss": 0.20881729125976561, + "step": 28150 + }, + { + "epoch": 0.24344795981011838, + "grad_norm": 57.65926533239018, + "learning_rate": 5.78330724180126e-06, + "loss": 0.4527557373046875, + "step": 28155 + }, + { + "epoch": 0.24349119333166164, + "grad_norm": 29.08935252545856, + "learning_rate": 5.783231211364911e-06, + "loss": 0.205511474609375, + "step": 28160 + }, + { + "epoch": 0.2435344268532049, + "grad_norm": 5.43325008025698, + "learning_rate": 5.783155168092547e-06, + "loss": 0.11812744140625, + "step": 28165 + }, + { + "epoch": 0.24357766037474818, + "grad_norm": 0.4913127227946357, + "learning_rate": 5.7830791119845164e-06, + "loss": 0.2281768798828125, + "step": 28170 + }, + { + "epoch": 0.24362089389629143, + "grad_norm": 37.43799789147564, + "learning_rate": 5.7830030430411705e-06, + "loss": 0.4773403167724609, + "step": 28175 + }, + { + "epoch": 0.2436641274178347, + "grad_norm": 1.3174328446746635, + "learning_rate": 5.7829269612628614e-06, + "loss": 0.04790802001953125, + "step": 28180 + }, + { + "epoch": 0.24370736093937795, + "grad_norm": 2.3158837650785102, + "learning_rate": 5.782850866649937e-06, + "loss": 0.272686767578125, + "step": 28185 + }, + { + "epoch": 0.24375059446092123, + "grad_norm": 4.271567319793334, + "learning_rate": 5.782774759202753e-06, + "loss": 0.21544189453125, + "step": 28190 + }, + { + "epoch": 0.2437938279824645, + "grad_norm": 1.4236135470886695, + "learning_rate": 5.782698638921656e-06, + "loss": 0.3369476318359375, + "step": 28195 + }, + { + "epoch": 0.24383706150400775, + "grad_norm": 1.14556034518813, + "learning_rate": 5.782622505807e-06, + "loss": 0.10245819091796875, + "step": 28200 + }, + { + "epoch": 0.243880295025551, + "grad_norm": 17.06687213153166, + "learning_rate": 5.782546359859134e-06, + "loss": 0.14058265686035157, + "step": 28205 + }, + { + "epoch": 0.2439235285470943, + "grad_norm": 34.18117099131114, + "learning_rate": 5.78247020107841e-06, + "loss": 0.4722419738769531, + "step": 28210 + }, + { + "epoch": 0.24396676206863754, + "grad_norm": 2.076851610763787, + "learning_rate": 5.78239402946518e-06, + "loss": 0.07077102661132813, + "step": 28215 + }, + { + "epoch": 0.2440099955901808, + "grad_norm": 0.5230302318934055, + "learning_rate": 5.782317845019793e-06, + "loss": 0.09009170532226562, + "step": 28220 + }, + { + "epoch": 0.24405322911172406, + "grad_norm": 4.724530251557034, + "learning_rate": 5.782241647742604e-06, + "loss": 0.18688087463378905, + "step": 28225 + }, + { + "epoch": 0.24409646263326734, + "grad_norm": 28.653120493814452, + "learning_rate": 5.782165437633961e-06, + "loss": 0.1386444091796875, + "step": 28230 + }, + { + "epoch": 0.2441396961548106, + "grad_norm": 28.58447461707604, + "learning_rate": 5.782089214694217e-06, + "loss": 0.17660865783691407, + "step": 28235 + }, + { + "epoch": 0.24418292967635385, + "grad_norm": 5.533369060906264, + "learning_rate": 5.782012978923724e-06, + "loss": 0.089752197265625, + "step": 28240 + }, + { + "epoch": 0.2442261631978971, + "grad_norm": 0.586282121529776, + "learning_rate": 5.781936730322832e-06, + "loss": 0.0748016357421875, + "step": 28245 + }, + { + "epoch": 0.2442693967194404, + "grad_norm": 100.94370739731782, + "learning_rate": 5.781860468891894e-06, + "loss": 0.327880859375, + "step": 28250 + }, + { + "epoch": 0.24431263024098365, + "grad_norm": 12.45217105418419, + "learning_rate": 5.781784194631263e-06, + "loss": 0.1414783477783203, + "step": 28255 + }, + { + "epoch": 0.2443558637625269, + "grad_norm": 7.983001744528381, + "learning_rate": 5.781707907541286e-06, + "loss": 0.1477750778198242, + "step": 28260 + }, + { + "epoch": 0.24439909728407017, + "grad_norm": 39.88496651291761, + "learning_rate": 5.78163160762232e-06, + "loss": 0.48578720092773436, + "step": 28265 + }, + { + "epoch": 0.24444233080561345, + "grad_norm": 22.348737490090237, + "learning_rate": 5.7815552948747145e-06, + "loss": 0.198858642578125, + "step": 28270 + }, + { + "epoch": 0.2444855643271567, + "grad_norm": 0.34533208759568296, + "learning_rate": 5.781478969298822e-06, + "loss": 0.07511749267578124, + "step": 28275 + }, + { + "epoch": 0.24452879784869996, + "grad_norm": 6.116994826138441, + "learning_rate": 5.781402630894994e-06, + "loss": 0.0425323486328125, + "step": 28280 + }, + { + "epoch": 0.24457203137024322, + "grad_norm": 1.8151655995905054, + "learning_rate": 5.781326279663582e-06, + "loss": 0.1862091064453125, + "step": 28285 + }, + { + "epoch": 0.2446152648917865, + "grad_norm": 0.2036397941809441, + "learning_rate": 5.7812499156049405e-06, + "loss": 0.04553298950195313, + "step": 28290 + }, + { + "epoch": 0.24465849841332976, + "grad_norm": 13.519594462946033, + "learning_rate": 5.781173538719419e-06, + "loss": 0.126165771484375, + "step": 28295 + }, + { + "epoch": 0.24470173193487302, + "grad_norm": 41.700874899453666, + "learning_rate": 5.781097149007371e-06, + "loss": 0.4360630035400391, + "step": 28300 + }, + { + "epoch": 0.24474496545641627, + "grad_norm": 1.3915411292511666, + "learning_rate": 5.7810207464691486e-06, + "loss": 0.1824310302734375, + "step": 28305 + }, + { + "epoch": 0.24478819897795956, + "grad_norm": 3.7765562960238377, + "learning_rate": 5.780944331105105e-06, + "loss": 0.17388954162597656, + "step": 28310 + }, + { + "epoch": 0.24483143249950282, + "grad_norm": 8.740118753496391, + "learning_rate": 5.780867902915592e-06, + "loss": 0.19028244018554688, + "step": 28315 + }, + { + "epoch": 0.24487466602104607, + "grad_norm": 8.334113835514135, + "learning_rate": 5.78079146190096e-06, + "loss": 0.10658187866210937, + "step": 28320 + }, + { + "epoch": 0.24491789954258933, + "grad_norm": 4.448741220305298, + "learning_rate": 5.7807150080615655e-06, + "loss": 0.164996337890625, + "step": 28325 + }, + { + "epoch": 0.2449611330641326, + "grad_norm": 9.916101417235405, + "learning_rate": 5.780638541397759e-06, + "loss": 0.1502349853515625, + "step": 28330 + }, + { + "epoch": 0.24500436658567587, + "grad_norm": 0.3126911056771371, + "learning_rate": 5.780562061909893e-06, + "loss": 0.03165512084960938, + "step": 28335 + }, + { + "epoch": 0.24504760010721913, + "grad_norm": 4.3627072097429105, + "learning_rate": 5.780485569598319e-06, + "loss": 0.25863494873046877, + "step": 28340 + }, + { + "epoch": 0.24509083362876238, + "grad_norm": 29.16274792307327, + "learning_rate": 5.780409064463393e-06, + "loss": 0.3729888916015625, + "step": 28345 + }, + { + "epoch": 0.24513406715030567, + "grad_norm": 1.7379807064322552, + "learning_rate": 5.780332546505465e-06, + "loss": 0.0879425048828125, + "step": 28350 + }, + { + "epoch": 0.24517730067184892, + "grad_norm": 50.81687544461309, + "learning_rate": 5.78025601572489e-06, + "loss": 0.387841796875, + "step": 28355 + }, + { + "epoch": 0.24522053419339218, + "grad_norm": 7.345255582018083, + "learning_rate": 5.780179472122019e-06, + "loss": 0.5519622802734375, + "step": 28360 + }, + { + "epoch": 0.24526376771493547, + "grad_norm": 3.039574578301338, + "learning_rate": 5.780102915697207e-06, + "loss": 0.0930755615234375, + "step": 28365 + }, + { + "epoch": 0.24530700123647872, + "grad_norm": 2.165217415130714, + "learning_rate": 5.7800263464508055e-06, + "loss": 0.4692413330078125, + "step": 28370 + }, + { + "epoch": 0.24535023475802198, + "grad_norm": 5.7975143091398165, + "learning_rate": 5.779949764383167e-06, + "loss": 0.0691192626953125, + "step": 28375 + }, + { + "epoch": 0.24539346827956524, + "grad_norm": 4.065558891163086, + "learning_rate": 5.779873169494648e-06, + "loss": 0.09301376342773438, + "step": 28380 + }, + { + "epoch": 0.24543670180110852, + "grad_norm": 1.0857544861021402, + "learning_rate": 5.779796561785598e-06, + "loss": 0.12666015625, + "step": 28385 + }, + { + "epoch": 0.24547993532265178, + "grad_norm": 5.375752718967346, + "learning_rate": 5.779719941256372e-06, + "loss": 0.06722908020019532, + "step": 28390 + }, + { + "epoch": 0.24552316884419503, + "grad_norm": 6.584204623245593, + "learning_rate": 5.779643307907323e-06, + "loss": 0.16836013793945312, + "step": 28395 + }, + { + "epoch": 0.2455664023657383, + "grad_norm": 64.55948536136859, + "learning_rate": 5.779566661738806e-06, + "loss": 0.11494903564453125, + "step": 28400 + }, + { + "epoch": 0.24560963588728157, + "grad_norm": 16.772973169651, + "learning_rate": 5.779490002751172e-06, + "loss": 0.32027587890625, + "step": 28405 + }, + { + "epoch": 0.24565286940882483, + "grad_norm": 29.293233510991413, + "learning_rate": 5.779413330944776e-06, + "loss": 0.198992919921875, + "step": 28410 + }, + { + "epoch": 0.2456961029303681, + "grad_norm": 2.859636185945771, + "learning_rate": 5.779336646319972e-06, + "loss": 0.4352508544921875, + "step": 28415 + }, + { + "epoch": 0.24573933645191134, + "grad_norm": 7.118951238441063, + "learning_rate": 5.779259948877112e-06, + "loss": 0.19281005859375, + "step": 28420 + }, + { + "epoch": 0.24578256997345463, + "grad_norm": 21.172655234842907, + "learning_rate": 5.77918323861655e-06, + "loss": 0.1128875732421875, + "step": 28425 + }, + { + "epoch": 0.24582580349499789, + "grad_norm": 13.90617117128998, + "learning_rate": 5.779106515538642e-06, + "loss": 0.15898876190185546, + "step": 28430 + }, + { + "epoch": 0.24586903701654114, + "grad_norm": 11.089721470906746, + "learning_rate": 5.77902977964374e-06, + "loss": 0.15809326171875, + "step": 28435 + }, + { + "epoch": 0.2459122705380844, + "grad_norm": 2.5180334845588015, + "learning_rate": 5.778953030932198e-06, + "loss": 0.124169921875, + "step": 28440 + }, + { + "epoch": 0.24595550405962768, + "grad_norm": 17.018767066458974, + "learning_rate": 5.77887626940437e-06, + "loss": 0.148211669921875, + "step": 28445 + }, + { + "epoch": 0.24599873758117094, + "grad_norm": 22.48494441091361, + "learning_rate": 5.77879949506061e-06, + "loss": 0.25959320068359376, + "step": 28450 + }, + { + "epoch": 0.2460419711027142, + "grad_norm": 2.643978371740476, + "learning_rate": 5.778722707901273e-06, + "loss": 0.0648651123046875, + "step": 28455 + }, + { + "epoch": 0.24608520462425745, + "grad_norm": 0.3949696108443356, + "learning_rate": 5.778645907926712e-06, + "loss": 0.1116485595703125, + "step": 28460 + }, + { + "epoch": 0.24612843814580074, + "grad_norm": 1.4787043249988396, + "learning_rate": 5.778569095137282e-06, + "loss": 0.32642059326171874, + "step": 28465 + }, + { + "epoch": 0.246171671667344, + "grad_norm": 6.067829413029844, + "learning_rate": 5.7784922695333365e-06, + "loss": 0.13691864013671876, + "step": 28470 + }, + { + "epoch": 0.24621490518888725, + "grad_norm": 1.0794153956552757, + "learning_rate": 5.7784154311152306e-06, + "loss": 0.11591949462890624, + "step": 28475 + }, + { + "epoch": 0.2462581387104305, + "grad_norm": 3.89923966203732, + "learning_rate": 5.778338579883317e-06, + "loss": 0.46646728515625, + "step": 28480 + }, + { + "epoch": 0.2463013722319738, + "grad_norm": 29.125057462326016, + "learning_rate": 5.778261715837953e-06, + "loss": 0.144683837890625, + "step": 28485 + }, + { + "epoch": 0.24634460575351705, + "grad_norm": 33.892272687794964, + "learning_rate": 5.7781848389794905e-06, + "loss": 0.14799766540527343, + "step": 28490 + }, + { + "epoch": 0.2463878392750603, + "grad_norm": 0.3632091812062531, + "learning_rate": 5.778107949308285e-06, + "loss": 0.18647842407226561, + "step": 28495 + }, + { + "epoch": 0.24643107279660356, + "grad_norm": 10.037345389818139, + "learning_rate": 5.778031046824691e-06, + "loss": 0.0700469970703125, + "step": 28500 + }, + { + "epoch": 0.24647430631814685, + "grad_norm": 1.1188248234541633, + "learning_rate": 5.777954131529064e-06, + "loss": 0.06975173950195312, + "step": 28505 + }, + { + "epoch": 0.2465175398396901, + "grad_norm": 4.384356918356591, + "learning_rate": 5.777877203421758e-06, + "loss": 0.07465286254882812, + "step": 28510 + }, + { + "epoch": 0.24656077336123336, + "grad_norm": 1.036449293671985, + "learning_rate": 5.777800262503127e-06, + "loss": 0.31625213623046877, + "step": 28515 + }, + { + "epoch": 0.24660400688277662, + "grad_norm": 7.909757507622112, + "learning_rate": 5.777723308773527e-06, + "loss": 0.43544921875, + "step": 28520 + }, + { + "epoch": 0.2466472404043199, + "grad_norm": 4.819354411916319, + "learning_rate": 5.777646342233312e-06, + "loss": 0.24057769775390625, + "step": 28525 + }, + { + "epoch": 0.24669047392586316, + "grad_norm": 27.59949919725324, + "learning_rate": 5.777569362882838e-06, + "loss": 0.30075531005859374, + "step": 28530 + }, + { + "epoch": 0.24673370744740641, + "grad_norm": 6.484683677546474, + "learning_rate": 5.77749237072246e-06, + "loss": 0.1165771484375, + "step": 28535 + }, + { + "epoch": 0.2467769409689497, + "grad_norm": 11.510873640374177, + "learning_rate": 5.7774153657525325e-06, + "loss": 0.1683929443359375, + "step": 28540 + }, + { + "epoch": 0.24682017449049296, + "grad_norm": 6.86789178714051, + "learning_rate": 5.77733834797341e-06, + "loss": 0.0891143798828125, + "step": 28545 + }, + { + "epoch": 0.2468634080120362, + "grad_norm": 13.48966691083467, + "learning_rate": 5.777261317385448e-06, + "loss": 0.350347900390625, + "step": 28550 + }, + { + "epoch": 0.24690664153357947, + "grad_norm": 32.75805351138957, + "learning_rate": 5.777184273989004e-06, + "loss": 0.44470977783203125, + "step": 28555 + }, + { + "epoch": 0.24694987505512275, + "grad_norm": 12.970804336145777, + "learning_rate": 5.77710721778443e-06, + "loss": 0.2716419219970703, + "step": 28560 + }, + { + "epoch": 0.246993108576666, + "grad_norm": 1.2578194839412142, + "learning_rate": 5.777030148772084e-06, + "loss": 0.08487319946289062, + "step": 28565 + }, + { + "epoch": 0.24703634209820927, + "grad_norm": 1.0164391381518554, + "learning_rate": 5.77695306695232e-06, + "loss": 0.451806640625, + "step": 28570 + }, + { + "epoch": 0.24707957561975252, + "grad_norm": 11.95497429692006, + "learning_rate": 5.776875972325494e-06, + "loss": 0.31009063720703123, + "step": 28575 + }, + { + "epoch": 0.2471228091412958, + "grad_norm": 1.0888948945571923, + "learning_rate": 5.776798864891961e-06, + "loss": 0.1205841064453125, + "step": 28580 + }, + { + "epoch": 0.24716604266283906, + "grad_norm": 8.198049760057042, + "learning_rate": 5.776721744652077e-06, + "loss": 0.17703704833984374, + "step": 28585 + }, + { + "epoch": 0.24720927618438232, + "grad_norm": 3.645345971220577, + "learning_rate": 5.776644611606197e-06, + "loss": 0.41193389892578125, + "step": 28590 + }, + { + "epoch": 0.24725250970592558, + "grad_norm": 1.4697134312282916, + "learning_rate": 5.776567465754679e-06, + "loss": 0.404559326171875, + "step": 28595 + }, + { + "epoch": 0.24729574322746886, + "grad_norm": 15.19488737078448, + "learning_rate": 5.776490307097876e-06, + "loss": 0.04844589233398437, + "step": 28600 + }, + { + "epoch": 0.24733897674901212, + "grad_norm": 7.6713051745246235, + "learning_rate": 5.776413135636145e-06, + "loss": 0.2663749694824219, + "step": 28605 + }, + { + "epoch": 0.24738221027055537, + "grad_norm": 23.84203115202725, + "learning_rate": 5.776335951369842e-06, + "loss": 0.37112274169921877, + "step": 28610 + }, + { + "epoch": 0.24742544379209863, + "grad_norm": 6.677710631435228, + "learning_rate": 5.776258754299324e-06, + "loss": 0.11274871826171876, + "step": 28615 + }, + { + "epoch": 0.24746867731364192, + "grad_norm": 4.434378955063698, + "learning_rate": 5.776181544424944e-06, + "loss": 0.11595001220703124, + "step": 28620 + }, + { + "epoch": 0.24751191083518517, + "grad_norm": 8.525990830980989, + "learning_rate": 5.776104321747061e-06, + "loss": 0.240350341796875, + "step": 28625 + }, + { + "epoch": 0.24755514435672843, + "grad_norm": 2.0506666250712393, + "learning_rate": 5.776027086266031e-06, + "loss": 0.14521484375, + "step": 28630 + }, + { + "epoch": 0.24759837787827169, + "grad_norm": 0.8931409987666419, + "learning_rate": 5.7759498379822095e-06, + "loss": 0.15347900390625, + "step": 28635 + }, + { + "epoch": 0.24764161139981497, + "grad_norm": 11.149661247792853, + "learning_rate": 5.775872576895951e-06, + "loss": 0.2724029541015625, + "step": 28640 + }, + { + "epoch": 0.24768484492135823, + "grad_norm": 34.57026602756, + "learning_rate": 5.775795303007615e-06, + "loss": 0.2666534423828125, + "step": 28645 + }, + { + "epoch": 0.24772807844290148, + "grad_norm": 49.46385544464519, + "learning_rate": 5.775718016317556e-06, + "loss": 0.33467254638671873, + "step": 28650 + }, + { + "epoch": 0.24777131196444474, + "grad_norm": 2.462392890129905, + "learning_rate": 5.77564071682613e-06, + "loss": 0.25136375427246094, + "step": 28655 + }, + { + "epoch": 0.24781454548598802, + "grad_norm": 38.37158818841127, + "learning_rate": 5.775563404533694e-06, + "loss": 0.28252315521240234, + "step": 28660 + }, + { + "epoch": 0.24785777900753128, + "grad_norm": 7.8282800744442325, + "learning_rate": 5.775486079440605e-06, + "loss": 0.1300689697265625, + "step": 28665 + }, + { + "epoch": 0.24790101252907454, + "grad_norm": 56.10547081691838, + "learning_rate": 5.775408741547221e-06, + "loss": 0.382928466796875, + "step": 28670 + }, + { + "epoch": 0.2479442460506178, + "grad_norm": 5.414099344263033, + "learning_rate": 5.775331390853897e-06, + "loss": 0.127142333984375, + "step": 28675 + }, + { + "epoch": 0.24798747957216108, + "grad_norm": 8.710591146482269, + "learning_rate": 5.775254027360988e-06, + "loss": 0.0833251953125, + "step": 28680 + }, + { + "epoch": 0.24803071309370434, + "grad_norm": 4.622592083150609, + "learning_rate": 5.775176651068854e-06, + "loss": 0.1057525634765625, + "step": 28685 + }, + { + "epoch": 0.2480739466152476, + "grad_norm": 8.10303917902229, + "learning_rate": 5.77509926197785e-06, + "loss": 0.240185546875, + "step": 28690 + }, + { + "epoch": 0.24811718013679085, + "grad_norm": 22.900936075133362, + "learning_rate": 5.775021860088333e-06, + "loss": 0.142840576171875, + "step": 28695 + }, + { + "epoch": 0.24816041365833413, + "grad_norm": 2.7161864127933137, + "learning_rate": 5.7749444454006616e-06, + "loss": 0.0702545166015625, + "step": 28700 + }, + { + "epoch": 0.2482036471798774, + "grad_norm": 20.00004447331399, + "learning_rate": 5.77486701791519e-06, + "loss": 0.1563751220703125, + "step": 28705 + }, + { + "epoch": 0.24824688070142065, + "grad_norm": 34.647674755936805, + "learning_rate": 5.774789577632279e-06, + "loss": 0.15787353515625, + "step": 28710 + }, + { + "epoch": 0.2482901142229639, + "grad_norm": 18.271308516741243, + "learning_rate": 5.774712124552282e-06, + "loss": 0.3666404724121094, + "step": 28715 + }, + { + "epoch": 0.2483333477445072, + "grad_norm": 3.251234637057768, + "learning_rate": 5.774634658675559e-06, + "loss": 0.4139411926269531, + "step": 28720 + }, + { + "epoch": 0.24837658126605044, + "grad_norm": 14.061366726935182, + "learning_rate": 5.774557180002465e-06, + "loss": 0.1583019256591797, + "step": 28725 + }, + { + "epoch": 0.2484198147875937, + "grad_norm": 50.52958303401237, + "learning_rate": 5.774479688533358e-06, + "loss": 0.27613983154296873, + "step": 28730 + }, + { + "epoch": 0.24846304830913699, + "grad_norm": 25.24656805462761, + "learning_rate": 5.774402184268598e-06, + "loss": 0.25099716186523435, + "step": 28735 + }, + { + "epoch": 0.24850628183068024, + "grad_norm": 1.6671412858789563, + "learning_rate": 5.774324667208538e-06, + "loss": 0.18304443359375, + "step": 28740 + }, + { + "epoch": 0.2485495153522235, + "grad_norm": 3.9538459881707633, + "learning_rate": 5.774247137353539e-06, + "loss": 0.0952301025390625, + "step": 28745 + }, + { + "epoch": 0.24859274887376676, + "grad_norm": 11.124279224568212, + "learning_rate": 5.774169594703957e-06, + "loss": 0.481640625, + "step": 28750 + }, + { + "epoch": 0.24863598239531004, + "grad_norm": 1.9216855679301506, + "learning_rate": 5.77409203926015e-06, + "loss": 0.150347900390625, + "step": 28755 + }, + { + "epoch": 0.2486792159168533, + "grad_norm": 1.245725884850337, + "learning_rate": 5.774014471022476e-06, + "loss": 0.1674041748046875, + "step": 28760 + }, + { + "epoch": 0.24872244943839655, + "grad_norm": 5.24269855352438, + "learning_rate": 5.773936889991292e-06, + "loss": 0.09747161865234374, + "step": 28765 + }, + { + "epoch": 0.2487656829599398, + "grad_norm": 20.18918752237728, + "learning_rate": 5.773859296166957e-06, + "loss": 0.27490921020507814, + "step": 28770 + }, + { + "epoch": 0.2488089164814831, + "grad_norm": 3.53486832399456, + "learning_rate": 5.7737816895498265e-06, + "loss": 0.1900146484375, + "step": 28775 + }, + { + "epoch": 0.24885215000302635, + "grad_norm": 1.5419819462184072, + "learning_rate": 5.773704070140261e-06, + "loss": 0.246697998046875, + "step": 28780 + }, + { + "epoch": 0.2488953835245696, + "grad_norm": 3.073245809922823, + "learning_rate": 5.773626437938617e-06, + "loss": 0.1238189697265625, + "step": 28785 + }, + { + "epoch": 0.24893861704611286, + "grad_norm": 25.425974529626323, + "learning_rate": 5.773548792945253e-06, + "loss": 0.086358642578125, + "step": 28790 + }, + { + "epoch": 0.24898185056765615, + "grad_norm": 1.5165653038063938, + "learning_rate": 5.773471135160527e-06, + "loss": 0.24235076904296876, + "step": 28795 + }, + { + "epoch": 0.2490250840891994, + "grad_norm": 8.921840140488936, + "learning_rate": 5.773393464584797e-06, + "loss": 0.37371826171875, + "step": 28800 + }, + { + "epoch": 0.24906831761074266, + "grad_norm": 2.895051065420842, + "learning_rate": 5.7733157812184225e-06, + "loss": 0.1131591796875, + "step": 28805 + }, + { + "epoch": 0.24911155113228592, + "grad_norm": 17.48091106692099, + "learning_rate": 5.77323808506176e-06, + "loss": 0.16700286865234376, + "step": 28810 + }, + { + "epoch": 0.2491547846538292, + "grad_norm": 0.6663847320663734, + "learning_rate": 5.773160376115168e-06, + "loss": 0.09363288879394531, + "step": 28815 + }, + { + "epoch": 0.24919801817537246, + "grad_norm": 22.059481761634657, + "learning_rate": 5.773082654379006e-06, + "loss": 0.22235107421875, + "step": 28820 + }, + { + "epoch": 0.24924125169691572, + "grad_norm": 2.8739970034978173, + "learning_rate": 5.7730049198536315e-06, + "loss": 0.2867401123046875, + "step": 28825 + }, + { + "epoch": 0.24928448521845897, + "grad_norm": 5.037513723751982, + "learning_rate": 5.772927172539403e-06, + "loss": 0.258502197265625, + "step": 28830 + }, + { + "epoch": 0.24932771874000226, + "grad_norm": 37.654918195550835, + "learning_rate": 5.772849412436681e-06, + "loss": 0.25113677978515625, + "step": 28835 + }, + { + "epoch": 0.24937095226154551, + "grad_norm": 4.792917963137055, + "learning_rate": 5.772771639545821e-06, + "loss": 0.3919677734375, + "step": 28840 + }, + { + "epoch": 0.24941418578308877, + "grad_norm": 12.213310714083258, + "learning_rate": 5.772693853867184e-06, + "loss": 0.08634185791015625, + "step": 28845 + }, + { + "epoch": 0.24945741930463203, + "grad_norm": 9.324993698542436, + "learning_rate": 5.772616055401127e-06, + "loss": 0.3060150146484375, + "step": 28850 + }, + { + "epoch": 0.2495006528261753, + "grad_norm": 4.52002073898681, + "learning_rate": 5.77253824414801e-06, + "loss": 0.151434326171875, + "step": 28855 + }, + { + "epoch": 0.24954388634771857, + "grad_norm": 7.847990068106426, + "learning_rate": 5.7724604201081926e-06, + "loss": 0.4531585693359375, + "step": 28860 + }, + { + "epoch": 0.24958711986926183, + "grad_norm": 14.62117154217121, + "learning_rate": 5.772382583282032e-06, + "loss": 0.0723358154296875, + "step": 28865 + }, + { + "epoch": 0.24963035339080508, + "grad_norm": 19.41391683904441, + "learning_rate": 5.772304733669887e-06, + "loss": 0.36609725952148436, + "step": 28870 + }, + { + "epoch": 0.24967358691234837, + "grad_norm": 0.3766979354188318, + "learning_rate": 5.77222687127212e-06, + "loss": 0.10859832763671876, + "step": 28875 + }, + { + "epoch": 0.24971682043389162, + "grad_norm": 11.819920533908325, + "learning_rate": 5.7721489960890855e-06, + "loss": 0.1244140625, + "step": 28880 + }, + { + "epoch": 0.24976005395543488, + "grad_norm": 21.976872872329007, + "learning_rate": 5.772071108121145e-06, + "loss": 0.26454887390136717, + "step": 28885 + }, + { + "epoch": 0.24980328747697814, + "grad_norm": 1.4768103331210847, + "learning_rate": 5.771993207368658e-06, + "loss": 0.07836151123046875, + "step": 28890 + }, + { + "epoch": 0.24984652099852142, + "grad_norm": 10.134416620802462, + "learning_rate": 5.771915293831983e-06, + "loss": 0.2500465393066406, + "step": 28895 + }, + { + "epoch": 0.24988975452006468, + "grad_norm": 4.351879758536533, + "learning_rate": 5.77183736751148e-06, + "loss": 0.19702606201171874, + "step": 28900 + }, + { + "epoch": 0.24993298804160793, + "grad_norm": 25.657482446614672, + "learning_rate": 5.771759428407508e-06, + "loss": 0.1402069091796875, + "step": 28905 + }, + { + "epoch": 0.24997622156315122, + "grad_norm": 5.262186441135424, + "learning_rate": 5.771681476520426e-06, + "loss": 0.16983909606933595, + "step": 28910 + }, + { + "epoch": 0.2500194550846945, + "grad_norm": 8.725441775170625, + "learning_rate": 5.7716035118505935e-06, + "loss": 0.3147186279296875, + "step": 28915 + }, + { + "epoch": 0.25006268860623776, + "grad_norm": 3.9847019883978967, + "learning_rate": 5.771525534398371e-06, + "loss": 0.416156005859375, + "step": 28920 + }, + { + "epoch": 0.250105922127781, + "grad_norm": 9.372657388357101, + "learning_rate": 5.771447544164118e-06, + "loss": 0.1259307861328125, + "step": 28925 + }, + { + "epoch": 0.2501491556493243, + "grad_norm": 18.903037196983142, + "learning_rate": 5.771369541148194e-06, + "loss": 0.14823760986328124, + "step": 28930 + }, + { + "epoch": 0.2501923891708675, + "grad_norm": 7.473271916176461, + "learning_rate": 5.7712915253509586e-06, + "loss": 0.062297630310058597, + "step": 28935 + }, + { + "epoch": 0.2502356226924108, + "grad_norm": 12.980162403882947, + "learning_rate": 5.77121349677277e-06, + "loss": 0.1471609115600586, + "step": 28940 + }, + { + "epoch": 0.25027885621395407, + "grad_norm": 32.43109435180673, + "learning_rate": 5.771135455413991e-06, + "loss": 0.21134567260742188, + "step": 28945 + }, + { + "epoch": 0.2503220897354973, + "grad_norm": 5.368000264298575, + "learning_rate": 5.7710574012749796e-06, + "loss": 0.06659698486328125, + "step": 28950 + }, + { + "epoch": 0.2503653232570406, + "grad_norm": 4.689137610597109, + "learning_rate": 5.770979334356097e-06, + "loss": 0.1654388427734375, + "step": 28955 + }, + { + "epoch": 0.25040855677858387, + "grad_norm": 30.27192826437717, + "learning_rate": 5.770901254657701e-06, + "loss": 0.17940521240234375, + "step": 28960 + }, + { + "epoch": 0.2504517903001271, + "grad_norm": 2.8694562914081887, + "learning_rate": 5.770823162180155e-06, + "loss": 0.050201416015625, + "step": 28965 + }, + { + "epoch": 0.2504950238216704, + "grad_norm": 10.99075969285774, + "learning_rate": 5.770745056923817e-06, + "loss": 0.2941619873046875, + "step": 28970 + }, + { + "epoch": 0.2505382573432136, + "grad_norm": 1.1344005866469853, + "learning_rate": 5.770666938889046e-06, + "loss": 0.3014640808105469, + "step": 28975 + }, + { + "epoch": 0.2505814908647569, + "grad_norm": 16.040860079319284, + "learning_rate": 5.770588808076206e-06, + "loss": 0.22244796752929688, + "step": 28980 + }, + { + "epoch": 0.2506247243863002, + "grad_norm": 1.1262693775062962, + "learning_rate": 5.770510664485655e-06, + "loss": 0.1084381103515625, + "step": 28985 + }, + { + "epoch": 0.2506679579078434, + "grad_norm": 12.416994443070731, + "learning_rate": 5.7704325081177524e-06, + "loss": 0.2761260986328125, + "step": 28990 + }, + { + "epoch": 0.2507111914293867, + "grad_norm": 0.9550279211401246, + "learning_rate": 5.7703543389728605e-06, + "loss": 0.18339691162109376, + "step": 28995 + }, + { + "epoch": 0.25075442495093, + "grad_norm": 2.932586586248154, + "learning_rate": 5.77027615705134e-06, + "loss": 0.131878662109375, + "step": 29000 + }, + { + "epoch": 0.2507976584724732, + "grad_norm": 6.558656014082539, + "learning_rate": 5.77019796235355e-06, + "loss": 0.13450698852539061, + "step": 29005 + }, + { + "epoch": 0.2508408919940165, + "grad_norm": 1.580381710319267, + "learning_rate": 5.7701197548798516e-06, + "loss": 0.1766681671142578, + "step": 29010 + }, + { + "epoch": 0.2508841255155597, + "grad_norm": 4.730294179103588, + "learning_rate": 5.770041534630606e-06, + "loss": 0.44138336181640625, + "step": 29015 + }, + { + "epoch": 0.250927359037103, + "grad_norm": 16.549990314629706, + "learning_rate": 5.769963301606173e-06, + "loss": 0.1343658447265625, + "step": 29020 + }, + { + "epoch": 0.2509705925586463, + "grad_norm": 12.991925022101295, + "learning_rate": 5.769885055806914e-06, + "loss": 0.7170440673828125, + "step": 29025 + }, + { + "epoch": 0.2510138260801895, + "grad_norm": 14.2434308706553, + "learning_rate": 5.76980679723319e-06, + "loss": 0.2124237060546875, + "step": 29030 + }, + { + "epoch": 0.2510570596017328, + "grad_norm": 3.126372571951673, + "learning_rate": 5.769728525885363e-06, + "loss": 0.0826812744140625, + "step": 29035 + }, + { + "epoch": 0.2511002931232761, + "grad_norm": 6.704532635581851, + "learning_rate": 5.769650241763792e-06, + "loss": 0.15489959716796875, + "step": 29040 + }, + { + "epoch": 0.2511435266448193, + "grad_norm": 23.892168028652303, + "learning_rate": 5.769571944868838e-06, + "loss": 0.23927459716796876, + "step": 29045 + }, + { + "epoch": 0.2511867601663626, + "grad_norm": 15.26380408883283, + "learning_rate": 5.769493635200864e-06, + "loss": 0.17598114013671876, + "step": 29050 + }, + { + "epoch": 0.25122999368790583, + "grad_norm": 1.3173452325667914, + "learning_rate": 5.76941531276023e-06, + "loss": 0.244622802734375, + "step": 29055 + }, + { + "epoch": 0.2512732272094491, + "grad_norm": 41.09940315937749, + "learning_rate": 5.769336977547296e-06, + "loss": 0.5412143707275391, + "step": 29060 + }, + { + "epoch": 0.2513164607309924, + "grad_norm": 8.151408083804943, + "learning_rate": 5.769258629562425e-06, + "loss": 0.342364501953125, + "step": 29065 + }, + { + "epoch": 0.2513596942525356, + "grad_norm": 13.816299119645779, + "learning_rate": 5.769180268805979e-06, + "loss": 0.3051734924316406, + "step": 29070 + }, + { + "epoch": 0.2514029277740789, + "grad_norm": 15.590321353600897, + "learning_rate": 5.769101895278318e-06, + "loss": 0.2516021728515625, + "step": 29075 + }, + { + "epoch": 0.2514461612956222, + "grad_norm": 4.429381579408949, + "learning_rate": 5.769023508979803e-06, + "loss": 0.2335235595703125, + "step": 29080 + }, + { + "epoch": 0.2514893948171654, + "grad_norm": 40.96979977467336, + "learning_rate": 5.768945109910797e-06, + "loss": 0.48580169677734375, + "step": 29085 + }, + { + "epoch": 0.2515326283387087, + "grad_norm": 15.97664513444982, + "learning_rate": 5.76886669807166e-06, + "loss": 0.157830810546875, + "step": 29090 + }, + { + "epoch": 0.251575861860252, + "grad_norm": 0.3743770754120103, + "learning_rate": 5.768788273462755e-06, + "loss": 0.158721923828125, + "step": 29095 + }, + { + "epoch": 0.2516190953817952, + "grad_norm": 44.788213195918466, + "learning_rate": 5.7687098360844424e-06, + "loss": 0.46688385009765626, + "step": 29100 + }, + { + "epoch": 0.2516623289033385, + "grad_norm": 3.913820519180674, + "learning_rate": 5.768631385937085e-06, + "loss": 0.11032180786132813, + "step": 29105 + }, + { + "epoch": 0.25170556242488173, + "grad_norm": 16.623909644211366, + "learning_rate": 5.768552923021045e-06, + "loss": 0.17072906494140624, + "step": 29110 + }, + { + "epoch": 0.251748795946425, + "grad_norm": 5.283367215960307, + "learning_rate": 5.768474447336684e-06, + "loss": 0.21349258422851564, + "step": 29115 + }, + { + "epoch": 0.2517920294679683, + "grad_norm": 78.0067279511577, + "learning_rate": 5.768395958884362e-06, + "loss": 0.31197052001953124, + "step": 29120 + }, + { + "epoch": 0.25183526298951153, + "grad_norm": 38.521212982961075, + "learning_rate": 5.7683174576644436e-06, + "loss": 0.23261337280273436, + "step": 29125 + }, + { + "epoch": 0.2518784965110548, + "grad_norm": 10.423257481355819, + "learning_rate": 5.768238943677289e-06, + "loss": 0.107305908203125, + "step": 29130 + }, + { + "epoch": 0.2519217300325981, + "grad_norm": 52.26006500304185, + "learning_rate": 5.768160416923261e-06, + "loss": 0.6380279541015625, + "step": 29135 + }, + { + "epoch": 0.25196496355414133, + "grad_norm": 4.0857819890949525, + "learning_rate": 5.768081877402722e-06, + "loss": 0.08697509765625, + "step": 29140 + }, + { + "epoch": 0.2520081970756846, + "grad_norm": 21.090552026487206, + "learning_rate": 5.768003325116034e-06, + "loss": 0.14054183959960936, + "step": 29145 + }, + { + "epoch": 0.25205143059722784, + "grad_norm": 6.809673875654713, + "learning_rate": 5.767924760063559e-06, + "loss": 0.09228363037109374, + "step": 29150 + }, + { + "epoch": 0.25209466411877113, + "grad_norm": 5.61963311229075, + "learning_rate": 5.76784618224566e-06, + "loss": 0.1233367919921875, + "step": 29155 + }, + { + "epoch": 0.2521378976403144, + "grad_norm": 0.8889276730817209, + "learning_rate": 5.767767591662699e-06, + "loss": 0.03386688232421875, + "step": 29160 + }, + { + "epoch": 0.25218113116185764, + "grad_norm": 11.81909244789638, + "learning_rate": 5.767688988315039e-06, + "loss": 0.4025416374206543, + "step": 29165 + }, + { + "epoch": 0.2522243646834009, + "grad_norm": 15.606940564118442, + "learning_rate": 5.76761037220304e-06, + "loss": 0.166436767578125, + "step": 29170 + }, + { + "epoch": 0.2522675982049442, + "grad_norm": 2.3777118671782325, + "learning_rate": 5.767531743327068e-06, + "loss": 0.0454437255859375, + "step": 29175 + }, + { + "epoch": 0.25231083172648744, + "grad_norm": 0.3664310755271946, + "learning_rate": 5.7674531016874836e-06, + "loss": 0.203863525390625, + "step": 29180 + }, + { + "epoch": 0.2523540652480307, + "grad_norm": 5.406441774284333, + "learning_rate": 5.76737444728465e-06, + "loss": 0.4898651123046875, + "step": 29185 + }, + { + "epoch": 0.25239729876957395, + "grad_norm": 56.75536012739885, + "learning_rate": 5.76729578011893e-06, + "loss": 0.3194366455078125, + "step": 29190 + }, + { + "epoch": 0.25244053229111724, + "grad_norm": 2.6046652361670697, + "learning_rate": 5.767217100190687e-06, + "loss": 0.2139892578125, + "step": 29195 + }, + { + "epoch": 0.2524837658126605, + "grad_norm": 3.479079238077066, + "learning_rate": 5.7671384075002825e-06, + "loss": 0.1193115234375, + "step": 29200 + }, + { + "epoch": 0.25252699933420375, + "grad_norm": 30.989941414087987, + "learning_rate": 5.76705970204808e-06, + "loss": 0.3130340576171875, + "step": 29205 + }, + { + "epoch": 0.25257023285574703, + "grad_norm": 6.809222461779854, + "learning_rate": 5.766980983834444e-06, + "loss": 0.10194091796875, + "step": 29210 + }, + { + "epoch": 0.2526134663772903, + "grad_norm": 10.856252760597206, + "learning_rate": 5.766902252859735e-06, + "loss": 0.2112579345703125, + "step": 29215 + }, + { + "epoch": 0.25265669989883355, + "grad_norm": 25.547093427874888, + "learning_rate": 5.766823509124317e-06, + "loss": 0.18765182495117189, + "step": 29220 + }, + { + "epoch": 0.25269993342037683, + "grad_norm": 17.919263426144262, + "learning_rate": 5.766744752628555e-06, + "loss": 0.21938629150390626, + "step": 29225 + }, + { + "epoch": 0.25274316694192006, + "grad_norm": 9.2537873881635, + "learning_rate": 5.76666598337281e-06, + "loss": 0.05463409423828125, + "step": 29230 + }, + { + "epoch": 0.25278640046346335, + "grad_norm": 26.445043575388624, + "learning_rate": 5.766587201357446e-06, + "loss": 0.1118896484375, + "step": 29235 + }, + { + "epoch": 0.25282963398500663, + "grad_norm": 0.8156267340565544, + "learning_rate": 5.7665084065828256e-06, + "loss": 0.09242401123046876, + "step": 29240 + }, + { + "epoch": 0.25287286750654986, + "grad_norm": 0.6404504802433448, + "learning_rate": 5.766429599049313e-06, + "loss": 0.052239990234375, + "step": 29245 + }, + { + "epoch": 0.25291610102809314, + "grad_norm": 36.9107968296665, + "learning_rate": 5.766350778757272e-06, + "loss": 0.512042236328125, + "step": 29250 + }, + { + "epoch": 0.2529593345496364, + "grad_norm": 17.799516676486693, + "learning_rate": 5.766271945707066e-06, + "loss": 0.1800750732421875, + "step": 29255 + }, + { + "epoch": 0.25300256807117966, + "grad_norm": 19.18976337929278, + "learning_rate": 5.766193099899057e-06, + "loss": 0.140386962890625, + "step": 29260 + }, + { + "epoch": 0.25304580159272294, + "grad_norm": 18.663743669252412, + "learning_rate": 5.766114241333611e-06, + "loss": 0.3343502044677734, + "step": 29265 + }, + { + "epoch": 0.25308903511426617, + "grad_norm": 6.247660316471607, + "learning_rate": 5.76603537001109e-06, + "loss": 0.09810409545898438, + "step": 29270 + }, + { + "epoch": 0.25313226863580945, + "grad_norm": 2.9628753718381424, + "learning_rate": 5.765956485931858e-06, + "loss": 0.11565170288085938, + "step": 29275 + }, + { + "epoch": 0.25317550215735274, + "grad_norm": 8.674533513233136, + "learning_rate": 5.765877589096279e-06, + "loss": 0.2253387451171875, + "step": 29280 + }, + { + "epoch": 0.25321873567889597, + "grad_norm": 20.447635869847083, + "learning_rate": 5.765798679504718e-06, + "loss": 0.13531036376953126, + "step": 29285 + }, + { + "epoch": 0.25326196920043925, + "grad_norm": 11.021642334060832, + "learning_rate": 5.765719757157537e-06, + "loss": 0.22132415771484376, + "step": 29290 + }, + { + "epoch": 0.25330520272198254, + "grad_norm": 13.901717347704658, + "learning_rate": 5.765640822055101e-06, + "loss": 0.281658935546875, + "step": 29295 + }, + { + "epoch": 0.25334843624352577, + "grad_norm": 52.15464684452104, + "learning_rate": 5.7655618741977745e-06, + "loss": 0.3351654052734375, + "step": 29300 + }, + { + "epoch": 0.25339166976506905, + "grad_norm": 41.242602100041005, + "learning_rate": 5.76548291358592e-06, + "loss": 0.11052703857421875, + "step": 29305 + }, + { + "epoch": 0.25343490328661233, + "grad_norm": 0.7445338994475009, + "learning_rate": 5.7654039402199035e-06, + "loss": 0.114697265625, + "step": 29310 + }, + { + "epoch": 0.25347813680815556, + "grad_norm": 7.297178651765237, + "learning_rate": 5.765324954100088e-06, + "loss": 0.2009490966796875, + "step": 29315 + }, + { + "epoch": 0.25352137032969885, + "grad_norm": 25.73162264110032, + "learning_rate": 5.765245955226838e-06, + "loss": 0.23175506591796874, + "step": 29320 + }, + { + "epoch": 0.2535646038512421, + "grad_norm": 49.35418421178691, + "learning_rate": 5.765166943600519e-06, + "loss": 0.34062042236328127, + "step": 29325 + }, + { + "epoch": 0.25360783737278536, + "grad_norm": 40.1093373978513, + "learning_rate": 5.765087919221493e-06, + "loss": 0.326318359375, + "step": 29330 + }, + { + "epoch": 0.25365107089432865, + "grad_norm": 17.28283617777959, + "learning_rate": 5.7650088820901265e-06, + "loss": 0.19503021240234375, + "step": 29335 + }, + { + "epoch": 0.2536943044158719, + "grad_norm": 18.353746546257412, + "learning_rate": 5.764929832206784e-06, + "loss": 0.45242919921875, + "step": 29340 + }, + { + "epoch": 0.25373753793741516, + "grad_norm": 0.6307024621047778, + "learning_rate": 5.764850769571829e-06, + "loss": 0.157537841796875, + "step": 29345 + }, + { + "epoch": 0.25378077145895844, + "grad_norm": 0.6513657271218952, + "learning_rate": 5.764771694185626e-06, + "loss": 0.1105682373046875, + "step": 29350 + }, + { + "epoch": 0.25382400498050167, + "grad_norm": 9.327160779968882, + "learning_rate": 5.764692606048541e-06, + "loss": 0.33861827850341797, + "step": 29355 + }, + { + "epoch": 0.25386723850204496, + "grad_norm": 5.883291709632755, + "learning_rate": 5.764613505160937e-06, + "loss": 0.6610742568969726, + "step": 29360 + }, + { + "epoch": 0.2539104720235882, + "grad_norm": 23.759624238186998, + "learning_rate": 5.7645343915231804e-06, + "loss": 0.352099609375, + "step": 29365 + }, + { + "epoch": 0.25395370554513147, + "grad_norm": 40.20693763234125, + "learning_rate": 5.764455265135636e-06, + "loss": 0.2838584899902344, + "step": 29370 + }, + { + "epoch": 0.25399693906667475, + "grad_norm": 12.567024259227864, + "learning_rate": 5.764376125998667e-06, + "loss": 0.09740524291992188, + "step": 29375 + }, + { + "epoch": 0.254040172588218, + "grad_norm": 1.4603459460043642, + "learning_rate": 5.76429697411264e-06, + "loss": 0.129705810546875, + "step": 29380 + }, + { + "epoch": 0.25408340610976127, + "grad_norm": 19.169032342160317, + "learning_rate": 5.76421780947792e-06, + "loss": 0.13547210693359374, + "step": 29385 + }, + { + "epoch": 0.25412663963130455, + "grad_norm": 7.771301887039145, + "learning_rate": 5.764138632094871e-06, + "loss": 0.1396728515625, + "step": 29390 + }, + { + "epoch": 0.2541698731528478, + "grad_norm": 4.836215902103136, + "learning_rate": 5.76405944196386e-06, + "loss": 0.12916717529296876, + "step": 29395 + }, + { + "epoch": 0.25421310667439107, + "grad_norm": 7.851181636246193, + "learning_rate": 5.763980239085251e-06, + "loss": 0.2064697265625, + "step": 29400 + }, + { + "epoch": 0.2542563401959343, + "grad_norm": 3.337862721805027, + "learning_rate": 5.763901023459408e-06, + "loss": 0.10931396484375, + "step": 29405 + }, + { + "epoch": 0.2542995737174776, + "grad_norm": 41.3528271304279, + "learning_rate": 5.763821795086699e-06, + "loss": 0.22542572021484375, + "step": 29410 + }, + { + "epoch": 0.25434280723902086, + "grad_norm": 3.475286843004868, + "learning_rate": 5.763742553967487e-06, + "loss": 0.35099945068359373, + "step": 29415 + }, + { + "epoch": 0.2543860407605641, + "grad_norm": 2.3384886478468045, + "learning_rate": 5.763663300102139e-06, + "loss": 0.1105926513671875, + "step": 29420 + }, + { + "epoch": 0.2544292742821074, + "grad_norm": 13.022318020426795, + "learning_rate": 5.763584033491021e-06, + "loss": 0.4802490234375, + "step": 29425 + }, + { + "epoch": 0.25447250780365066, + "grad_norm": 6.9364576877458, + "learning_rate": 5.763504754134497e-06, + "loss": 0.076959228515625, + "step": 29430 + }, + { + "epoch": 0.2545157413251939, + "grad_norm": 26.38898720476315, + "learning_rate": 5.763425462032933e-06, + "loss": 0.2361083984375, + "step": 29435 + }, + { + "epoch": 0.2545589748467372, + "grad_norm": 0.82276626858143, + "learning_rate": 5.763346157186695e-06, + "loss": 0.08742828369140625, + "step": 29440 + }, + { + "epoch": 0.2546022083682804, + "grad_norm": 18.97312210360565, + "learning_rate": 5.763266839596149e-06, + "loss": 0.3041343688964844, + "step": 29445 + }, + { + "epoch": 0.2546454418898237, + "grad_norm": 41.860431544332855, + "learning_rate": 5.76318750926166e-06, + "loss": 0.2980949401855469, + "step": 29450 + }, + { + "epoch": 0.25468867541136697, + "grad_norm": 9.957896922890592, + "learning_rate": 5.7631081661835945e-06, + "loss": 0.15316162109375, + "step": 29455 + }, + { + "epoch": 0.2547319089329102, + "grad_norm": 16.961856224161668, + "learning_rate": 5.763028810362319e-06, + "loss": 0.12127838134765626, + "step": 29460 + }, + { + "epoch": 0.2547751424544535, + "grad_norm": 25.189435802936995, + "learning_rate": 5.762949441798198e-06, + "loss": 0.18568191528320313, + "step": 29465 + }, + { + "epoch": 0.25481837597599677, + "grad_norm": 29.142222525811096, + "learning_rate": 5.762870060491598e-06, + "loss": 0.40216064453125, + "step": 29470 + }, + { + "epoch": 0.25486160949754, + "grad_norm": 2.8708623496717003, + "learning_rate": 5.762790666442886e-06, + "loss": 0.14187393188476563, + "step": 29475 + }, + { + "epoch": 0.2549048430190833, + "grad_norm": 17.52378887109832, + "learning_rate": 5.762711259652428e-06, + "loss": 0.14496688842773436, + "step": 29480 + }, + { + "epoch": 0.25494807654062657, + "grad_norm": 1.8365575777762326, + "learning_rate": 5.762631840120589e-06, + "loss": 0.02051239013671875, + "step": 29485 + }, + { + "epoch": 0.2549913100621698, + "grad_norm": 3.7347877286310185, + "learning_rate": 5.762552407847736e-06, + "loss": 0.03965415954589844, + "step": 29490 + }, + { + "epoch": 0.2550345435837131, + "grad_norm": 20.611797825250576, + "learning_rate": 5.762472962834237e-06, + "loss": 0.22274322509765626, + "step": 29495 + }, + { + "epoch": 0.2550777771052563, + "grad_norm": 3.4862554824991214, + "learning_rate": 5.762393505080455e-06, + "loss": 0.16583709716796874, + "step": 29500 + }, + { + "epoch": 0.2551210106267996, + "grad_norm": 75.52112620104398, + "learning_rate": 5.762314034586758e-06, + "loss": 0.254327392578125, + "step": 29505 + }, + { + "epoch": 0.2551642441483429, + "grad_norm": 13.971601733500123, + "learning_rate": 5.762234551353514e-06, + "loss": 0.17677154541015624, + "step": 29510 + }, + { + "epoch": 0.2552074776698861, + "grad_norm": 9.776958276839682, + "learning_rate": 5.7621550553810875e-06, + "loss": 0.09286651611328126, + "step": 29515 + }, + { + "epoch": 0.2552507111914294, + "grad_norm": 32.24210731876242, + "learning_rate": 5.762075546669847e-06, + "loss": 0.14575653076171874, + "step": 29520 + }, + { + "epoch": 0.2552939447129727, + "grad_norm": 0.04661087505046665, + "learning_rate": 5.761996025220157e-06, + "loss": 0.08164043426513672, + "step": 29525 + }, + { + "epoch": 0.2553371782345159, + "grad_norm": 2.31558876865855, + "learning_rate": 5.761916491032387e-06, + "loss": 0.1740753173828125, + "step": 29530 + }, + { + "epoch": 0.2553804117560592, + "grad_norm": 69.93158099070753, + "learning_rate": 5.7618369441069004e-06, + "loss": 0.2901630401611328, + "step": 29535 + }, + { + "epoch": 0.2554236452776024, + "grad_norm": 11.389557823613464, + "learning_rate": 5.761757384444066e-06, + "loss": 0.38522186279296877, + "step": 29540 + }, + { + "epoch": 0.2554668787991457, + "grad_norm": 16.853660857414987, + "learning_rate": 5.761677812044251e-06, + "loss": 0.34514694213867186, + "step": 29545 + }, + { + "epoch": 0.255510112320689, + "grad_norm": 4.1175685386975935, + "learning_rate": 5.761598226907823e-06, + "loss": 0.1644775390625, + "step": 29550 + }, + { + "epoch": 0.2555533458422322, + "grad_norm": 16.793426723569706, + "learning_rate": 5.761518629035147e-06, + "loss": 0.21012420654296876, + "step": 29555 + }, + { + "epoch": 0.2555965793637755, + "grad_norm": 1.7696751291203605, + "learning_rate": 5.761439018426591e-06, + "loss": 0.5536859512329102, + "step": 29560 + }, + { + "epoch": 0.2556398128853188, + "grad_norm": 4.437682660906226, + "learning_rate": 5.761359395082522e-06, + "loss": 0.6608604431152344, + "step": 29565 + }, + { + "epoch": 0.255683046406862, + "grad_norm": 28.177824382427485, + "learning_rate": 5.761279759003309e-06, + "loss": 0.6071159362792968, + "step": 29570 + }, + { + "epoch": 0.2557262799284053, + "grad_norm": 7.713436298903985, + "learning_rate": 5.761200110189316e-06, + "loss": 0.1502655029296875, + "step": 29575 + }, + { + "epoch": 0.2557695134499485, + "grad_norm": 6.617569033931722, + "learning_rate": 5.761120448640912e-06, + "loss": 0.1188201904296875, + "step": 29580 + }, + { + "epoch": 0.2558127469714918, + "grad_norm": 16.61046363968155, + "learning_rate": 5.7610407743584655e-06, + "loss": 0.2644622802734375, + "step": 29585 + }, + { + "epoch": 0.2558559804930351, + "grad_norm": 5.014699945914891, + "learning_rate": 5.760961087342342e-06, + "loss": 0.103326416015625, + "step": 29590 + }, + { + "epoch": 0.2558992140145783, + "grad_norm": 6.415307819303781, + "learning_rate": 5.760881387592911e-06, + "loss": 0.07023468017578124, + "step": 29595 + }, + { + "epoch": 0.2559424475361216, + "grad_norm": 0.1897901683985417, + "learning_rate": 5.760801675110538e-06, + "loss": 0.11849784851074219, + "step": 29600 + }, + { + "epoch": 0.2559856810576649, + "grad_norm": 1.7229840672131418, + "learning_rate": 5.760721949895592e-06, + "loss": 0.0599700927734375, + "step": 29605 + }, + { + "epoch": 0.2560289145792081, + "grad_norm": 27.07807249630645, + "learning_rate": 5.760642211948441e-06, + "loss": 0.51617431640625, + "step": 29610 + }, + { + "epoch": 0.2560721481007514, + "grad_norm": 6.9312152455290565, + "learning_rate": 5.760562461269451e-06, + "loss": 0.39151611328125, + "step": 29615 + }, + { + "epoch": 0.25611538162229464, + "grad_norm": 2.6726479114292823, + "learning_rate": 5.760482697858991e-06, + "loss": 0.21251983642578126, + "step": 29620 + }, + { + "epoch": 0.2561586151438379, + "grad_norm": 3.1048807299365286, + "learning_rate": 5.760402921717429e-06, + "loss": 0.34662704467773436, + "step": 29625 + }, + { + "epoch": 0.2562018486653812, + "grad_norm": 9.333158621076267, + "learning_rate": 5.760323132845133e-06, + "loss": 0.3453094482421875, + "step": 29630 + }, + { + "epoch": 0.25624508218692443, + "grad_norm": 23.509357461702983, + "learning_rate": 5.76024333124247e-06, + "loss": 0.11982192993164062, + "step": 29635 + }, + { + "epoch": 0.2562883157084677, + "grad_norm": 4.44059693030619, + "learning_rate": 5.76016351690981e-06, + "loss": 0.10403594970703126, + "step": 29640 + }, + { + "epoch": 0.256331549230011, + "grad_norm": 4.600754295432633, + "learning_rate": 5.760083689847518e-06, + "loss": 0.0558685302734375, + "step": 29645 + }, + { + "epoch": 0.25637478275155423, + "grad_norm": 1.318367734940939, + "learning_rate": 5.7600038500559644e-06, + "loss": 0.4240570068359375, + "step": 29650 + }, + { + "epoch": 0.2564180162730975, + "grad_norm": 17.341889796344606, + "learning_rate": 5.759923997535517e-06, + "loss": 0.27141571044921875, + "step": 29655 + }, + { + "epoch": 0.2564612497946408, + "grad_norm": 0.7967160436139313, + "learning_rate": 5.759844132286544e-06, + "loss": 0.195050048828125, + "step": 29660 + }, + { + "epoch": 0.25650448331618403, + "grad_norm": 3.1856175997789586, + "learning_rate": 5.759764254309415e-06, + "loss": 0.0362457275390625, + "step": 29665 + }, + { + "epoch": 0.2565477168377273, + "grad_norm": 34.042843924022506, + "learning_rate": 5.7596843636044955e-06, + "loss": 0.17216949462890624, + "step": 29670 + }, + { + "epoch": 0.25659095035927054, + "grad_norm": 16.992235049475138, + "learning_rate": 5.759604460172156e-06, + "loss": 0.6001754760742187, + "step": 29675 + }, + { + "epoch": 0.2566341838808138, + "grad_norm": 15.685685055224264, + "learning_rate": 5.7595245440127645e-06, + "loss": 0.2537139892578125, + "step": 29680 + }, + { + "epoch": 0.2566774174023571, + "grad_norm": 18.004416223027043, + "learning_rate": 5.759444615126689e-06, + "loss": 0.44864349365234374, + "step": 29685 + }, + { + "epoch": 0.25672065092390034, + "grad_norm": 8.85174499262916, + "learning_rate": 5.7593646735143e-06, + "loss": 0.6319244384765625, + "step": 29690 + }, + { + "epoch": 0.2567638844454436, + "grad_norm": 47.593463467073605, + "learning_rate": 5.7592847191759645e-06, + "loss": 0.2611907958984375, + "step": 29695 + }, + { + "epoch": 0.2568071179669869, + "grad_norm": 3.3016881529762188, + "learning_rate": 5.759204752112052e-06, + "loss": 0.34439697265625, + "step": 29700 + }, + { + "epoch": 0.25685035148853014, + "grad_norm": 7.494875245832712, + "learning_rate": 5.759124772322931e-06, + "loss": 0.11764488220214844, + "step": 29705 + }, + { + "epoch": 0.2568935850100734, + "grad_norm": 2.929402444034915, + "learning_rate": 5.759044779808969e-06, + "loss": 0.17158203125, + "step": 29710 + }, + { + "epoch": 0.25693681853161665, + "grad_norm": 17.709943469174682, + "learning_rate": 5.758964774570537e-06, + "loss": 0.17371063232421874, + "step": 29715 + }, + { + "epoch": 0.25698005205315994, + "grad_norm": 2.503473466842343, + "learning_rate": 5.758884756608004e-06, + "loss": 0.2917144775390625, + "step": 29720 + }, + { + "epoch": 0.2570232855747032, + "grad_norm": 4.232182628331576, + "learning_rate": 5.758804725921738e-06, + "loss": 0.12505416870117186, + "step": 29725 + }, + { + "epoch": 0.25706651909624645, + "grad_norm": 0.6707699117218283, + "learning_rate": 5.7587246825121085e-06, + "loss": 0.05842437744140625, + "step": 29730 + }, + { + "epoch": 0.25710975261778973, + "grad_norm": 0.6568641949466163, + "learning_rate": 5.758644626379484e-06, + "loss": 0.10358734130859375, + "step": 29735 + }, + { + "epoch": 0.257152986139333, + "grad_norm": 0.7561845096055224, + "learning_rate": 5.758564557524234e-06, + "loss": 0.16720428466796874, + "step": 29740 + }, + { + "epoch": 0.25719621966087625, + "grad_norm": 0.31866293029578985, + "learning_rate": 5.7584844759467284e-06, + "loss": 0.04810333251953125, + "step": 29745 + }, + { + "epoch": 0.25723945318241953, + "grad_norm": 0.9936663701445886, + "learning_rate": 5.758404381647336e-06, + "loss": 0.2222076416015625, + "step": 29750 + }, + { + "epoch": 0.25728268670396276, + "grad_norm": 7.391274159117798, + "learning_rate": 5.758324274626427e-06, + "loss": 0.1760772705078125, + "step": 29755 + }, + { + "epoch": 0.25732592022550604, + "grad_norm": 2.95980879958975, + "learning_rate": 5.7582441548843685e-06, + "loss": 0.14092597961425782, + "step": 29760 + }, + { + "epoch": 0.25736915374704933, + "grad_norm": 22.55179908954351, + "learning_rate": 5.758164022421533e-06, + "loss": 0.07941741943359375, + "step": 29765 + }, + { + "epoch": 0.25741238726859256, + "grad_norm": 4.35055657277351, + "learning_rate": 5.758083877238289e-06, + "loss": 0.10885772705078126, + "step": 29770 + }, + { + "epoch": 0.25745562079013584, + "grad_norm": 440.95206326107893, + "learning_rate": 5.758003719335005e-06, + "loss": 0.22684326171875, + "step": 29775 + }, + { + "epoch": 0.2574988543116791, + "grad_norm": 24.945921513708342, + "learning_rate": 5.757923548712052e-06, + "loss": 0.2293010711669922, + "step": 29780 + }, + { + "epoch": 0.25754208783322236, + "grad_norm": 4.768396208742784, + "learning_rate": 5.7578433653698e-06, + "loss": 0.34122161865234374, + "step": 29785 + }, + { + "epoch": 0.25758532135476564, + "grad_norm": 35.06110598063697, + "learning_rate": 5.757763169308617e-06, + "loss": 0.2393280029296875, + "step": 29790 + }, + { + "epoch": 0.25762855487630887, + "grad_norm": 4.156979260469825, + "learning_rate": 5.757682960528875e-06, + "loss": 0.19118499755859375, + "step": 29795 + }, + { + "epoch": 0.25767178839785215, + "grad_norm": 0.04264374048322781, + "learning_rate": 5.757602739030942e-06, + "loss": 0.27152385711669924, + "step": 29800 + }, + { + "epoch": 0.25771502191939544, + "grad_norm": 5.855601421822506, + "learning_rate": 5.7575225048151886e-06, + "loss": 0.12941131591796876, + "step": 29805 + }, + { + "epoch": 0.25775825544093867, + "grad_norm": 0.3865288134414569, + "learning_rate": 5.757442257881986e-06, + "loss": 0.3184173583984375, + "step": 29810 + }, + { + "epoch": 0.25780148896248195, + "grad_norm": 1.0581834702060284, + "learning_rate": 5.757361998231703e-06, + "loss": 0.0970703125, + "step": 29815 + }, + { + "epoch": 0.25784472248402523, + "grad_norm": 12.74501701821418, + "learning_rate": 5.757281725864709e-06, + "loss": 0.1403839111328125, + "step": 29820 + }, + { + "epoch": 0.25788795600556846, + "grad_norm": 3.5104207992770555, + "learning_rate": 5.757201440781377e-06, + "loss": 0.03402786254882813, + "step": 29825 + }, + { + "epoch": 0.25793118952711175, + "grad_norm": 13.45855846091519, + "learning_rate": 5.757121142982074e-06, + "loss": 0.11767501831054687, + "step": 29830 + }, + { + "epoch": 0.25797442304865503, + "grad_norm": 0.7911338826184844, + "learning_rate": 5.757040832467173e-06, + "loss": 0.22488861083984374, + "step": 29835 + }, + { + "epoch": 0.25801765657019826, + "grad_norm": 2.9591096928033185, + "learning_rate": 5.756960509237043e-06, + "loss": 0.378533935546875, + "step": 29840 + }, + { + "epoch": 0.25806089009174155, + "grad_norm": 1.9043192649243936, + "learning_rate": 5.756880173292055e-06, + "loss": 0.1422821044921875, + "step": 29845 + }, + { + "epoch": 0.2581041236132848, + "grad_norm": 46.94767003552795, + "learning_rate": 5.756799824632579e-06, + "loss": 0.186529541015625, + "step": 29850 + }, + { + "epoch": 0.25814735713482806, + "grad_norm": 36.04220878155575, + "learning_rate": 5.756719463258986e-06, + "loss": 0.5102798461914062, + "step": 29855 + }, + { + "epoch": 0.25819059065637134, + "grad_norm": 21.724416633674338, + "learning_rate": 5.756639089171647e-06, + "loss": 0.20252914428710939, + "step": 29860 + }, + { + "epoch": 0.2582338241779146, + "grad_norm": 4.214967144373272, + "learning_rate": 5.7565587023709305e-06, + "loss": 0.12202377319335937, + "step": 29865 + }, + { + "epoch": 0.25827705769945786, + "grad_norm": 4.826748426519226, + "learning_rate": 5.756478302857209e-06, + "loss": 0.339697265625, + "step": 29870 + }, + { + "epoch": 0.25832029122100114, + "grad_norm": 24.9754864112719, + "learning_rate": 5.756397890630854e-06, + "loss": 0.0592315673828125, + "step": 29875 + }, + { + "epoch": 0.25836352474254437, + "grad_norm": 0.38463931128030626, + "learning_rate": 5.756317465692236e-06, + "loss": 0.0347076416015625, + "step": 29880 + }, + { + "epoch": 0.25840675826408765, + "grad_norm": 1.5691433821742808, + "learning_rate": 5.756237028041724e-06, + "loss": 0.14308948516845704, + "step": 29885 + }, + { + "epoch": 0.2584499917856309, + "grad_norm": 10.853307162938158, + "learning_rate": 5.756156577679692e-06, + "loss": 0.21156883239746094, + "step": 29890 + }, + { + "epoch": 0.25849322530717417, + "grad_norm": 2.9637036256543987, + "learning_rate": 5.756076114606509e-06, + "loss": 0.12814254760742189, + "step": 29895 + }, + { + "epoch": 0.25853645882871745, + "grad_norm": 0.6505591748405969, + "learning_rate": 5.755995638822545e-06, + "loss": 0.1569915771484375, + "step": 29900 + }, + { + "epoch": 0.2585796923502607, + "grad_norm": 2.293786606088865, + "learning_rate": 5.755915150328174e-06, + "loss": 0.09914703369140625, + "step": 29905 + }, + { + "epoch": 0.25862292587180397, + "grad_norm": 9.818822767006699, + "learning_rate": 5.755834649123765e-06, + "loss": 0.13575057983398436, + "step": 29910 + }, + { + "epoch": 0.25866615939334725, + "grad_norm": 1.295016276666932, + "learning_rate": 5.755754135209691e-06, + "loss": 0.13411502838134765, + "step": 29915 + }, + { + "epoch": 0.2587093929148905, + "grad_norm": 0.34298728692224467, + "learning_rate": 5.755673608586322e-06, + "loss": 0.07199325561523437, + "step": 29920 + }, + { + "epoch": 0.25875262643643376, + "grad_norm": 12.105976828874155, + "learning_rate": 5.75559306925403e-06, + "loss": 0.12635650634765624, + "step": 29925 + }, + { + "epoch": 0.258795859957977, + "grad_norm": 15.58479720041487, + "learning_rate": 5.755512517213186e-06, + "loss": 0.2811004638671875, + "step": 29930 + }, + { + "epoch": 0.2588390934795203, + "grad_norm": 0.564216703497639, + "learning_rate": 5.755431952464162e-06, + "loss": 0.3648223876953125, + "step": 29935 + }, + { + "epoch": 0.25888232700106356, + "grad_norm": 15.174255431842207, + "learning_rate": 5.755351375007328e-06, + "loss": 0.35789337158203127, + "step": 29940 + }, + { + "epoch": 0.2589255605226068, + "grad_norm": 1.2757619870969914, + "learning_rate": 5.755270784843059e-06, + "loss": 0.23293991088867189, + "step": 29945 + }, + { + "epoch": 0.2589687940441501, + "grad_norm": 5.43700831019197, + "learning_rate": 5.755190181971723e-06, + "loss": 0.18602447509765624, + "step": 29950 + }, + { + "epoch": 0.25901202756569336, + "grad_norm": 3.5876465537645674, + "learning_rate": 5.7551095663936945e-06, + "loss": 0.19518890380859374, + "step": 29955 + }, + { + "epoch": 0.2590552610872366, + "grad_norm": 0.5651620226746399, + "learning_rate": 5.755028938109343e-06, + "loss": 0.11735076904296875, + "step": 29960 + }, + { + "epoch": 0.2590984946087799, + "grad_norm": 7.191108366094181, + "learning_rate": 5.754948297119043e-06, + "loss": 0.137335205078125, + "step": 29965 + }, + { + "epoch": 0.2591417281303231, + "grad_norm": 8.116152705652151, + "learning_rate": 5.7548676434231635e-06, + "loss": 0.132867431640625, + "step": 29970 + }, + { + "epoch": 0.2591849616518664, + "grad_norm": 16.984779332235973, + "learning_rate": 5.754786977022078e-06, + "loss": 0.34679412841796875, + "step": 29975 + }, + { + "epoch": 0.25922819517340967, + "grad_norm": 5.323848770010101, + "learning_rate": 5.754706297916159e-06, + "loss": 0.08420486450195312, + "step": 29980 + }, + { + "epoch": 0.2592714286949529, + "grad_norm": 14.676252800026582, + "learning_rate": 5.754625606105777e-06, + "loss": 0.3102748870849609, + "step": 29985 + }, + { + "epoch": 0.2593146622164962, + "grad_norm": 42.89511865182231, + "learning_rate": 5.754544901591306e-06, + "loss": 0.3118408203125, + "step": 29990 + }, + { + "epoch": 0.25935789573803947, + "grad_norm": 3.4008490967988587, + "learning_rate": 5.754464184373117e-06, + "loss": 0.07225875854492188, + "step": 29995 + }, + { + "epoch": 0.2594011292595827, + "grad_norm": 7.306246873794658, + "learning_rate": 5.7543834544515836e-06, + "loss": 0.037923431396484374, + "step": 30000 + }, + { + "epoch": 0.259444362781126, + "grad_norm": 2.604598888052297, + "learning_rate": 5.754302711827076e-06, + "loss": 0.269073486328125, + "step": 30005 + }, + { + "epoch": 0.2594875963026692, + "grad_norm": 7.531879259862805, + "learning_rate": 5.7542219564999675e-06, + "loss": 0.12037353515625, + "step": 30010 + }, + { + "epoch": 0.2595308298242125, + "grad_norm": 4.372346148131458, + "learning_rate": 5.754141188470631e-06, + "loss": 0.07664642333984376, + "step": 30015 + }, + { + "epoch": 0.2595740633457558, + "grad_norm": 8.015684957441726, + "learning_rate": 5.754060407739439e-06, + "loss": 0.1669097900390625, + "step": 30020 + }, + { + "epoch": 0.259617296867299, + "grad_norm": 0.4244771195273673, + "learning_rate": 5.753979614306763e-06, + "loss": 0.034641265869140625, + "step": 30025 + }, + { + "epoch": 0.2596605303888423, + "grad_norm": 9.776400525780574, + "learning_rate": 5.753898808172977e-06, + "loss": 0.141448974609375, + "step": 30030 + }, + { + "epoch": 0.2597037639103856, + "grad_norm": 9.532579907003075, + "learning_rate": 5.753817989338453e-06, + "loss": 0.1674589157104492, + "step": 30035 + }, + { + "epoch": 0.2597469974319288, + "grad_norm": 1.893011781503566, + "learning_rate": 5.753737157803564e-06, + "loss": 0.141552734375, + "step": 30040 + }, + { + "epoch": 0.2597902309534721, + "grad_norm": 31.527492307665984, + "learning_rate": 5.753656313568682e-06, + "loss": 0.16613998413085937, + "step": 30045 + }, + { + "epoch": 0.2598334644750154, + "grad_norm": 42.8793140249001, + "learning_rate": 5.7535754566341805e-06, + "loss": 0.28173828125, + "step": 30050 + }, + { + "epoch": 0.2598766979965586, + "grad_norm": 16.791385014128885, + "learning_rate": 5.753494587000432e-06, + "loss": 0.23429412841796876, + "step": 30055 + }, + { + "epoch": 0.2599199315181019, + "grad_norm": 10.59698448230264, + "learning_rate": 5.7534137046678105e-06, + "loss": 0.09347457885742187, + "step": 30060 + }, + { + "epoch": 0.2599631650396451, + "grad_norm": 0.9207298255379748, + "learning_rate": 5.753332809636688e-06, + "loss": 0.4300956726074219, + "step": 30065 + }, + { + "epoch": 0.2600063985611884, + "grad_norm": 3.8170526853301165, + "learning_rate": 5.753251901907438e-06, + "loss": 0.1107757568359375, + "step": 30070 + }, + { + "epoch": 0.2600496320827317, + "grad_norm": 2.4903758347350236, + "learning_rate": 5.753170981480433e-06, + "loss": 0.179248046875, + "step": 30075 + }, + { + "epoch": 0.2600928656042749, + "grad_norm": 1.3076905584855576, + "learning_rate": 5.753090048356047e-06, + "loss": 0.28118133544921875, + "step": 30080 + }, + { + "epoch": 0.2601360991258182, + "grad_norm": 15.510709555678384, + "learning_rate": 5.753009102534653e-06, + "loss": 0.59049072265625, + "step": 30085 + }, + { + "epoch": 0.2601793326473615, + "grad_norm": 4.162287320593462, + "learning_rate": 5.752928144016624e-06, + "loss": 0.1039215087890625, + "step": 30090 + }, + { + "epoch": 0.2602225661689047, + "grad_norm": 1.349939578975803, + "learning_rate": 5.752847172802335e-06, + "loss": 0.071954345703125, + "step": 30095 + }, + { + "epoch": 0.260265799690448, + "grad_norm": 5.336238024103412, + "learning_rate": 5.752766188892156e-06, + "loss": 0.1474365234375, + "step": 30100 + }, + { + "epoch": 0.2603090332119912, + "grad_norm": 0.6628467216859765, + "learning_rate": 5.752685192286464e-06, + "loss": 0.23746490478515625, + "step": 30105 + }, + { + "epoch": 0.2603522667335345, + "grad_norm": 64.66951204590966, + "learning_rate": 5.75260418298563e-06, + "loss": 0.4545654296875, + "step": 30110 + }, + { + "epoch": 0.2603955002550778, + "grad_norm": 11.660694925232553, + "learning_rate": 5.75252316099003e-06, + "loss": 0.09368896484375, + "step": 30115 + }, + { + "epoch": 0.260438733776621, + "grad_norm": 10.71945676786599, + "learning_rate": 5.752442126300036e-06, + "loss": 0.053957366943359376, + "step": 30120 + }, + { + "epoch": 0.2604819672981643, + "grad_norm": 15.57369351388551, + "learning_rate": 5.752361078916022e-06, + "loss": 0.3252407073974609, + "step": 30125 + }, + { + "epoch": 0.2605252008197076, + "grad_norm": 0.4619500480300233, + "learning_rate": 5.752280018838361e-06, + "loss": 0.14481620788574218, + "step": 30130 + }, + { + "epoch": 0.2605684343412508, + "grad_norm": 12.63599673042512, + "learning_rate": 5.752198946067429e-06, + "loss": 0.264007568359375, + "step": 30135 + }, + { + "epoch": 0.2606116678627941, + "grad_norm": 5.117505462942349, + "learning_rate": 5.752117860603599e-06, + "loss": 0.11352386474609374, + "step": 30140 + }, + { + "epoch": 0.26065490138433733, + "grad_norm": 16.17568808582178, + "learning_rate": 5.752036762447243e-06, + "loss": 0.3080963134765625, + "step": 30145 + }, + { + "epoch": 0.2606981349058806, + "grad_norm": 10.877658256796169, + "learning_rate": 5.751955651598737e-06, + "loss": 0.07359695434570312, + "step": 30150 + }, + { + "epoch": 0.2607413684274239, + "grad_norm": 8.238231974933043, + "learning_rate": 5.751874528058456e-06, + "loss": 0.20023193359375, + "step": 30155 + }, + { + "epoch": 0.26078460194896713, + "grad_norm": 10.915544283282182, + "learning_rate": 5.751793391826772e-06, + "loss": 0.0909637451171875, + "step": 30160 + }, + { + "epoch": 0.2608278354705104, + "grad_norm": 1.2466819609826143, + "learning_rate": 5.75171224290406e-06, + "loss": 0.02739410400390625, + "step": 30165 + }, + { + "epoch": 0.2608710689920537, + "grad_norm": 12.434476570719442, + "learning_rate": 5.751631081290694e-06, + "loss": 0.1626729965209961, + "step": 30170 + }, + { + "epoch": 0.26091430251359693, + "grad_norm": 14.575077660545348, + "learning_rate": 5.7515499069870495e-06, + "loss": 0.17551422119140625, + "step": 30175 + }, + { + "epoch": 0.2609575360351402, + "grad_norm": 15.602068375428177, + "learning_rate": 5.751468719993499e-06, + "loss": 0.1321990966796875, + "step": 30180 + }, + { + "epoch": 0.26100076955668344, + "grad_norm": 4.583573638270261, + "learning_rate": 5.751387520310418e-06, + "loss": 0.299945068359375, + "step": 30185 + }, + { + "epoch": 0.2610440030782267, + "grad_norm": 8.93537169366545, + "learning_rate": 5.751306307938182e-06, + "loss": 0.4007545471191406, + "step": 30190 + }, + { + "epoch": 0.26108723659977, + "grad_norm": 13.819521832277394, + "learning_rate": 5.751225082877163e-06, + "loss": 0.1173095703125, + "step": 30195 + }, + { + "epoch": 0.26113047012131324, + "grad_norm": 2.5247370509971816, + "learning_rate": 5.751143845127738e-06, + "loss": 0.24638671875, + "step": 30200 + }, + { + "epoch": 0.2611737036428565, + "grad_norm": 0.40179825229070604, + "learning_rate": 5.75106259469028e-06, + "loss": 0.34019775390625, + "step": 30205 + }, + { + "epoch": 0.2612169371643998, + "grad_norm": 2.310487644405855, + "learning_rate": 5.750981331565164e-06, + "loss": 0.2226593017578125, + "step": 30210 + }, + { + "epoch": 0.26126017068594304, + "grad_norm": 6.086945667817048, + "learning_rate": 5.750900055752767e-06, + "loss": 0.0350189208984375, + "step": 30215 + }, + { + "epoch": 0.2613034042074863, + "grad_norm": 0.6082174677585074, + "learning_rate": 5.75081876725346e-06, + "loss": 0.03491363525390625, + "step": 30220 + }, + { + "epoch": 0.2613466377290296, + "grad_norm": 23.387868078612588, + "learning_rate": 5.75073746606762e-06, + "loss": 0.11654052734375, + "step": 30225 + }, + { + "epoch": 0.26138987125057284, + "grad_norm": 20.25148172826352, + "learning_rate": 5.750656152195624e-06, + "loss": 0.31194343566894533, + "step": 30230 + }, + { + "epoch": 0.2614331047721161, + "grad_norm": 4.067066671728714, + "learning_rate": 5.750574825637843e-06, + "loss": 0.0768707275390625, + "step": 30235 + }, + { + "epoch": 0.26147633829365935, + "grad_norm": 41.88678431704598, + "learning_rate": 5.750493486394654e-06, + "loss": 0.46420440673828123, + "step": 30240 + }, + { + "epoch": 0.26151957181520263, + "grad_norm": 6.497219730959117, + "learning_rate": 5.750412134466433e-06, + "loss": 0.0747802734375, + "step": 30245 + }, + { + "epoch": 0.2615628053367459, + "grad_norm": 5.418550542757546, + "learning_rate": 5.750330769853554e-06, + "loss": 0.23946380615234375, + "step": 30250 + }, + { + "epoch": 0.26160603885828915, + "grad_norm": 24.732654309698226, + "learning_rate": 5.750249392556392e-06, + "loss": 0.147198486328125, + "step": 30255 + }, + { + "epoch": 0.26164927237983243, + "grad_norm": 37.687674273995896, + "learning_rate": 5.750168002575323e-06, + "loss": 0.1994384765625, + "step": 30260 + }, + { + "epoch": 0.2616925059013757, + "grad_norm": 37.785859509248695, + "learning_rate": 5.750086599910723e-06, + "loss": 0.40816650390625, + "step": 30265 + }, + { + "epoch": 0.26173573942291894, + "grad_norm": 41.54675032806076, + "learning_rate": 5.750005184562966e-06, + "loss": 0.5261890411376953, + "step": 30270 + }, + { + "epoch": 0.26177897294446223, + "grad_norm": 13.774854476510095, + "learning_rate": 5.749923756532428e-06, + "loss": 0.148773193359375, + "step": 30275 + }, + { + "epoch": 0.26182220646600546, + "grad_norm": 13.488060472969469, + "learning_rate": 5.749842315819484e-06, + "loss": 0.133563232421875, + "step": 30280 + }, + { + "epoch": 0.26186543998754874, + "grad_norm": 27.713013597390507, + "learning_rate": 5.749760862424511e-06, + "loss": 0.2439422607421875, + "step": 30285 + }, + { + "epoch": 0.261908673509092, + "grad_norm": 1.249172159349194, + "learning_rate": 5.749679396347884e-06, + "loss": 0.080755615234375, + "step": 30290 + }, + { + "epoch": 0.26195190703063526, + "grad_norm": 2.6207083158364863, + "learning_rate": 5.749597917589978e-06, + "loss": 0.15970916748046876, + "step": 30295 + }, + { + "epoch": 0.26199514055217854, + "grad_norm": 1.944292722817126, + "learning_rate": 5.749516426151171e-06, + "loss": 0.10115966796875, + "step": 30300 + }, + { + "epoch": 0.2620383740737218, + "grad_norm": 47.91258500950611, + "learning_rate": 5.749434922031835e-06, + "loss": 0.3956329345703125, + "step": 30305 + }, + { + "epoch": 0.26208160759526505, + "grad_norm": 16.554486090231077, + "learning_rate": 5.7493534052323486e-06, + "loss": 0.392877197265625, + "step": 30310 + }, + { + "epoch": 0.26212484111680834, + "grad_norm": 3.8874737968844424, + "learning_rate": 5.7492718757530876e-06, + "loss": 0.1005126953125, + "step": 30315 + }, + { + "epoch": 0.26216807463835157, + "grad_norm": 45.9622624072379, + "learning_rate": 5.749190333594428e-06, + "loss": 0.4865833282470703, + "step": 30320 + }, + { + "epoch": 0.26221130815989485, + "grad_norm": 3.6532382272500032, + "learning_rate": 5.749108778756744e-06, + "loss": 0.1821807861328125, + "step": 30325 + }, + { + "epoch": 0.26225454168143814, + "grad_norm": 25.009158683386808, + "learning_rate": 5.749027211240415e-06, + "loss": 0.3415245056152344, + "step": 30330 + }, + { + "epoch": 0.26229777520298136, + "grad_norm": 29.73681089281918, + "learning_rate": 5.748945631045815e-06, + "loss": 0.1181610107421875, + "step": 30335 + }, + { + "epoch": 0.26234100872452465, + "grad_norm": 22.760920138743252, + "learning_rate": 5.74886403817332e-06, + "loss": 0.09289970397949218, + "step": 30340 + }, + { + "epoch": 0.26238424224606793, + "grad_norm": 9.747508588809438, + "learning_rate": 5.7487824326233075e-06, + "loss": 0.0868408203125, + "step": 30345 + }, + { + "epoch": 0.26242747576761116, + "grad_norm": 2.606255664539968, + "learning_rate": 5.748700814396153e-06, + "loss": 0.09660873413085938, + "step": 30350 + }, + { + "epoch": 0.26247070928915445, + "grad_norm": 1.0565337363920615, + "learning_rate": 5.748619183492234e-06, + "loss": 0.10028076171875, + "step": 30355 + }, + { + "epoch": 0.2625139428106977, + "grad_norm": 2.1035599333296955, + "learning_rate": 5.748537539911926e-06, + "loss": 0.1082000732421875, + "step": 30360 + }, + { + "epoch": 0.26255717633224096, + "grad_norm": 1.4930529001688806, + "learning_rate": 5.748455883655605e-06, + "loss": 0.10718154907226562, + "step": 30365 + }, + { + "epoch": 0.26260040985378424, + "grad_norm": 10.524297374069578, + "learning_rate": 5.748374214723649e-06, + "loss": 0.1332550048828125, + "step": 30370 + }, + { + "epoch": 0.2626436433753275, + "grad_norm": 1.9438452900774768, + "learning_rate": 5.748292533116433e-06, + "loss": 0.092584228515625, + "step": 30375 + }, + { + "epoch": 0.26268687689687076, + "grad_norm": 18.996381448961465, + "learning_rate": 5.748210838834335e-06, + "loss": 0.2692291259765625, + "step": 30380 + }, + { + "epoch": 0.26273011041841404, + "grad_norm": 11.24856008937148, + "learning_rate": 5.748129131877732e-06, + "loss": 0.1716583251953125, + "step": 30385 + }, + { + "epoch": 0.26277334393995727, + "grad_norm": 42.77557694419618, + "learning_rate": 5.748047412247e-06, + "loss": 0.603765869140625, + "step": 30390 + }, + { + "epoch": 0.26281657746150056, + "grad_norm": 24.611810265466712, + "learning_rate": 5.747965679942516e-06, + "loss": 0.108160400390625, + "step": 30395 + }, + { + "epoch": 0.26285981098304384, + "grad_norm": 3.9383168788487555, + "learning_rate": 5.747883934964657e-06, + "loss": 0.5453475952148438, + "step": 30400 + }, + { + "epoch": 0.26290304450458707, + "grad_norm": 43.15156095121709, + "learning_rate": 5.747802177313801e-06, + "loss": 0.26343536376953125, + "step": 30405 + }, + { + "epoch": 0.26294627802613035, + "grad_norm": 6.656897898815735, + "learning_rate": 5.7477204069903236e-06, + "loss": 0.30601348876953127, + "step": 30410 + }, + { + "epoch": 0.2629895115476736, + "grad_norm": 0.9855673572538496, + "learning_rate": 5.747638623994603e-06, + "loss": 0.261065673828125, + "step": 30415 + }, + { + "epoch": 0.26303274506921687, + "grad_norm": 16.44580153249226, + "learning_rate": 5.747556828327014e-06, + "loss": 0.1736480712890625, + "step": 30420 + }, + { + "epoch": 0.26307597859076015, + "grad_norm": 15.092101661615954, + "learning_rate": 5.7474750199879374e-06, + "loss": 0.1785003662109375, + "step": 30425 + }, + { + "epoch": 0.2631192121123034, + "grad_norm": 2.446725512801093, + "learning_rate": 5.747393198977748e-06, + "loss": 0.23663330078125, + "step": 30430 + }, + { + "epoch": 0.26316244563384666, + "grad_norm": 17.170692456827044, + "learning_rate": 5.7473113652968245e-06, + "loss": 0.20866928100585938, + "step": 30435 + }, + { + "epoch": 0.26320567915538995, + "grad_norm": 27.651522458407044, + "learning_rate": 5.747229518945544e-06, + "loss": 0.20612945556640624, + "step": 30440 + }, + { + "epoch": 0.2632489126769332, + "grad_norm": 23.45208133220222, + "learning_rate": 5.747147659924282e-06, + "loss": 0.2093994140625, + "step": 30445 + }, + { + "epoch": 0.26329214619847646, + "grad_norm": 1.0230788338128973, + "learning_rate": 5.74706578823342e-06, + "loss": 0.23432998657226561, + "step": 30450 + }, + { + "epoch": 0.2633353797200197, + "grad_norm": 5.320185016314561, + "learning_rate": 5.746983903873332e-06, + "loss": 0.1809967041015625, + "step": 30455 + }, + { + "epoch": 0.263378613241563, + "grad_norm": 2.273459739719861, + "learning_rate": 5.746902006844398e-06, + "loss": 0.06806182861328125, + "step": 30460 + }, + { + "epoch": 0.26342184676310626, + "grad_norm": 0.9621786632349815, + "learning_rate": 5.746820097146993e-06, + "loss": 0.2089202880859375, + "step": 30465 + }, + { + "epoch": 0.2634650802846495, + "grad_norm": 28.75689338631207, + "learning_rate": 5.746738174781498e-06, + "loss": 0.2684165954589844, + "step": 30470 + }, + { + "epoch": 0.2635083138061928, + "grad_norm": 0.5569859652188214, + "learning_rate": 5.746656239748288e-06, + "loss": 0.168695068359375, + "step": 30475 + }, + { + "epoch": 0.26355154732773606, + "grad_norm": 0.33808411482234535, + "learning_rate": 5.746574292047743e-06, + "loss": 0.2586475372314453, + "step": 30480 + }, + { + "epoch": 0.2635947808492793, + "grad_norm": 0.13885330574858556, + "learning_rate": 5.74649233168024e-06, + "loss": 0.15664749145507811, + "step": 30485 + }, + { + "epoch": 0.26363801437082257, + "grad_norm": 3.46773651357115, + "learning_rate": 5.746410358646156e-06, + "loss": 0.28536300659179686, + "step": 30490 + }, + { + "epoch": 0.2636812478923658, + "grad_norm": 5.253408429797069, + "learning_rate": 5.746328372945871e-06, + "loss": 0.230230712890625, + "step": 30495 + }, + { + "epoch": 0.2637244814139091, + "grad_norm": 0.6908658219204618, + "learning_rate": 5.746246374579762e-06, + "loss": 0.4334381103515625, + "step": 30500 + }, + { + "epoch": 0.26376771493545237, + "grad_norm": 38.61022892549641, + "learning_rate": 5.746164363548208e-06, + "loss": 0.3236968994140625, + "step": 30505 + }, + { + "epoch": 0.2638109484569956, + "grad_norm": 31.681505188915498, + "learning_rate": 5.746082339851586e-06, + "loss": 0.3047882080078125, + "step": 30510 + }, + { + "epoch": 0.2638541819785389, + "grad_norm": 1.5770727311750719, + "learning_rate": 5.746000303490274e-06, + "loss": 0.13722381591796876, + "step": 30515 + }, + { + "epoch": 0.26389741550008217, + "grad_norm": 2.7027372959463274, + "learning_rate": 5.7459182544646525e-06, + "loss": 0.023770904541015624, + "step": 30520 + }, + { + "epoch": 0.2639406490216254, + "grad_norm": 47.13013987460967, + "learning_rate": 5.745836192775099e-06, + "loss": 0.6732421875, + "step": 30525 + }, + { + "epoch": 0.2639838825431687, + "grad_norm": 58.190990717898764, + "learning_rate": 5.745754118421991e-06, + "loss": 0.23560791015625, + "step": 30530 + }, + { + "epoch": 0.2640271160647119, + "grad_norm": 39.46398876633675, + "learning_rate": 5.745672031405707e-06, + "loss": 0.28219451904296877, + "step": 30535 + }, + { + "epoch": 0.2640703495862552, + "grad_norm": 4.5163729840257965, + "learning_rate": 5.745589931726627e-06, + "loss": 0.1858642578125, + "step": 30540 + }, + { + "epoch": 0.2641135831077985, + "grad_norm": 5.430492210215022, + "learning_rate": 5.745507819385128e-06, + "loss": 0.1961883544921875, + "step": 30545 + }, + { + "epoch": 0.2641568166293417, + "grad_norm": 4.754952566805617, + "learning_rate": 5.74542569438159e-06, + "loss": 0.08917884826660157, + "step": 30550 + }, + { + "epoch": 0.264200050150885, + "grad_norm": 0.23059919954144387, + "learning_rate": 5.7453435567163916e-06, + "loss": 0.39433135986328127, + "step": 30555 + }, + { + "epoch": 0.2642432836724283, + "grad_norm": 5.171491911956582, + "learning_rate": 5.745261406389912e-06, + "loss": 0.199981689453125, + "step": 30560 + }, + { + "epoch": 0.2642865171939715, + "grad_norm": 19.755929742861955, + "learning_rate": 5.7451792434025285e-06, + "loss": 0.4105224609375, + "step": 30565 + }, + { + "epoch": 0.2643297507155148, + "grad_norm": 7.999416841807281, + "learning_rate": 5.745097067754621e-06, + "loss": 0.1909820556640625, + "step": 30570 + }, + { + "epoch": 0.2643729842370581, + "grad_norm": 0.8623430920354125, + "learning_rate": 5.745014879446569e-06, + "loss": 0.3771816253662109, + "step": 30575 + }, + { + "epoch": 0.2644162177586013, + "grad_norm": 7.9206390714045085, + "learning_rate": 5.744932678478749e-06, + "loss": 0.18575515747070312, + "step": 30580 + }, + { + "epoch": 0.2644594512801446, + "grad_norm": 0.3890104124630792, + "learning_rate": 5.744850464851543e-06, + "loss": 0.27759780883789065, + "step": 30585 + }, + { + "epoch": 0.2645026848016878, + "grad_norm": 124.70252115807136, + "learning_rate": 5.74476823856533e-06, + "loss": 0.10306472778320312, + "step": 30590 + }, + { + "epoch": 0.2645459183232311, + "grad_norm": 10.67188516999093, + "learning_rate": 5.744685999620488e-06, + "loss": 0.4142578125, + "step": 30595 + }, + { + "epoch": 0.2645891518447744, + "grad_norm": 3.9043848561469083, + "learning_rate": 5.744603748017397e-06, + "loss": 0.390594482421875, + "step": 30600 + }, + { + "epoch": 0.2646323853663176, + "grad_norm": 1.4215273309864176, + "learning_rate": 5.744521483756435e-06, + "loss": 0.31139678955078126, + "step": 30605 + }, + { + "epoch": 0.2646756188878609, + "grad_norm": 2.9215625544903423, + "learning_rate": 5.744439206837983e-06, + "loss": 0.0300872802734375, + "step": 30610 + }, + { + "epoch": 0.2647188524094042, + "grad_norm": 23.07701908956356, + "learning_rate": 5.74435691726242e-06, + "loss": 0.18850860595703126, + "step": 30615 + }, + { + "epoch": 0.2647620859309474, + "grad_norm": 24.899559243380693, + "learning_rate": 5.744274615030126e-06, + "loss": 0.15344696044921874, + "step": 30620 + }, + { + "epoch": 0.2648053194524907, + "grad_norm": 8.617740719944987, + "learning_rate": 5.744192300141479e-06, + "loss": 0.09106330871582032, + "step": 30625 + }, + { + "epoch": 0.2648485529740339, + "grad_norm": 0.3225655623374317, + "learning_rate": 5.74410997259686e-06, + "loss": 0.1894805908203125, + "step": 30630 + }, + { + "epoch": 0.2648917864955772, + "grad_norm": 3.0686569504095838, + "learning_rate": 5.744027632396648e-06, + "loss": 0.0929443359375, + "step": 30635 + }, + { + "epoch": 0.2649350200171205, + "grad_norm": 24.89242384071751, + "learning_rate": 5.7439452795412225e-06, + "loss": 0.140362548828125, + "step": 30640 + }, + { + "epoch": 0.2649782535386637, + "grad_norm": 9.663746526385196, + "learning_rate": 5.743862914030964e-06, + "loss": 0.13154830932617187, + "step": 30645 + }, + { + "epoch": 0.265021487060207, + "grad_norm": 10.782038265754899, + "learning_rate": 5.743780535866254e-06, + "loss": 0.4873291015625, + "step": 30650 + }, + { + "epoch": 0.2650647205817503, + "grad_norm": 9.92815458418164, + "learning_rate": 5.743698145047469e-06, + "loss": 0.12787094116210937, + "step": 30655 + }, + { + "epoch": 0.2651079541032935, + "grad_norm": 9.667435613583821, + "learning_rate": 5.74361574157499e-06, + "loss": 0.12224044799804687, + "step": 30660 + }, + { + "epoch": 0.2651511876248368, + "grad_norm": 28.466029434618367, + "learning_rate": 5.743533325449198e-06, + "loss": 0.13540802001953126, + "step": 30665 + }, + { + "epoch": 0.26519442114638003, + "grad_norm": 23.524910909522966, + "learning_rate": 5.743450896670475e-06, + "loss": 0.23020095825195314, + "step": 30670 + }, + { + "epoch": 0.2652376546679233, + "grad_norm": 25.9035243328799, + "learning_rate": 5.7433684552391966e-06, + "loss": 0.128173828125, + "step": 30675 + }, + { + "epoch": 0.2652808881894666, + "grad_norm": 1.9094955001931577, + "learning_rate": 5.743286001155746e-06, + "loss": 0.25857696533203123, + "step": 30680 + }, + { + "epoch": 0.26532412171100983, + "grad_norm": 9.69419950267812, + "learning_rate": 5.743203534420502e-06, + "loss": 0.41520843505859373, + "step": 30685 + }, + { + "epoch": 0.2653673552325531, + "grad_norm": 13.071978244788607, + "learning_rate": 5.7431210550338465e-06, + "loss": 0.08757476806640625, + "step": 30690 + }, + { + "epoch": 0.2654105887540964, + "grad_norm": 23.03675347989723, + "learning_rate": 5.743038562996159e-06, + "loss": 0.08588714599609375, + "step": 30695 + }, + { + "epoch": 0.26545382227563963, + "grad_norm": 6.639053870456424, + "learning_rate": 5.74295605830782e-06, + "loss": 0.21708831787109376, + "step": 30700 + }, + { + "epoch": 0.2654970557971829, + "grad_norm": 3.768602709936817, + "learning_rate": 5.74287354096921e-06, + "loss": 0.17144393920898438, + "step": 30705 + }, + { + "epoch": 0.26554028931872614, + "grad_norm": 1.9269190205142104, + "learning_rate": 5.742791010980709e-06, + "loss": 0.09385528564453124, + "step": 30710 + }, + { + "epoch": 0.2655835228402694, + "grad_norm": 0.36387897883676473, + "learning_rate": 5.742708468342699e-06, + "loss": 0.15714263916015625, + "step": 30715 + }, + { + "epoch": 0.2656267563618127, + "grad_norm": 16.717251537784442, + "learning_rate": 5.742625913055559e-06, + "loss": 0.2172119140625, + "step": 30720 + }, + { + "epoch": 0.26566998988335594, + "grad_norm": 4.291401489174598, + "learning_rate": 5.742543345119672e-06, + "loss": 0.1885162353515625, + "step": 30725 + }, + { + "epoch": 0.2657132234048992, + "grad_norm": 29.12736797785495, + "learning_rate": 5.7424607645354155e-06, + "loss": 0.10394973754882812, + "step": 30730 + }, + { + "epoch": 0.2657564569264425, + "grad_norm": 1.2463907758497976, + "learning_rate": 5.742378171303174e-06, + "loss": 0.03209228515625, + "step": 30735 + }, + { + "epoch": 0.26579969044798574, + "grad_norm": 0.3173856986527362, + "learning_rate": 5.742295565423327e-06, + "loss": 0.27222747802734376, + "step": 30740 + }, + { + "epoch": 0.265842923969529, + "grad_norm": 0.17017445727614436, + "learning_rate": 5.742212946896254e-06, + "loss": 0.23506622314453124, + "step": 30745 + }, + { + "epoch": 0.26588615749107225, + "grad_norm": 5.664732570833638, + "learning_rate": 5.742130315722337e-06, + "loss": 0.0276702880859375, + "step": 30750 + }, + { + "epoch": 0.26592939101261553, + "grad_norm": 3.3331572331101915, + "learning_rate": 5.742047671901959e-06, + "loss": 0.2069549560546875, + "step": 30755 + }, + { + "epoch": 0.2659726245341588, + "grad_norm": 13.307528768430622, + "learning_rate": 5.741965015435497e-06, + "loss": 0.0787750244140625, + "step": 30760 + }, + { + "epoch": 0.26601585805570205, + "grad_norm": 4.8609057014864545, + "learning_rate": 5.741882346323336e-06, + "loss": 0.03371124267578125, + "step": 30765 + }, + { + "epoch": 0.26605909157724533, + "grad_norm": 4.353890493208695, + "learning_rate": 5.741799664565856e-06, + "loss": 0.07994537353515625, + "step": 30770 + }, + { + "epoch": 0.2661023250987886, + "grad_norm": 9.013848998000746, + "learning_rate": 5.7417169701634375e-06, + "loss": 0.15694656372070312, + "step": 30775 + }, + { + "epoch": 0.26614555862033185, + "grad_norm": 0.3663921328596339, + "learning_rate": 5.741634263116463e-06, + "loss": 0.02852935791015625, + "step": 30780 + }, + { + "epoch": 0.26618879214187513, + "grad_norm": 0.19502662972034046, + "learning_rate": 5.741551543425314e-06, + "loss": 0.22038803100585938, + "step": 30785 + }, + { + "epoch": 0.2662320256634184, + "grad_norm": 5.0917826587789525, + "learning_rate": 5.741468811090371e-06, + "loss": 0.17068252563476563, + "step": 30790 + }, + { + "epoch": 0.26627525918496164, + "grad_norm": 2.5984439502732926, + "learning_rate": 5.741386066112016e-06, + "loss": 0.0907135009765625, + "step": 30795 + }, + { + "epoch": 0.26631849270650493, + "grad_norm": 1.6333033603717815, + "learning_rate": 5.74130330849063e-06, + "loss": 0.3612030029296875, + "step": 30800 + }, + { + "epoch": 0.26636172622804816, + "grad_norm": 2.5794961565468135, + "learning_rate": 5.741220538226597e-06, + "loss": 0.14111328125, + "step": 30805 + }, + { + "epoch": 0.26640495974959144, + "grad_norm": 0.4890045115727388, + "learning_rate": 5.741137755320297e-06, + "loss": 0.16052780151367188, + "step": 30810 + }, + { + "epoch": 0.2664481932711347, + "grad_norm": 22.135372170953325, + "learning_rate": 5.741054959772112e-06, + "loss": 0.263262939453125, + "step": 30815 + }, + { + "epoch": 0.26649142679267795, + "grad_norm": 0.11384241746351914, + "learning_rate": 5.7409721515824225e-06, + "loss": 0.09681758880615235, + "step": 30820 + }, + { + "epoch": 0.26653466031422124, + "grad_norm": 0.605759931208478, + "learning_rate": 5.740889330751611e-06, + "loss": 0.13554115295410157, + "step": 30825 + }, + { + "epoch": 0.2665778938357645, + "grad_norm": 5.429069242757235, + "learning_rate": 5.740806497280062e-06, + "loss": 0.076470947265625, + "step": 30830 + }, + { + "epoch": 0.26662112735730775, + "grad_norm": 56.896756092320224, + "learning_rate": 5.740723651168155e-06, + "loss": 0.5082305908203125, + "step": 30835 + }, + { + "epoch": 0.26666436087885104, + "grad_norm": 17.10800582781601, + "learning_rate": 5.740640792416272e-06, + "loss": 0.1675628662109375, + "step": 30840 + }, + { + "epoch": 0.26670759440039427, + "grad_norm": 17.215088198943537, + "learning_rate": 5.740557921024797e-06, + "loss": 0.4441349029541016, + "step": 30845 + }, + { + "epoch": 0.26675082792193755, + "grad_norm": 4.57655010620311, + "learning_rate": 5.74047503699411e-06, + "loss": 0.04629707336425781, + "step": 30850 + }, + { + "epoch": 0.26679406144348083, + "grad_norm": 28.76842484366671, + "learning_rate": 5.740392140324594e-06, + "loss": 0.213140869140625, + "step": 30855 + }, + { + "epoch": 0.26683729496502406, + "grad_norm": 5.166780051737187, + "learning_rate": 5.740309231016632e-06, + "loss": 0.18491668701171876, + "step": 30860 + }, + { + "epoch": 0.26688052848656735, + "grad_norm": 26.860592433744127, + "learning_rate": 5.740226309070607e-06, + "loss": 0.229437255859375, + "step": 30865 + }, + { + "epoch": 0.26692376200811063, + "grad_norm": 0.8404569630158436, + "learning_rate": 5.7401433744868994e-06, + "loss": 0.12336053848266601, + "step": 30870 + }, + { + "epoch": 0.26696699552965386, + "grad_norm": 9.47004081348072, + "learning_rate": 5.740060427265893e-06, + "loss": 0.30980377197265624, + "step": 30875 + }, + { + "epoch": 0.26701022905119715, + "grad_norm": 31.46912323302063, + "learning_rate": 5.73997746740797e-06, + "loss": 0.27079925537109373, + "step": 30880 + }, + { + "epoch": 0.2670534625727404, + "grad_norm": 11.840839550328443, + "learning_rate": 5.739894494913513e-06, + "loss": 0.3347900390625, + "step": 30885 + }, + { + "epoch": 0.26709669609428366, + "grad_norm": 42.21411922205161, + "learning_rate": 5.739811509782904e-06, + "loss": 0.1373077392578125, + "step": 30890 + }, + { + "epoch": 0.26713992961582694, + "grad_norm": 30.83362938179092, + "learning_rate": 5.739728512016528e-06, + "loss": 0.11308441162109376, + "step": 30895 + }, + { + "epoch": 0.26718316313737017, + "grad_norm": 10.872617510726762, + "learning_rate": 5.7396455016147645e-06, + "loss": 0.10272216796875, + "step": 30900 + }, + { + "epoch": 0.26722639665891346, + "grad_norm": 5.5015055349516615, + "learning_rate": 5.739562478577999e-06, + "loss": 0.0803314208984375, + "step": 30905 + }, + { + "epoch": 0.26726963018045674, + "grad_norm": 3.7096352546406766, + "learning_rate": 5.739479442906613e-06, + "loss": 0.25795974731445315, + "step": 30910 + }, + { + "epoch": 0.26731286370199997, + "grad_norm": 22.430691323644684, + "learning_rate": 5.73939639460099e-06, + "loss": 0.4974273681640625, + "step": 30915 + }, + { + "epoch": 0.26735609722354325, + "grad_norm": 3.3690376259466346, + "learning_rate": 5.7393133336615135e-06, + "loss": 0.05025148391723633, + "step": 30920 + }, + { + "epoch": 0.2673993307450865, + "grad_norm": 7.858340053260505, + "learning_rate": 5.739230260088565e-06, + "loss": 0.1769989013671875, + "step": 30925 + }, + { + "epoch": 0.26744256426662977, + "grad_norm": 31.5607534163857, + "learning_rate": 5.7391471738825284e-06, + "loss": 0.1851806640625, + "step": 30930 + }, + { + "epoch": 0.26748579778817305, + "grad_norm": 2.6268581060584095, + "learning_rate": 5.739064075043787e-06, + "loss": 0.1010406494140625, + "step": 30935 + }, + { + "epoch": 0.2675290313097163, + "grad_norm": 1.1198104123007715, + "learning_rate": 5.7389809635727265e-06, + "loss": 0.17273941040039062, + "step": 30940 + }, + { + "epoch": 0.26757226483125957, + "grad_norm": 33.73530458436193, + "learning_rate": 5.738897839469725e-06, + "loss": 0.21693267822265624, + "step": 30945 + }, + { + "epoch": 0.26761549835280285, + "grad_norm": 5.90693151943036, + "learning_rate": 5.738814702735171e-06, + "loss": 0.07642135620117188, + "step": 30950 + }, + { + "epoch": 0.2676587318743461, + "grad_norm": 35.04417443179033, + "learning_rate": 5.738731553369444e-06, + "loss": 0.44452056884765623, + "step": 30955 + }, + { + "epoch": 0.26770196539588936, + "grad_norm": 1.6599807933305506, + "learning_rate": 5.7386483913729294e-06, + "loss": 0.194244384765625, + "step": 30960 + }, + { + "epoch": 0.26774519891743265, + "grad_norm": 20.172768560821673, + "learning_rate": 5.7385652167460095e-06, + "loss": 0.115350341796875, + "step": 30965 + }, + { + "epoch": 0.2677884324389759, + "grad_norm": 6.165717986111432, + "learning_rate": 5.73848202948907e-06, + "loss": 0.1061056137084961, + "step": 30970 + }, + { + "epoch": 0.26783166596051916, + "grad_norm": 18.60747344988736, + "learning_rate": 5.7383988296024935e-06, + "loss": 0.176812744140625, + "step": 30975 + }, + { + "epoch": 0.2678748994820624, + "grad_norm": 50.72043212545709, + "learning_rate": 5.7383156170866625e-06, + "loss": 0.31130828857421877, + "step": 30980 + }, + { + "epoch": 0.2679181330036057, + "grad_norm": 7.406804758936221, + "learning_rate": 5.738232391941963e-06, + "loss": 0.2771484375, + "step": 30985 + }, + { + "epoch": 0.26796136652514896, + "grad_norm": 1.9562812386649961, + "learning_rate": 5.738149154168777e-06, + "loss": 0.0770721435546875, + "step": 30990 + }, + { + "epoch": 0.2680046000466922, + "grad_norm": 20.38787313519327, + "learning_rate": 5.73806590376749e-06, + "loss": 0.09655609130859374, + "step": 30995 + }, + { + "epoch": 0.26804783356823547, + "grad_norm": 11.723148115304985, + "learning_rate": 5.737982640738484e-06, + "loss": 0.29624481201171876, + "step": 31000 + }, + { + "epoch": 0.26809106708977876, + "grad_norm": 27.571244377008266, + "learning_rate": 5.7378993650821434e-06, + "loss": 0.2317638397216797, + "step": 31005 + }, + { + "epoch": 0.268134300611322, + "grad_norm": 11.262872630454044, + "learning_rate": 5.7378160767988535e-06, + "loss": 0.336358642578125, + "step": 31010 + }, + { + "epoch": 0.26817753413286527, + "grad_norm": 15.763470409297408, + "learning_rate": 5.737732775888998e-06, + "loss": 0.136322021484375, + "step": 31015 + }, + { + "epoch": 0.2682207676544085, + "grad_norm": 5.299599209607282, + "learning_rate": 5.737649462352961e-06, + "loss": 0.18907623291015624, + "step": 31020 + }, + { + "epoch": 0.2682640011759518, + "grad_norm": 3.6293490929823453, + "learning_rate": 5.737566136191127e-06, + "loss": 0.15220718383789061, + "step": 31025 + }, + { + "epoch": 0.26830723469749507, + "grad_norm": 1.1222383380135594, + "learning_rate": 5.737482797403878e-06, + "loss": 0.272308349609375, + "step": 31030 + }, + { + "epoch": 0.2683504682190383, + "grad_norm": 35.858672605167364, + "learning_rate": 5.737399445991602e-06, + "loss": 0.168621826171875, + "step": 31035 + }, + { + "epoch": 0.2683937017405816, + "grad_norm": 21.09529442049369, + "learning_rate": 5.737316081954681e-06, + "loss": 0.3181396484375, + "step": 31040 + }, + { + "epoch": 0.26843693526212486, + "grad_norm": 17.96260898347085, + "learning_rate": 5.7372327052935e-06, + "loss": 0.22872543334960938, + "step": 31045 + }, + { + "epoch": 0.2684801687836681, + "grad_norm": 35.38421709138963, + "learning_rate": 5.737149316008444e-06, + "loss": 0.16314697265625, + "step": 31050 + }, + { + "epoch": 0.2685234023052114, + "grad_norm": 19.92854306704819, + "learning_rate": 5.737065914099897e-06, + "loss": 0.3243095397949219, + "step": 31055 + }, + { + "epoch": 0.2685666358267546, + "grad_norm": 3.7554147565008873, + "learning_rate": 5.736982499568244e-06, + "loss": 0.11832809448242188, + "step": 31060 + }, + { + "epoch": 0.2686098693482979, + "grad_norm": 7.729779755534693, + "learning_rate": 5.736899072413869e-06, + "loss": 0.35574188232421877, + "step": 31065 + }, + { + "epoch": 0.2686531028698412, + "grad_norm": 3.8336459042585926, + "learning_rate": 5.736815632637158e-06, + "loss": 0.07878036499023437, + "step": 31070 + }, + { + "epoch": 0.2686963363913844, + "grad_norm": 1.9441796016029733, + "learning_rate": 5.736732180238495e-06, + "loss": 0.3977783203125, + "step": 31075 + }, + { + "epoch": 0.2687395699129277, + "grad_norm": 11.133457299194326, + "learning_rate": 5.736648715218264e-06, + "loss": 0.23771209716796876, + "step": 31080 + }, + { + "epoch": 0.268782803434471, + "grad_norm": 26.33926897040628, + "learning_rate": 5.7365652375768526e-06, + "loss": 0.20130538940429688, + "step": 31085 + }, + { + "epoch": 0.2688260369560142, + "grad_norm": 10.76317218442397, + "learning_rate": 5.736481747314643e-06, + "loss": 0.1687835693359375, + "step": 31090 + }, + { + "epoch": 0.2688692704775575, + "grad_norm": 33.69948451314141, + "learning_rate": 5.736398244432022e-06, + "loss": 0.2576469421386719, + "step": 31095 + }, + { + "epoch": 0.2689125039991007, + "grad_norm": 27.80098854543606, + "learning_rate": 5.736314728929374e-06, + "loss": 0.208489990234375, + "step": 31100 + }, + { + "epoch": 0.268955737520644, + "grad_norm": 25.415319174199933, + "learning_rate": 5.736231200807084e-06, + "loss": 0.2372833251953125, + "step": 31105 + }, + { + "epoch": 0.2689989710421873, + "grad_norm": 3.9279386018698044, + "learning_rate": 5.736147660065537e-06, + "loss": 0.3100341796875, + "step": 31110 + }, + { + "epoch": 0.2690422045637305, + "grad_norm": 5.174843057521366, + "learning_rate": 5.736064106705119e-06, + "loss": 0.1075439453125, + "step": 31115 + }, + { + "epoch": 0.2690854380852738, + "grad_norm": 21.595036077102108, + "learning_rate": 5.7359805407262155e-06, + "loss": 0.275164794921875, + "step": 31120 + }, + { + "epoch": 0.2691286716068171, + "grad_norm": 19.46316067252262, + "learning_rate": 5.7358969621292125e-06, + "loss": 0.38109893798828126, + "step": 31125 + }, + { + "epoch": 0.2691719051283603, + "grad_norm": 5.270362200570669, + "learning_rate": 5.7358133709144926e-06, + "loss": 0.14887237548828125, + "step": 31130 + }, + { + "epoch": 0.2692151386499036, + "grad_norm": 0.7661794563001942, + "learning_rate": 5.735729767082444e-06, + "loss": 0.156353759765625, + "step": 31135 + }, + { + "epoch": 0.2692583721714469, + "grad_norm": 20.063731351401454, + "learning_rate": 5.7356461506334516e-06, + "loss": 0.23224258422851562, + "step": 31140 + }, + { + "epoch": 0.2693016056929901, + "grad_norm": 5.2402912663686285, + "learning_rate": 5.7355625215679005e-06, + "loss": 0.32866973876953126, + "step": 31145 + }, + { + "epoch": 0.2693448392145334, + "grad_norm": 0.6962345481327783, + "learning_rate": 5.7354788798861774e-06, + "loss": 0.02962150573730469, + "step": 31150 + }, + { + "epoch": 0.2693880727360766, + "grad_norm": 33.410727170381726, + "learning_rate": 5.735395225588667e-06, + "loss": 0.23961868286132812, + "step": 31155 + }, + { + "epoch": 0.2694313062576199, + "grad_norm": 13.467575683512303, + "learning_rate": 5.735311558675755e-06, + "loss": 0.15928955078125, + "step": 31160 + }, + { + "epoch": 0.2694745397791632, + "grad_norm": 24.410251618218737, + "learning_rate": 5.735227879147828e-06, + "loss": 0.2908319473266602, + "step": 31165 + }, + { + "epoch": 0.2695177733007064, + "grad_norm": 27.675060670412694, + "learning_rate": 5.735144187005273e-06, + "loss": 0.2281513214111328, + "step": 31170 + }, + { + "epoch": 0.2695610068222497, + "grad_norm": 0.9618461163143971, + "learning_rate": 5.735060482248473e-06, + "loss": 0.10220947265625, + "step": 31175 + }, + { + "epoch": 0.269604240343793, + "grad_norm": 44.26298703134139, + "learning_rate": 5.734976764877817e-06, + "loss": 0.39098968505859377, + "step": 31180 + }, + { + "epoch": 0.2696474738653362, + "grad_norm": 27.0399252764708, + "learning_rate": 5.734893034893689e-06, + "loss": 0.33927001953125, + "step": 31185 + }, + { + "epoch": 0.2696907073868795, + "grad_norm": 7.513808075382055, + "learning_rate": 5.734809292296476e-06, + "loss": 0.1206451416015625, + "step": 31190 + }, + { + "epoch": 0.26973394090842273, + "grad_norm": 21.965349405284346, + "learning_rate": 5.734725537086564e-06, + "loss": 0.402239990234375, + "step": 31195 + }, + { + "epoch": 0.269777174429966, + "grad_norm": 16.143655052818648, + "learning_rate": 5.73464176926434e-06, + "loss": 0.07190170288085937, + "step": 31200 + }, + { + "epoch": 0.2698204079515093, + "grad_norm": 19.819539886131967, + "learning_rate": 5.7345579888301895e-06, + "loss": 0.5303730010986328, + "step": 31205 + }, + { + "epoch": 0.26986364147305253, + "grad_norm": 11.584867886118563, + "learning_rate": 5.7344741957844995e-06, + "loss": 0.11827163696289063, + "step": 31210 + }, + { + "epoch": 0.2699068749945958, + "grad_norm": 4.974151152612101, + "learning_rate": 5.734390390127655e-06, + "loss": 0.25469818115234377, + "step": 31215 + }, + { + "epoch": 0.2699501085161391, + "grad_norm": 1.2327598340620685, + "learning_rate": 5.734306571860046e-06, + "loss": 0.332525634765625, + "step": 31220 + }, + { + "epoch": 0.2699933420376823, + "grad_norm": 6.158477858842794, + "learning_rate": 5.734222740982055e-06, + "loss": 0.2885444641113281, + "step": 31225 + }, + { + "epoch": 0.2700365755592256, + "grad_norm": 13.617748347860053, + "learning_rate": 5.734138897494071e-06, + "loss": 0.2972412109375, + "step": 31230 + }, + { + "epoch": 0.27007980908076884, + "grad_norm": 4.025818603733158, + "learning_rate": 5.73405504139648e-06, + "loss": 0.10365219116210937, + "step": 31235 + }, + { + "epoch": 0.2701230426023121, + "grad_norm": 9.899230198469613, + "learning_rate": 5.733971172689668e-06, + "loss": 0.2979644775390625, + "step": 31240 + }, + { + "epoch": 0.2701662761238554, + "grad_norm": 13.18447578152077, + "learning_rate": 5.733887291374023e-06, + "loss": 0.14419822692871093, + "step": 31245 + }, + { + "epoch": 0.27020950964539864, + "grad_norm": 277.3846404291768, + "learning_rate": 5.733803397449931e-06, + "loss": 0.23106842041015624, + "step": 31250 + }, + { + "epoch": 0.2702527431669419, + "grad_norm": 1.716084050248521, + "learning_rate": 5.733719490917779e-06, + "loss": 0.076519775390625, + "step": 31255 + }, + { + "epoch": 0.2702959766884852, + "grad_norm": 0.4738150433749284, + "learning_rate": 5.733635571777956e-06, + "loss": 0.107720947265625, + "step": 31260 + }, + { + "epoch": 0.27033921021002844, + "grad_norm": 29.321043620683476, + "learning_rate": 5.733551640030846e-06, + "loss": 0.3714630126953125, + "step": 31265 + }, + { + "epoch": 0.2703824437315717, + "grad_norm": 11.046453336139102, + "learning_rate": 5.7334676956768366e-06, + "loss": 0.18145675659179689, + "step": 31270 + }, + { + "epoch": 0.27042567725311495, + "grad_norm": 9.149498105187726, + "learning_rate": 5.733383738716316e-06, + "loss": 0.047528076171875, + "step": 31275 + }, + { + "epoch": 0.27046891077465823, + "grad_norm": 1.8861379134061806, + "learning_rate": 5.7332997691496726e-06, + "loss": 0.1344736099243164, + "step": 31280 + }, + { + "epoch": 0.2705121442962015, + "grad_norm": 3.2034016247660726, + "learning_rate": 5.733215786977291e-06, + "loss": 0.18874282836914064, + "step": 31285 + }, + { + "epoch": 0.27055537781774475, + "grad_norm": 9.474545814032254, + "learning_rate": 5.7331317921995605e-06, + "loss": 0.117279052734375, + "step": 31290 + }, + { + "epoch": 0.27059861133928803, + "grad_norm": 14.521184094600539, + "learning_rate": 5.733047784816867e-06, + "loss": 0.08510627746582031, + "step": 31295 + }, + { + "epoch": 0.2706418448608313, + "grad_norm": 8.780398712794154, + "learning_rate": 5.732963764829599e-06, + "loss": 0.302349853515625, + "step": 31300 + }, + { + "epoch": 0.27068507838237454, + "grad_norm": 0.622348363372006, + "learning_rate": 5.732879732238143e-06, + "loss": 0.03056182861328125, + "step": 31305 + }, + { + "epoch": 0.27072831190391783, + "grad_norm": 11.162566621051534, + "learning_rate": 5.7327956870428875e-06, + "loss": 0.3021026611328125, + "step": 31310 + }, + { + "epoch": 0.2707715454254611, + "grad_norm": 0.03112105790230428, + "learning_rate": 5.73271162924422e-06, + "loss": 0.04739952087402344, + "step": 31315 + }, + { + "epoch": 0.27081477894700434, + "grad_norm": 22.690261690322732, + "learning_rate": 5.732627558842529e-06, + "loss": 0.19280548095703126, + "step": 31320 + }, + { + "epoch": 0.2708580124685476, + "grad_norm": 0.3382470509196583, + "learning_rate": 5.732543475838198e-06, + "loss": 0.08912506103515624, + "step": 31325 + }, + { + "epoch": 0.27090124599009086, + "grad_norm": 18.94350674762538, + "learning_rate": 5.7324593802316205e-06, + "loss": 0.7699005126953125, + "step": 31330 + }, + { + "epoch": 0.27094447951163414, + "grad_norm": 4.267822790902178, + "learning_rate": 5.732375272023181e-06, + "loss": 0.052880477905273435, + "step": 31335 + }, + { + "epoch": 0.2709877130331774, + "grad_norm": 36.082321843114784, + "learning_rate": 5.732291151213268e-06, + "loss": 0.15158767700195314, + "step": 31340 + }, + { + "epoch": 0.27103094655472065, + "grad_norm": 1.4316045577120204, + "learning_rate": 5.73220701780227e-06, + "loss": 0.2023101806640625, + "step": 31345 + }, + { + "epoch": 0.27107418007626394, + "grad_norm": 0.09575174477089153, + "learning_rate": 5.732122871790575e-06, + "loss": 0.16213760375976563, + "step": 31350 + }, + { + "epoch": 0.2711174135978072, + "grad_norm": 40.269701537823174, + "learning_rate": 5.73203871317857e-06, + "loss": 0.19520721435546876, + "step": 31355 + }, + { + "epoch": 0.27116064711935045, + "grad_norm": 6.853378488028208, + "learning_rate": 5.731954541966645e-06, + "loss": 0.4399538040161133, + "step": 31360 + }, + { + "epoch": 0.27120388064089374, + "grad_norm": 4.633821031310853, + "learning_rate": 5.731870358155186e-06, + "loss": 0.13265228271484375, + "step": 31365 + }, + { + "epoch": 0.27124711416243696, + "grad_norm": 10.766332503684227, + "learning_rate": 5.731786161744583e-06, + "loss": 0.3593841552734375, + "step": 31370 + }, + { + "epoch": 0.27129034768398025, + "grad_norm": 1.5888686860680463, + "learning_rate": 5.731701952735223e-06, + "loss": 0.15896835327148437, + "step": 31375 + }, + { + "epoch": 0.27133358120552353, + "grad_norm": 17.304424082883596, + "learning_rate": 5.731617731127495e-06, + "loss": 0.207855224609375, + "step": 31380 + }, + { + "epoch": 0.27137681472706676, + "grad_norm": 7.149688374022452, + "learning_rate": 5.7315334969217875e-06, + "loss": 0.0521636962890625, + "step": 31385 + }, + { + "epoch": 0.27142004824861005, + "grad_norm": 19.34160971904754, + "learning_rate": 5.731449250118489e-06, + "loss": 0.33407440185546877, + "step": 31390 + }, + { + "epoch": 0.27146328177015333, + "grad_norm": 1.412003085807246, + "learning_rate": 5.731364990717988e-06, + "loss": 0.2329925537109375, + "step": 31395 + }, + { + "epoch": 0.27150651529169656, + "grad_norm": 11.860909493688645, + "learning_rate": 5.731280718720673e-06, + "loss": 0.7028640747070313, + "step": 31400 + }, + { + "epoch": 0.27154974881323984, + "grad_norm": 5.2146840837061585, + "learning_rate": 5.731196434126932e-06, + "loss": 0.07879791259765626, + "step": 31405 + }, + { + "epoch": 0.2715929823347831, + "grad_norm": 1.0775782466297046, + "learning_rate": 5.731112136937155e-06, + "loss": 0.2397857666015625, + "step": 31410 + }, + { + "epoch": 0.27163621585632636, + "grad_norm": 97.40330320264052, + "learning_rate": 5.73102782715173e-06, + "loss": 0.502911376953125, + "step": 31415 + }, + { + "epoch": 0.27167944937786964, + "grad_norm": 1.8867162230926564, + "learning_rate": 5.730943504771046e-06, + "loss": 0.05935745239257813, + "step": 31420 + }, + { + "epoch": 0.27172268289941287, + "grad_norm": 13.089177943500875, + "learning_rate": 5.730859169795492e-06, + "loss": 0.17148056030273437, + "step": 31425 + }, + { + "epoch": 0.27176591642095616, + "grad_norm": 12.931989942375067, + "learning_rate": 5.730774822225456e-06, + "loss": 0.259344482421875, + "step": 31430 + }, + { + "epoch": 0.27180914994249944, + "grad_norm": 18.25724274423517, + "learning_rate": 5.730690462061329e-06, + "loss": 0.10871162414550781, + "step": 31435 + }, + { + "epoch": 0.27185238346404267, + "grad_norm": 7.0811179720156705, + "learning_rate": 5.730606089303499e-06, + "loss": 0.4454063415527344, + "step": 31440 + }, + { + "epoch": 0.27189561698558595, + "grad_norm": 2.5096160015170494, + "learning_rate": 5.730521703952354e-06, + "loss": 0.20881195068359376, + "step": 31445 + }, + { + "epoch": 0.2719388505071292, + "grad_norm": 10.132267587572926, + "learning_rate": 5.730437306008285e-06, + "loss": 0.3061199188232422, + "step": 31450 + }, + { + "epoch": 0.27198208402867247, + "grad_norm": 38.97211340682371, + "learning_rate": 5.730352895471679e-06, + "loss": 0.304327392578125, + "step": 31455 + }, + { + "epoch": 0.27202531755021575, + "grad_norm": 20.98947153989833, + "learning_rate": 5.730268472342928e-06, + "loss": 0.24736328125, + "step": 31460 + }, + { + "epoch": 0.272068551071759, + "grad_norm": 9.32378255410406, + "learning_rate": 5.730184036622419e-06, + "loss": 0.25134124755859377, + "step": 31465 + }, + { + "epoch": 0.27211178459330226, + "grad_norm": 24.823085666964847, + "learning_rate": 5.730099588310544e-06, + "loss": 0.1588653564453125, + "step": 31470 + }, + { + "epoch": 0.27215501811484555, + "grad_norm": 0.9418839257050223, + "learning_rate": 5.730015127407689e-06, + "loss": 0.45565643310546877, + "step": 31475 + }, + { + "epoch": 0.2721982516363888, + "grad_norm": 3.1920671165094756, + "learning_rate": 5.729930653914248e-06, + "loss": 0.201556396484375, + "step": 31480 + }, + { + "epoch": 0.27224148515793206, + "grad_norm": 6.223185052473056, + "learning_rate": 5.729846167830606e-06, + "loss": 0.16552200317382812, + "step": 31485 + }, + { + "epoch": 0.2722847186794753, + "grad_norm": 2.7716294261881966, + "learning_rate": 5.7297616691571555e-06, + "loss": 0.1025543212890625, + "step": 31490 + }, + { + "epoch": 0.2723279522010186, + "grad_norm": 2.6350774542824893, + "learning_rate": 5.729677157894285e-06, + "loss": 0.12281494140625, + "step": 31495 + }, + { + "epoch": 0.27237118572256186, + "grad_norm": 24.749316657441003, + "learning_rate": 5.729592634042385e-06, + "loss": 0.211859130859375, + "step": 31500 + }, + { + "epoch": 0.2724144192441051, + "grad_norm": 8.99473346873071, + "learning_rate": 5.729508097601845e-06, + "loss": 0.269732666015625, + "step": 31505 + }, + { + "epoch": 0.2724576527656484, + "grad_norm": 4.72301131504132, + "learning_rate": 5.729423548573055e-06, + "loss": 0.19531478881835937, + "step": 31510 + }, + { + "epoch": 0.27250088628719166, + "grad_norm": 9.201809317664049, + "learning_rate": 5.729338986956404e-06, + "loss": 0.25788116455078125, + "step": 31515 + }, + { + "epoch": 0.2725441198087349, + "grad_norm": 18.413491384821594, + "learning_rate": 5.7292544127522836e-06, + "loss": 0.29080810546875, + "step": 31520 + }, + { + "epoch": 0.27258735333027817, + "grad_norm": 6.774687616215611, + "learning_rate": 5.729169825961083e-06, + "loss": 0.177313232421875, + "step": 31525 + }, + { + "epoch": 0.27263058685182145, + "grad_norm": 33.48652472807826, + "learning_rate": 5.729085226583191e-06, + "loss": 0.14017333984375, + "step": 31530 + }, + { + "epoch": 0.2726738203733647, + "grad_norm": 5.612971493951357, + "learning_rate": 5.729000614619e-06, + "loss": 0.2866302490234375, + "step": 31535 + }, + { + "epoch": 0.27271705389490797, + "grad_norm": 12.803500612410845, + "learning_rate": 5.7289159900688994e-06, + "loss": 0.19721393585205077, + "step": 31540 + }, + { + "epoch": 0.2727602874164512, + "grad_norm": 5.787647427139542, + "learning_rate": 5.728831352933279e-06, + "loss": 0.270220947265625, + "step": 31545 + }, + { + "epoch": 0.2728035209379945, + "grad_norm": 4.089205738910445, + "learning_rate": 5.72874670321253e-06, + "loss": 0.11370849609375, + "step": 31550 + }, + { + "epoch": 0.27284675445953777, + "grad_norm": 6.444297112781752, + "learning_rate": 5.728662040907042e-06, + "loss": 0.185992431640625, + "step": 31555 + }, + { + "epoch": 0.272889987981081, + "grad_norm": 3.4393007663921975, + "learning_rate": 5.728577366017206e-06, + "loss": 0.3650360107421875, + "step": 31560 + }, + { + "epoch": 0.2729332215026243, + "grad_norm": 40.42144566711978, + "learning_rate": 5.7284926785434125e-06, + "loss": 0.200927734375, + "step": 31565 + }, + { + "epoch": 0.27297645502416756, + "grad_norm": 15.318673553496053, + "learning_rate": 5.728407978486051e-06, + "loss": 0.692425537109375, + "step": 31570 + }, + { + "epoch": 0.2730196885457108, + "grad_norm": 6.636121953967052, + "learning_rate": 5.728323265845513e-06, + "loss": 0.1232391357421875, + "step": 31575 + }, + { + "epoch": 0.2730629220672541, + "grad_norm": 2.3168580832554935, + "learning_rate": 5.728238540622191e-06, + "loss": 0.02389068603515625, + "step": 31580 + }, + { + "epoch": 0.2731061555887973, + "grad_norm": 0.7478253674706687, + "learning_rate": 5.728153802816472e-06, + "loss": 0.09937591552734375, + "step": 31585 + }, + { + "epoch": 0.2731493891103406, + "grad_norm": 31.11684255177956, + "learning_rate": 5.728069052428749e-06, + "loss": 0.238848876953125, + "step": 31590 + }, + { + "epoch": 0.2731926226318839, + "grad_norm": 30.77864916378178, + "learning_rate": 5.727984289459414e-06, + "loss": 0.2076934814453125, + "step": 31595 + }, + { + "epoch": 0.2732358561534271, + "grad_norm": 5.096084977242137, + "learning_rate": 5.7278995139088546e-06, + "loss": 0.08912353515625, + "step": 31600 + }, + { + "epoch": 0.2732790896749704, + "grad_norm": 1.681848335336501, + "learning_rate": 5.727814725777465e-06, + "loss": 0.22795524597167968, + "step": 31605 + }, + { + "epoch": 0.2733223231965137, + "grad_norm": 18.512797412762744, + "learning_rate": 5.7277299250656335e-06, + "loss": 0.2573333740234375, + "step": 31610 + }, + { + "epoch": 0.2733655567180569, + "grad_norm": 33.78127496885005, + "learning_rate": 5.727645111773754e-06, + "loss": 0.267572021484375, + "step": 31615 + }, + { + "epoch": 0.2734087902396002, + "grad_norm": 35.99257004381976, + "learning_rate": 5.727560285902216e-06, + "loss": 0.263580322265625, + "step": 31620 + }, + { + "epoch": 0.2734520237611434, + "grad_norm": 15.099830646660505, + "learning_rate": 5.72747544745141e-06, + "loss": 0.32912139892578124, + "step": 31625 + }, + { + "epoch": 0.2734952572826867, + "grad_norm": 1.7181505667111883, + "learning_rate": 5.72739059642173e-06, + "loss": 0.0990966796875, + "step": 31630 + }, + { + "epoch": 0.27353849080423, + "grad_norm": 163.08458379995923, + "learning_rate": 5.727305732813564e-06, + "loss": 0.2575347900390625, + "step": 31635 + }, + { + "epoch": 0.2735817243257732, + "grad_norm": 21.143220335633874, + "learning_rate": 5.727220856627306e-06, + "loss": 0.25069580078125, + "step": 31640 + }, + { + "epoch": 0.2736249578473165, + "grad_norm": 14.301839484991103, + "learning_rate": 5.7271359678633454e-06, + "loss": 0.5784614562988282, + "step": 31645 + }, + { + "epoch": 0.2736681913688598, + "grad_norm": 1.5796608016513387, + "learning_rate": 5.727051066522076e-06, + "loss": 0.0586090087890625, + "step": 31650 + }, + { + "epoch": 0.273711424890403, + "grad_norm": 1.5646149116515489, + "learning_rate": 5.726966152603887e-06, + "loss": 0.0873382568359375, + "step": 31655 + }, + { + "epoch": 0.2737546584119463, + "grad_norm": 4.035534128782784, + "learning_rate": 5.726881226109172e-06, + "loss": 0.309722900390625, + "step": 31660 + }, + { + "epoch": 0.2737978919334895, + "grad_norm": 1.574186978001206, + "learning_rate": 5.72679628703832e-06, + "loss": 0.12066650390625, + "step": 31665 + }, + { + "epoch": 0.2738411254550328, + "grad_norm": 11.165092335949769, + "learning_rate": 5.7267113353917255e-06, + "loss": 0.11803131103515625, + "step": 31670 + }, + { + "epoch": 0.2738843589765761, + "grad_norm": 8.859064909363159, + "learning_rate": 5.72662637116978e-06, + "loss": 0.22994537353515626, + "step": 31675 + }, + { + "epoch": 0.2739275924981193, + "grad_norm": 5.2315046041182995, + "learning_rate": 5.726541394372873e-06, + "loss": 0.20070724487304686, + "step": 31680 + }, + { + "epoch": 0.2739708260196626, + "grad_norm": 18.34064259480861, + "learning_rate": 5.7264564050014e-06, + "loss": 0.18148651123046874, + "step": 31685 + }, + { + "epoch": 0.2740140595412059, + "grad_norm": 9.484936188335471, + "learning_rate": 5.72637140305575e-06, + "loss": 0.13032569885253906, + "step": 31690 + }, + { + "epoch": 0.2740572930627491, + "grad_norm": 0.7665133203035983, + "learning_rate": 5.726286388536315e-06, + "loss": 0.05547065734863281, + "step": 31695 + }, + { + "epoch": 0.2741005265842924, + "grad_norm": 1.6042910173917861, + "learning_rate": 5.72620136144349e-06, + "loss": 0.202191162109375, + "step": 31700 + }, + { + "epoch": 0.2741437601058357, + "grad_norm": 0.46872426599319134, + "learning_rate": 5.726116321777663e-06, + "loss": 0.05638427734375, + "step": 31705 + }, + { + "epoch": 0.2741869936273789, + "grad_norm": 34.02602510288825, + "learning_rate": 5.726031269539229e-06, + "loss": 0.28038330078125, + "step": 31710 + }, + { + "epoch": 0.2742302271489222, + "grad_norm": 0.3463602640333943, + "learning_rate": 5.7259462047285805e-06, + "loss": 0.11385927200317383, + "step": 31715 + }, + { + "epoch": 0.27427346067046543, + "grad_norm": 12.3090876191201, + "learning_rate": 5.725861127346109e-06, + "loss": 0.36522979736328126, + "step": 31720 + }, + { + "epoch": 0.2743166941920087, + "grad_norm": 14.597858775968598, + "learning_rate": 5.725776037392207e-06, + "loss": 0.1496734619140625, + "step": 31725 + }, + { + "epoch": 0.274359927713552, + "grad_norm": 1.7453015846401856, + "learning_rate": 5.725690934867266e-06, + "loss": 0.113690185546875, + "step": 31730 + }, + { + "epoch": 0.2744031612350952, + "grad_norm": 14.315306411877163, + "learning_rate": 5.72560581977168e-06, + "loss": 0.07223968505859375, + "step": 31735 + }, + { + "epoch": 0.2744463947566385, + "grad_norm": 20.244391125205706, + "learning_rate": 5.72552069210584e-06, + "loss": 0.2447021484375, + "step": 31740 + }, + { + "epoch": 0.2744896282781818, + "grad_norm": 50.71827117599829, + "learning_rate": 5.725435551870139e-06, + "loss": 0.18838119506835938, + "step": 31745 + }, + { + "epoch": 0.274532861799725, + "grad_norm": 2.3329722031269378, + "learning_rate": 5.725350399064971e-06, + "loss": 0.06443023681640625, + "step": 31750 + }, + { + "epoch": 0.2745760953212683, + "grad_norm": 1.605437322726801, + "learning_rate": 5.725265233690728e-06, + "loss": 0.3654975891113281, + "step": 31755 + }, + { + "epoch": 0.27461932884281154, + "grad_norm": 3.200415367116863, + "learning_rate": 5.725180055747802e-06, + "loss": 0.2110626220703125, + "step": 31760 + }, + { + "epoch": 0.2746625623643548, + "grad_norm": 1.6799593387330238, + "learning_rate": 5.725094865236587e-06, + "loss": 0.10357666015625, + "step": 31765 + }, + { + "epoch": 0.2747057958858981, + "grad_norm": 32.43064856974728, + "learning_rate": 5.725009662157474e-06, + "loss": 0.32321014404296877, + "step": 31770 + }, + { + "epoch": 0.27474902940744134, + "grad_norm": 2.5329708395202664, + "learning_rate": 5.724924446510859e-06, + "loss": 0.23895721435546874, + "step": 31775 + }, + { + "epoch": 0.2747922629289846, + "grad_norm": 1.8288711309972994, + "learning_rate": 5.724839218297132e-06, + "loss": 0.15690231323242188, + "step": 31780 + }, + { + "epoch": 0.2748354964505279, + "grad_norm": 21.67280485570514, + "learning_rate": 5.724753977516688e-06, + "loss": 0.1578369140625, + "step": 31785 + }, + { + "epoch": 0.27487872997207113, + "grad_norm": 10.114359809680524, + "learning_rate": 5.724668724169919e-06, + "loss": 0.205487060546875, + "step": 31790 + }, + { + "epoch": 0.2749219634936144, + "grad_norm": 6.709528462843119, + "learning_rate": 5.724583458257218e-06, + "loss": 0.335333251953125, + "step": 31795 + }, + { + "epoch": 0.27496519701515765, + "grad_norm": 29.674363326546285, + "learning_rate": 5.72449817977898e-06, + "loss": 0.1316680908203125, + "step": 31800 + }, + { + "epoch": 0.27500843053670093, + "grad_norm": 0.8984012643216521, + "learning_rate": 5.724412888735596e-06, + "loss": 0.23228607177734376, + "step": 31805 + }, + { + "epoch": 0.2750516640582442, + "grad_norm": 12.051146116347807, + "learning_rate": 5.7243275851274615e-06, + "loss": 0.3758995056152344, + "step": 31810 + }, + { + "epoch": 0.27509489757978745, + "grad_norm": 1.5124040407191777, + "learning_rate": 5.7242422689549684e-06, + "loss": 0.090484619140625, + "step": 31815 + }, + { + "epoch": 0.27513813110133073, + "grad_norm": 19.46319805316371, + "learning_rate": 5.72415694021851e-06, + "loss": 0.16665496826171874, + "step": 31820 + }, + { + "epoch": 0.275181364622874, + "grad_norm": 24.672223111652944, + "learning_rate": 5.724071598918482e-06, + "loss": 0.42730560302734377, + "step": 31825 + }, + { + "epoch": 0.27522459814441724, + "grad_norm": 8.322457282323295, + "learning_rate": 5.723986245055275e-06, + "loss": 0.1641143798828125, + "step": 31830 + }, + { + "epoch": 0.2752678316659605, + "grad_norm": 64.53100821825177, + "learning_rate": 5.723900878629285e-06, + "loss": 0.4423675537109375, + "step": 31835 + }, + { + "epoch": 0.27531106518750376, + "grad_norm": 2.8786973346577813, + "learning_rate": 5.723815499640905e-06, + "loss": 0.09694290161132812, + "step": 31840 + }, + { + "epoch": 0.27535429870904704, + "grad_norm": 7.637107866147896, + "learning_rate": 5.723730108090528e-06, + "loss": 0.19507484436035155, + "step": 31845 + }, + { + "epoch": 0.2753975322305903, + "grad_norm": 24.685429257008767, + "learning_rate": 5.723644703978548e-06, + "loss": 0.5511474609375, + "step": 31850 + }, + { + "epoch": 0.27544076575213355, + "grad_norm": 54.87182580373786, + "learning_rate": 5.72355928730536e-06, + "loss": 0.144512939453125, + "step": 31855 + }, + { + "epoch": 0.27548399927367684, + "grad_norm": 16.440873416315856, + "learning_rate": 5.723473858071358e-06, + "loss": 0.31173171997070315, + "step": 31860 + }, + { + "epoch": 0.2755272327952201, + "grad_norm": 6.726825788890021, + "learning_rate": 5.723388416276934e-06, + "loss": 0.044683837890625, + "step": 31865 + }, + { + "epoch": 0.27557046631676335, + "grad_norm": 16.55019080677238, + "learning_rate": 5.723302961922484e-06, + "loss": 0.25384368896484377, + "step": 31870 + }, + { + "epoch": 0.27561369983830664, + "grad_norm": 2.263278137521522, + "learning_rate": 5.723217495008401e-06, + "loss": 0.05697059631347656, + "step": 31875 + }, + { + "epoch": 0.2756569333598499, + "grad_norm": 19.809364557191277, + "learning_rate": 5.72313201553508e-06, + "loss": 0.184686279296875, + "step": 31880 + }, + { + "epoch": 0.27570016688139315, + "grad_norm": 8.197029431013751, + "learning_rate": 5.723046523502914e-06, + "loss": 0.07705898284912109, + "step": 31885 + }, + { + "epoch": 0.27574340040293643, + "grad_norm": 5.036153387328914, + "learning_rate": 5.7229610189122985e-06, + "loss": 0.12087478637695312, + "step": 31890 + }, + { + "epoch": 0.27578663392447966, + "grad_norm": 5.019170058254573, + "learning_rate": 5.722875501763627e-06, + "loss": 0.17538833618164062, + "step": 31895 + }, + { + "epoch": 0.27582986744602295, + "grad_norm": 16.004279531664604, + "learning_rate": 5.722789972057294e-06, + "loss": 0.21605224609375, + "step": 31900 + }, + { + "epoch": 0.27587310096756623, + "grad_norm": 1.616914798352473, + "learning_rate": 5.7227044297936954e-06, + "loss": 0.190484619140625, + "step": 31905 + }, + { + "epoch": 0.27591633448910946, + "grad_norm": 6.3419879395216086, + "learning_rate": 5.7226188749732245e-06, + "loss": 0.15172271728515624, + "step": 31910 + }, + { + "epoch": 0.27595956801065274, + "grad_norm": 40.23312701825373, + "learning_rate": 5.722533307596275e-06, + "loss": 0.16686248779296875, + "step": 31915 + }, + { + "epoch": 0.27600280153219603, + "grad_norm": 5.097389006786313, + "learning_rate": 5.722447727663243e-06, + "loss": 0.2459716796875, + "step": 31920 + }, + { + "epoch": 0.27604603505373926, + "grad_norm": 0.16533500155219263, + "learning_rate": 5.722362135174523e-06, + "loss": 0.36361846923828123, + "step": 31925 + }, + { + "epoch": 0.27608926857528254, + "grad_norm": 0.6487884860682734, + "learning_rate": 5.722276530130508e-06, + "loss": 0.0903717041015625, + "step": 31930 + }, + { + "epoch": 0.27613250209682577, + "grad_norm": 11.280032322904697, + "learning_rate": 5.7221909125315954e-06, + "loss": 0.279351806640625, + "step": 31935 + }, + { + "epoch": 0.27617573561836906, + "grad_norm": 19.34248568376106, + "learning_rate": 5.722105282378178e-06, + "loss": 0.621136474609375, + "step": 31940 + }, + { + "epoch": 0.27621896913991234, + "grad_norm": 22.710368191175807, + "learning_rate": 5.722019639670653e-06, + "loss": 0.21048583984375, + "step": 31945 + }, + { + "epoch": 0.27626220266145557, + "grad_norm": 6.553151142938818, + "learning_rate": 5.721933984409412e-06, + "loss": 0.123565673828125, + "step": 31950 + }, + { + "epoch": 0.27630543618299885, + "grad_norm": 3.335278363803823, + "learning_rate": 5.721848316594853e-06, + "loss": 0.28380794525146485, + "step": 31955 + }, + { + "epoch": 0.27634866970454214, + "grad_norm": 4.793654229989738, + "learning_rate": 5.72176263622737e-06, + "loss": 0.30471343994140626, + "step": 31960 + }, + { + "epoch": 0.27639190322608537, + "grad_norm": 11.084429449841085, + "learning_rate": 5.721676943307359e-06, + "loss": 0.141424560546875, + "step": 31965 + }, + { + "epoch": 0.27643513674762865, + "grad_norm": 8.704193728831285, + "learning_rate": 5.721591237835213e-06, + "loss": 0.14345703125, + "step": 31970 + }, + { + "epoch": 0.2764783702691719, + "grad_norm": 0.7707979158545646, + "learning_rate": 5.72150551981133e-06, + "loss": 0.163134765625, + "step": 31975 + }, + { + "epoch": 0.27652160379071516, + "grad_norm": 18.325027751125567, + "learning_rate": 5.721419789236103e-06, + "loss": 0.3139228820800781, + "step": 31980 + }, + { + "epoch": 0.27656483731225845, + "grad_norm": 0.48526023377027855, + "learning_rate": 5.7213340461099294e-06, + "loss": 0.09347991943359375, + "step": 31985 + }, + { + "epoch": 0.2766080708338017, + "grad_norm": 8.83199753418615, + "learning_rate": 5.721248290433203e-06, + "loss": 0.1200408935546875, + "step": 31990 + }, + { + "epoch": 0.27665130435534496, + "grad_norm": 22.18116723101392, + "learning_rate": 5.72116252220632e-06, + "loss": 0.1872650146484375, + "step": 31995 + }, + { + "epoch": 0.27669453787688825, + "grad_norm": 18.802833810082895, + "learning_rate": 5.721076741429675e-06, + "loss": 0.13647937774658203, + "step": 32000 + }, + { + "epoch": 0.2767377713984315, + "grad_norm": 5.469253868648913, + "learning_rate": 5.720990948103666e-06, + "loss": 0.3206066131591797, + "step": 32005 + }, + { + "epoch": 0.27678100491997476, + "grad_norm": 7.315116690771275, + "learning_rate": 5.720905142228686e-06, + "loss": 0.0597076416015625, + "step": 32010 + }, + { + "epoch": 0.276824238441518, + "grad_norm": 15.803471288788522, + "learning_rate": 5.720819323805134e-06, + "loss": 0.35952911376953123, + "step": 32015 + }, + { + "epoch": 0.2768674719630613, + "grad_norm": 11.827378606739263, + "learning_rate": 5.720733492833401e-06, + "loss": 0.168597412109375, + "step": 32020 + }, + { + "epoch": 0.27691070548460456, + "grad_norm": 1.9619962352327587, + "learning_rate": 5.720647649313887e-06, + "loss": 0.15268707275390625, + "step": 32025 + }, + { + "epoch": 0.2769539390061478, + "grad_norm": 4.765917032026281, + "learning_rate": 5.720561793246986e-06, + "loss": 0.1899850845336914, + "step": 32030 + }, + { + "epoch": 0.27699717252769107, + "grad_norm": 6.224047647606458, + "learning_rate": 5.7204759246330945e-06, + "loss": 0.20098419189453126, + "step": 32035 + }, + { + "epoch": 0.27704040604923436, + "grad_norm": 8.308276172246606, + "learning_rate": 5.720390043472609e-06, + "loss": 0.123309326171875, + "step": 32040 + }, + { + "epoch": 0.2770836395707776, + "grad_norm": 9.280535226526338, + "learning_rate": 5.720304149765924e-06, + "loss": 0.14666328430175782, + "step": 32045 + }, + { + "epoch": 0.27712687309232087, + "grad_norm": 1.0543223040498644, + "learning_rate": 5.720218243513438e-06, + "loss": 0.09599227905273437, + "step": 32050 + }, + { + "epoch": 0.27717010661386415, + "grad_norm": 2.433263409600901, + "learning_rate": 5.720132324715544e-06, + "loss": 0.07257080078125, + "step": 32055 + }, + { + "epoch": 0.2772133401354074, + "grad_norm": 3.3803482595432794, + "learning_rate": 5.7200463933726415e-06, + "loss": 0.12025146484375, + "step": 32060 + }, + { + "epoch": 0.27725657365695067, + "grad_norm": 5.040460765727538, + "learning_rate": 5.719960449485125e-06, + "loss": 0.2569755554199219, + "step": 32065 + }, + { + "epoch": 0.2772998071784939, + "grad_norm": 31.245289105909364, + "learning_rate": 5.719874493053391e-06, + "loss": 0.3293853759765625, + "step": 32070 + }, + { + "epoch": 0.2773430407000372, + "grad_norm": 0.1919454058778401, + "learning_rate": 5.719788524077836e-06, + "loss": 0.26252403259277346, + "step": 32075 + }, + { + "epoch": 0.27738627422158046, + "grad_norm": 5.066452219835852, + "learning_rate": 5.719702542558857e-06, + "loss": 0.1575286865234375, + "step": 32080 + }, + { + "epoch": 0.2774295077431237, + "grad_norm": 25.66131194172717, + "learning_rate": 5.719616548496851e-06, + "loss": 0.36829376220703125, + "step": 32085 + }, + { + "epoch": 0.277472741264667, + "grad_norm": 0.4101987574830099, + "learning_rate": 5.719530541892213e-06, + "loss": 0.1734405517578125, + "step": 32090 + }, + { + "epoch": 0.27751597478621026, + "grad_norm": 20.21230430855127, + "learning_rate": 5.719444522745339e-06, + "loss": 0.34990768432617186, + "step": 32095 + }, + { + "epoch": 0.2775592083077535, + "grad_norm": 20.978265523186607, + "learning_rate": 5.7193584910566285e-06, + "loss": 0.17490386962890625, + "step": 32100 + }, + { + "epoch": 0.2776024418292968, + "grad_norm": 15.395485068207414, + "learning_rate": 5.719272446826477e-06, + "loss": 0.100494384765625, + "step": 32105 + }, + { + "epoch": 0.27764567535084, + "grad_norm": 19.15980078397313, + "learning_rate": 5.719186390055281e-06, + "loss": 0.14674072265625, + "step": 32110 + }, + { + "epoch": 0.2776889088723833, + "grad_norm": 1.8574380493183251, + "learning_rate": 5.719100320743439e-06, + "loss": 0.30010528564453126, + "step": 32115 + }, + { + "epoch": 0.2777321423939266, + "grad_norm": 25.627458698107233, + "learning_rate": 5.7190142388913455e-06, + "loss": 0.18763885498046876, + "step": 32120 + }, + { + "epoch": 0.2777753759154698, + "grad_norm": 1.6596291908256589, + "learning_rate": 5.718928144499397e-06, + "loss": 0.054449462890625, + "step": 32125 + }, + { + "epoch": 0.2778186094370131, + "grad_norm": 0.581982798828866, + "learning_rate": 5.718842037567994e-06, + "loss": 0.0216796875, + "step": 32130 + }, + { + "epoch": 0.27786184295855637, + "grad_norm": 7.2605284913688095, + "learning_rate": 5.718755918097532e-06, + "loss": 0.14845008850097657, + "step": 32135 + }, + { + "epoch": 0.2779050764800996, + "grad_norm": 19.76182257642276, + "learning_rate": 5.718669786088407e-06, + "loss": 0.12671003341674805, + "step": 32140 + }, + { + "epoch": 0.2779483100016429, + "grad_norm": 31.06804142284207, + "learning_rate": 5.718583641541018e-06, + "loss": 0.1337249755859375, + "step": 32145 + }, + { + "epoch": 0.2779915435231861, + "grad_norm": 38.75725436543846, + "learning_rate": 5.718497484455761e-06, + "loss": 0.6437191009521485, + "step": 32150 + }, + { + "epoch": 0.2780347770447294, + "grad_norm": 8.964001322238195, + "learning_rate": 5.7184113148330335e-06, + "loss": 0.1546112060546875, + "step": 32155 + }, + { + "epoch": 0.2780780105662727, + "grad_norm": 6.16145903763676, + "learning_rate": 5.718325132673233e-06, + "loss": 0.1750274658203125, + "step": 32160 + }, + { + "epoch": 0.2781212440878159, + "grad_norm": 13.417316186574453, + "learning_rate": 5.7182389379767584e-06, + "loss": 0.241839599609375, + "step": 32165 + }, + { + "epoch": 0.2781644776093592, + "grad_norm": 2.165068151402627, + "learning_rate": 5.718152730744005e-06, + "loss": 0.22142486572265624, + "step": 32170 + }, + { + "epoch": 0.2782077111309025, + "grad_norm": 6.636352013171224, + "learning_rate": 5.718066510975372e-06, + "loss": 0.3294158935546875, + "step": 32175 + }, + { + "epoch": 0.2782509446524457, + "grad_norm": 2.192903091160774, + "learning_rate": 5.717980278671256e-06, + "loss": 0.21551475524902344, + "step": 32180 + }, + { + "epoch": 0.278294178173989, + "grad_norm": 22.475934196045582, + "learning_rate": 5.717894033832055e-06, + "loss": 0.13656578063964844, + "step": 32185 + }, + { + "epoch": 0.2783374116955322, + "grad_norm": 7.990240072320231, + "learning_rate": 5.717807776458168e-06, + "loss": 0.171405029296875, + "step": 32190 + }, + { + "epoch": 0.2783806452170755, + "grad_norm": 63.875152527045785, + "learning_rate": 5.7177215065499906e-06, + "loss": 0.16770248413085936, + "step": 32195 + }, + { + "epoch": 0.2784238787386188, + "grad_norm": 26.478565431999982, + "learning_rate": 5.717635224107921e-06, + "loss": 0.2940887451171875, + "step": 32200 + }, + { + "epoch": 0.278467112260162, + "grad_norm": 1.1371229776599698, + "learning_rate": 5.717548929132359e-06, + "loss": 0.20928497314453126, + "step": 32205 + }, + { + "epoch": 0.2785103457817053, + "grad_norm": 3.175764489320583, + "learning_rate": 5.717462621623701e-06, + "loss": 0.098370361328125, + "step": 32210 + }, + { + "epoch": 0.2785535793032486, + "grad_norm": 17.58340419806856, + "learning_rate": 5.717376301582346e-06, + "loss": 0.19754791259765625, + "step": 32215 + }, + { + "epoch": 0.2785968128247918, + "grad_norm": 0.10101063352412547, + "learning_rate": 5.717289969008692e-06, + "loss": 0.1103759765625, + "step": 32220 + }, + { + "epoch": 0.2786400463463351, + "grad_norm": 2.3257599453736306, + "learning_rate": 5.717203623903136e-06, + "loss": 0.1345916748046875, + "step": 32225 + }, + { + "epoch": 0.27868327986787833, + "grad_norm": 10.97563378771435, + "learning_rate": 5.717117266266077e-06, + "loss": 0.35165252685546877, + "step": 32230 + }, + { + "epoch": 0.2787265133894216, + "grad_norm": 4.09161011671208, + "learning_rate": 5.717030896097914e-06, + "loss": 0.0720947265625, + "step": 32235 + }, + { + "epoch": 0.2787697469109649, + "grad_norm": 8.545006045094173, + "learning_rate": 5.716944513399044e-06, + "loss": 0.10869064331054687, + "step": 32240 + }, + { + "epoch": 0.27881298043250813, + "grad_norm": 7.233327999924011, + "learning_rate": 5.716858118169865e-06, + "loss": 0.10413055419921875, + "step": 32245 + }, + { + "epoch": 0.2788562139540514, + "grad_norm": 2.309963054211625, + "learning_rate": 5.716771710410778e-06, + "loss": 0.07564430236816407, + "step": 32250 + }, + { + "epoch": 0.2788994474755947, + "grad_norm": 25.934715244783995, + "learning_rate": 5.71668529012218e-06, + "loss": 0.17907791137695311, + "step": 32255 + }, + { + "epoch": 0.2789426809971379, + "grad_norm": 4.034628411437151, + "learning_rate": 5.716598857304468e-06, + "loss": 0.06996536254882812, + "step": 32260 + }, + { + "epoch": 0.2789859145186812, + "grad_norm": 22.288058048039453, + "learning_rate": 5.716512411958043e-06, + "loss": 0.2477783203125, + "step": 32265 + }, + { + "epoch": 0.2790291480402245, + "grad_norm": 4.768037540785927, + "learning_rate": 5.716425954083303e-06, + "loss": 0.17568893432617189, + "step": 32270 + }, + { + "epoch": 0.2790723815617677, + "grad_norm": 6.689161852380759, + "learning_rate": 5.716339483680646e-06, + "loss": 0.16432876586914064, + "step": 32275 + }, + { + "epoch": 0.279115615083311, + "grad_norm": 16.45850913339697, + "learning_rate": 5.716253000750472e-06, + "loss": 0.23707952499389648, + "step": 32280 + }, + { + "epoch": 0.27915884860485424, + "grad_norm": 3.126225769685289, + "learning_rate": 5.716166505293179e-06, + "loss": 0.06726036071777344, + "step": 32285 + }, + { + "epoch": 0.2792020821263975, + "grad_norm": 13.125515644052006, + "learning_rate": 5.716079997309166e-06, + "loss": 0.38322982788085935, + "step": 32290 + }, + { + "epoch": 0.2792453156479408, + "grad_norm": 6.207457353949518, + "learning_rate": 5.715993476798831e-06, + "loss": 0.27607574462890627, + "step": 32295 + }, + { + "epoch": 0.27928854916948403, + "grad_norm": 3.0646111379931655, + "learning_rate": 5.715906943762575e-06, + "loss": 0.11673507690429688, + "step": 32300 + }, + { + "epoch": 0.2793317826910273, + "grad_norm": 73.55315322035763, + "learning_rate": 5.715820398200796e-06, + "loss": 0.4196647644042969, + "step": 32305 + }, + { + "epoch": 0.2793750162125706, + "grad_norm": 26.20352680409145, + "learning_rate": 5.715733840113894e-06, + "loss": 0.2528961181640625, + "step": 32310 + }, + { + "epoch": 0.27941824973411383, + "grad_norm": 43.565087732568124, + "learning_rate": 5.715647269502266e-06, + "loss": 0.8544479370117187, + "step": 32315 + }, + { + "epoch": 0.2794614832556571, + "grad_norm": 5.287852245919932, + "learning_rate": 5.7155606863663145e-06, + "loss": 0.5761810302734375, + "step": 32320 + }, + { + "epoch": 0.27950471677720035, + "grad_norm": 22.58820284542382, + "learning_rate": 5.715474090706436e-06, + "loss": 0.31085205078125, + "step": 32325 + }, + { + "epoch": 0.27954795029874363, + "grad_norm": 1.0596048975626524, + "learning_rate": 5.7153874825230315e-06, + "loss": 0.060662460327148435, + "step": 32330 + }, + { + "epoch": 0.2795911838202869, + "grad_norm": 2.0854513808737174, + "learning_rate": 5.7153008618165e-06, + "loss": 0.049782943725585935, + "step": 32335 + }, + { + "epoch": 0.27963441734183014, + "grad_norm": 43.10460823955474, + "learning_rate": 5.71521422858724e-06, + "loss": 0.1304473876953125, + "step": 32340 + }, + { + "epoch": 0.27967765086337343, + "grad_norm": 0.9413175089775286, + "learning_rate": 5.715127582835651e-06, + "loss": 0.24437713623046875, + "step": 32345 + }, + { + "epoch": 0.2797208843849167, + "grad_norm": 7.388057534052577, + "learning_rate": 5.715040924562136e-06, + "loss": 0.064837646484375, + "step": 32350 + }, + { + "epoch": 0.27976411790645994, + "grad_norm": 10.214414955948797, + "learning_rate": 5.714954253767091e-06, + "loss": 0.13851051330566405, + "step": 32355 + }, + { + "epoch": 0.2798073514280032, + "grad_norm": 1.054478673199317, + "learning_rate": 5.714867570450917e-06, + "loss": 0.12401046752929687, + "step": 32360 + }, + { + "epoch": 0.27985058494954645, + "grad_norm": 2.5398659408436752, + "learning_rate": 5.714780874614012e-06, + "loss": 0.13538818359375, + "step": 32365 + }, + { + "epoch": 0.27989381847108974, + "grad_norm": 30.454418988561667, + "learning_rate": 5.71469416625678e-06, + "loss": 0.300408935546875, + "step": 32370 + }, + { + "epoch": 0.279937051992633, + "grad_norm": 12.041182195840655, + "learning_rate": 5.714607445379617e-06, + "loss": 0.121917724609375, + "step": 32375 + }, + { + "epoch": 0.27998028551417625, + "grad_norm": 10.063513492472175, + "learning_rate": 5.714520711982925e-06, + "loss": 0.27914886474609374, + "step": 32380 + }, + { + "epoch": 0.28002351903571954, + "grad_norm": 3.8147848454575364, + "learning_rate": 5.7144339660671024e-06, + "loss": 0.12117233276367187, + "step": 32385 + }, + { + "epoch": 0.2800667525572628, + "grad_norm": 3.3660655368611736, + "learning_rate": 5.714347207632551e-06, + "loss": 0.1408905029296875, + "step": 32390 + }, + { + "epoch": 0.28010998607880605, + "grad_norm": 8.16620653960762, + "learning_rate": 5.714260436679671e-06, + "loss": 0.41626739501953125, + "step": 32395 + }, + { + "epoch": 0.28015321960034933, + "grad_norm": 8.035130364061754, + "learning_rate": 5.71417365320886e-06, + "loss": 0.053338623046875, + "step": 32400 + }, + { + "epoch": 0.28019645312189256, + "grad_norm": 26.986436006948903, + "learning_rate": 5.714086857220521e-06, + "loss": 0.316802978515625, + "step": 32405 + }, + { + "epoch": 0.28023968664343585, + "grad_norm": 10.621947731041617, + "learning_rate": 5.7140000487150525e-06, + "loss": 0.305731201171875, + "step": 32410 + }, + { + "epoch": 0.28028292016497913, + "grad_norm": 3.639874853338867, + "learning_rate": 5.713913227692856e-06, + "loss": 0.5780059814453125, + "step": 32415 + }, + { + "epoch": 0.28032615368652236, + "grad_norm": 3.9071225216129917, + "learning_rate": 5.713826394154331e-06, + "loss": 0.19503860473632811, + "step": 32420 + }, + { + "epoch": 0.28036938720806565, + "grad_norm": 11.564864478373112, + "learning_rate": 5.7137395480998795e-06, + "loss": 0.27798309326171877, + "step": 32425 + }, + { + "epoch": 0.28041262072960893, + "grad_norm": 2.016992221341489, + "learning_rate": 5.7136526895299e-06, + "loss": 0.2826042175292969, + "step": 32430 + }, + { + "epoch": 0.28045585425115216, + "grad_norm": 13.929348302993764, + "learning_rate": 5.713565818444794e-06, + "loss": 0.28160400390625, + "step": 32435 + }, + { + "epoch": 0.28049908777269544, + "grad_norm": 11.43562141624273, + "learning_rate": 5.713478934844962e-06, + "loss": 0.1321441650390625, + "step": 32440 + }, + { + "epoch": 0.28054232129423873, + "grad_norm": 2.1206244339334135, + "learning_rate": 5.713392038730807e-06, + "loss": 0.06275138854980469, + "step": 32445 + }, + { + "epoch": 0.28058555481578196, + "grad_norm": 12.20201253878195, + "learning_rate": 5.713305130102725e-06, + "loss": 0.27338180541992185, + "step": 32450 + }, + { + "epoch": 0.28062878833732524, + "grad_norm": 13.019763749419822, + "learning_rate": 5.713218208961121e-06, + "loss": 0.1313507080078125, + "step": 32455 + }, + { + "epoch": 0.28067202185886847, + "grad_norm": 2.7055950543431515, + "learning_rate": 5.713131275306394e-06, + "loss": 0.10678482055664062, + "step": 32460 + }, + { + "epoch": 0.28071525538041175, + "grad_norm": 25.968688515992113, + "learning_rate": 5.713044329138946e-06, + "loss": 0.24709320068359375, + "step": 32465 + }, + { + "epoch": 0.28075848890195504, + "grad_norm": 6.79051000544415, + "learning_rate": 5.712957370459176e-06, + "loss": 0.23637313842773439, + "step": 32470 + }, + { + "epoch": 0.28080172242349827, + "grad_norm": 5.65417380213552, + "learning_rate": 5.712870399267487e-06, + "loss": 0.16740989685058594, + "step": 32475 + }, + { + "epoch": 0.28084495594504155, + "grad_norm": 28.09239458516723, + "learning_rate": 5.71278341556428e-06, + "loss": 0.25714874267578125, + "step": 32480 + }, + { + "epoch": 0.28088818946658484, + "grad_norm": 6.095141948940669, + "learning_rate": 5.712696419349954e-06, + "loss": 0.13420257568359376, + "step": 32485 + }, + { + "epoch": 0.28093142298812807, + "grad_norm": 1.2444554306009037, + "learning_rate": 5.712609410624914e-06, + "loss": 0.21563491821289063, + "step": 32490 + }, + { + "epoch": 0.28097465650967135, + "grad_norm": 4.568990717615066, + "learning_rate": 5.712522389389558e-06, + "loss": 0.2839569091796875, + "step": 32495 + }, + { + "epoch": 0.2810178900312146, + "grad_norm": 2.478483455921944, + "learning_rate": 5.712435355644288e-06, + "loss": 0.3125701904296875, + "step": 32500 + }, + { + "epoch": 0.28106112355275786, + "grad_norm": 17.679654768715203, + "learning_rate": 5.7123483093895066e-06, + "loss": 0.4348876953125, + "step": 32505 + }, + { + "epoch": 0.28110435707430115, + "grad_norm": 23.30130054671403, + "learning_rate": 5.712261250625614e-06, + "loss": 0.18791046142578124, + "step": 32510 + }, + { + "epoch": 0.2811475905958444, + "grad_norm": 5.105702089559686, + "learning_rate": 5.712174179353012e-06, + "loss": 0.325189208984375, + "step": 32515 + }, + { + "epoch": 0.28119082411738766, + "grad_norm": 10.411968011005264, + "learning_rate": 5.712087095572102e-06, + "loss": 0.2858062744140625, + "step": 32520 + }, + { + "epoch": 0.28123405763893095, + "grad_norm": 27.86046125591227, + "learning_rate": 5.711999999283286e-06, + "loss": 0.18492431640625, + "step": 32525 + }, + { + "epoch": 0.2812772911604742, + "grad_norm": 26.500277505834028, + "learning_rate": 5.711912890486967e-06, + "loss": 0.34012451171875, + "step": 32530 + }, + { + "epoch": 0.28132052468201746, + "grad_norm": 36.68806654791653, + "learning_rate": 5.711825769183544e-06, + "loss": 0.3759880065917969, + "step": 32535 + }, + { + "epoch": 0.2813637582035607, + "grad_norm": 4.5797717561889355, + "learning_rate": 5.711738635373421e-06, + "loss": 0.09210662841796875, + "step": 32540 + }, + { + "epoch": 0.28140699172510397, + "grad_norm": 8.022816396113987, + "learning_rate": 5.711651489056999e-06, + "loss": 0.230126953125, + "step": 32545 + }, + { + "epoch": 0.28145022524664726, + "grad_norm": 51.8227983216737, + "learning_rate": 5.7115643302346794e-06, + "loss": 0.4008636474609375, + "step": 32550 + }, + { + "epoch": 0.2814934587681905, + "grad_norm": 0.37070743898483705, + "learning_rate": 5.7114771589068655e-06, + "loss": 0.2116241455078125, + "step": 32555 + }, + { + "epoch": 0.28153669228973377, + "grad_norm": 1.1800233390115662, + "learning_rate": 5.711389975073958e-06, + "loss": 0.129443359375, + "step": 32560 + }, + { + "epoch": 0.28157992581127705, + "grad_norm": 3.936851713543133, + "learning_rate": 5.711302778736359e-06, + "loss": 0.08626251220703125, + "step": 32565 + }, + { + "epoch": 0.2816231593328203, + "grad_norm": 18.676976817563094, + "learning_rate": 5.711215569894472e-06, + "loss": 0.293634033203125, + "step": 32570 + }, + { + "epoch": 0.28166639285436357, + "grad_norm": 1.7125852478986785, + "learning_rate": 5.711128348548698e-06, + "loss": 0.12762584686279296, + "step": 32575 + }, + { + "epoch": 0.2817096263759068, + "grad_norm": 39.53236938387904, + "learning_rate": 5.71104111469944e-06, + "loss": 0.3841644287109375, + "step": 32580 + }, + { + "epoch": 0.2817528598974501, + "grad_norm": 1.0668672761008609, + "learning_rate": 5.7109538683471e-06, + "loss": 0.14012298583984376, + "step": 32585 + }, + { + "epoch": 0.28179609341899337, + "grad_norm": 15.250433205682867, + "learning_rate": 5.71086660949208e-06, + "loss": 0.07717437744140625, + "step": 32590 + }, + { + "epoch": 0.2818393269405366, + "grad_norm": 45.770589540601826, + "learning_rate": 5.710779338134781e-06, + "loss": 0.34495086669921876, + "step": 32595 + }, + { + "epoch": 0.2818825604620799, + "grad_norm": 8.703266660042013, + "learning_rate": 5.71069205427561e-06, + "loss": 0.22355880737304687, + "step": 32600 + }, + { + "epoch": 0.28192579398362316, + "grad_norm": 15.812534155546066, + "learning_rate": 5.710604757914964e-06, + "loss": 0.16200485229492187, + "step": 32605 + }, + { + "epoch": 0.2819690275051664, + "grad_norm": 6.997895922500608, + "learning_rate": 5.710517449053251e-06, + "loss": 0.122174072265625, + "step": 32610 + }, + { + "epoch": 0.2820122610267097, + "grad_norm": 37.42059754545604, + "learning_rate": 5.71043012769087e-06, + "loss": 0.1215576171875, + "step": 32615 + }, + { + "epoch": 0.28205549454825296, + "grad_norm": 39.30872718366661, + "learning_rate": 5.7103427938282245e-06, + "loss": 0.4510852813720703, + "step": 32620 + }, + { + "epoch": 0.2820987280697962, + "grad_norm": 19.770407500819196, + "learning_rate": 5.710255447465717e-06, + "loss": 0.24208984375, + "step": 32625 + }, + { + "epoch": 0.2821419615913395, + "grad_norm": 13.869004927513473, + "learning_rate": 5.710168088603753e-06, + "loss": 0.21141204833984376, + "step": 32630 + }, + { + "epoch": 0.2821851951128827, + "grad_norm": 0.31965879029590505, + "learning_rate": 5.710080717242731e-06, + "loss": 0.31683197021484377, + "step": 32635 + }, + { + "epoch": 0.282228428634426, + "grad_norm": 2.741773151870257, + "learning_rate": 5.709993333383058e-06, + "loss": 0.2747528076171875, + "step": 32640 + }, + { + "epoch": 0.28227166215596927, + "grad_norm": 5.104390651573195, + "learning_rate": 5.709905937025134e-06, + "loss": 0.0775634765625, + "step": 32645 + }, + { + "epoch": 0.2823148956775125, + "grad_norm": 10.669111524863775, + "learning_rate": 5.709818528169364e-06, + "loss": 0.1397308349609375, + "step": 32650 + }, + { + "epoch": 0.2823581291990558, + "grad_norm": 14.139087540093223, + "learning_rate": 5.709731106816149e-06, + "loss": 0.21715011596679687, + "step": 32655 + }, + { + "epoch": 0.28240136272059907, + "grad_norm": 17.823006910569784, + "learning_rate": 5.709643672965895e-06, + "loss": 0.15727081298828124, + "step": 32660 + }, + { + "epoch": 0.2824445962421423, + "grad_norm": 0.590627342535353, + "learning_rate": 5.709556226619003e-06, + "loss": 0.134521484375, + "step": 32665 + }, + { + "epoch": 0.2824878297636856, + "grad_norm": 4.4671728987028825, + "learning_rate": 5.709468767775878e-06, + "loss": 0.04238471984863281, + "step": 32670 + }, + { + "epoch": 0.2825310632852288, + "grad_norm": 35.985675763935795, + "learning_rate": 5.7093812964369214e-06, + "loss": 0.260986328125, + "step": 32675 + }, + { + "epoch": 0.2825742968067721, + "grad_norm": 0.44463317350206033, + "learning_rate": 5.709293812602538e-06, + "loss": 0.2628448486328125, + "step": 32680 + }, + { + "epoch": 0.2826175303283154, + "grad_norm": 25.429955598889954, + "learning_rate": 5.709206316273132e-06, + "loss": 0.15642318725585938, + "step": 32685 + }, + { + "epoch": 0.2826607638498586, + "grad_norm": 9.36693623872853, + "learning_rate": 5.709118807449104e-06, + "loss": 0.392431640625, + "step": 32690 + }, + { + "epoch": 0.2827039973714019, + "grad_norm": 7.877511577816259, + "learning_rate": 5.709031286130861e-06, + "loss": 0.05874481201171875, + "step": 32695 + }, + { + "epoch": 0.2827472308929452, + "grad_norm": 39.510837187285574, + "learning_rate": 5.7089437523188046e-06, + "loss": 0.3364471435546875, + "step": 32700 + }, + { + "epoch": 0.2827904644144884, + "grad_norm": 7.285813737654926, + "learning_rate": 5.7088562060133385e-06, + "loss": 0.1646240234375, + "step": 32705 + }, + { + "epoch": 0.2828336979360317, + "grad_norm": 0.14828393396203526, + "learning_rate": 5.708768647214867e-06, + "loss": 0.20598602294921875, + "step": 32710 + }, + { + "epoch": 0.2828769314575749, + "grad_norm": 6.4322999540119765, + "learning_rate": 5.708681075923794e-06, + "loss": 0.2443267822265625, + "step": 32715 + }, + { + "epoch": 0.2829201649791182, + "grad_norm": 13.064540170540335, + "learning_rate": 5.708593492140524e-06, + "loss": 0.14674415588378906, + "step": 32720 + }, + { + "epoch": 0.2829633985006615, + "grad_norm": 48.55003456033517, + "learning_rate": 5.70850589586546e-06, + "loss": 0.3035728454589844, + "step": 32725 + }, + { + "epoch": 0.2830066320222047, + "grad_norm": 1.089588046358207, + "learning_rate": 5.708418287099005e-06, + "loss": 0.08963623046875, + "step": 32730 + }, + { + "epoch": 0.283049865543748, + "grad_norm": 13.954465566103513, + "learning_rate": 5.708330665841564e-06, + "loss": 0.241998291015625, + "step": 32735 + }, + { + "epoch": 0.2830930990652913, + "grad_norm": 9.917094836812392, + "learning_rate": 5.708243032093542e-06, + "loss": 0.2755950927734375, + "step": 32740 + }, + { + "epoch": 0.2831363325868345, + "grad_norm": 20.07947111852866, + "learning_rate": 5.708155385855343e-06, + "loss": 0.383282470703125, + "step": 32745 + }, + { + "epoch": 0.2831795661083778, + "grad_norm": 1.1452115618482586, + "learning_rate": 5.708067727127371e-06, + "loss": 0.18856582641601563, + "step": 32750 + }, + { + "epoch": 0.28322279962992103, + "grad_norm": 27.587219887266215, + "learning_rate": 5.707980055910029e-06, + "loss": 0.1089630126953125, + "step": 32755 + }, + { + "epoch": 0.2832660331514643, + "grad_norm": 2.672558874969092, + "learning_rate": 5.7078923722037215e-06, + "loss": 0.14276885986328125, + "step": 32760 + }, + { + "epoch": 0.2833092666730076, + "grad_norm": 43.711343891778476, + "learning_rate": 5.707804676008855e-06, + "loss": 0.555999755859375, + "step": 32765 + }, + { + "epoch": 0.2833525001945508, + "grad_norm": 28.23017121412429, + "learning_rate": 5.707716967325832e-06, + "loss": 0.3826507568359375, + "step": 32770 + }, + { + "epoch": 0.2833957337160941, + "grad_norm": 3.5020022243317026, + "learning_rate": 5.7076292461550585e-06, + "loss": 0.10234146118164063, + "step": 32775 + }, + { + "epoch": 0.2834389672376374, + "grad_norm": 13.516560237239316, + "learning_rate": 5.707541512496937e-06, + "loss": 0.12988433837890626, + "step": 32780 + }, + { + "epoch": 0.2834822007591806, + "grad_norm": 10.308129413217209, + "learning_rate": 5.707453766351874e-06, + "loss": 0.21295318603515626, + "step": 32785 + }, + { + "epoch": 0.2835254342807239, + "grad_norm": 7.973608912770049, + "learning_rate": 5.707366007720272e-06, + "loss": 0.3406829833984375, + "step": 32790 + }, + { + "epoch": 0.2835686678022672, + "grad_norm": 13.787851510140909, + "learning_rate": 5.707278236602539e-06, + "loss": 0.08129425048828125, + "step": 32795 + }, + { + "epoch": 0.2836119013238104, + "grad_norm": 9.76222762997871, + "learning_rate": 5.707190452999077e-06, + "loss": 0.13535919189453124, + "step": 32800 + }, + { + "epoch": 0.2836551348453537, + "grad_norm": 65.04650387454451, + "learning_rate": 5.707102656910293e-06, + "loss": 0.21133804321289062, + "step": 32805 + }, + { + "epoch": 0.28369836836689694, + "grad_norm": 37.08748139614883, + "learning_rate": 5.707014848336589e-06, + "loss": 0.43520317077636717, + "step": 32810 + }, + { + "epoch": 0.2837416018884402, + "grad_norm": 0.05346165003750084, + "learning_rate": 5.706927027278373e-06, + "loss": 0.41904449462890625, + "step": 32815 + }, + { + "epoch": 0.2837848354099835, + "grad_norm": 11.34812810505826, + "learning_rate": 5.706839193736048e-06, + "loss": 0.1090118408203125, + "step": 32820 + }, + { + "epoch": 0.28382806893152673, + "grad_norm": 13.629772959844923, + "learning_rate": 5.706751347710021e-06, + "loss": 0.12837982177734375, + "step": 32825 + }, + { + "epoch": 0.28387130245307, + "grad_norm": 8.468898025529402, + "learning_rate": 5.706663489200694e-06, + "loss": 0.1421255111694336, + "step": 32830 + }, + { + "epoch": 0.2839145359746133, + "grad_norm": 15.496019614918318, + "learning_rate": 5.706575618208476e-06, + "loss": 0.238848876953125, + "step": 32835 + }, + { + "epoch": 0.28395776949615653, + "grad_norm": 2.84391503192815, + "learning_rate": 5.7064877347337695e-06, + "loss": 0.1758636474609375, + "step": 32840 + }, + { + "epoch": 0.2840010030176998, + "grad_norm": 3.2517050921341037, + "learning_rate": 5.706399838776981e-06, + "loss": 0.22673568725585938, + "step": 32845 + }, + { + "epoch": 0.28404423653924304, + "grad_norm": 31.369221008010044, + "learning_rate": 5.706311930338516e-06, + "loss": 0.13800888061523436, + "step": 32850 + }, + { + "epoch": 0.28408747006078633, + "grad_norm": 2.907877791516347, + "learning_rate": 5.706224009418779e-06, + "loss": 0.471551513671875, + "step": 32855 + }, + { + "epoch": 0.2841307035823296, + "grad_norm": 28.67000097291913, + "learning_rate": 5.706136076018175e-06, + "loss": 0.22627429962158202, + "step": 32860 + }, + { + "epoch": 0.28417393710387284, + "grad_norm": 11.218747233636499, + "learning_rate": 5.706048130137113e-06, + "loss": 0.2661346435546875, + "step": 32865 + }, + { + "epoch": 0.2842171706254161, + "grad_norm": 10.753219793706716, + "learning_rate": 5.705960171775994e-06, + "loss": 0.11700897216796875, + "step": 32870 + }, + { + "epoch": 0.2842604041469594, + "grad_norm": 5.916833103531452, + "learning_rate": 5.705872200935227e-06, + "loss": 0.2022308349609375, + "step": 32875 + }, + { + "epoch": 0.28430363766850264, + "grad_norm": 1.6362635846366547, + "learning_rate": 5.705784217615216e-06, + "loss": 0.1314483642578125, + "step": 32880 + }, + { + "epoch": 0.2843468711900459, + "grad_norm": 10.504199174871326, + "learning_rate": 5.705696221816367e-06, + "loss": 0.23538589477539062, + "step": 32885 + }, + { + "epoch": 0.28439010471158915, + "grad_norm": 2.987855072375423, + "learning_rate": 5.705608213539086e-06, + "loss": 0.4448890686035156, + "step": 32890 + }, + { + "epoch": 0.28443333823313244, + "grad_norm": 29.21687223111923, + "learning_rate": 5.705520192783779e-06, + "loss": 0.2450439453125, + "step": 32895 + }, + { + "epoch": 0.2844765717546757, + "grad_norm": 9.787823099029314, + "learning_rate": 5.7054321595508525e-06, + "loss": 0.19371337890625, + "step": 32900 + }, + { + "epoch": 0.28451980527621895, + "grad_norm": 5.9932195238511285, + "learning_rate": 5.705344113840712e-06, + "loss": 0.3146453857421875, + "step": 32905 + }, + { + "epoch": 0.28456303879776224, + "grad_norm": 2.6796627328914293, + "learning_rate": 5.7052560556537635e-06, + "loss": 0.4286102294921875, + "step": 32910 + }, + { + "epoch": 0.2846062723193055, + "grad_norm": 8.044639067810932, + "learning_rate": 5.705167984990413e-06, + "loss": 0.06683807373046875, + "step": 32915 + }, + { + "epoch": 0.28464950584084875, + "grad_norm": 29.586545579603957, + "learning_rate": 5.705079901851066e-06, + "loss": 0.211187744140625, + "step": 32920 + }, + { + "epoch": 0.28469273936239203, + "grad_norm": 2.8064318534854875, + "learning_rate": 5.7049918062361305e-06, + "loss": 0.24440231323242187, + "step": 32925 + }, + { + "epoch": 0.28473597288393526, + "grad_norm": 16.613350554648214, + "learning_rate": 5.704903698146011e-06, + "loss": 0.23409423828125, + "step": 32930 + }, + { + "epoch": 0.28477920640547855, + "grad_norm": 2.088141328713423, + "learning_rate": 5.704815577581115e-06, + "loss": 0.0308868408203125, + "step": 32935 + }, + { + "epoch": 0.28482243992702183, + "grad_norm": 39.468363262305026, + "learning_rate": 5.704727444541849e-06, + "loss": 0.24175567626953126, + "step": 32940 + }, + { + "epoch": 0.28486567344856506, + "grad_norm": 1.1689775532691995, + "learning_rate": 5.704639299028619e-06, + "loss": 0.08531494140625, + "step": 32945 + }, + { + "epoch": 0.28490890697010834, + "grad_norm": 1.1091300062177882, + "learning_rate": 5.704551141041831e-06, + "loss": 0.057073974609375, + "step": 32950 + }, + { + "epoch": 0.28495214049165163, + "grad_norm": 8.13237020810355, + "learning_rate": 5.7044629705818925e-06, + "loss": 0.17168731689453126, + "step": 32955 + }, + { + "epoch": 0.28499537401319486, + "grad_norm": 16.691178855339967, + "learning_rate": 5.704374787649209e-06, + "loss": 0.24508895874023437, + "step": 32960 + }, + { + "epoch": 0.28503860753473814, + "grad_norm": 0.2195041006671929, + "learning_rate": 5.704286592244189e-06, + "loss": 0.09951705932617187, + "step": 32965 + }, + { + "epoch": 0.28508184105628137, + "grad_norm": 1.526646723995614, + "learning_rate": 5.7041983843672375e-06, + "loss": 0.03782806396484375, + "step": 32970 + }, + { + "epoch": 0.28512507457782466, + "grad_norm": 0.2195253942548394, + "learning_rate": 5.704110164018763e-06, + "loss": 0.554351806640625, + "step": 32975 + }, + { + "epoch": 0.28516830809936794, + "grad_norm": 20.011577859383525, + "learning_rate": 5.70402193119917e-06, + "loss": 0.242803955078125, + "step": 32980 + }, + { + "epoch": 0.28521154162091117, + "grad_norm": 5.1918976760987, + "learning_rate": 5.703933685908868e-06, + "loss": 0.1389404296875, + "step": 32985 + }, + { + "epoch": 0.28525477514245445, + "grad_norm": 6.525054634971255, + "learning_rate": 5.703845428148262e-06, + "loss": 0.157269287109375, + "step": 32990 + }, + { + "epoch": 0.28529800866399774, + "grad_norm": 3.3187135496837104, + "learning_rate": 5.703757157917759e-06, + "loss": 0.17435302734375, + "step": 32995 + }, + { + "epoch": 0.28534124218554097, + "grad_norm": 29.586168055226697, + "learning_rate": 5.703668875217769e-06, + "loss": 0.4050445556640625, + "step": 33000 + }, + { + "epoch": 0.28538447570708425, + "grad_norm": 3.262894950051958, + "learning_rate": 5.703580580048695e-06, + "loss": 0.08810958862304688, + "step": 33005 + }, + { + "epoch": 0.28542770922862754, + "grad_norm": 1.0703694871734428, + "learning_rate": 5.703492272410948e-06, + "loss": 0.17995147705078124, + "step": 33010 + }, + { + "epoch": 0.28547094275017076, + "grad_norm": 22.085793627743264, + "learning_rate": 5.703403952304932e-06, + "loss": 0.1957061767578125, + "step": 33015 + }, + { + "epoch": 0.28551417627171405, + "grad_norm": 3.2684959308732013, + "learning_rate": 5.7033156197310565e-06, + "loss": 0.262152099609375, + "step": 33020 + }, + { + "epoch": 0.2855574097932573, + "grad_norm": 0.22339222132868838, + "learning_rate": 5.703227274689727e-06, + "loss": 0.20795745849609376, + "step": 33025 + }, + { + "epoch": 0.28560064331480056, + "grad_norm": 6.42565464153906, + "learning_rate": 5.703138917181354e-06, + "loss": 0.109368896484375, + "step": 33030 + }, + { + "epoch": 0.28564387683634385, + "grad_norm": 8.194117582978805, + "learning_rate": 5.703050547206343e-06, + "loss": 0.18115882873535155, + "step": 33035 + }, + { + "epoch": 0.2856871103578871, + "grad_norm": 11.633410195234877, + "learning_rate": 5.702962164765101e-06, + "loss": 0.07072982788085938, + "step": 33040 + }, + { + "epoch": 0.28573034387943036, + "grad_norm": 0.28479769376880765, + "learning_rate": 5.702873769858037e-06, + "loss": 0.12855224609375, + "step": 33045 + }, + { + "epoch": 0.28577357740097364, + "grad_norm": 2.068738291336417, + "learning_rate": 5.702785362485557e-06, + "loss": 0.1085174560546875, + "step": 33050 + }, + { + "epoch": 0.2858168109225169, + "grad_norm": 4.577813153051502, + "learning_rate": 5.7026969426480714e-06, + "loss": 0.049463081359863284, + "step": 33055 + }, + { + "epoch": 0.28586004444406016, + "grad_norm": 23.55356804600776, + "learning_rate": 5.702608510345985e-06, + "loss": 0.204461669921875, + "step": 33060 + }, + { + "epoch": 0.2859032779656034, + "grad_norm": 13.564752647431467, + "learning_rate": 5.702520065579707e-06, + "loss": 0.20189437866210938, + "step": 33065 + }, + { + "epoch": 0.28594651148714667, + "grad_norm": 33.551520836808486, + "learning_rate": 5.702431608349645e-06, + "loss": 0.3769683837890625, + "step": 33070 + }, + { + "epoch": 0.28598974500868996, + "grad_norm": 29.32112428351551, + "learning_rate": 5.702343138656208e-06, + "loss": 0.38226318359375, + "step": 33075 + }, + { + "epoch": 0.2860329785302332, + "grad_norm": 12.712454483729777, + "learning_rate": 5.702254656499803e-06, + "loss": 0.297271728515625, + "step": 33080 + }, + { + "epoch": 0.28607621205177647, + "grad_norm": 0.32666726853493105, + "learning_rate": 5.702166161880839e-06, + "loss": 0.12231979370117188, + "step": 33085 + }, + { + "epoch": 0.28611944557331975, + "grad_norm": 5.817938297525336, + "learning_rate": 5.702077654799722e-06, + "loss": 0.04729766845703125, + "step": 33090 + }, + { + "epoch": 0.286162679094863, + "grad_norm": 0.39411704418995336, + "learning_rate": 5.701989135256863e-06, + "loss": 0.1337432861328125, + "step": 33095 + }, + { + "epoch": 0.28620591261640627, + "grad_norm": 14.819828853266454, + "learning_rate": 5.701900603252667e-06, + "loss": 0.20048294067382813, + "step": 33100 + }, + { + "epoch": 0.2862491461379495, + "grad_norm": 12.998007774964035, + "learning_rate": 5.7018120587875455e-06, + "loss": 0.25064659118652344, + "step": 33105 + }, + { + "epoch": 0.2862923796594928, + "grad_norm": 4.090159019678311, + "learning_rate": 5.701723501861905e-06, + "loss": 0.1040863037109375, + "step": 33110 + }, + { + "epoch": 0.28633561318103606, + "grad_norm": 97.46885626974723, + "learning_rate": 5.701634932476155e-06, + "loss": 0.6391143798828125, + "step": 33115 + }, + { + "epoch": 0.2863788467025793, + "grad_norm": 7.084253056082746, + "learning_rate": 5.7015463506307034e-06, + "loss": 0.14660186767578126, + "step": 33120 + }, + { + "epoch": 0.2864220802241226, + "grad_norm": 29.72366424256011, + "learning_rate": 5.701457756325959e-06, + "loss": 0.3483673095703125, + "step": 33125 + }, + { + "epoch": 0.28646531374566586, + "grad_norm": 2.5638303960911832, + "learning_rate": 5.701369149562328e-06, + "loss": 0.235455322265625, + "step": 33130 + }, + { + "epoch": 0.2865085472672091, + "grad_norm": 18.331701419815587, + "learning_rate": 5.701280530340224e-06, + "loss": 0.24447784423828126, + "step": 33135 + }, + { + "epoch": 0.2865517807887524, + "grad_norm": 16.514262276349186, + "learning_rate": 5.701191898660052e-06, + "loss": 0.2832206726074219, + "step": 33140 + }, + { + "epoch": 0.2865950143102956, + "grad_norm": 0.38971399699619824, + "learning_rate": 5.701103254522221e-06, + "loss": 0.3162109375, + "step": 33145 + }, + { + "epoch": 0.2866382478318389, + "grad_norm": 31.236432733498326, + "learning_rate": 5.70101459792714e-06, + "loss": 0.23926219940185547, + "step": 33150 + }, + { + "epoch": 0.2866814813533822, + "grad_norm": 4.912047834441746, + "learning_rate": 5.70092592887522e-06, + "loss": 0.39260711669921877, + "step": 33155 + }, + { + "epoch": 0.2867247148749254, + "grad_norm": 8.86092586280775, + "learning_rate": 5.7008372473668676e-06, + "loss": 0.10263214111328126, + "step": 33160 + }, + { + "epoch": 0.2867679483964687, + "grad_norm": 1.4337327431728768, + "learning_rate": 5.700748553402492e-06, + "loss": 0.12291107177734376, + "step": 33165 + }, + { + "epoch": 0.28681118191801197, + "grad_norm": 3.291314682204805, + "learning_rate": 5.700659846982503e-06, + "loss": 0.11803207397460938, + "step": 33170 + }, + { + "epoch": 0.2868544154395552, + "grad_norm": 2.823085276543035, + "learning_rate": 5.700571128107309e-06, + "loss": 0.18003997802734376, + "step": 33175 + }, + { + "epoch": 0.2868976489610985, + "grad_norm": 24.21976598430517, + "learning_rate": 5.70048239677732e-06, + "loss": 0.07034912109375, + "step": 33180 + }, + { + "epoch": 0.28694088248264177, + "grad_norm": 35.034026735829315, + "learning_rate": 5.700393652992945e-06, + "loss": 0.37071685791015624, + "step": 33185 + }, + { + "epoch": 0.286984116004185, + "grad_norm": 0.45867251863925473, + "learning_rate": 5.700304896754593e-06, + "loss": 0.20634613037109376, + "step": 33190 + }, + { + "epoch": 0.2870273495257283, + "grad_norm": 7.502313170231103, + "learning_rate": 5.700216128062673e-06, + "loss": 0.1986724853515625, + "step": 33195 + }, + { + "epoch": 0.2870705830472715, + "grad_norm": 11.715989020776934, + "learning_rate": 5.700127346917595e-06, + "loss": 0.2061450958251953, + "step": 33200 + }, + { + "epoch": 0.2871138165688148, + "grad_norm": 0.28733133059838273, + "learning_rate": 5.7000385533197685e-06, + "loss": 0.14855194091796875, + "step": 33205 + }, + { + "epoch": 0.2871570500903581, + "grad_norm": 6.510084066761343, + "learning_rate": 5.6999497472696025e-06, + "loss": 0.12490158081054688, + "step": 33210 + }, + { + "epoch": 0.2872002836119013, + "grad_norm": 7.355307717148653, + "learning_rate": 5.6998609287675075e-06, + "loss": 0.13460693359375, + "step": 33215 + }, + { + "epoch": 0.2872435171334446, + "grad_norm": 11.652206831404218, + "learning_rate": 5.699772097813892e-06, + "loss": 0.27275390625, + "step": 33220 + }, + { + "epoch": 0.2872867506549879, + "grad_norm": 22.361416579811387, + "learning_rate": 5.699683254409166e-06, + "loss": 0.1928375244140625, + "step": 33225 + }, + { + "epoch": 0.2873299841765311, + "grad_norm": 10.896575625226083, + "learning_rate": 5.69959439855374e-06, + "loss": 0.0882354736328125, + "step": 33230 + }, + { + "epoch": 0.2873732176980744, + "grad_norm": 14.35052750837095, + "learning_rate": 5.699505530248022e-06, + "loss": 0.22389545440673828, + "step": 33235 + }, + { + "epoch": 0.2874164512196176, + "grad_norm": 25.866104366934856, + "learning_rate": 5.699416649492423e-06, + "loss": 0.32111663818359376, + "step": 33240 + }, + { + "epoch": 0.2874596847411609, + "grad_norm": 1.796756950219203, + "learning_rate": 5.699327756287354e-06, + "loss": 0.13537750244140626, + "step": 33245 + }, + { + "epoch": 0.2875029182627042, + "grad_norm": 4.912939879095256, + "learning_rate": 5.699238850633224e-06, + "loss": 0.3700469970703125, + "step": 33250 + }, + { + "epoch": 0.2875461517842474, + "grad_norm": 6.4775640134820485, + "learning_rate": 5.699149932530442e-06, + "loss": 0.05599822998046875, + "step": 33255 + }, + { + "epoch": 0.2875893853057907, + "grad_norm": 30.851926761820266, + "learning_rate": 5.69906100197942e-06, + "loss": 0.2613250732421875, + "step": 33260 + }, + { + "epoch": 0.287632618827334, + "grad_norm": 3.183724554821298, + "learning_rate": 5.698972058980566e-06, + "loss": 0.165869140625, + "step": 33265 + }, + { + "epoch": 0.2876758523488772, + "grad_norm": 20.81642107308581, + "learning_rate": 5.698883103534292e-06, + "loss": 0.2375030517578125, + "step": 33270 + }, + { + "epoch": 0.2877190858704205, + "grad_norm": 3.345573156257276, + "learning_rate": 5.698794135641007e-06, + "loss": 0.239898681640625, + "step": 33275 + }, + { + "epoch": 0.28776231939196373, + "grad_norm": 27.932329252532806, + "learning_rate": 5.698705155301123e-06, + "loss": 0.22573623657226563, + "step": 33280 + }, + { + "epoch": 0.287805552913507, + "grad_norm": 1.2752796381703333, + "learning_rate": 5.698616162515049e-06, + "loss": 0.426544189453125, + "step": 33285 + }, + { + "epoch": 0.2878487864350503, + "grad_norm": 1.687700347843407, + "learning_rate": 5.698527157283195e-06, + "loss": 0.34682159423828124, + "step": 33290 + }, + { + "epoch": 0.2878920199565935, + "grad_norm": 19.776252384870673, + "learning_rate": 5.698438139605973e-06, + "loss": 0.0898193359375, + "step": 33295 + }, + { + "epoch": 0.2879352534781368, + "grad_norm": 3.6787266156824865, + "learning_rate": 5.6983491094837925e-06, + "loss": 0.49025115966796873, + "step": 33300 + }, + { + "epoch": 0.2879784869996801, + "grad_norm": 0.1467937560962666, + "learning_rate": 5.698260066917065e-06, + "loss": 0.03944091796875, + "step": 33305 + }, + { + "epoch": 0.2880217205212233, + "grad_norm": 5.44655701345261, + "learning_rate": 5.698171011906199e-06, + "loss": 0.089617919921875, + "step": 33310 + }, + { + "epoch": 0.2880649540427666, + "grad_norm": 16.725080992701592, + "learning_rate": 5.698081944451607e-06, + "loss": 0.17837600708007811, + "step": 33315 + }, + { + "epoch": 0.28810818756430984, + "grad_norm": 32.80055376160702, + "learning_rate": 5.6979928645537e-06, + "loss": 0.550518798828125, + "step": 33320 + }, + { + "epoch": 0.2881514210858531, + "grad_norm": 36.670290127344686, + "learning_rate": 5.697903772212888e-06, + "loss": 0.4624908447265625, + "step": 33325 + }, + { + "epoch": 0.2881946546073964, + "grad_norm": 40.30189345735284, + "learning_rate": 5.697814667429583e-06, + "loss": 0.1570037841796875, + "step": 33330 + }, + { + "epoch": 0.28823788812893963, + "grad_norm": 1.1932194922743187, + "learning_rate": 5.6977255502041945e-06, + "loss": 0.07532958984375, + "step": 33335 + }, + { + "epoch": 0.2882811216504829, + "grad_norm": 13.909136783683726, + "learning_rate": 5.697636420537134e-06, + "loss": 0.1241851806640625, + "step": 33340 + }, + { + "epoch": 0.2883243551720262, + "grad_norm": 19.291544219883058, + "learning_rate": 5.697547278428813e-06, + "loss": 0.163677978515625, + "step": 33345 + }, + { + "epoch": 0.28836758869356943, + "grad_norm": 26.486785382457292, + "learning_rate": 5.697458123879642e-06, + "loss": 0.297821044921875, + "step": 33350 + }, + { + "epoch": 0.2884108222151127, + "grad_norm": 21.48693013049611, + "learning_rate": 5.697368956890033e-06, + "loss": 0.15594635009765626, + "step": 33355 + }, + { + "epoch": 0.288454055736656, + "grad_norm": 9.080259934080875, + "learning_rate": 5.697279777460397e-06, + "loss": 0.23847808837890624, + "step": 33360 + }, + { + "epoch": 0.28849728925819923, + "grad_norm": 2.462317008258063, + "learning_rate": 5.697190585591144e-06, + "loss": 0.25338058471679686, + "step": 33365 + }, + { + "epoch": 0.2885405227797425, + "grad_norm": 20.124334745758528, + "learning_rate": 5.697101381282686e-06, + "loss": 0.17150802612304689, + "step": 33370 + }, + { + "epoch": 0.28858375630128574, + "grad_norm": 24.68433887603342, + "learning_rate": 5.697012164535436e-06, + "loss": 0.3161865234375, + "step": 33375 + }, + { + "epoch": 0.288626989822829, + "grad_norm": 43.81687863933759, + "learning_rate": 5.696922935349804e-06, + "loss": 0.24813079833984375, + "step": 33380 + }, + { + "epoch": 0.2886702233443723, + "grad_norm": 9.258798472554933, + "learning_rate": 5.6968336937262015e-06, + "loss": 0.1300048828125, + "step": 33385 + }, + { + "epoch": 0.28871345686591554, + "grad_norm": 34.58005685162237, + "learning_rate": 5.696744439665039e-06, + "loss": 0.20184326171875, + "step": 33390 + }, + { + "epoch": 0.2887566903874588, + "grad_norm": 3.232677010843073, + "learning_rate": 5.696655173166732e-06, + "loss": 0.22578125, + "step": 33395 + }, + { + "epoch": 0.2887999239090021, + "grad_norm": 3.2852715702498574, + "learning_rate": 5.696565894231688e-06, + "loss": 0.18096542358398438, + "step": 33400 + }, + { + "epoch": 0.28884315743054534, + "grad_norm": 34.132754378702906, + "learning_rate": 5.69647660286032e-06, + "loss": 0.16133041381835939, + "step": 33405 + }, + { + "epoch": 0.2888863909520886, + "grad_norm": 5.096482025915024, + "learning_rate": 5.696387299053041e-06, + "loss": 0.0630279541015625, + "step": 33410 + }, + { + "epoch": 0.28892962447363185, + "grad_norm": 0.5015254246718197, + "learning_rate": 5.696297982810262e-06, + "loss": 0.19015789031982422, + "step": 33415 + }, + { + "epoch": 0.28897285799517514, + "grad_norm": 4.186716216202129, + "learning_rate": 5.696208654132395e-06, + "loss": 0.36768798828125, + "step": 33420 + }, + { + "epoch": 0.2890160915167184, + "grad_norm": 15.003822823678794, + "learning_rate": 5.696119313019851e-06, + "loss": 0.1642578125, + "step": 33425 + }, + { + "epoch": 0.28905932503826165, + "grad_norm": 6.60205391963604, + "learning_rate": 5.696029959473044e-06, + "loss": 0.268017578125, + "step": 33430 + }, + { + "epoch": 0.28910255855980493, + "grad_norm": 9.713377937653734, + "learning_rate": 5.695940593492385e-06, + "loss": 0.1553619384765625, + "step": 33435 + }, + { + "epoch": 0.2891457920813482, + "grad_norm": 1.33378090556234, + "learning_rate": 5.695851215078285e-06, + "loss": 0.036540985107421875, + "step": 33440 + }, + { + "epoch": 0.28918902560289145, + "grad_norm": 21.68980144983463, + "learning_rate": 5.69576182423116e-06, + "loss": 0.49278411865234373, + "step": 33445 + }, + { + "epoch": 0.28923225912443473, + "grad_norm": 2.916957828881203, + "learning_rate": 5.695672420951418e-06, + "loss": 0.0891693115234375, + "step": 33450 + }, + { + "epoch": 0.28927549264597796, + "grad_norm": 4.2863167586126485, + "learning_rate": 5.6955830052394725e-06, + "loss": 0.1130523681640625, + "step": 33455 + }, + { + "epoch": 0.28931872616752125, + "grad_norm": 13.57729688519755, + "learning_rate": 5.6954935770957366e-06, + "loss": 0.5418354034423828, + "step": 33460 + }, + { + "epoch": 0.28936195968906453, + "grad_norm": 6.170165732011743, + "learning_rate": 5.695404136520622e-06, + "loss": 0.24418869018554687, + "step": 33465 + }, + { + "epoch": 0.28940519321060776, + "grad_norm": 2.8265396567769985, + "learning_rate": 5.695314683514543e-06, + "loss": 0.10562744140625, + "step": 33470 + }, + { + "epoch": 0.28944842673215104, + "grad_norm": 0.31582033816882626, + "learning_rate": 5.69522521807791e-06, + "loss": 0.04518890380859375, + "step": 33475 + }, + { + "epoch": 0.2894916602536943, + "grad_norm": 7.199936003277751, + "learning_rate": 5.695135740211137e-06, + "loss": 0.32108154296875, + "step": 33480 + }, + { + "epoch": 0.28953489377523756, + "grad_norm": 18.777029510732294, + "learning_rate": 5.6950462499146356e-06, + "loss": 0.339483642578125, + "step": 33485 + }, + { + "epoch": 0.28957812729678084, + "grad_norm": 6.194375342531329, + "learning_rate": 5.69495674718882e-06, + "loss": 0.13616943359375, + "step": 33490 + }, + { + "epoch": 0.28962136081832407, + "grad_norm": 23.93353986066997, + "learning_rate": 5.694867232034101e-06, + "loss": 0.34452056884765625, + "step": 33495 + }, + { + "epoch": 0.28966459433986735, + "grad_norm": 23.944275639927593, + "learning_rate": 5.694777704450893e-06, + "loss": 0.24107208251953124, + "step": 33500 + }, + { + "epoch": 0.28970782786141064, + "grad_norm": 10.546905998745904, + "learning_rate": 5.694688164439608e-06, + "loss": 0.2106201171875, + "step": 33505 + }, + { + "epoch": 0.28975106138295387, + "grad_norm": 5.091261181164597, + "learning_rate": 5.69459861200066e-06, + "loss": 0.08439178466796875, + "step": 33510 + }, + { + "epoch": 0.28979429490449715, + "grad_norm": 14.143110852245126, + "learning_rate": 5.69450904713446e-06, + "loss": 0.030326461791992186, + "step": 33515 + }, + { + "epoch": 0.28983752842604044, + "grad_norm": 2.78907604321231, + "learning_rate": 5.694419469841423e-06, + "loss": 0.04992218017578125, + "step": 33520 + }, + { + "epoch": 0.28988076194758366, + "grad_norm": 5.489076114797912, + "learning_rate": 5.6943298801219615e-06, + "loss": 0.2540283203125, + "step": 33525 + }, + { + "epoch": 0.28992399546912695, + "grad_norm": 2.5255763729510226, + "learning_rate": 5.694240277976489e-06, + "loss": 0.0812530517578125, + "step": 33530 + }, + { + "epoch": 0.2899672289906702, + "grad_norm": 7.885947492233533, + "learning_rate": 5.694150663405418e-06, + "loss": 0.231622314453125, + "step": 33535 + }, + { + "epoch": 0.29001046251221346, + "grad_norm": 7.507737552819898, + "learning_rate": 5.694061036409162e-06, + "loss": 0.1727294921875, + "step": 33540 + }, + { + "epoch": 0.29005369603375675, + "grad_norm": 31.463606934135523, + "learning_rate": 5.693971396988135e-06, + "loss": 0.24188499450683593, + "step": 33545 + }, + { + "epoch": 0.2900969295553, + "grad_norm": 0.5325576276002963, + "learning_rate": 5.6938817451427504e-06, + "loss": 0.513555908203125, + "step": 33550 + }, + { + "epoch": 0.29014016307684326, + "grad_norm": 3.4972354987318917, + "learning_rate": 5.69379208087342e-06, + "loss": 0.3267333984375, + "step": 33555 + }, + { + "epoch": 0.29018339659838654, + "grad_norm": 1.3304656296082424, + "learning_rate": 5.6937024041805595e-06, + "loss": 0.21893730163574218, + "step": 33560 + }, + { + "epoch": 0.2902266301199298, + "grad_norm": 2.5249899163478995, + "learning_rate": 5.693612715064581e-06, + "loss": 0.05639801025390625, + "step": 33565 + }, + { + "epoch": 0.29026986364147306, + "grad_norm": 13.749975165614652, + "learning_rate": 5.693523013525899e-06, + "loss": 0.134722900390625, + "step": 33570 + }, + { + "epoch": 0.29031309716301634, + "grad_norm": 27.05753621652113, + "learning_rate": 5.693433299564927e-06, + "loss": 0.153125, + "step": 33575 + }, + { + "epoch": 0.29035633068455957, + "grad_norm": 3.6704720592125124, + "learning_rate": 5.693343573182079e-06, + "loss": 0.2042266845703125, + "step": 33580 + }, + { + "epoch": 0.29039956420610286, + "grad_norm": 19.104786091609917, + "learning_rate": 5.693253834377767e-06, + "loss": 0.531683349609375, + "step": 33585 + }, + { + "epoch": 0.2904427977276461, + "grad_norm": 2.2092817209559152, + "learning_rate": 5.693164083152407e-06, + "loss": 0.01503753662109375, + "step": 33590 + }, + { + "epoch": 0.29048603124918937, + "grad_norm": 9.526729610055352, + "learning_rate": 5.6930743195064125e-06, + "loss": 0.29969940185546873, + "step": 33595 + }, + { + "epoch": 0.29052926477073265, + "grad_norm": 7.923243691089042, + "learning_rate": 5.692984543440197e-06, + "loss": 0.37840423583984373, + "step": 33600 + }, + { + "epoch": 0.2905724982922759, + "grad_norm": 9.880239017165346, + "learning_rate": 5.6928947549541745e-06, + "loss": 0.2914886474609375, + "step": 33605 + }, + { + "epoch": 0.29061573181381917, + "grad_norm": 0.408415590263555, + "learning_rate": 5.69280495404876e-06, + "loss": 0.16610565185546874, + "step": 33610 + }, + { + "epoch": 0.29065896533536245, + "grad_norm": 13.436570213033889, + "learning_rate": 5.6927151407243665e-06, + "loss": 0.36967926025390624, + "step": 33615 + }, + { + "epoch": 0.2907021988569057, + "grad_norm": 16.275852266303925, + "learning_rate": 5.692625314981409e-06, + "loss": 0.26663360595703123, + "step": 33620 + }, + { + "epoch": 0.29074543237844896, + "grad_norm": 5.636604508723783, + "learning_rate": 5.692535476820302e-06, + "loss": 0.19129791259765624, + "step": 33625 + }, + { + "epoch": 0.2907886658999922, + "grad_norm": 8.124942583638502, + "learning_rate": 5.692445626241458e-06, + "loss": 0.545587158203125, + "step": 33630 + }, + { + "epoch": 0.2908318994215355, + "grad_norm": 26.246185598257764, + "learning_rate": 5.692355763245294e-06, + "loss": 0.3249603271484375, + "step": 33635 + }, + { + "epoch": 0.29087513294307876, + "grad_norm": 2.8557441892263777, + "learning_rate": 5.692265887832222e-06, + "loss": 0.10539360046386718, + "step": 33640 + }, + { + "epoch": 0.290918366464622, + "grad_norm": 10.818371188297617, + "learning_rate": 5.692176000002658e-06, + "loss": 0.07129325866699218, + "step": 33645 + }, + { + "epoch": 0.2909615999861653, + "grad_norm": 1.508668325354867, + "learning_rate": 5.692086099757015e-06, + "loss": 0.22872467041015626, + "step": 33650 + }, + { + "epoch": 0.29100483350770856, + "grad_norm": 3.000721240022243, + "learning_rate": 5.69199618709571e-06, + "loss": 0.2736083984375, + "step": 33655 + }, + { + "epoch": 0.2910480670292518, + "grad_norm": 4.929571976237845, + "learning_rate": 5.691906262019157e-06, + "loss": 0.0923919677734375, + "step": 33660 + }, + { + "epoch": 0.2910913005507951, + "grad_norm": 4.405596126428721, + "learning_rate": 5.691816324527769e-06, + "loss": 0.0675323486328125, + "step": 33665 + }, + { + "epoch": 0.2911345340723383, + "grad_norm": 28.587510942197166, + "learning_rate": 5.691726374621962e-06, + "loss": 0.23180694580078126, + "step": 33670 + }, + { + "epoch": 0.2911777675938816, + "grad_norm": 3.628662738403044, + "learning_rate": 5.691636412302152e-06, + "loss": 0.16596603393554688, + "step": 33675 + }, + { + "epoch": 0.29122100111542487, + "grad_norm": 4.361315031070349, + "learning_rate": 5.6915464375687515e-06, + "loss": 0.437164306640625, + "step": 33680 + }, + { + "epoch": 0.2912642346369681, + "grad_norm": 4.013227645446693, + "learning_rate": 5.691456450422178e-06, + "loss": 0.0687896728515625, + "step": 33685 + }, + { + "epoch": 0.2913074681585114, + "grad_norm": 14.588657836106961, + "learning_rate": 5.691366450862845e-06, + "loss": 0.49312667846679686, + "step": 33690 + }, + { + "epoch": 0.29135070168005467, + "grad_norm": 9.164641040421909, + "learning_rate": 5.691276438891167e-06, + "loss": 0.32521514892578124, + "step": 33695 + }, + { + "epoch": 0.2913939352015979, + "grad_norm": 3.991912810493246, + "learning_rate": 5.691186414507559e-06, + "loss": 0.24092884063720704, + "step": 33700 + }, + { + "epoch": 0.2914371687231412, + "grad_norm": 2.645085582618612, + "learning_rate": 5.691096377712438e-06, + "loss": 0.0641845703125, + "step": 33705 + }, + { + "epoch": 0.2914804022446844, + "grad_norm": 8.084780044095792, + "learning_rate": 5.6910063285062185e-06, + "loss": 0.13496856689453124, + "step": 33710 + }, + { + "epoch": 0.2915236357662277, + "grad_norm": 134.07129872630844, + "learning_rate": 5.6909162668893155e-06, + "loss": 0.392938232421875, + "step": 33715 + }, + { + "epoch": 0.291566869287771, + "grad_norm": 38.57223656053756, + "learning_rate": 5.690826192862145e-06, + "loss": 0.199462890625, + "step": 33720 + }, + { + "epoch": 0.2916101028093142, + "grad_norm": 2.313493366829992, + "learning_rate": 5.690736106425121e-06, + "loss": 0.44449691772460936, + "step": 33725 + }, + { + "epoch": 0.2916533363308575, + "grad_norm": 12.655942185968831, + "learning_rate": 5.69064600757866e-06, + "loss": 0.1485118865966797, + "step": 33730 + }, + { + "epoch": 0.2916965698524008, + "grad_norm": 0.3271509957653004, + "learning_rate": 5.690555896323177e-06, + "loss": 0.06971893310546876, + "step": 33735 + }, + { + "epoch": 0.291739803373944, + "grad_norm": 0.24233907139442837, + "learning_rate": 5.69046577265909e-06, + "loss": 0.27260665893554686, + "step": 33740 + }, + { + "epoch": 0.2917830368954873, + "grad_norm": 9.410682600276735, + "learning_rate": 5.69037563658681e-06, + "loss": 0.0853912353515625, + "step": 33745 + }, + { + "epoch": 0.2918262704170306, + "grad_norm": 13.520227041576687, + "learning_rate": 5.690285488106756e-06, + "loss": 0.3064117431640625, + "step": 33750 + }, + { + "epoch": 0.2918695039385738, + "grad_norm": 40.99836841676305, + "learning_rate": 5.690195327219344e-06, + "loss": 0.442041015625, + "step": 33755 + }, + { + "epoch": 0.2919127374601171, + "grad_norm": 7.904087635178896, + "learning_rate": 5.690105153924987e-06, + "loss": 0.188446044921875, + "step": 33760 + }, + { + "epoch": 0.2919559709816603, + "grad_norm": 9.926373351183747, + "learning_rate": 5.690014968224104e-06, + "loss": 0.12303695678710938, + "step": 33765 + }, + { + "epoch": 0.2919992045032036, + "grad_norm": 3.844399839083764, + "learning_rate": 5.68992477011711e-06, + "loss": 0.27854766845703127, + "step": 33770 + }, + { + "epoch": 0.2920424380247469, + "grad_norm": 3.429417357421809, + "learning_rate": 5.6898345596044205e-06, + "loss": 0.24515380859375, + "step": 33775 + }, + { + "epoch": 0.2920856715462901, + "grad_norm": 12.434072533942471, + "learning_rate": 5.689744336686451e-06, + "loss": 0.28612060546875, + "step": 33780 + }, + { + "epoch": 0.2921289050678334, + "grad_norm": 4.378553436530737, + "learning_rate": 5.6896541013636196e-06, + "loss": 0.4055419921875, + "step": 33785 + }, + { + "epoch": 0.2921721385893767, + "grad_norm": 1.8493645500341631, + "learning_rate": 5.689563853636339e-06, + "loss": 0.16937103271484374, + "step": 33790 + }, + { + "epoch": 0.2922153721109199, + "grad_norm": 5.316200116932208, + "learning_rate": 5.6894735935050296e-06, + "loss": 0.12227096557617187, + "step": 33795 + }, + { + "epoch": 0.2922586056324632, + "grad_norm": 0.2730232463362653, + "learning_rate": 5.689383320970105e-06, + "loss": 0.3871063232421875, + "step": 33800 + }, + { + "epoch": 0.2923018391540064, + "grad_norm": 0.6425868634123458, + "learning_rate": 5.689293036031982e-06, + "loss": 0.15066070556640626, + "step": 33805 + }, + { + "epoch": 0.2923450726755497, + "grad_norm": 5.1012539302122715, + "learning_rate": 5.6892027386910765e-06, + "loss": 0.21847000122070312, + "step": 33810 + }, + { + "epoch": 0.292388306197093, + "grad_norm": 5.639900325042001, + "learning_rate": 5.689112428947806e-06, + "loss": 0.11846046447753907, + "step": 33815 + }, + { + "epoch": 0.2924315397186362, + "grad_norm": 11.59586394764112, + "learning_rate": 5.689022106802587e-06, + "loss": 0.12459449768066407, + "step": 33820 + }, + { + "epoch": 0.2924747732401795, + "grad_norm": 16.527822174185392, + "learning_rate": 5.688931772255836e-06, + "loss": 0.13749771118164061, + "step": 33825 + }, + { + "epoch": 0.2925180067617228, + "grad_norm": 5.4111887649577195, + "learning_rate": 5.688841425307969e-06, + "loss": 0.1644317626953125, + "step": 33830 + }, + { + "epoch": 0.292561240283266, + "grad_norm": 15.576471386756012, + "learning_rate": 5.688751065959402e-06, + "loss": 0.456744384765625, + "step": 33835 + }, + { + "epoch": 0.2926044738048093, + "grad_norm": 16.96352048643928, + "learning_rate": 5.688660694210553e-06, + "loss": 0.19111709594726561, + "step": 33840 + }, + { + "epoch": 0.29264770732635254, + "grad_norm": 59.52946436625829, + "learning_rate": 5.6885703100618395e-06, + "loss": 0.439794921875, + "step": 33845 + }, + { + "epoch": 0.2926909408478958, + "grad_norm": 2.7005805774012472, + "learning_rate": 5.688479913513677e-06, + "loss": 0.1333984375, + "step": 33850 + }, + { + "epoch": 0.2927341743694391, + "grad_norm": 11.98865809268237, + "learning_rate": 5.688389504566482e-06, + "loss": 0.2267303466796875, + "step": 33855 + }, + { + "epoch": 0.29277740789098233, + "grad_norm": 5.477070448839911, + "learning_rate": 5.688299083220672e-06, + "loss": 0.11705169677734376, + "step": 33860 + }, + { + "epoch": 0.2928206414125256, + "grad_norm": 40.34040083196526, + "learning_rate": 5.688208649476665e-06, + "loss": 0.21879196166992188, + "step": 33865 + }, + { + "epoch": 0.2928638749340689, + "grad_norm": 9.343129345294374, + "learning_rate": 5.688118203334877e-06, + "loss": 0.3324981689453125, + "step": 33870 + }, + { + "epoch": 0.29290710845561213, + "grad_norm": 34.956997704746705, + "learning_rate": 5.688027744795725e-06, + "loss": 0.22610321044921874, + "step": 33875 + }, + { + "epoch": 0.2929503419771554, + "grad_norm": 20.675501359881068, + "learning_rate": 5.687937273859627e-06, + "loss": 0.1704864501953125, + "step": 33880 + }, + { + "epoch": 0.29299357549869864, + "grad_norm": 1.413621784842577, + "learning_rate": 5.687846790527e-06, + "loss": 0.24580230712890624, + "step": 33885 + }, + { + "epoch": 0.29303680902024193, + "grad_norm": 9.672764228241178, + "learning_rate": 5.687756294798259e-06, + "loss": 0.344970703125, + "step": 33890 + }, + { + "epoch": 0.2930800425417852, + "grad_norm": 38.25272097317197, + "learning_rate": 5.687665786673826e-06, + "loss": 0.5703887939453125, + "step": 33895 + }, + { + "epoch": 0.29312327606332844, + "grad_norm": 0.4040594021465325, + "learning_rate": 5.687575266154115e-06, + "loss": 0.42715301513671877, + "step": 33900 + }, + { + "epoch": 0.2931665095848717, + "grad_norm": 3.5191216197817425, + "learning_rate": 5.687484733239545e-06, + "loss": 0.15567169189453126, + "step": 33905 + }, + { + "epoch": 0.293209743106415, + "grad_norm": 7.635931267792977, + "learning_rate": 5.687394187930532e-06, + "loss": 0.30401077270507815, + "step": 33910 + }, + { + "epoch": 0.29325297662795824, + "grad_norm": 28.322175474657065, + "learning_rate": 5.687303630227495e-06, + "loss": 0.18917083740234375, + "step": 33915 + }, + { + "epoch": 0.2932962101495015, + "grad_norm": 6.657547462646137, + "learning_rate": 5.68721306013085e-06, + "loss": 0.3208274841308594, + "step": 33920 + }, + { + "epoch": 0.2933394436710448, + "grad_norm": 1.1134911161539522, + "learning_rate": 5.687122477641017e-06, + "loss": 0.20806884765625, + "step": 33925 + }, + { + "epoch": 0.29338267719258804, + "grad_norm": 5.2319867764394115, + "learning_rate": 5.687031882758412e-06, + "loss": 0.2698211669921875, + "step": 33930 + }, + { + "epoch": 0.2934259107141313, + "grad_norm": 21.38336559449887, + "learning_rate": 5.686941275483454e-06, + "loss": 0.338299560546875, + "step": 33935 + }, + { + "epoch": 0.29346914423567455, + "grad_norm": 3.5925079203907013, + "learning_rate": 5.68685065581656e-06, + "loss": 0.17408294677734376, + "step": 33940 + }, + { + "epoch": 0.29351237775721783, + "grad_norm": 26.33147021444267, + "learning_rate": 5.686760023758148e-06, + "loss": 0.4359649658203125, + "step": 33945 + }, + { + "epoch": 0.2935556112787611, + "grad_norm": 1.5468080303946978, + "learning_rate": 5.686669379308635e-06, + "loss": 0.1898193359375, + "step": 33950 + }, + { + "epoch": 0.29359884480030435, + "grad_norm": 5.645011075529381, + "learning_rate": 5.686578722468442e-06, + "loss": 0.09316864013671874, + "step": 33955 + }, + { + "epoch": 0.29364207832184763, + "grad_norm": 0.24864074033220662, + "learning_rate": 5.686488053237985e-06, + "loss": 0.13046112060546874, + "step": 33960 + }, + { + "epoch": 0.2936853118433909, + "grad_norm": 0.23956080570536833, + "learning_rate": 5.686397371617682e-06, + "loss": 0.14480094909667968, + "step": 33965 + }, + { + "epoch": 0.29372854536493415, + "grad_norm": 13.147845593609391, + "learning_rate": 5.6863066776079525e-06, + "loss": 0.34705810546875, + "step": 33970 + }, + { + "epoch": 0.29377177888647743, + "grad_norm": 43.803274747549075, + "learning_rate": 5.686215971209213e-06, + "loss": 0.25447998046875, + "step": 33975 + }, + { + "epoch": 0.29381501240802066, + "grad_norm": 35.2382699145814, + "learning_rate": 5.686125252421884e-06, + "loss": 0.341815185546875, + "step": 33980 + }, + { + "epoch": 0.29385824592956394, + "grad_norm": 3.8452721251125874, + "learning_rate": 5.686034521246381e-06, + "loss": 0.40015411376953125, + "step": 33985 + }, + { + "epoch": 0.29390147945110723, + "grad_norm": 4.839794650517175, + "learning_rate": 5.685943777683125e-06, + "loss": 0.38532257080078125, + "step": 33990 + }, + { + "epoch": 0.29394471297265046, + "grad_norm": 4.104657477092446, + "learning_rate": 5.685853021732534e-06, + "loss": 0.1909393310546875, + "step": 33995 + }, + { + "epoch": 0.29398794649419374, + "grad_norm": 18.172523711512433, + "learning_rate": 5.6857622533950265e-06, + "loss": 0.13529586791992188, + "step": 34000 + }, + { + "epoch": 0.294031180015737, + "grad_norm": 14.10938740983107, + "learning_rate": 5.68567147267102e-06, + "loss": 0.080902099609375, + "step": 34005 + }, + { + "epoch": 0.29407441353728025, + "grad_norm": 55.40604637479861, + "learning_rate": 5.685580679560934e-06, + "loss": 0.32576828002929686, + "step": 34010 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 12.136575535808388, + "learning_rate": 5.685489874065187e-06, + "loss": 0.2473602294921875, + "step": 34015 + }, + { + "epoch": 0.29416088058036677, + "grad_norm": 1.8969529120146893, + "learning_rate": 5.685399056184199e-06, + "loss": 0.225347900390625, + "step": 34020 + }, + { + "epoch": 0.29420411410191005, + "grad_norm": 2.3659718787043875, + "learning_rate": 5.685308225918386e-06, + "loss": 0.0818206787109375, + "step": 34025 + }, + { + "epoch": 0.29424734762345334, + "grad_norm": 18.420259077493174, + "learning_rate": 5.68521738326817e-06, + "loss": 0.2438568115234375, + "step": 34030 + }, + { + "epoch": 0.29429058114499657, + "grad_norm": 12.926299362743634, + "learning_rate": 5.685126528233969e-06, + "loss": 0.28155059814453126, + "step": 34035 + }, + { + "epoch": 0.29433381466653985, + "grad_norm": 0.7162991100298077, + "learning_rate": 5.685035660816202e-06, + "loss": 0.14172592163085937, + "step": 34040 + }, + { + "epoch": 0.29437704818808313, + "grad_norm": 0.9194917162652421, + "learning_rate": 5.6849447810152865e-06, + "loss": 0.140155029296875, + "step": 34045 + }, + { + "epoch": 0.29442028170962636, + "grad_norm": 1.8165216417729684, + "learning_rate": 5.684853888831644e-06, + "loss": 0.16573543548583985, + "step": 34050 + }, + { + "epoch": 0.29446351523116965, + "grad_norm": 1.2107875388025, + "learning_rate": 5.684762984265692e-06, + "loss": 0.0446014404296875, + "step": 34055 + }, + { + "epoch": 0.2945067487527129, + "grad_norm": 8.6288133630707, + "learning_rate": 5.68467206731785e-06, + "loss": 0.11940231323242187, + "step": 34060 + }, + { + "epoch": 0.29454998227425616, + "grad_norm": 2.449658723998932, + "learning_rate": 5.6845811379885385e-06, + "loss": 0.0643829345703125, + "step": 34065 + }, + { + "epoch": 0.29459321579579945, + "grad_norm": 21.06712399402287, + "learning_rate": 5.684490196278176e-06, + "loss": 0.3224395751953125, + "step": 34070 + }, + { + "epoch": 0.2946364493173427, + "grad_norm": 3.3523998965019652, + "learning_rate": 5.684399242187181e-06, + "loss": 0.1151519775390625, + "step": 34075 + }, + { + "epoch": 0.29467968283888596, + "grad_norm": 0.3915787207905951, + "learning_rate": 5.6843082757159745e-06, + "loss": 0.6311553955078125, + "step": 34080 + }, + { + "epoch": 0.29472291636042924, + "grad_norm": 35.35248187597768, + "learning_rate": 5.6842172968649754e-06, + "loss": 0.32987060546875, + "step": 34085 + }, + { + "epoch": 0.2947661498819725, + "grad_norm": 6.846668319254251, + "learning_rate": 5.684126305634603e-06, + "loss": 0.2558319091796875, + "step": 34090 + }, + { + "epoch": 0.29480938340351576, + "grad_norm": 0.6418094364235108, + "learning_rate": 5.684035302025278e-06, + "loss": 0.10352935791015624, + "step": 34095 + }, + { + "epoch": 0.29485261692505904, + "grad_norm": 17.263927129520553, + "learning_rate": 5.683944286037418e-06, + "loss": 0.1230621337890625, + "step": 34100 + }, + { + "epoch": 0.29489585044660227, + "grad_norm": 20.08906139491862, + "learning_rate": 5.683853257671446e-06, + "loss": 0.156988525390625, + "step": 34105 + }, + { + "epoch": 0.29493908396814555, + "grad_norm": 5.58389938350375, + "learning_rate": 5.683762216927779e-06, + "loss": 0.13759841918945312, + "step": 34110 + }, + { + "epoch": 0.2949823174896888, + "grad_norm": 12.999744540284329, + "learning_rate": 5.683671163806837e-06, + "loss": 0.10542869567871094, + "step": 34115 + }, + { + "epoch": 0.29502555101123207, + "grad_norm": 17.196896287618006, + "learning_rate": 5.683580098309042e-06, + "loss": 0.14595489501953124, + "step": 34120 + }, + { + "epoch": 0.29506878453277535, + "grad_norm": 6.31097162507488, + "learning_rate": 5.683489020434812e-06, + "loss": 0.407257080078125, + "step": 34125 + }, + { + "epoch": 0.2951120180543186, + "grad_norm": 23.003290563648328, + "learning_rate": 5.683397930184568e-06, + "loss": 0.20386962890625, + "step": 34130 + }, + { + "epoch": 0.29515525157586187, + "grad_norm": 17.06652143482866, + "learning_rate": 5.68330682755873e-06, + "loss": 0.36103515625, + "step": 34135 + }, + { + "epoch": 0.29519848509740515, + "grad_norm": 13.282906363451644, + "learning_rate": 5.683215712557717e-06, + "loss": 0.2558174133300781, + "step": 34140 + }, + { + "epoch": 0.2952417186189484, + "grad_norm": 15.546696066532103, + "learning_rate": 5.6831245851819516e-06, + "loss": 0.17690095901489258, + "step": 34145 + }, + { + "epoch": 0.29528495214049166, + "grad_norm": 5.743101324716272, + "learning_rate": 5.683033445431852e-06, + "loss": 0.1181060791015625, + "step": 34150 + }, + { + "epoch": 0.2953281856620349, + "grad_norm": 1.7501013892855801, + "learning_rate": 5.682942293307839e-06, + "loss": 0.13154144287109376, + "step": 34155 + }, + { + "epoch": 0.2953714191835782, + "grad_norm": 5.07864116973134, + "learning_rate": 5.6828511288103336e-06, + "loss": 0.21521434783935547, + "step": 34160 + }, + { + "epoch": 0.29541465270512146, + "grad_norm": 22.156601165576543, + "learning_rate": 5.682759951939756e-06, + "loss": 0.1688873291015625, + "step": 34165 + }, + { + "epoch": 0.2954578862266647, + "grad_norm": 23.11200124796792, + "learning_rate": 5.682668762696527e-06, + "loss": 0.20124855041503906, + "step": 34170 + }, + { + "epoch": 0.295501119748208, + "grad_norm": 3.6683940148154224, + "learning_rate": 5.682577561081065e-06, + "loss": 0.141986083984375, + "step": 34175 + }, + { + "epoch": 0.29554435326975126, + "grad_norm": 4.284751441528241, + "learning_rate": 5.682486347093794e-06, + "loss": 0.0601165771484375, + "step": 34180 + }, + { + "epoch": 0.2955875867912945, + "grad_norm": 1.4019928602299427, + "learning_rate": 5.682395120735132e-06, + "loss": 0.1020904541015625, + "step": 34185 + }, + { + "epoch": 0.29563082031283777, + "grad_norm": 3.115505005304172, + "learning_rate": 5.682303882005501e-06, + "loss": 0.44250946044921874, + "step": 34190 + }, + { + "epoch": 0.295674053834381, + "grad_norm": 34.97788719600421, + "learning_rate": 5.682212630905321e-06, + "loss": 0.238739013671875, + "step": 34195 + }, + { + "epoch": 0.2957172873559243, + "grad_norm": 1.0054118575059467, + "learning_rate": 5.682121367435014e-06, + "loss": 0.1296478271484375, + "step": 34200 + }, + { + "epoch": 0.29576052087746757, + "grad_norm": 7.784437961975888, + "learning_rate": 5.682030091595e-06, + "loss": 0.10695819854736328, + "step": 34205 + }, + { + "epoch": 0.2958037543990108, + "grad_norm": 39.89565531813655, + "learning_rate": 5.6819388033857015e-06, + "loss": 0.56259765625, + "step": 34210 + }, + { + "epoch": 0.2958469879205541, + "grad_norm": 2.4983971692418145, + "learning_rate": 5.681847502807537e-06, + "loss": 0.04539699554443359, + "step": 34215 + }, + { + "epoch": 0.29589022144209737, + "grad_norm": 8.287083852530335, + "learning_rate": 5.681756189860929e-06, + "loss": 0.0752105712890625, + "step": 34220 + }, + { + "epoch": 0.2959334549636406, + "grad_norm": 2.75822736404354, + "learning_rate": 5.681664864546298e-06, + "loss": 0.14481658935546876, + "step": 34225 + }, + { + "epoch": 0.2959766884851839, + "grad_norm": 9.36235035706145, + "learning_rate": 5.681573526864066e-06, + "loss": 0.2532695770263672, + "step": 34230 + }, + { + "epoch": 0.2960199220067271, + "grad_norm": 21.19876292328851, + "learning_rate": 5.681482176814654e-06, + "loss": 0.33121337890625, + "step": 34235 + }, + { + "epoch": 0.2960631555282704, + "grad_norm": 9.223508673573031, + "learning_rate": 5.681390814398483e-06, + "loss": 0.3188873291015625, + "step": 34240 + }, + { + "epoch": 0.2961063890498137, + "grad_norm": 9.567362984959969, + "learning_rate": 5.681299439615974e-06, + "loss": 0.40946044921875, + "step": 34245 + }, + { + "epoch": 0.2961496225713569, + "grad_norm": 11.878766863970394, + "learning_rate": 5.681208052467549e-06, + "loss": 0.0675628662109375, + "step": 34250 + }, + { + "epoch": 0.2961928560929002, + "grad_norm": 6.148994254407652, + "learning_rate": 5.68111665295363e-06, + "loss": 0.24381256103515625, + "step": 34255 + }, + { + "epoch": 0.2962360896144435, + "grad_norm": 0.8753111442712252, + "learning_rate": 5.681025241074638e-06, + "loss": 0.5514907836914062, + "step": 34260 + }, + { + "epoch": 0.2962793231359867, + "grad_norm": 21.700161157475442, + "learning_rate": 5.680933816830994e-06, + "loss": 0.1461273193359375, + "step": 34265 + }, + { + "epoch": 0.29632255665753, + "grad_norm": 1.4734905642773652, + "learning_rate": 5.680842380223119e-06, + "loss": 0.04891357421875, + "step": 34270 + }, + { + "epoch": 0.2963657901790732, + "grad_norm": 0.3609329973194765, + "learning_rate": 5.680750931251437e-06, + "loss": 0.2005706787109375, + "step": 34275 + }, + { + "epoch": 0.2964090237006165, + "grad_norm": 21.55272509126759, + "learning_rate": 5.680659469916369e-06, + "loss": 0.105352783203125, + "step": 34280 + }, + { + "epoch": 0.2964522572221598, + "grad_norm": 2.10306861007823, + "learning_rate": 5.680567996218337e-06, + "loss": 0.08672027587890625, + "step": 34285 + }, + { + "epoch": 0.296495490743703, + "grad_norm": 11.74895068905094, + "learning_rate": 5.6804765101577605e-06, + "loss": 0.45545654296875, + "step": 34290 + }, + { + "epoch": 0.2965387242652463, + "grad_norm": 31.03225430298496, + "learning_rate": 5.680385011735064e-06, + "loss": 0.20176467895507813, + "step": 34295 + }, + { + "epoch": 0.2965819577867896, + "grad_norm": 4.131975404243687, + "learning_rate": 5.6802935009506685e-06, + "loss": 0.04156417846679687, + "step": 34300 + }, + { + "epoch": 0.2966251913083328, + "grad_norm": 0.047747252006656776, + "learning_rate": 5.6802019778049965e-06, + "loss": 0.3324871063232422, + "step": 34305 + }, + { + "epoch": 0.2966684248298761, + "grad_norm": 58.60179253271194, + "learning_rate": 5.68011044229847e-06, + "loss": 0.507159423828125, + "step": 34310 + }, + { + "epoch": 0.2967116583514194, + "grad_norm": 13.959632566991315, + "learning_rate": 5.680018894431511e-06, + "loss": 0.32169952392578127, + "step": 34315 + }, + { + "epoch": 0.2967548918729626, + "grad_norm": 6.429618728105332, + "learning_rate": 5.6799273342045405e-06, + "loss": 0.1150054931640625, + "step": 34320 + }, + { + "epoch": 0.2967981253945059, + "grad_norm": 40.812462889666676, + "learning_rate": 5.679835761617982e-06, + "loss": 0.3071929931640625, + "step": 34325 + }, + { + "epoch": 0.2968413589160491, + "grad_norm": 13.353441074687801, + "learning_rate": 5.679744176672259e-06, + "loss": 0.12593994140625, + "step": 34330 + }, + { + "epoch": 0.2968845924375924, + "grad_norm": 4.470456500255615, + "learning_rate": 5.679652579367793e-06, + "loss": 0.0522186279296875, + "step": 34335 + }, + { + "epoch": 0.2969278259591357, + "grad_norm": 62.60030785753438, + "learning_rate": 5.679560969705005e-06, + "loss": 0.21423492431640626, + "step": 34340 + }, + { + "epoch": 0.2969710594806789, + "grad_norm": 3.0391391768986287, + "learning_rate": 5.679469347684318e-06, + "loss": 0.4622894287109375, + "step": 34345 + }, + { + "epoch": 0.2970142930022222, + "grad_norm": 1.9561560214752416, + "learning_rate": 5.679377713306156e-06, + "loss": 0.21803817749023438, + "step": 34350 + }, + { + "epoch": 0.2970575265237655, + "grad_norm": 18.543090806398187, + "learning_rate": 5.67928606657094e-06, + "loss": 0.20023193359375, + "step": 34355 + }, + { + "epoch": 0.2971007600453087, + "grad_norm": 22.888253065097924, + "learning_rate": 5.679194407479095e-06, + "loss": 0.08294296264648438, + "step": 34360 + }, + { + "epoch": 0.297143993566852, + "grad_norm": 1.6246335174440751, + "learning_rate": 5.6791027360310405e-06, + "loss": 0.08369789123535157, + "step": 34365 + }, + { + "epoch": 0.29718722708839523, + "grad_norm": 3.9100664944648402, + "learning_rate": 5.679011052227202e-06, + "loss": 0.2866161346435547, + "step": 34370 + }, + { + "epoch": 0.2972304606099385, + "grad_norm": 13.509495425518981, + "learning_rate": 5.678919356068001e-06, + "loss": 0.34231929779052733, + "step": 34375 + }, + { + "epoch": 0.2972736941314818, + "grad_norm": 3.398321641550167, + "learning_rate": 5.67882764755386e-06, + "loss": 0.3171539306640625, + "step": 34380 + }, + { + "epoch": 0.29731692765302503, + "grad_norm": 38.74897620506276, + "learning_rate": 5.678735926685203e-06, + "loss": 0.24707412719726562, + "step": 34385 + }, + { + "epoch": 0.2973601611745683, + "grad_norm": 12.416307531425028, + "learning_rate": 5.678644193462453e-06, + "loss": 0.11701202392578125, + "step": 34390 + }, + { + "epoch": 0.2974033946961116, + "grad_norm": 19.453784388921104, + "learning_rate": 5.678552447886032e-06, + "loss": 0.328179931640625, + "step": 34395 + }, + { + "epoch": 0.29744662821765483, + "grad_norm": 7.198784571500187, + "learning_rate": 5.6784606899563645e-06, + "loss": 0.8016189575195313, + "step": 34400 + }, + { + "epoch": 0.2974898617391981, + "grad_norm": 9.673107462005666, + "learning_rate": 5.6783689196738725e-06, + "loss": 0.28012847900390625, + "step": 34405 + }, + { + "epoch": 0.29753309526074134, + "grad_norm": 0.4104691456174364, + "learning_rate": 5.6782771370389795e-06, + "loss": 0.04216842651367188, + "step": 34410 + }, + { + "epoch": 0.2975763287822846, + "grad_norm": 15.147691933489211, + "learning_rate": 5.678185342052109e-06, + "loss": 0.170208740234375, + "step": 34415 + }, + { + "epoch": 0.2976195623038279, + "grad_norm": 0.7524652879942642, + "learning_rate": 5.678093534713685e-06, + "loss": 0.20321044921875, + "step": 34420 + }, + { + "epoch": 0.29766279582537114, + "grad_norm": 1.2660668088732354, + "learning_rate": 5.67800171502413e-06, + "loss": 0.1063323974609375, + "step": 34425 + }, + { + "epoch": 0.2977060293469144, + "grad_norm": 22.467690407474382, + "learning_rate": 5.677909882983867e-06, + "loss": 0.28209228515625, + "step": 34430 + }, + { + "epoch": 0.2977492628684577, + "grad_norm": 11.441400207283769, + "learning_rate": 5.677818038593322e-06, + "loss": 0.122015380859375, + "step": 34435 + }, + { + "epoch": 0.29779249639000094, + "grad_norm": 1.6436835697514547, + "learning_rate": 5.6777261818529155e-06, + "loss": 0.13248748779296876, + "step": 34440 + }, + { + "epoch": 0.2978357299115442, + "grad_norm": 3.6078592265604885, + "learning_rate": 5.677634312763073e-06, + "loss": 0.187701416015625, + "step": 34445 + }, + { + "epoch": 0.29787896343308745, + "grad_norm": 1.9911268760350629, + "learning_rate": 5.677542431324217e-06, + "loss": 0.0812835693359375, + "step": 34450 + }, + { + "epoch": 0.29792219695463074, + "grad_norm": 28.999465213982525, + "learning_rate": 5.677450537536772e-06, + "loss": 0.2211458206176758, + "step": 34455 + }, + { + "epoch": 0.297965430476174, + "grad_norm": 9.678687897013555, + "learning_rate": 5.677358631401163e-06, + "loss": 0.19158935546875, + "step": 34460 + }, + { + "epoch": 0.29800866399771725, + "grad_norm": 50.62278891633109, + "learning_rate": 5.677266712917811e-06, + "loss": 0.33668212890625, + "step": 34465 + }, + { + "epoch": 0.29805189751926053, + "grad_norm": 6.504494161087869, + "learning_rate": 5.677174782087142e-06, + "loss": 0.15554962158203126, + "step": 34470 + }, + { + "epoch": 0.2980951310408038, + "grad_norm": 11.005933886621666, + "learning_rate": 5.677082838909579e-06, + "loss": 0.14350967407226561, + "step": 34475 + }, + { + "epoch": 0.29813836456234705, + "grad_norm": 7.768771650049046, + "learning_rate": 5.6769908833855475e-06, + "loss": 0.16822433471679688, + "step": 34480 + }, + { + "epoch": 0.29818159808389033, + "grad_norm": 0.5951340762700712, + "learning_rate": 5.676898915515469e-06, + "loss": 0.14523849487304688, + "step": 34485 + }, + { + "epoch": 0.2982248316054336, + "grad_norm": 35.182634378895756, + "learning_rate": 5.67680693529977e-06, + "loss": 0.41044464111328127, + "step": 34490 + }, + { + "epoch": 0.29826806512697684, + "grad_norm": 0.8652133818112682, + "learning_rate": 5.676714942738875e-06, + "loss": 0.2955963134765625, + "step": 34495 + }, + { + "epoch": 0.29831129864852013, + "grad_norm": 12.082467837740797, + "learning_rate": 5.676622937833206e-06, + "loss": 0.21320343017578125, + "step": 34500 + }, + { + "epoch": 0.29835453217006336, + "grad_norm": 2.803133544266185, + "learning_rate": 5.676530920583189e-06, + "loss": 0.1234130859375, + "step": 34505 + }, + { + "epoch": 0.29839776569160664, + "grad_norm": 1.7632419281840805, + "learning_rate": 5.676438890989248e-06, + "loss": 0.22994384765625, + "step": 34510 + }, + { + "epoch": 0.2984409992131499, + "grad_norm": 5.697340060472345, + "learning_rate": 5.676346849051807e-06, + "loss": 0.07831039428710937, + "step": 34515 + }, + { + "epoch": 0.29848423273469316, + "grad_norm": 2.616508870277796, + "learning_rate": 5.676254794771291e-06, + "loss": 0.2387420654296875, + "step": 34520 + }, + { + "epoch": 0.29852746625623644, + "grad_norm": 7.205421598668792, + "learning_rate": 5.676162728148124e-06, + "loss": 0.1625091552734375, + "step": 34525 + }, + { + "epoch": 0.2985706997777797, + "grad_norm": 2.5047331907602457, + "learning_rate": 5.676070649182732e-06, + "loss": 0.3664947509765625, + "step": 34530 + }, + { + "epoch": 0.29861393329932295, + "grad_norm": 2.035022961761645, + "learning_rate": 5.675978557875537e-06, + "loss": 0.1472320556640625, + "step": 34535 + }, + { + "epoch": 0.29865716682086624, + "grad_norm": 11.897066730237764, + "learning_rate": 5.675886454226966e-06, + "loss": 0.23612747192382813, + "step": 34540 + }, + { + "epoch": 0.29870040034240947, + "grad_norm": 13.853343616048612, + "learning_rate": 5.675794338237444e-06, + "loss": 0.08306503295898438, + "step": 34545 + }, + { + "epoch": 0.29874363386395275, + "grad_norm": 19.312965731138263, + "learning_rate": 5.675702209907395e-06, + "loss": 0.18164825439453125, + "step": 34550 + }, + { + "epoch": 0.29878686738549604, + "grad_norm": 15.056555624681003, + "learning_rate": 5.675610069237242e-06, + "loss": 0.07759246826171876, + "step": 34555 + }, + { + "epoch": 0.29883010090703926, + "grad_norm": 4.548061263730798, + "learning_rate": 5.675517916227413e-06, + "loss": 0.158953857421875, + "step": 34560 + }, + { + "epoch": 0.29887333442858255, + "grad_norm": 3.9621887797949467, + "learning_rate": 5.675425750878332e-06, + "loss": 0.025885009765625, + "step": 34565 + }, + { + "epoch": 0.29891656795012583, + "grad_norm": 33.33086173406417, + "learning_rate": 5.675333573190423e-06, + "loss": 0.12742462158203124, + "step": 34570 + }, + { + "epoch": 0.29895980147166906, + "grad_norm": 46.05595930112337, + "learning_rate": 5.675241383164114e-06, + "loss": 0.2804561614990234, + "step": 34575 + }, + { + "epoch": 0.29900303499321235, + "grad_norm": 0.9214966860443448, + "learning_rate": 5.675149180799826e-06, + "loss": 0.12234840393066407, + "step": 34580 + }, + { + "epoch": 0.2990462685147556, + "grad_norm": 14.306659130504734, + "learning_rate": 5.6750569660979865e-06, + "loss": 0.1038848876953125, + "step": 34585 + }, + { + "epoch": 0.29908950203629886, + "grad_norm": 22.97934735918612, + "learning_rate": 5.674964739059021e-06, + "loss": 0.1753662109375, + "step": 34590 + }, + { + "epoch": 0.29913273555784214, + "grad_norm": 29.850719395759175, + "learning_rate": 5.674872499683356e-06, + "loss": 0.16058502197265626, + "step": 34595 + }, + { + "epoch": 0.2991759690793854, + "grad_norm": 7.2804903025971575, + "learning_rate": 5.674780247971413e-06, + "loss": 0.2369171142578125, + "step": 34600 + }, + { + "epoch": 0.29921920260092866, + "grad_norm": 41.97358981058354, + "learning_rate": 5.674687983923621e-06, + "loss": 0.555908203125, + "step": 34605 + }, + { + "epoch": 0.29926243612247194, + "grad_norm": 15.172065903839187, + "learning_rate": 5.674595707540404e-06, + "loss": 0.06495437622070313, + "step": 34610 + }, + { + "epoch": 0.29930566964401517, + "grad_norm": 28.151696480550125, + "learning_rate": 5.674503418822189e-06, + "loss": 0.17069244384765625, + "step": 34615 + }, + { + "epoch": 0.29934890316555846, + "grad_norm": 4.025617838344525, + "learning_rate": 5.674411117769399e-06, + "loss": 0.16402206420898438, + "step": 34620 + }, + { + "epoch": 0.2993921366871017, + "grad_norm": 2.7780900154241177, + "learning_rate": 5.6743188043824625e-06, + "loss": 0.349566650390625, + "step": 34625 + }, + { + "epoch": 0.29943537020864497, + "grad_norm": 3.8718529313791725, + "learning_rate": 5.674226478661803e-06, + "loss": 0.1331512451171875, + "step": 34630 + }, + { + "epoch": 0.29947860373018825, + "grad_norm": 12.165099820651346, + "learning_rate": 5.674134140607848e-06, + "loss": 0.07729034423828125, + "step": 34635 + }, + { + "epoch": 0.2995218372517315, + "grad_norm": 28.366324685764067, + "learning_rate": 5.674041790221023e-06, + "loss": 0.1336181640625, + "step": 34640 + }, + { + "epoch": 0.29956507077327477, + "grad_norm": 9.659431274254002, + "learning_rate": 5.6739494275017525e-06, + "loss": 0.13797607421875, + "step": 34645 + }, + { + "epoch": 0.29960830429481805, + "grad_norm": 6.218538021567126, + "learning_rate": 5.673857052450464e-06, + "loss": 0.07051849365234375, + "step": 34650 + }, + { + "epoch": 0.2996515378163613, + "grad_norm": 35.546670395258246, + "learning_rate": 5.673764665067583e-06, + "loss": 0.35740814208984373, + "step": 34655 + }, + { + "epoch": 0.29969477133790456, + "grad_norm": 14.164567903858753, + "learning_rate": 5.673672265353535e-06, + "loss": 0.2872661590576172, + "step": 34660 + }, + { + "epoch": 0.29973800485944785, + "grad_norm": 35.15672092520423, + "learning_rate": 5.673579853308748e-06, + "loss": 0.2941314697265625, + "step": 34665 + }, + { + "epoch": 0.2997812383809911, + "grad_norm": 3.8897897432389184, + "learning_rate": 5.673487428933646e-06, + "loss": 0.03223114013671875, + "step": 34670 + }, + { + "epoch": 0.29982447190253436, + "grad_norm": 39.02552766782976, + "learning_rate": 5.673394992228656e-06, + "loss": 0.2166667938232422, + "step": 34675 + }, + { + "epoch": 0.2998677054240776, + "grad_norm": 60.81062765988923, + "learning_rate": 5.673302543194205e-06, + "loss": 0.17969512939453125, + "step": 34680 + }, + { + "epoch": 0.2999109389456209, + "grad_norm": 1.079922334839676, + "learning_rate": 5.673210081830719e-06, + "loss": 0.28128814697265625, + "step": 34685 + }, + { + "epoch": 0.29995417246716416, + "grad_norm": 8.629535316926882, + "learning_rate": 5.673117608138624e-06, + "loss": 0.12959442138671876, + "step": 34690 + }, + { + "epoch": 0.2999974059887074, + "grad_norm": 17.96553095455947, + "learning_rate": 5.6730251221183475e-06, + "loss": 0.1948760986328125, + "step": 34695 + }, + { + "epoch": 0.3000406395102507, + "grad_norm": 0.4538035547993137, + "learning_rate": 5.672932623770314e-06, + "loss": 0.23369903564453126, + "step": 34700 + }, + { + "epoch": 0.30008387303179396, + "grad_norm": 55.57717596845839, + "learning_rate": 5.672840113094953e-06, + "loss": 0.58336181640625, + "step": 34705 + }, + { + "epoch": 0.3001271065533372, + "grad_norm": 22.15306534636674, + "learning_rate": 5.672747590092689e-06, + "loss": 0.20770492553710937, + "step": 34710 + }, + { + "epoch": 0.30017034007488047, + "grad_norm": 5.762735399076602, + "learning_rate": 5.672655054763949e-06, + "loss": 0.11160049438476563, + "step": 34715 + }, + { + "epoch": 0.3002135735964237, + "grad_norm": 21.882677544312262, + "learning_rate": 5.67256250710916e-06, + "loss": 0.2051025390625, + "step": 34720 + }, + { + "epoch": 0.300256807117967, + "grad_norm": 6.542771382133509, + "learning_rate": 5.67246994712875e-06, + "loss": 0.0416168212890625, + "step": 34725 + }, + { + "epoch": 0.30030004063951027, + "grad_norm": 2.554927948728634, + "learning_rate": 5.672377374823143e-06, + "loss": 0.11236572265625, + "step": 34730 + }, + { + "epoch": 0.3003432741610535, + "grad_norm": 3.4736543852409363, + "learning_rate": 5.672284790192769e-06, + "loss": 0.060150146484375, + "step": 34735 + }, + { + "epoch": 0.3003865076825968, + "grad_norm": 0.2594356202452105, + "learning_rate": 5.672192193238052e-06, + "loss": 0.11951885223388672, + "step": 34740 + }, + { + "epoch": 0.30042974120414007, + "grad_norm": 1.3984489030034999, + "learning_rate": 5.6720995839594225e-06, + "loss": 0.09430389404296875, + "step": 34745 + }, + { + "epoch": 0.3004729747256833, + "grad_norm": 0.22881445514364243, + "learning_rate": 5.672006962357305e-06, + "loss": 0.3120231628417969, + "step": 34750 + }, + { + "epoch": 0.3005162082472266, + "grad_norm": 16.751505487492572, + "learning_rate": 5.671914328432127e-06, + "loss": 0.25976409912109377, + "step": 34755 + }, + { + "epoch": 0.3005594417687698, + "grad_norm": 4.790169527420734, + "learning_rate": 5.671821682184318e-06, + "loss": 0.1595184326171875, + "step": 34760 + }, + { + "epoch": 0.3006026752903131, + "grad_norm": 5.341400162034357, + "learning_rate": 5.671729023614302e-06, + "loss": 0.13567657470703126, + "step": 34765 + }, + { + "epoch": 0.3006459088118564, + "grad_norm": 28.26662394300077, + "learning_rate": 5.671636352722508e-06, + "loss": 0.3214691162109375, + "step": 34770 + }, + { + "epoch": 0.3006891423333996, + "grad_norm": 19.28493609365237, + "learning_rate": 5.671543669509363e-06, + "loss": 0.2199432373046875, + "step": 34775 + }, + { + "epoch": 0.3007323758549429, + "grad_norm": 3.9069459173886183, + "learning_rate": 5.6714509739752955e-06, + "loss": 0.1336639404296875, + "step": 34780 + }, + { + "epoch": 0.3007756093764862, + "grad_norm": 0.17980925098928152, + "learning_rate": 5.671358266120732e-06, + "loss": 0.17446365356445312, + "step": 34785 + }, + { + "epoch": 0.3008188428980294, + "grad_norm": 1.0156426412187651, + "learning_rate": 5.6712655459460995e-06, + "loss": 0.10347938537597656, + "step": 34790 + }, + { + "epoch": 0.3008620764195727, + "grad_norm": 1.7404437142377285, + "learning_rate": 5.671172813451827e-06, + "loss": 0.0777130126953125, + "step": 34795 + }, + { + "epoch": 0.3009053099411159, + "grad_norm": 14.052006653871263, + "learning_rate": 5.671080068638341e-06, + "loss": 0.06843414306640624, + "step": 34800 + }, + { + "epoch": 0.3009485434626592, + "grad_norm": 2.8045963010727855, + "learning_rate": 5.67098731150607e-06, + "loss": 0.1463409423828125, + "step": 34805 + }, + { + "epoch": 0.3009917769842025, + "grad_norm": 73.07879977832773, + "learning_rate": 5.670894542055442e-06, + "loss": 0.311395263671875, + "step": 34810 + }, + { + "epoch": 0.3010350105057457, + "grad_norm": 0.281300885468111, + "learning_rate": 5.670801760286884e-06, + "loss": 0.14144134521484375, + "step": 34815 + }, + { + "epoch": 0.301078244027289, + "grad_norm": 12.882559590039907, + "learning_rate": 5.670708966200824e-06, + "loss": 0.4072845458984375, + "step": 34820 + }, + { + "epoch": 0.3011214775488323, + "grad_norm": 5.143130402814036, + "learning_rate": 5.67061615979769e-06, + "loss": 0.5367263793945313, + "step": 34825 + }, + { + "epoch": 0.3011647110703755, + "grad_norm": 39.52442818688151, + "learning_rate": 5.670523341077912e-06, + "loss": 0.1276092529296875, + "step": 34830 + }, + { + "epoch": 0.3012079445919188, + "grad_norm": 8.306586106154596, + "learning_rate": 5.6704305100419145e-06, + "loss": 0.2180206298828125, + "step": 34835 + }, + { + "epoch": 0.3012511781134621, + "grad_norm": 4.7710910397138875, + "learning_rate": 5.670337666690128e-06, + "loss": 0.230322265625, + "step": 34840 + }, + { + "epoch": 0.3012944116350053, + "grad_norm": 23.732757566947992, + "learning_rate": 5.6702448110229795e-06, + "loss": 0.20233688354492188, + "step": 34845 + }, + { + "epoch": 0.3013376451565486, + "grad_norm": 0.3727477808471082, + "learning_rate": 5.670151943040899e-06, + "loss": 0.1092041015625, + "step": 34850 + }, + { + "epoch": 0.3013808786780918, + "grad_norm": 7.452996687820868, + "learning_rate": 5.670059062744313e-06, + "loss": 0.08661880493164062, + "step": 34855 + }, + { + "epoch": 0.3014241121996351, + "grad_norm": 52.06496588884591, + "learning_rate": 5.669966170133651e-06, + "loss": 0.6269954681396485, + "step": 34860 + }, + { + "epoch": 0.3014673457211784, + "grad_norm": 11.096562203999577, + "learning_rate": 5.66987326520934e-06, + "loss": 0.243951416015625, + "step": 34865 + }, + { + "epoch": 0.3015105792427216, + "grad_norm": 2.247239304043515, + "learning_rate": 5.6697803479718105e-06, + "loss": 0.08568878173828125, + "step": 34870 + }, + { + "epoch": 0.3015538127642649, + "grad_norm": 5.7065477775885975, + "learning_rate": 5.6696874184214895e-06, + "loss": 0.0750518798828125, + "step": 34875 + }, + { + "epoch": 0.3015970462858082, + "grad_norm": 42.26255318515956, + "learning_rate": 5.669594476558806e-06, + "loss": 0.40162506103515627, + "step": 34880 + }, + { + "epoch": 0.3016402798073514, + "grad_norm": 2.3998561291087306, + "learning_rate": 5.6695015223841895e-06, + "loss": 0.08488273620605469, + "step": 34885 + }, + { + "epoch": 0.3016835133288947, + "grad_norm": 5.824004235734912, + "learning_rate": 5.669408555898067e-06, + "loss": 0.118768310546875, + "step": 34890 + }, + { + "epoch": 0.30172674685043793, + "grad_norm": 6.627691539581229, + "learning_rate": 5.669315577100867e-06, + "loss": 0.25333709716796876, + "step": 34895 + }, + { + "epoch": 0.3017699803719812, + "grad_norm": 1.1507148101832796, + "learning_rate": 5.669222585993021e-06, + "loss": 0.1464263916015625, + "step": 34900 + }, + { + "epoch": 0.3018132138935245, + "grad_norm": 5.192070869430769, + "learning_rate": 5.669129582574956e-06, + "loss": 0.17772216796875, + "step": 34905 + }, + { + "epoch": 0.30185644741506773, + "grad_norm": 31.187576922512154, + "learning_rate": 5.669036566847101e-06, + "loss": 0.3774139404296875, + "step": 34910 + }, + { + "epoch": 0.301899680936611, + "grad_norm": 21.678179882729737, + "learning_rate": 5.668943538809886e-06, + "loss": 0.16199951171875, + "step": 34915 + }, + { + "epoch": 0.3019429144581543, + "grad_norm": 10.30089489929703, + "learning_rate": 5.668850498463738e-06, + "loss": 0.397369384765625, + "step": 34920 + }, + { + "epoch": 0.30198614797969753, + "grad_norm": 3.2871699696449976, + "learning_rate": 5.668757445809088e-06, + "loss": 0.16460189819335938, + "step": 34925 + }, + { + "epoch": 0.3020293815012408, + "grad_norm": 5.99785300746752, + "learning_rate": 5.668664380846364e-06, + "loss": 0.2347808837890625, + "step": 34930 + }, + { + "epoch": 0.30207261502278404, + "grad_norm": 1.046732045975379, + "learning_rate": 5.668571303575996e-06, + "loss": 0.27876739501953124, + "step": 34935 + }, + { + "epoch": 0.3021158485443273, + "grad_norm": 2.584032085547988, + "learning_rate": 5.6684782139984135e-06, + "loss": 0.32061767578125, + "step": 34940 + }, + { + "epoch": 0.3021590820658706, + "grad_norm": 11.523750451629063, + "learning_rate": 5.668385112114044e-06, + "loss": 0.22623291015625, + "step": 34945 + }, + { + "epoch": 0.30220231558741384, + "grad_norm": 59.31194477767776, + "learning_rate": 5.6682919979233185e-06, + "loss": 0.444256591796875, + "step": 34950 + }, + { + "epoch": 0.3022455491089571, + "grad_norm": 9.357637679532726, + "learning_rate": 5.6681988714266675e-06, + "loss": 0.14892196655273438, + "step": 34955 + }, + { + "epoch": 0.3022887826305004, + "grad_norm": 4.133187323389542, + "learning_rate": 5.6681057326245174e-06, + "loss": 0.2078125, + "step": 34960 + }, + { + "epoch": 0.30233201615204364, + "grad_norm": 13.762336245012905, + "learning_rate": 5.6680125815173e-06, + "loss": 0.264996337890625, + "step": 34965 + }, + { + "epoch": 0.3023752496735869, + "grad_norm": 1.2911239367008245, + "learning_rate": 5.667919418105444e-06, + "loss": 0.029022216796875, + "step": 34970 + }, + { + "epoch": 0.30241848319513015, + "grad_norm": 474.66048993268737, + "learning_rate": 5.6678262423893805e-06, + "loss": 0.3221099853515625, + "step": 34975 + }, + { + "epoch": 0.30246171671667343, + "grad_norm": 2.8639914023844475, + "learning_rate": 5.6677330543695366e-06, + "loss": 0.09135055541992188, + "step": 34980 + }, + { + "epoch": 0.3025049502382167, + "grad_norm": 0.6577623491819314, + "learning_rate": 5.667639854046345e-06, + "loss": 0.2634422302246094, + "step": 34985 + }, + { + "epoch": 0.30254818375975995, + "grad_norm": 13.5209336588666, + "learning_rate": 5.667546641420233e-06, + "loss": 0.18964691162109376, + "step": 34990 + }, + { + "epoch": 0.30259141728130323, + "grad_norm": 0.659479646588496, + "learning_rate": 5.667453416491632e-06, + "loss": 0.32535152435302733, + "step": 34995 + }, + { + "epoch": 0.3026346508028465, + "grad_norm": 13.27634599000302, + "learning_rate": 5.667360179260972e-06, + "loss": 0.21422958374023438, + "step": 35000 + }, + { + "epoch": 0.30267788432438975, + "grad_norm": 1.1213298069725013, + "learning_rate": 5.667266929728682e-06, + "loss": 0.22517852783203124, + "step": 35005 + }, + { + "epoch": 0.30272111784593303, + "grad_norm": 3.301777096074967, + "learning_rate": 5.667173667895192e-06, + "loss": 0.147198486328125, + "step": 35010 + }, + { + "epoch": 0.30276435136747626, + "grad_norm": 20.87128990756931, + "learning_rate": 5.6670803937609344e-06, + "loss": 0.2418701171875, + "step": 35015 + }, + { + "epoch": 0.30280758488901954, + "grad_norm": 10.931269523833214, + "learning_rate": 5.6669871073263364e-06, + "loss": 0.188641357421875, + "step": 35020 + }, + { + "epoch": 0.3028508184105628, + "grad_norm": 1.3098896399979645, + "learning_rate": 5.66689380859183e-06, + "loss": 0.1360076904296875, + "step": 35025 + }, + { + "epoch": 0.30289405193210606, + "grad_norm": 33.4756313887948, + "learning_rate": 5.666800497557845e-06, + "loss": 0.25598878860473634, + "step": 35030 + }, + { + "epoch": 0.30293728545364934, + "grad_norm": 15.65544442107718, + "learning_rate": 5.666707174224811e-06, + "loss": 0.10102996826171876, + "step": 35035 + }, + { + "epoch": 0.3029805189751926, + "grad_norm": 4.525221039591773, + "learning_rate": 5.66661383859316e-06, + "loss": 0.1405975341796875, + "step": 35040 + }, + { + "epoch": 0.30302375249673585, + "grad_norm": 21.310115304471164, + "learning_rate": 5.666520490663321e-06, + "loss": 0.14934844970703126, + "step": 35045 + }, + { + "epoch": 0.30306698601827914, + "grad_norm": 7.8917181391243725, + "learning_rate": 5.666427130435725e-06, + "loss": 0.15859832763671874, + "step": 35050 + }, + { + "epoch": 0.3031102195398224, + "grad_norm": 12.662415287992298, + "learning_rate": 5.666333757910804e-06, + "loss": 0.058935546875, + "step": 35055 + }, + { + "epoch": 0.30315345306136565, + "grad_norm": 1.9222810136133353, + "learning_rate": 5.666240373088987e-06, + "loss": 0.17731819152832032, + "step": 35060 + }, + { + "epoch": 0.30319668658290894, + "grad_norm": 6.883580505872698, + "learning_rate": 5.6661469759707045e-06, + "loss": 0.2491943359375, + "step": 35065 + }, + { + "epoch": 0.30323992010445217, + "grad_norm": 4.440141138537543, + "learning_rate": 5.666053566556387e-06, + "loss": 0.31768455505371096, + "step": 35070 + }, + { + "epoch": 0.30328315362599545, + "grad_norm": 3.6381498639569583, + "learning_rate": 5.665960144846467e-06, + "loss": 0.261724853515625, + "step": 35075 + }, + { + "epoch": 0.30332638714753873, + "grad_norm": 7.0254728582588895, + "learning_rate": 5.665866710841374e-06, + "loss": 0.0742340087890625, + "step": 35080 + }, + { + "epoch": 0.30336962066908196, + "grad_norm": 1.9043529193868316, + "learning_rate": 5.665773264541539e-06, + "loss": 0.20015029907226561, + "step": 35085 + }, + { + "epoch": 0.30341285419062525, + "grad_norm": 5.759499739517881, + "learning_rate": 5.665679805947394e-06, + "loss": 0.2622161865234375, + "step": 35090 + }, + { + "epoch": 0.30345608771216853, + "grad_norm": 20.431513773907767, + "learning_rate": 5.665586335059368e-06, + "loss": 0.234552001953125, + "step": 35095 + }, + { + "epoch": 0.30349932123371176, + "grad_norm": 8.23786992945655, + "learning_rate": 5.665492851877895e-06, + "loss": 0.17816200256347656, + "step": 35100 + }, + { + "epoch": 0.30354255475525505, + "grad_norm": 26.581308053503093, + "learning_rate": 5.6653993564034035e-06, + "loss": 0.3366790771484375, + "step": 35105 + }, + { + "epoch": 0.3035857882767983, + "grad_norm": 4.953107121683199, + "learning_rate": 5.6653058486363265e-06, + "loss": 0.1934661865234375, + "step": 35110 + }, + { + "epoch": 0.30362902179834156, + "grad_norm": 8.971100722171613, + "learning_rate": 5.665212328577094e-06, + "loss": 0.12429733276367187, + "step": 35115 + }, + { + "epoch": 0.30367225531988484, + "grad_norm": 71.41147322796365, + "learning_rate": 5.665118796226138e-06, + "loss": 0.25504302978515625, + "step": 35120 + }, + { + "epoch": 0.30371548884142807, + "grad_norm": 19.320141985828606, + "learning_rate": 5.665025251583889e-06, + "loss": 0.35218505859375, + "step": 35125 + }, + { + "epoch": 0.30375872236297136, + "grad_norm": 61.58607639116055, + "learning_rate": 5.664931694650779e-06, + "loss": 0.3119842529296875, + "step": 35130 + }, + { + "epoch": 0.30380195588451464, + "grad_norm": 2.8008726321178465, + "learning_rate": 5.664838125427239e-06, + "loss": 0.05623817443847656, + "step": 35135 + }, + { + "epoch": 0.30384518940605787, + "grad_norm": 3.632043127927079, + "learning_rate": 5.664744543913702e-06, + "loss": 0.167462158203125, + "step": 35140 + }, + { + "epoch": 0.30388842292760115, + "grad_norm": 7.3420488462990745, + "learning_rate": 5.664650950110598e-06, + "loss": 0.043267822265625, + "step": 35145 + }, + { + "epoch": 0.3039316564491444, + "grad_norm": 0.42214910877613027, + "learning_rate": 5.6645573440183605e-06, + "loss": 0.13721389770507814, + "step": 35150 + }, + { + "epoch": 0.30397488997068767, + "grad_norm": 6.067384264999855, + "learning_rate": 5.664463725637419e-06, + "loss": 0.0748565673828125, + "step": 35155 + }, + { + "epoch": 0.30401812349223095, + "grad_norm": 1.9011625291848007, + "learning_rate": 5.664370094968207e-06, + "loss": 0.1072265625, + "step": 35160 + }, + { + "epoch": 0.3040613570137742, + "grad_norm": 1.1584929829708241, + "learning_rate": 5.664276452011154e-06, + "loss": 0.05488872528076172, + "step": 35165 + }, + { + "epoch": 0.30410459053531746, + "grad_norm": 23.292563347224647, + "learning_rate": 5.664182796766694e-06, + "loss": 0.3613983154296875, + "step": 35170 + }, + { + "epoch": 0.30414782405686075, + "grad_norm": 1.021624092577504, + "learning_rate": 5.6640891292352596e-06, + "loss": 0.1593017578125, + "step": 35175 + }, + { + "epoch": 0.304191057578404, + "grad_norm": 1.0244197632171737, + "learning_rate": 5.663995449417281e-06, + "loss": 0.0771514892578125, + "step": 35180 + }, + { + "epoch": 0.30423429109994726, + "grad_norm": 20.29497604295653, + "learning_rate": 5.66390175731319e-06, + "loss": 0.19703369140625, + "step": 35185 + }, + { + "epoch": 0.3042775246214905, + "grad_norm": 43.36874692651735, + "learning_rate": 5.66380805292342e-06, + "loss": 0.3633697509765625, + "step": 35190 + }, + { + "epoch": 0.3043207581430338, + "grad_norm": 11.115786812122952, + "learning_rate": 5.6637143362484026e-06, + "loss": 0.11295166015625, + "step": 35195 + }, + { + "epoch": 0.30436399166457706, + "grad_norm": 0.39070339159600415, + "learning_rate": 5.66362060728857e-06, + "loss": 0.13957290649414061, + "step": 35200 + }, + { + "epoch": 0.3044072251861203, + "grad_norm": 0.4928103286548035, + "learning_rate": 5.663526866044355e-06, + "loss": 0.09272003173828125, + "step": 35205 + }, + { + "epoch": 0.3044504587076636, + "grad_norm": 10.925238637608475, + "learning_rate": 5.663433112516188e-06, + "loss": 0.21419219970703124, + "step": 35210 + }, + { + "epoch": 0.30449369222920686, + "grad_norm": 22.934177083767466, + "learning_rate": 5.663339346704504e-06, + "loss": 0.12279052734375, + "step": 35215 + }, + { + "epoch": 0.3045369257507501, + "grad_norm": 10.996896642683982, + "learning_rate": 5.6632455686097345e-06, + "loss": 0.09518070220947265, + "step": 35220 + }, + { + "epoch": 0.30458015927229337, + "grad_norm": 7.707432320966506, + "learning_rate": 5.663151778232311e-06, + "loss": 0.38422088623046874, + "step": 35225 + }, + { + "epoch": 0.30462339279383666, + "grad_norm": 26.40964360373981, + "learning_rate": 5.663057975572667e-06, + "loss": 0.10617218017578126, + "step": 35230 + }, + { + "epoch": 0.3046666263153799, + "grad_norm": 25.349243539962398, + "learning_rate": 5.6629641606312355e-06, + "loss": 0.548681640625, + "step": 35235 + }, + { + "epoch": 0.30470985983692317, + "grad_norm": 2.1691813520796637, + "learning_rate": 5.662870333408448e-06, + "loss": 0.28469161987304686, + "step": 35240 + }, + { + "epoch": 0.3047530933584664, + "grad_norm": 26.655936852544684, + "learning_rate": 5.6627764939047375e-06, + "loss": 0.18533172607421874, + "step": 35245 + }, + { + "epoch": 0.3047963268800097, + "grad_norm": 9.477563709531317, + "learning_rate": 5.662682642120538e-06, + "loss": 0.0487274169921875, + "step": 35250 + }, + { + "epoch": 0.30483956040155297, + "grad_norm": 13.945242454610137, + "learning_rate": 5.6625887780562815e-06, + "loss": 0.38195114135742186, + "step": 35255 + }, + { + "epoch": 0.3048827939230962, + "grad_norm": 2.32472097093018, + "learning_rate": 5.6624949017124e-06, + "loss": 0.15382843017578124, + "step": 35260 + }, + { + "epoch": 0.3049260274446395, + "grad_norm": 11.706890926957671, + "learning_rate": 5.662401013089327e-06, + "loss": 0.180816650390625, + "step": 35265 + }, + { + "epoch": 0.30496926096618276, + "grad_norm": 16.905526898420195, + "learning_rate": 5.662307112187497e-06, + "loss": 0.11253662109375, + "step": 35270 + }, + { + "epoch": 0.305012494487726, + "grad_norm": 32.80074937134953, + "learning_rate": 5.662213199007341e-06, + "loss": 0.09976425170898437, + "step": 35275 + }, + { + "epoch": 0.3050557280092693, + "grad_norm": 34.49755104942082, + "learning_rate": 5.6621192735492936e-06, + "loss": 0.32784576416015626, + "step": 35280 + }, + { + "epoch": 0.3050989615308125, + "grad_norm": 19.125634175774824, + "learning_rate": 5.662025335813787e-06, + "loss": 0.137396240234375, + "step": 35285 + }, + { + "epoch": 0.3051421950523558, + "grad_norm": 20.40930518230756, + "learning_rate": 5.661931385801255e-06, + "loss": 0.19941635131835939, + "step": 35290 + }, + { + "epoch": 0.3051854285738991, + "grad_norm": 27.064818507709564, + "learning_rate": 5.66183742351213e-06, + "loss": 0.2498138427734375, + "step": 35295 + }, + { + "epoch": 0.3052286620954423, + "grad_norm": 0.861596617923565, + "learning_rate": 5.661743448946847e-06, + "loss": 0.218243408203125, + "step": 35300 + }, + { + "epoch": 0.3052718956169856, + "grad_norm": 1.8824557319977313, + "learning_rate": 5.661649462105837e-06, + "loss": 0.1149139404296875, + "step": 35305 + }, + { + "epoch": 0.3053151291385289, + "grad_norm": 22.954295926868863, + "learning_rate": 5.661555462989536e-06, + "loss": 0.14272670745849608, + "step": 35310 + }, + { + "epoch": 0.3053583626600721, + "grad_norm": 43.69549540741199, + "learning_rate": 5.661461451598376e-06, + "loss": 0.346588134765625, + "step": 35315 + }, + { + "epoch": 0.3054015961816154, + "grad_norm": 37.6318493862111, + "learning_rate": 5.661367427932791e-06, + "loss": 0.299310302734375, + "step": 35320 + }, + { + "epoch": 0.3054448297031586, + "grad_norm": 17.941769559268987, + "learning_rate": 5.661273391993215e-06, + "loss": 0.17588768005371094, + "step": 35325 + }, + { + "epoch": 0.3054880632247019, + "grad_norm": 1.5154747838154436, + "learning_rate": 5.66117934378008e-06, + "loss": 0.411907958984375, + "step": 35330 + }, + { + "epoch": 0.3055312967462452, + "grad_norm": 0.9180008572510325, + "learning_rate": 5.661085283293822e-06, + "loss": 0.120550537109375, + "step": 35335 + }, + { + "epoch": 0.3055745302677884, + "grad_norm": 43.29838244043593, + "learning_rate": 5.660991210534874e-06, + "loss": 0.24273834228515626, + "step": 35340 + }, + { + "epoch": 0.3056177637893317, + "grad_norm": 2.816168986125297, + "learning_rate": 5.6608971255036694e-06, + "loss": 0.203350830078125, + "step": 35345 + }, + { + "epoch": 0.305660997310875, + "grad_norm": 2.3667880931834455, + "learning_rate": 5.660803028200642e-06, + "loss": 0.09771270751953125, + "step": 35350 + }, + { + "epoch": 0.3057042308324182, + "grad_norm": 5.560273138001805, + "learning_rate": 5.660708918626226e-06, + "loss": 0.148736572265625, + "step": 35355 + }, + { + "epoch": 0.3057474643539615, + "grad_norm": 32.520295978920146, + "learning_rate": 5.660614796780857e-06, + "loss": 0.2249481201171875, + "step": 35360 + }, + { + "epoch": 0.3057906978755047, + "grad_norm": 7.110949092116069, + "learning_rate": 5.660520662664967e-06, + "loss": 0.142791748046875, + "step": 35365 + }, + { + "epoch": 0.305833931397048, + "grad_norm": 3.1027955256451003, + "learning_rate": 5.66042651627899e-06, + "loss": 0.056136703491210936, + "step": 35370 + }, + { + "epoch": 0.3058771649185913, + "grad_norm": 17.799710964387526, + "learning_rate": 5.660332357623361e-06, + "loss": 0.19279251098632813, + "step": 35375 + }, + { + "epoch": 0.3059203984401345, + "grad_norm": 0.6083118557078853, + "learning_rate": 5.660238186698515e-06, + "loss": 0.6414466857910156, + "step": 35380 + }, + { + "epoch": 0.3059636319616778, + "grad_norm": 20.530503073534824, + "learning_rate": 5.660144003504885e-06, + "loss": 0.087744140625, + "step": 35385 + }, + { + "epoch": 0.3060068654832211, + "grad_norm": 12.945883942559654, + "learning_rate": 5.660049808042907e-06, + "loss": 0.0767852783203125, + "step": 35390 + }, + { + "epoch": 0.3060500990047643, + "grad_norm": 2.845586654560123, + "learning_rate": 5.659955600313013e-06, + "loss": 0.09300384521484376, + "step": 35395 + }, + { + "epoch": 0.3060933325263076, + "grad_norm": 12.462719576780337, + "learning_rate": 5.659861380315639e-06, + "loss": 0.1655853271484375, + "step": 35400 + }, + { + "epoch": 0.3061365660478509, + "grad_norm": 32.68519211678814, + "learning_rate": 5.65976714805122e-06, + "loss": 0.2723876953125, + "step": 35405 + }, + { + "epoch": 0.3061797995693941, + "grad_norm": 15.655484204366173, + "learning_rate": 5.659672903520189e-06, + "loss": 0.17003326416015624, + "step": 35410 + }, + { + "epoch": 0.3062230330909374, + "grad_norm": 25.19273840237559, + "learning_rate": 5.659578646722983e-06, + "loss": 0.1945709228515625, + "step": 35415 + }, + { + "epoch": 0.30626626661248063, + "grad_norm": 26.46479298656278, + "learning_rate": 5.659484377660034e-06, + "loss": 0.18184928894042968, + "step": 35420 + }, + { + "epoch": 0.3063095001340239, + "grad_norm": 0.2935756825578229, + "learning_rate": 5.659390096331779e-06, + "loss": 0.24453334808349608, + "step": 35425 + }, + { + "epoch": 0.3063527336555672, + "grad_norm": 31.893324486236914, + "learning_rate": 5.659295802738651e-06, + "loss": 0.1405487060546875, + "step": 35430 + }, + { + "epoch": 0.30639596717711043, + "grad_norm": 4.966901994284098, + "learning_rate": 5.6592014968810865e-06, + "loss": 0.45253677368164064, + "step": 35435 + }, + { + "epoch": 0.3064392006986537, + "grad_norm": 9.979196087251553, + "learning_rate": 5.6591071787595185e-06, + "loss": 0.09457817077636718, + "step": 35440 + }, + { + "epoch": 0.306482434220197, + "grad_norm": 30.493042702035734, + "learning_rate": 5.659012848374384e-06, + "loss": 0.1599506378173828, + "step": 35445 + }, + { + "epoch": 0.3065256677417402, + "grad_norm": 16.178823510054087, + "learning_rate": 5.658918505726117e-06, + "loss": 0.20618133544921874, + "step": 35450 + }, + { + "epoch": 0.3065689012632835, + "grad_norm": 16.005169973508927, + "learning_rate": 5.658824150815154e-06, + "loss": 0.2524566650390625, + "step": 35455 + }, + { + "epoch": 0.30661213478482674, + "grad_norm": 3.695171049082386, + "learning_rate": 5.658729783641927e-06, + "loss": 0.1703929901123047, + "step": 35460 + }, + { + "epoch": 0.30665536830637, + "grad_norm": 6.842178821492992, + "learning_rate": 5.658635404206875e-06, + "loss": 0.051213645935058595, + "step": 35465 + }, + { + "epoch": 0.3066986018279133, + "grad_norm": 10.471607052460891, + "learning_rate": 5.65854101251043e-06, + "loss": 0.576617431640625, + "step": 35470 + }, + { + "epoch": 0.30674183534945654, + "grad_norm": 4.0190411350708635, + "learning_rate": 5.658446608553029e-06, + "loss": 0.0903076171875, + "step": 35475 + }, + { + "epoch": 0.3067850688709998, + "grad_norm": 0.41973305895506346, + "learning_rate": 5.658352192335107e-06, + "loss": 0.17041854858398436, + "step": 35480 + }, + { + "epoch": 0.3068283023925431, + "grad_norm": 1.3924297531555687, + "learning_rate": 5.6582577638571e-06, + "loss": 0.148284912109375, + "step": 35485 + }, + { + "epoch": 0.30687153591408634, + "grad_norm": 14.794803875259138, + "learning_rate": 5.658163323119443e-06, + "loss": 0.35540924072265623, + "step": 35490 + }, + { + "epoch": 0.3069147694356296, + "grad_norm": 27.42225337267716, + "learning_rate": 5.658068870122572e-06, + "loss": 0.169329833984375, + "step": 35495 + }, + { + "epoch": 0.30695800295717285, + "grad_norm": 13.224365791620812, + "learning_rate": 5.657974404866922e-06, + "loss": 0.20861434936523438, + "step": 35500 + }, + { + "epoch": 0.30700123647871613, + "grad_norm": 3.6136097595546897, + "learning_rate": 5.657879927352928e-06, + "loss": 0.059114837646484376, + "step": 35505 + }, + { + "epoch": 0.3070444700002594, + "grad_norm": 3.9958270510892357, + "learning_rate": 5.657785437581029e-06, + "loss": 0.412896728515625, + "step": 35510 + }, + { + "epoch": 0.30708770352180265, + "grad_norm": 1.7509442726913815, + "learning_rate": 5.657690935551656e-06, + "loss": 0.3031829833984375, + "step": 35515 + }, + { + "epoch": 0.30713093704334593, + "grad_norm": 5.003644931844607, + "learning_rate": 5.657596421265248e-06, + "loss": 0.13743667602539061, + "step": 35520 + }, + { + "epoch": 0.3071741705648892, + "grad_norm": 9.230460218521278, + "learning_rate": 5.65750189472224e-06, + "loss": 0.4206672668457031, + "step": 35525 + }, + { + "epoch": 0.30721740408643244, + "grad_norm": 5.285796362599028, + "learning_rate": 5.6574073559230685e-06, + "loss": 0.19463424682617186, + "step": 35530 + }, + { + "epoch": 0.30726063760797573, + "grad_norm": 3.1397342410011966, + "learning_rate": 5.657312804868169e-06, + "loss": 0.07633209228515625, + "step": 35535 + }, + { + "epoch": 0.30730387112951896, + "grad_norm": 27.63998502023215, + "learning_rate": 5.657218241557978e-06, + "loss": 0.22066802978515626, + "step": 35540 + }, + { + "epoch": 0.30734710465106224, + "grad_norm": 3.5900108813580274, + "learning_rate": 5.65712366599293e-06, + "loss": 0.0927703857421875, + "step": 35545 + }, + { + "epoch": 0.3073903381726055, + "grad_norm": 1.7455720711400569, + "learning_rate": 5.657029078173464e-06, + "loss": 0.0636383056640625, + "step": 35550 + }, + { + "epoch": 0.30743357169414876, + "grad_norm": 0.6848608875585004, + "learning_rate": 5.656934478100013e-06, + "loss": 0.17392425537109374, + "step": 35555 + }, + { + "epoch": 0.30747680521569204, + "grad_norm": 26.120845225278348, + "learning_rate": 5.6568398657730165e-06, + "loss": 0.2119781494140625, + "step": 35560 + }, + { + "epoch": 0.3075200387372353, + "grad_norm": 7.845920317411341, + "learning_rate": 5.656745241192909e-06, + "loss": 0.276104736328125, + "step": 35565 + }, + { + "epoch": 0.30756327225877855, + "grad_norm": 1.0548228095834302, + "learning_rate": 5.656650604360127e-06, + "loss": 0.38927135467529295, + "step": 35570 + }, + { + "epoch": 0.30760650578032184, + "grad_norm": 1.193137591783013, + "learning_rate": 5.656555955275107e-06, + "loss": 0.435362434387207, + "step": 35575 + }, + { + "epoch": 0.3076497393018651, + "grad_norm": 33.11581767355793, + "learning_rate": 5.6564612939382856e-06, + "loss": 0.419647216796875, + "step": 35580 + }, + { + "epoch": 0.30769297282340835, + "grad_norm": 1.2745737609087506, + "learning_rate": 5.656366620350099e-06, + "loss": 0.095233154296875, + "step": 35585 + }, + { + "epoch": 0.30773620634495163, + "grad_norm": 1.3258008144871847, + "learning_rate": 5.656271934510985e-06, + "loss": 0.24014892578125, + "step": 35590 + }, + { + "epoch": 0.30777943986649486, + "grad_norm": 16.791078425666157, + "learning_rate": 5.6561772364213796e-06, + "loss": 0.162744140625, + "step": 35595 + }, + { + "epoch": 0.30782267338803815, + "grad_norm": 2.9180324962266813, + "learning_rate": 5.656082526081719e-06, + "loss": 0.038762664794921874, + "step": 35600 + }, + { + "epoch": 0.30786590690958143, + "grad_norm": 4.681177065317217, + "learning_rate": 5.655987803492441e-06, + "loss": 0.1269256591796875, + "step": 35605 + }, + { + "epoch": 0.30790914043112466, + "grad_norm": 3.1963266231704424, + "learning_rate": 5.655893068653982e-06, + "loss": 0.5990036010742188, + "step": 35610 + }, + { + "epoch": 0.30795237395266795, + "grad_norm": 13.84927974204587, + "learning_rate": 5.655798321566778e-06, + "loss": 0.22926025390625, + "step": 35615 + }, + { + "epoch": 0.30799560747421123, + "grad_norm": 2.435317772800361, + "learning_rate": 5.655703562231267e-06, + "loss": 0.111700439453125, + "step": 35620 + }, + { + "epoch": 0.30803884099575446, + "grad_norm": 20.013596446862742, + "learning_rate": 5.655608790647887e-06, + "loss": 0.1384746551513672, + "step": 35625 + }, + { + "epoch": 0.30808207451729774, + "grad_norm": 36.8021974849765, + "learning_rate": 5.655514006817073e-06, + "loss": 0.24968185424804687, + "step": 35630 + }, + { + "epoch": 0.308125308038841, + "grad_norm": 2.6778933717545055, + "learning_rate": 5.6554192107392625e-06, + "loss": 0.121026611328125, + "step": 35635 + }, + { + "epoch": 0.30816854156038426, + "grad_norm": 1.1129647617930678, + "learning_rate": 5.655324402414895e-06, + "loss": 0.1717041015625, + "step": 35640 + }, + { + "epoch": 0.30821177508192754, + "grad_norm": 15.006409395337485, + "learning_rate": 5.655229581844404e-06, + "loss": 0.08816375732421874, + "step": 35645 + }, + { + "epoch": 0.30825500860347077, + "grad_norm": 39.12378886687061, + "learning_rate": 5.65513474902823e-06, + "loss": 0.22104339599609374, + "step": 35650 + }, + { + "epoch": 0.30829824212501405, + "grad_norm": 13.943311033441574, + "learning_rate": 5.655039903966808e-06, + "loss": 0.238079833984375, + "step": 35655 + }, + { + "epoch": 0.30834147564655734, + "grad_norm": 1.3212645738970212, + "learning_rate": 5.654945046660578e-06, + "loss": 0.15815353393554688, + "step": 35660 + }, + { + "epoch": 0.30838470916810057, + "grad_norm": 21.162548538491734, + "learning_rate": 5.654850177109975e-06, + "loss": 0.2176300048828125, + "step": 35665 + }, + { + "epoch": 0.30842794268964385, + "grad_norm": 1.0508967774515285, + "learning_rate": 5.654755295315439e-06, + "loss": 0.047137451171875, + "step": 35670 + }, + { + "epoch": 0.3084711762111871, + "grad_norm": 0.6215462861224903, + "learning_rate": 5.654660401277404e-06, + "loss": 0.08768463134765625, + "step": 35675 + }, + { + "epoch": 0.30851440973273037, + "grad_norm": 0.4276727968384022, + "learning_rate": 5.654565494996311e-06, + "loss": 0.1435028076171875, + "step": 35680 + }, + { + "epoch": 0.30855764325427365, + "grad_norm": 0.8388071004212652, + "learning_rate": 5.654470576472597e-06, + "loss": 0.14579544067382813, + "step": 35685 + }, + { + "epoch": 0.3086008767758169, + "grad_norm": 17.491759171308427, + "learning_rate": 5.654375645706699e-06, + "loss": 0.10400238037109374, + "step": 35690 + }, + { + "epoch": 0.30864411029736016, + "grad_norm": 0.9228017355801842, + "learning_rate": 5.654280702699054e-06, + "loss": 0.36126346588134767, + "step": 35695 + }, + { + "epoch": 0.30868734381890345, + "grad_norm": 31.7666051254643, + "learning_rate": 5.654185747450102e-06, + "loss": 0.2362396240234375, + "step": 35700 + }, + { + "epoch": 0.3087305773404467, + "grad_norm": 1.0647281253878442, + "learning_rate": 5.654090779960279e-06, + "loss": 0.13757095336914063, + "step": 35705 + }, + { + "epoch": 0.30877381086198996, + "grad_norm": 7.882973440038241, + "learning_rate": 5.653995800230025e-06, + "loss": 0.071246337890625, + "step": 35710 + }, + { + "epoch": 0.3088170443835332, + "grad_norm": 21.86270587537461, + "learning_rate": 5.653900808259776e-06, + "loss": 0.148797607421875, + "step": 35715 + }, + { + "epoch": 0.3088602779050765, + "grad_norm": 53.151675883101284, + "learning_rate": 5.653805804049971e-06, + "loss": 0.261798095703125, + "step": 35720 + }, + { + "epoch": 0.30890351142661976, + "grad_norm": 15.063247119929244, + "learning_rate": 5.653710787601049e-06, + "loss": 0.06905326843261719, + "step": 35725 + }, + { + "epoch": 0.308946744948163, + "grad_norm": 0.276773113085488, + "learning_rate": 5.6536157589134456e-06, + "loss": 0.371527099609375, + "step": 35730 + }, + { + "epoch": 0.3089899784697063, + "grad_norm": 45.78613335563515, + "learning_rate": 5.653520717987602e-06, + "loss": 0.32842388153076174, + "step": 35735 + }, + { + "epoch": 0.30903321199124956, + "grad_norm": 16.89773266241962, + "learning_rate": 5.653425664823955e-06, + "loss": 0.12626953125, + "step": 35740 + }, + { + "epoch": 0.3090764455127928, + "grad_norm": 5.24368949038201, + "learning_rate": 5.653330599422944e-06, + "loss": 0.09044342041015625, + "step": 35745 + }, + { + "epoch": 0.30911967903433607, + "grad_norm": 3.7773282697960697, + "learning_rate": 5.653235521785006e-06, + "loss": 0.15821571350097657, + "step": 35750 + }, + { + "epoch": 0.3091629125558793, + "grad_norm": 23.284543196500863, + "learning_rate": 5.6531404319105815e-06, + "loss": 0.35816802978515627, + "step": 35755 + }, + { + "epoch": 0.3092061460774226, + "grad_norm": 4.381798853477227, + "learning_rate": 5.653045329800107e-06, + "loss": 0.160528564453125, + "step": 35760 + }, + { + "epoch": 0.30924937959896587, + "grad_norm": 4.17562441586429, + "learning_rate": 5.652950215454021e-06, + "loss": 0.11064910888671875, + "step": 35765 + }, + { + "epoch": 0.3092926131205091, + "grad_norm": 2.112331663373736, + "learning_rate": 5.6528550888727635e-06, + "loss": 0.07792816162109376, + "step": 35770 + }, + { + "epoch": 0.3093358466420524, + "grad_norm": 1.7588150401407079, + "learning_rate": 5.6527599500567725e-06, + "loss": 0.08489913940429687, + "step": 35775 + }, + { + "epoch": 0.30937908016359567, + "grad_norm": 2.2827763543026367, + "learning_rate": 5.6526647990064874e-06, + "loss": 0.24631500244140625, + "step": 35780 + }, + { + "epoch": 0.3094223136851389, + "grad_norm": 1.6051869858387453, + "learning_rate": 5.652569635722347e-06, + "loss": 0.013746833801269532, + "step": 35785 + }, + { + "epoch": 0.3094655472066822, + "grad_norm": 27.665007317663093, + "learning_rate": 5.6524744602047895e-06, + "loss": 0.563250732421875, + "step": 35790 + }, + { + "epoch": 0.30950878072822546, + "grad_norm": 1.3193890785866282, + "learning_rate": 5.652379272454253e-06, + "loss": 0.07656936645507813, + "step": 35795 + }, + { + "epoch": 0.3095520142497687, + "grad_norm": 30.295669988429655, + "learning_rate": 5.652284072471179e-06, + "loss": 0.3625457763671875, + "step": 35800 + }, + { + "epoch": 0.309595247771312, + "grad_norm": 0.7042859377286909, + "learning_rate": 5.652188860256005e-06, + "loss": 0.05523529052734375, + "step": 35805 + }, + { + "epoch": 0.3096384812928552, + "grad_norm": 4.367515521238749, + "learning_rate": 5.652093635809171e-06, + "loss": 0.14855728149414063, + "step": 35810 + }, + { + "epoch": 0.3096817148143985, + "grad_norm": 59.171829153440164, + "learning_rate": 5.651998399131114e-06, + "loss": 0.5069282531738282, + "step": 35815 + }, + { + "epoch": 0.3097249483359418, + "grad_norm": 26.827016233318073, + "learning_rate": 5.651903150222276e-06, + "loss": 0.419158935546875, + "step": 35820 + }, + { + "epoch": 0.309768181857485, + "grad_norm": 0.8551608145247368, + "learning_rate": 5.651807889083095e-06, + "loss": 0.15762786865234374, + "step": 35825 + }, + { + "epoch": 0.3098114153790283, + "grad_norm": 2.7142475452184343, + "learning_rate": 5.651712615714009e-06, + "loss": 0.176397705078125, + "step": 35830 + }, + { + "epoch": 0.30985464890057157, + "grad_norm": 2.0780233215064814, + "learning_rate": 5.65161733011546e-06, + "loss": 0.37719268798828126, + "step": 35835 + }, + { + "epoch": 0.3098978824221148, + "grad_norm": 5.441142691133919, + "learning_rate": 5.651522032287886e-06, + "loss": 0.296893310546875, + "step": 35840 + }, + { + "epoch": 0.3099411159436581, + "grad_norm": 7.955627082224128, + "learning_rate": 5.651426722231726e-06, + "loss": 0.23102493286132814, + "step": 35845 + }, + { + "epoch": 0.3099843494652013, + "grad_norm": 15.762541786187976, + "learning_rate": 5.65133139994742e-06, + "loss": 0.23237133026123047, + "step": 35850 + }, + { + "epoch": 0.3100275829867446, + "grad_norm": 4.108372571422783, + "learning_rate": 5.651236065435409e-06, + "loss": 0.06200485229492188, + "step": 35855 + }, + { + "epoch": 0.3100708165082879, + "grad_norm": 1.2382487033769058, + "learning_rate": 5.651140718696131e-06, + "loss": 0.3012939453125, + "step": 35860 + }, + { + "epoch": 0.3101140500298311, + "grad_norm": 3.8088731201327906, + "learning_rate": 5.651045359730027e-06, + "loss": 0.16054611206054686, + "step": 35865 + }, + { + "epoch": 0.3101572835513744, + "grad_norm": 7.271513831169026, + "learning_rate": 5.650949988537536e-06, + "loss": 0.26195068359375, + "step": 35870 + }, + { + "epoch": 0.3102005170729177, + "grad_norm": 6.275267487609342, + "learning_rate": 5.650854605119095e-06, + "loss": 0.22418975830078125, + "step": 35875 + }, + { + "epoch": 0.3102437505944609, + "grad_norm": 4.690909083124023, + "learning_rate": 5.65075920947515e-06, + "loss": 0.090850830078125, + "step": 35880 + }, + { + "epoch": 0.3102869841160042, + "grad_norm": 0.8727075411218912, + "learning_rate": 5.650663801606137e-06, + "loss": 0.265606689453125, + "step": 35885 + }, + { + "epoch": 0.3103302176375474, + "grad_norm": 3.1805182277947828, + "learning_rate": 5.6505683815124966e-06, + "loss": 0.9016815185546875, + "step": 35890 + }, + { + "epoch": 0.3103734511590907, + "grad_norm": 3.320197008748215, + "learning_rate": 5.650472949194669e-06, + "loss": 0.4172271728515625, + "step": 35895 + }, + { + "epoch": 0.310416684680634, + "grad_norm": 4.569285362877149, + "learning_rate": 5.6503775046530944e-06, + "loss": 0.1626220703125, + "step": 35900 + }, + { + "epoch": 0.3104599182021772, + "grad_norm": 17.583999066633126, + "learning_rate": 5.650282047888213e-06, + "loss": 0.10985527038574219, + "step": 35905 + }, + { + "epoch": 0.3105031517237205, + "grad_norm": 18.42776364844614, + "learning_rate": 5.6501865789004646e-06, + "loss": 0.23340377807617188, + "step": 35910 + }, + { + "epoch": 0.3105463852452638, + "grad_norm": 14.981159390927727, + "learning_rate": 5.65009109769029e-06, + "loss": 0.079620361328125, + "step": 35915 + }, + { + "epoch": 0.310589618766807, + "grad_norm": 12.673499048523494, + "learning_rate": 5.64999560425813e-06, + "loss": 0.1598388671875, + "step": 35920 + }, + { + "epoch": 0.3106328522883503, + "grad_norm": 9.798490636832435, + "learning_rate": 5.649900098604424e-06, + "loss": 0.35782318115234374, + "step": 35925 + }, + { + "epoch": 0.31067608580989353, + "grad_norm": 67.94585098219278, + "learning_rate": 5.649804580729613e-06, + "loss": 0.371142578125, + "step": 35930 + }, + { + "epoch": 0.3107193193314368, + "grad_norm": 80.51791567071045, + "learning_rate": 5.649709050634137e-06, + "loss": 0.0751678466796875, + "step": 35935 + }, + { + "epoch": 0.3107625528529801, + "grad_norm": 1.0132014115896686, + "learning_rate": 5.649613508318437e-06, + "loss": 0.0813812255859375, + "step": 35940 + }, + { + "epoch": 0.31080578637452333, + "grad_norm": 4.108827932344216, + "learning_rate": 5.649517953782954e-06, + "loss": 0.1635009765625, + "step": 35945 + }, + { + "epoch": 0.3108490198960666, + "grad_norm": 0.4533054835723854, + "learning_rate": 5.649422387028129e-06, + "loss": 0.2252145767211914, + "step": 35950 + }, + { + "epoch": 0.3108922534176099, + "grad_norm": 0.37494896925670923, + "learning_rate": 5.649326808054401e-06, + "loss": 0.1462493896484375, + "step": 35955 + }, + { + "epoch": 0.3109354869391531, + "grad_norm": 2.2426873681604147, + "learning_rate": 5.6492312168622116e-06, + "loss": 0.04672164916992187, + "step": 35960 + }, + { + "epoch": 0.3109787204606964, + "grad_norm": 0.33217059822535333, + "learning_rate": 5.649135613452003e-06, + "loss": 0.039020538330078125, + "step": 35965 + }, + { + "epoch": 0.3110219539822397, + "grad_norm": 34.700948012464266, + "learning_rate": 5.6490399978242145e-06, + "loss": 0.611224365234375, + "step": 35970 + }, + { + "epoch": 0.3110651875037829, + "grad_norm": 1.7255558586130653, + "learning_rate": 5.6489443699792876e-06, + "loss": 0.5377662658691407, + "step": 35975 + }, + { + "epoch": 0.3111084210253262, + "grad_norm": 6.8569837140045085, + "learning_rate": 5.648848729917663e-06, + "loss": 0.04552001953125, + "step": 35980 + }, + { + "epoch": 0.31115165454686944, + "grad_norm": 6.579587935507502, + "learning_rate": 5.648753077639783e-06, + "loss": 0.08783340454101562, + "step": 35985 + }, + { + "epoch": 0.3111948880684127, + "grad_norm": 52.6893497951623, + "learning_rate": 5.648657413146087e-06, + "loss": 0.6396354675292969, + "step": 35990 + }, + { + "epoch": 0.311238121589956, + "grad_norm": 12.880564470179982, + "learning_rate": 5.648561736437017e-06, + "loss": 0.23861846923828126, + "step": 35995 + }, + { + "epoch": 0.31128135511149924, + "grad_norm": 17.948763864051575, + "learning_rate": 5.648466047513015e-06, + "loss": 0.313690185546875, + "step": 36000 + }, + { + "epoch": 0.3113245886330425, + "grad_norm": 4.396402466213762, + "learning_rate": 5.648370346374521e-06, + "loss": 0.07823028564453124, + "step": 36005 + }, + { + "epoch": 0.3113678221545858, + "grad_norm": 28.062225305099084, + "learning_rate": 5.6482746330219775e-06, + "loss": 0.33451385498046876, + "step": 36010 + }, + { + "epoch": 0.31141105567612903, + "grad_norm": 0.871066410083372, + "learning_rate": 5.648178907455825e-06, + "loss": 0.1694061279296875, + "step": 36015 + }, + { + "epoch": 0.3114542891976723, + "grad_norm": 0.27395170317944195, + "learning_rate": 5.6480831696765045e-06, + "loss": 0.10774765014648438, + "step": 36020 + }, + { + "epoch": 0.31149752271921555, + "grad_norm": 40.451465154358225, + "learning_rate": 5.64798741968446e-06, + "loss": 0.193865966796875, + "step": 36025 + }, + { + "epoch": 0.31154075624075883, + "grad_norm": 2.9563311613417373, + "learning_rate": 5.647891657480132e-06, + "loss": 0.04491119384765625, + "step": 36030 + }, + { + "epoch": 0.3115839897623021, + "grad_norm": 5.651078993740017, + "learning_rate": 5.647795883063959e-06, + "loss": 0.24135169982910157, + "step": 36035 + }, + { + "epoch": 0.31162722328384534, + "grad_norm": 18.38015509167032, + "learning_rate": 5.647700096436388e-06, + "loss": 0.09198074340820313, + "step": 36040 + }, + { + "epoch": 0.31167045680538863, + "grad_norm": 0.9689115736545411, + "learning_rate": 5.647604297597857e-06, + "loss": 0.19177093505859374, + "step": 36045 + }, + { + "epoch": 0.3117136903269319, + "grad_norm": 9.39445058454502, + "learning_rate": 5.647508486548809e-06, + "loss": 0.28518524169921877, + "step": 36050 + }, + { + "epoch": 0.31175692384847514, + "grad_norm": 14.875254062883432, + "learning_rate": 5.6474126632896855e-06, + "loss": 0.3167236328125, + "step": 36055 + }, + { + "epoch": 0.3118001573700184, + "grad_norm": 1.4832160729854087, + "learning_rate": 5.6473168278209285e-06, + "loss": 0.24754638671875, + "step": 36060 + }, + { + "epoch": 0.31184339089156166, + "grad_norm": 8.529532511470594, + "learning_rate": 5.64722098014298e-06, + "loss": 0.246746826171875, + "step": 36065 + }, + { + "epoch": 0.31188662441310494, + "grad_norm": 15.042938369051177, + "learning_rate": 5.647125120256284e-06, + "loss": 0.2708282470703125, + "step": 36070 + }, + { + "epoch": 0.3119298579346482, + "grad_norm": 39.738791967390085, + "learning_rate": 5.64702924816128e-06, + "loss": 0.15892677307128905, + "step": 36075 + }, + { + "epoch": 0.31197309145619145, + "grad_norm": 8.822524894233053, + "learning_rate": 5.64693336385841e-06, + "loss": 0.25106239318847656, + "step": 36080 + }, + { + "epoch": 0.31201632497773474, + "grad_norm": 0.7586349854726611, + "learning_rate": 5.646837467348118e-06, + "loss": 0.16045074462890624, + "step": 36085 + }, + { + "epoch": 0.312059558499278, + "grad_norm": 32.94686149724854, + "learning_rate": 5.646741558630845e-06, + "loss": 0.22035980224609375, + "step": 36090 + }, + { + "epoch": 0.31210279202082125, + "grad_norm": 9.163254157606886, + "learning_rate": 5.646645637707035e-06, + "loss": 0.101788330078125, + "step": 36095 + }, + { + "epoch": 0.31214602554236454, + "grad_norm": 6.510071913721599, + "learning_rate": 5.646549704577128e-06, + "loss": 0.129290771484375, + "step": 36100 + }, + { + "epoch": 0.31218925906390776, + "grad_norm": 116.16321791316622, + "learning_rate": 5.6464537592415684e-06, + "loss": 0.19212646484375, + "step": 36105 + }, + { + "epoch": 0.31223249258545105, + "grad_norm": 0.5976687412649849, + "learning_rate": 5.646357801700797e-06, + "loss": 0.3103675842285156, + "step": 36110 + }, + { + "epoch": 0.31227572610699433, + "grad_norm": 3.9637629162909818, + "learning_rate": 5.646261831955259e-06, + "loss": 0.22373046875, + "step": 36115 + }, + { + "epoch": 0.31231895962853756, + "grad_norm": 1.3880492387265897, + "learning_rate": 5.646165850005394e-06, + "loss": 0.15402069091796874, + "step": 36120 + }, + { + "epoch": 0.31236219315008085, + "grad_norm": 5.4828735795492145, + "learning_rate": 5.6460698558516455e-06, + "loss": 0.2273773193359375, + "step": 36125 + }, + { + "epoch": 0.31240542667162413, + "grad_norm": 17.603808840311643, + "learning_rate": 5.6459738494944565e-06, + "loss": 0.4681827545166016, + "step": 36130 + }, + { + "epoch": 0.31244866019316736, + "grad_norm": 2.9948500540435194, + "learning_rate": 5.645877830934271e-06, + "loss": 0.3147247314453125, + "step": 36135 + }, + { + "epoch": 0.31249189371471064, + "grad_norm": 13.181586227499906, + "learning_rate": 5.645781800171531e-06, + "loss": 0.1822998046875, + "step": 36140 + }, + { + "epoch": 0.31253512723625393, + "grad_norm": 35.44935757706715, + "learning_rate": 5.645685757206678e-06, + "loss": 0.28118438720703126, + "step": 36145 + }, + { + "epoch": 0.31257836075779716, + "grad_norm": 6.866385269614804, + "learning_rate": 5.645589702040157e-06, + "loss": 0.4679962158203125, + "step": 36150 + }, + { + "epoch": 0.31262159427934044, + "grad_norm": 6.258179600413498, + "learning_rate": 5.6454936346724095e-06, + "loss": 0.08848876953125, + "step": 36155 + }, + { + "epoch": 0.31266482780088367, + "grad_norm": 6.609674540378534, + "learning_rate": 5.645397555103879e-06, + "loss": 0.3138427734375, + "step": 36160 + }, + { + "epoch": 0.31270806132242696, + "grad_norm": 0.8102593565836679, + "learning_rate": 5.645301463335009e-06, + "loss": 0.1062744140625, + "step": 36165 + }, + { + "epoch": 0.31275129484397024, + "grad_norm": 4.645971545028843, + "learning_rate": 5.645205359366242e-06, + "loss": 0.183880615234375, + "step": 36170 + }, + { + "epoch": 0.31279452836551347, + "grad_norm": 36.91984643295803, + "learning_rate": 5.645109243198021e-06, + "loss": 0.34923782348632815, + "step": 36175 + }, + { + "epoch": 0.31283776188705675, + "grad_norm": 8.813146398192123, + "learning_rate": 5.645013114830791e-06, + "loss": 0.23626289367675782, + "step": 36180 + }, + { + "epoch": 0.31288099540860004, + "grad_norm": 45.79609558856121, + "learning_rate": 5.644916974264995e-06, + "loss": 0.37676239013671875, + "step": 36185 + }, + { + "epoch": 0.31292422893014327, + "grad_norm": 14.208456919684124, + "learning_rate": 5.644820821501075e-06, + "loss": 0.232720947265625, + "step": 36190 + }, + { + "epoch": 0.31296746245168655, + "grad_norm": 3.500228252597335, + "learning_rate": 5.6447246565394744e-06, + "loss": 0.189013671875, + "step": 36195 + }, + { + "epoch": 0.3130106959732298, + "grad_norm": 28.167777341154366, + "learning_rate": 5.644628479380637e-06, + "loss": 0.241424560546875, + "step": 36200 + }, + { + "epoch": 0.31305392949477306, + "grad_norm": 1.787782682286997, + "learning_rate": 5.644532290025008e-06, + "loss": 0.21682281494140626, + "step": 36205 + }, + { + "epoch": 0.31309716301631635, + "grad_norm": 12.178411201516411, + "learning_rate": 5.644436088473029e-06, + "loss": 0.23642578125, + "step": 36210 + }, + { + "epoch": 0.3131403965378596, + "grad_norm": 15.016891324928007, + "learning_rate": 5.644339874725145e-06, + "loss": 0.1345245361328125, + "step": 36215 + }, + { + "epoch": 0.31318363005940286, + "grad_norm": 4.035252208264001, + "learning_rate": 5.644243648781799e-06, + "loss": 0.225762939453125, + "step": 36220 + }, + { + "epoch": 0.31322686358094615, + "grad_norm": 0.06373940898256869, + "learning_rate": 5.644147410643434e-06, + "loss": 0.3137836456298828, + "step": 36225 + }, + { + "epoch": 0.3132700971024894, + "grad_norm": 6.732154451421975, + "learning_rate": 5.644051160310496e-06, + "loss": 0.17800865173339844, + "step": 36230 + }, + { + "epoch": 0.31331333062403266, + "grad_norm": 32.0738358842478, + "learning_rate": 5.6439548977834266e-06, + "loss": 0.5329544067382812, + "step": 36235 + }, + { + "epoch": 0.3133565641455759, + "grad_norm": 0.31015658134433177, + "learning_rate": 5.643858623062672e-06, + "loss": 0.05234222412109375, + "step": 36240 + }, + { + "epoch": 0.3133997976671192, + "grad_norm": 2.4533704962127665, + "learning_rate": 5.643762336148674e-06, + "loss": 0.082830810546875, + "step": 36245 + }, + { + "epoch": 0.31344303118866246, + "grad_norm": 30.446875370192284, + "learning_rate": 5.643666037041878e-06, + "loss": 0.156317138671875, + "step": 36250 + }, + { + "epoch": 0.3134862647102057, + "grad_norm": 22.59186184607426, + "learning_rate": 5.6435697257427274e-06, + "loss": 0.154193115234375, + "step": 36255 + }, + { + "epoch": 0.31352949823174897, + "grad_norm": 22.113672650194246, + "learning_rate": 5.643473402251668e-06, + "loss": 0.4265174865722656, + "step": 36260 + }, + { + "epoch": 0.31357273175329226, + "grad_norm": 2.982494745874196, + "learning_rate": 5.643377066569142e-06, + "loss": 0.12387924194335938, + "step": 36265 + }, + { + "epoch": 0.3136159652748355, + "grad_norm": 2.3625252608486376, + "learning_rate": 5.643280718695595e-06, + "loss": 0.1287078857421875, + "step": 36270 + }, + { + "epoch": 0.31365919879637877, + "grad_norm": 4.792698543283166, + "learning_rate": 5.643184358631471e-06, + "loss": 0.06996383666992187, + "step": 36275 + }, + { + "epoch": 0.313702432317922, + "grad_norm": 4.261428823026283, + "learning_rate": 5.643087986377214e-06, + "loss": 0.041042327880859375, + "step": 36280 + }, + { + "epoch": 0.3137456658394653, + "grad_norm": 43.80032548821768, + "learning_rate": 5.642991601933268e-06, + "loss": 0.49761962890625, + "step": 36285 + }, + { + "epoch": 0.31378889936100857, + "grad_norm": 3.284105798227991, + "learning_rate": 5.642895205300079e-06, + "loss": 0.1706634521484375, + "step": 36290 + }, + { + "epoch": 0.3138321328825518, + "grad_norm": 37.35041494792823, + "learning_rate": 5.6427987964780915e-06, + "loss": 0.12188491821289063, + "step": 36295 + }, + { + "epoch": 0.3138753664040951, + "grad_norm": 2.0416188854972477, + "learning_rate": 5.6427023754677486e-06, + "loss": 0.2066934585571289, + "step": 36300 + }, + { + "epoch": 0.31391859992563836, + "grad_norm": 1.5476805739570194, + "learning_rate": 5.642605942269496e-06, + "loss": 0.32511558532714846, + "step": 36305 + }, + { + "epoch": 0.3139618334471816, + "grad_norm": 17.946087009309814, + "learning_rate": 5.642509496883778e-06, + "loss": 0.20484161376953125, + "step": 36310 + }, + { + "epoch": 0.3140050669687249, + "grad_norm": 2.917186044861263, + "learning_rate": 5.642413039311041e-06, + "loss": 0.11628646850585937, + "step": 36315 + }, + { + "epoch": 0.31404830049026816, + "grad_norm": 29.288636179482594, + "learning_rate": 5.642316569551727e-06, + "loss": 0.6002525329589844, + "step": 36320 + }, + { + "epoch": 0.3140915340118114, + "grad_norm": 0.3218126998330867, + "learning_rate": 5.642220087606283e-06, + "loss": 0.13141994476318358, + "step": 36325 + }, + { + "epoch": 0.3141347675333547, + "grad_norm": 12.866872720949146, + "learning_rate": 5.642123593475154e-06, + "loss": 0.09108505249023438, + "step": 36330 + }, + { + "epoch": 0.3141780010548979, + "grad_norm": 32.13328792170105, + "learning_rate": 5.642027087158783e-06, + "loss": 0.372222900390625, + "step": 36335 + }, + { + "epoch": 0.3142212345764412, + "grad_norm": 9.26601930729928, + "learning_rate": 5.641930568657618e-06, + "loss": 0.17247314453125, + "step": 36340 + }, + { + "epoch": 0.3142644680979845, + "grad_norm": 20.43849709973703, + "learning_rate": 5.641834037972103e-06, + "loss": 0.4549278259277344, + "step": 36345 + }, + { + "epoch": 0.3143077016195277, + "grad_norm": 1.2512491575188984, + "learning_rate": 5.6417374951026814e-06, + "loss": 0.17237548828125, + "step": 36350 + }, + { + "epoch": 0.314350935141071, + "grad_norm": 40.476381255753715, + "learning_rate": 5.641640940049801e-06, + "loss": 0.45835723876953127, + "step": 36355 + }, + { + "epoch": 0.31439416866261427, + "grad_norm": 0.2981844840518028, + "learning_rate": 5.641544372813905e-06, + "loss": 0.0301849365234375, + "step": 36360 + }, + { + "epoch": 0.3144374021841575, + "grad_norm": 7.221158281667303, + "learning_rate": 5.641447793395441e-06, + "loss": 0.07885360717773438, + "step": 36365 + }, + { + "epoch": 0.3144806357057008, + "grad_norm": 6.325572689155293, + "learning_rate": 5.641351201794852e-06, + "loss": 0.14932861328125, + "step": 36370 + }, + { + "epoch": 0.314523869227244, + "grad_norm": 3.206698499184135, + "learning_rate": 5.6412545980125856e-06, + "loss": 0.12652587890625, + "step": 36375 + }, + { + "epoch": 0.3145671027487873, + "grad_norm": 4.713709723179758, + "learning_rate": 5.641157982049085e-06, + "loss": 0.16484909057617186, + "step": 36380 + }, + { + "epoch": 0.3146103362703306, + "grad_norm": 2.089452868471237, + "learning_rate": 5.641061353904798e-06, + "loss": 0.121881103515625, + "step": 36385 + }, + { + "epoch": 0.3146535697918738, + "grad_norm": 2.754354188522602, + "learning_rate": 5.640964713580169e-06, + "loss": 0.168505859375, + "step": 36390 + }, + { + "epoch": 0.3146968033134171, + "grad_norm": 0.8950330493261255, + "learning_rate": 5.640868061075645e-06, + "loss": 0.40098876953125, + "step": 36395 + }, + { + "epoch": 0.3147400368349604, + "grad_norm": 0.4801988225700556, + "learning_rate": 5.640771396391671e-06, + "loss": 0.20274658203125, + "step": 36400 + }, + { + "epoch": 0.3147832703565036, + "grad_norm": 8.480271102529388, + "learning_rate": 5.640674719528692e-06, + "loss": 0.33365020751953123, + "step": 36405 + }, + { + "epoch": 0.3148265038780469, + "grad_norm": 2.536201971278039, + "learning_rate": 5.640578030487154e-06, + "loss": 0.0491363525390625, + "step": 36410 + }, + { + "epoch": 0.3148697373995901, + "grad_norm": 9.186433161777597, + "learning_rate": 5.640481329267504e-06, + "loss": 0.17728195190429688, + "step": 36415 + }, + { + "epoch": 0.3149129709211334, + "grad_norm": 4.90574359214063, + "learning_rate": 5.6403846158701875e-06, + "loss": 0.20897216796875, + "step": 36420 + }, + { + "epoch": 0.3149562044426767, + "grad_norm": 2.4936230686675014, + "learning_rate": 5.640287890295651e-06, + "loss": 0.2325897216796875, + "step": 36425 + }, + { + "epoch": 0.3149994379642199, + "grad_norm": 3.3725928355445354, + "learning_rate": 5.640191152544339e-06, + "loss": 0.25898094177246095, + "step": 36430 + }, + { + "epoch": 0.3150426714857632, + "grad_norm": 1.657794842875944, + "learning_rate": 5.6400944026166995e-06, + "loss": 0.133551025390625, + "step": 36435 + }, + { + "epoch": 0.3150859050073065, + "grad_norm": 1.0141261667352985, + "learning_rate": 5.6399976405131776e-06, + "loss": 0.16023387908935546, + "step": 36440 + }, + { + "epoch": 0.3151291385288497, + "grad_norm": 10.907440111957683, + "learning_rate": 5.639900866234219e-06, + "loss": 0.08040313720703125, + "step": 36445 + }, + { + "epoch": 0.315172372050393, + "grad_norm": 22.531728783349585, + "learning_rate": 5.639804079780272e-06, + "loss": 0.245355224609375, + "step": 36450 + }, + { + "epoch": 0.31521560557193623, + "grad_norm": 0.9096181339172125, + "learning_rate": 5.639707281151782e-06, + "loss": 0.110662841796875, + "step": 36455 + }, + { + "epoch": 0.3152588390934795, + "grad_norm": 17.950695764462854, + "learning_rate": 5.639610470349194e-06, + "loss": 0.2546776294708252, + "step": 36460 + }, + { + "epoch": 0.3153020726150228, + "grad_norm": 2.685138306219179, + "learning_rate": 5.639513647372957e-06, + "loss": 0.1288177490234375, + "step": 36465 + }, + { + "epoch": 0.31534530613656603, + "grad_norm": 17.42598833662186, + "learning_rate": 5.639416812223516e-06, + "loss": 0.2991455078125, + "step": 36470 + }, + { + "epoch": 0.3153885396581093, + "grad_norm": 19.368913645441506, + "learning_rate": 5.639319964901318e-06, + "loss": 0.04398651123046875, + "step": 36475 + }, + { + "epoch": 0.3154317731796526, + "grad_norm": 43.58490721621504, + "learning_rate": 5.639223105406809e-06, + "loss": 0.1546539306640625, + "step": 36480 + }, + { + "epoch": 0.3154750067011958, + "grad_norm": 3.3335590993021227, + "learning_rate": 5.639126233740437e-06, + "loss": 0.08689498901367188, + "step": 36485 + }, + { + "epoch": 0.3155182402227391, + "grad_norm": 18.13657394565849, + "learning_rate": 5.639029349902647e-06, + "loss": 0.11264114379882813, + "step": 36490 + }, + { + "epoch": 0.31556147374428234, + "grad_norm": 0.8397286120018297, + "learning_rate": 5.638932453893888e-06, + "loss": 0.451751708984375, + "step": 36495 + }, + { + "epoch": 0.3156047072658256, + "grad_norm": 5.7705802173409255, + "learning_rate": 5.638835545714604e-06, + "loss": 0.14169464111328126, + "step": 36500 + }, + { + "epoch": 0.3156479407873689, + "grad_norm": 3.1536206395363, + "learning_rate": 5.6387386253652446e-06, + "loss": 0.06692085266113282, + "step": 36505 + }, + { + "epoch": 0.31569117430891214, + "grad_norm": 4.20674074441131, + "learning_rate": 5.638641692846256e-06, + "loss": 0.0938232421875, + "step": 36510 + }, + { + "epoch": 0.3157344078304554, + "grad_norm": 8.894683780298621, + "learning_rate": 5.638544748158085e-06, + "loss": 0.3994758605957031, + "step": 36515 + }, + { + "epoch": 0.3157776413519987, + "grad_norm": 1.50396223432959, + "learning_rate": 5.638447791301179e-06, + "loss": 0.145977783203125, + "step": 36520 + }, + { + "epoch": 0.31582087487354193, + "grad_norm": 0.36466861244038723, + "learning_rate": 5.638350822275984e-06, + "loss": 0.039174652099609374, + "step": 36525 + }, + { + "epoch": 0.3158641083950852, + "grad_norm": 10.208112344346508, + "learning_rate": 5.6382538410829495e-06, + "loss": 0.069036865234375, + "step": 36530 + }, + { + "epoch": 0.3159073419166285, + "grad_norm": 5.915102940717511, + "learning_rate": 5.63815684772252e-06, + "loss": 0.14216232299804688, + "step": 36535 + }, + { + "epoch": 0.31595057543817173, + "grad_norm": 0.3130655450425709, + "learning_rate": 5.6380598421951455e-06, + "loss": 0.0346038818359375, + "step": 36540 + }, + { + "epoch": 0.315993808959715, + "grad_norm": 5.396197733349963, + "learning_rate": 5.6379628245012714e-06, + "loss": 0.12600555419921874, + "step": 36545 + }, + { + "epoch": 0.31603704248125825, + "grad_norm": 3.3708028081627237, + "learning_rate": 5.637865794641346e-06, + "loss": 0.11471633911132813, + "step": 36550 + }, + { + "epoch": 0.31608027600280153, + "grad_norm": 18.19234128107114, + "learning_rate": 5.6377687526158164e-06, + "loss": 0.35556488037109374, + "step": 36555 + }, + { + "epoch": 0.3161235095243448, + "grad_norm": 0.4815261690161264, + "learning_rate": 5.637671698425131e-06, + "loss": 0.14770050048828126, + "step": 36560 + }, + { + "epoch": 0.31616674304588804, + "grad_norm": 7.990095016803775, + "learning_rate": 5.6375746320697366e-06, + "loss": 0.28543243408203123, + "step": 36565 + }, + { + "epoch": 0.31620997656743133, + "grad_norm": 2.033777081354122, + "learning_rate": 5.637477553550081e-06, + "loss": 0.215740966796875, + "step": 36570 + }, + { + "epoch": 0.3162532100889746, + "grad_norm": 36.12961234712313, + "learning_rate": 5.637380462866611e-06, + "loss": 0.2902008056640625, + "step": 36575 + }, + { + "epoch": 0.31629644361051784, + "grad_norm": 4.576131489056342, + "learning_rate": 5.6372833600197765e-06, + "loss": 0.13670148849487304, + "step": 36580 + }, + { + "epoch": 0.3163396771320611, + "grad_norm": 2.0866067637754293, + "learning_rate": 5.637186245010024e-06, + "loss": 0.18409423828125, + "step": 36585 + }, + { + "epoch": 0.31638291065360435, + "grad_norm": 34.106229566009006, + "learning_rate": 5.637089117837801e-06, + "loss": 0.14426193237304688, + "step": 36590 + }, + { + "epoch": 0.31642614417514764, + "grad_norm": 14.24875272700083, + "learning_rate": 5.636991978503557e-06, + "loss": 0.12037487030029297, + "step": 36595 + }, + { + "epoch": 0.3164693776966909, + "grad_norm": 0.8860143522812555, + "learning_rate": 5.636894827007739e-06, + "loss": 0.08239402770996093, + "step": 36600 + }, + { + "epoch": 0.31651261121823415, + "grad_norm": 5.920384930116886, + "learning_rate": 5.636797663350794e-06, + "loss": 0.08118515014648438, + "step": 36605 + }, + { + "epoch": 0.31655584473977744, + "grad_norm": 1.117847190597515, + "learning_rate": 5.636700487533171e-06, + "loss": 0.099072265625, + "step": 36610 + }, + { + "epoch": 0.3165990782613207, + "grad_norm": 11.17665934439689, + "learning_rate": 5.636603299555319e-06, + "loss": 0.112200927734375, + "step": 36615 + }, + { + "epoch": 0.31664231178286395, + "grad_norm": 9.828591333333971, + "learning_rate": 5.636506099417686e-06, + "loss": 0.1046905517578125, + "step": 36620 + }, + { + "epoch": 0.31668554530440723, + "grad_norm": 17.882056783847933, + "learning_rate": 5.63640888712072e-06, + "loss": 0.10691566467285156, + "step": 36625 + }, + { + "epoch": 0.31672877882595046, + "grad_norm": 0.21643379831206602, + "learning_rate": 5.636311662664868e-06, + "loss": 0.31190185546875, + "step": 36630 + }, + { + "epoch": 0.31677201234749375, + "grad_norm": 5.9691526143060845, + "learning_rate": 5.63621442605058e-06, + "loss": 0.21194534301757811, + "step": 36635 + }, + { + "epoch": 0.31681524586903703, + "grad_norm": 39.99810488755685, + "learning_rate": 5.636117177278304e-06, + "loss": 0.35436210632324217, + "step": 36640 + }, + { + "epoch": 0.31685847939058026, + "grad_norm": 0.6135429585966441, + "learning_rate": 5.6360199163484886e-06, + "loss": 0.09726486206054688, + "step": 36645 + }, + { + "epoch": 0.31690171291212355, + "grad_norm": 0.891535450161232, + "learning_rate": 5.635922643261583e-06, + "loss": 0.130206298828125, + "step": 36650 + }, + { + "epoch": 0.31694494643366683, + "grad_norm": 4.797436722577689, + "learning_rate": 5.6358253580180335e-06, + "loss": 0.1116241455078125, + "step": 36655 + }, + { + "epoch": 0.31698817995521006, + "grad_norm": 22.168859884920085, + "learning_rate": 5.6357280606182915e-06, + "loss": 0.33324851989746096, + "step": 36660 + }, + { + "epoch": 0.31703141347675334, + "grad_norm": 5.766207484818412, + "learning_rate": 5.635630751062804e-06, + "loss": 0.08855438232421875, + "step": 36665 + }, + { + "epoch": 0.31707464699829657, + "grad_norm": 21.774691574787568, + "learning_rate": 5.635533429352021e-06, + "loss": 0.35531005859375, + "step": 36670 + }, + { + "epoch": 0.31711788051983986, + "grad_norm": 0.33552872995205063, + "learning_rate": 5.6354360954863905e-06, + "loss": 0.1425729751586914, + "step": 36675 + }, + { + "epoch": 0.31716111404138314, + "grad_norm": 4.919450751233194, + "learning_rate": 5.635338749466361e-06, + "loss": 0.27325849533081054, + "step": 36680 + }, + { + "epoch": 0.31720434756292637, + "grad_norm": 2.4661594051270894, + "learning_rate": 5.635241391292384e-06, + "loss": 0.1161041259765625, + "step": 36685 + }, + { + "epoch": 0.31724758108446965, + "grad_norm": 7.121216371584509, + "learning_rate": 5.635144020964904e-06, + "loss": 0.18662490844726562, + "step": 36690 + }, + { + "epoch": 0.31729081460601294, + "grad_norm": 19.650024329324186, + "learning_rate": 5.6350466384843735e-06, + "loss": 0.36224365234375, + "step": 36695 + }, + { + "epoch": 0.31733404812755617, + "grad_norm": 15.62762676113056, + "learning_rate": 5.634949243851241e-06, + "loss": 0.1425537109375, + "step": 36700 + }, + { + "epoch": 0.31737728164909945, + "grad_norm": 4.530437968341385, + "learning_rate": 5.634851837065956e-06, + "loss": 0.12965087890625, + "step": 36705 + }, + { + "epoch": 0.31742051517064274, + "grad_norm": 11.492791905710199, + "learning_rate": 5.634754418128965e-06, + "loss": 0.11256103515625, + "step": 36710 + }, + { + "epoch": 0.31746374869218597, + "grad_norm": 9.233240949138612, + "learning_rate": 5.6346569870407215e-06, + "loss": 0.1558990478515625, + "step": 36715 + }, + { + "epoch": 0.31750698221372925, + "grad_norm": 20.169105929479628, + "learning_rate": 5.634559543801672e-06, + "loss": 0.22905426025390624, + "step": 36720 + }, + { + "epoch": 0.3175502157352725, + "grad_norm": 25.873260760783786, + "learning_rate": 5.634462088412268e-06, + "loss": 0.17174854278564453, + "step": 36725 + }, + { + "epoch": 0.31759344925681576, + "grad_norm": 59.511239045765585, + "learning_rate": 5.634364620872956e-06, + "loss": 0.15613632202148436, + "step": 36730 + }, + { + "epoch": 0.31763668277835905, + "grad_norm": 24.98088862713765, + "learning_rate": 5.634267141184188e-06, + "loss": 0.1267780303955078, + "step": 36735 + }, + { + "epoch": 0.3176799162999023, + "grad_norm": 5.3182865463173155, + "learning_rate": 5.6341696493464124e-06, + "loss": 0.21318817138671875, + "step": 36740 + }, + { + "epoch": 0.31772314982144556, + "grad_norm": 25.885580742887306, + "learning_rate": 5.634072145360079e-06, + "loss": 0.353033447265625, + "step": 36745 + }, + { + "epoch": 0.31776638334298885, + "grad_norm": 5.155725450176457, + "learning_rate": 5.633974629225638e-06, + "loss": 0.12541465759277343, + "step": 36750 + }, + { + "epoch": 0.3178096168645321, + "grad_norm": 7.142083771118982, + "learning_rate": 5.633877100943538e-06, + "loss": 0.1433746337890625, + "step": 36755 + }, + { + "epoch": 0.31785285038607536, + "grad_norm": 2.1262306381881597, + "learning_rate": 5.63377956051423e-06, + "loss": 0.36841659545898436, + "step": 36760 + }, + { + "epoch": 0.3178960839076186, + "grad_norm": 1.1033009997324612, + "learning_rate": 5.633682007938163e-06, + "loss": 0.5308364868164063, + "step": 36765 + }, + { + "epoch": 0.31793931742916187, + "grad_norm": 10.869408397891235, + "learning_rate": 5.633584443215788e-06, + "loss": 0.293896484375, + "step": 36770 + }, + { + "epoch": 0.31798255095070516, + "grad_norm": 9.131879162803989, + "learning_rate": 5.6334868663475545e-06, + "loss": 0.0884796142578125, + "step": 36775 + }, + { + "epoch": 0.3180257844722484, + "grad_norm": 4.467223514868299, + "learning_rate": 5.6333892773339115e-06, + "loss": 0.1447540283203125, + "step": 36780 + }, + { + "epoch": 0.31806901799379167, + "grad_norm": 11.613929914261695, + "learning_rate": 5.63329167617531e-06, + "loss": 0.18264312744140626, + "step": 36785 + }, + { + "epoch": 0.31811225151533495, + "grad_norm": 25.555403566434716, + "learning_rate": 5.6331940628721995e-06, + "loss": 0.27218055725097656, + "step": 36790 + }, + { + "epoch": 0.3181554850368782, + "grad_norm": 15.303165493263272, + "learning_rate": 5.633096437425032e-06, + "loss": 0.15397682189941406, + "step": 36795 + }, + { + "epoch": 0.31819871855842147, + "grad_norm": 11.134537403562357, + "learning_rate": 5.632998799834255e-06, + "loss": 0.10951461791992187, + "step": 36800 + }, + { + "epoch": 0.3182419520799647, + "grad_norm": 8.852692080752362, + "learning_rate": 5.632901150100322e-06, + "loss": 0.2345367431640625, + "step": 36805 + }, + { + "epoch": 0.318285185601508, + "grad_norm": 19.079152156058246, + "learning_rate": 5.63280348822368e-06, + "loss": 0.33469467163085936, + "step": 36810 + }, + { + "epoch": 0.31832841912305126, + "grad_norm": 40.35381489377063, + "learning_rate": 5.63270581420478e-06, + "loss": 0.283538818359375, + "step": 36815 + }, + { + "epoch": 0.3183716526445945, + "grad_norm": 27.603859794200332, + "learning_rate": 5.632608128044076e-06, + "loss": 0.356903076171875, + "step": 36820 + }, + { + "epoch": 0.3184148861661378, + "grad_norm": 3.203444776383236, + "learning_rate": 5.632510429742014e-06, + "loss": 0.12334785461425782, + "step": 36825 + }, + { + "epoch": 0.31845811968768106, + "grad_norm": 4.253573677068963, + "learning_rate": 5.632412719299046e-06, + "loss": 0.24838495254516602, + "step": 36830 + }, + { + "epoch": 0.3185013532092243, + "grad_norm": 6.589335979884317, + "learning_rate": 5.632314996715625e-06, + "loss": 0.22909469604492189, + "step": 36835 + }, + { + "epoch": 0.3185445867307676, + "grad_norm": 1.6344578106405352, + "learning_rate": 5.6322172619922e-06, + "loss": 0.158599853515625, + "step": 36840 + }, + { + "epoch": 0.3185878202523108, + "grad_norm": 21.600549800799318, + "learning_rate": 5.63211951512922e-06, + "loss": 0.309381103515625, + "step": 36845 + }, + { + "epoch": 0.3186310537738541, + "grad_norm": 0.7644131648307001, + "learning_rate": 5.632021756127138e-06, + "loss": 0.12185592651367187, + "step": 36850 + }, + { + "epoch": 0.3186742872953974, + "grad_norm": 2.7623614022920955, + "learning_rate": 5.6319239849864054e-06, + "loss": 0.14165267944335938, + "step": 36855 + }, + { + "epoch": 0.3187175208169406, + "grad_norm": 1.691653939923438, + "learning_rate": 5.6318262017074695e-06, + "loss": 0.1630859375, + "step": 36860 + }, + { + "epoch": 0.3187607543384839, + "grad_norm": 4.676151215113081, + "learning_rate": 5.631728406290785e-06, + "loss": 0.08607101440429688, + "step": 36865 + }, + { + "epoch": 0.31880398786002717, + "grad_norm": 24.951604643316834, + "learning_rate": 5.6316305987368024e-06, + "loss": 0.2847023010253906, + "step": 36870 + }, + { + "epoch": 0.3188472213815704, + "grad_norm": 14.8444227063645, + "learning_rate": 5.631532779045971e-06, + "loss": 0.16378173828125, + "step": 36875 + }, + { + "epoch": 0.3188904549031137, + "grad_norm": 1.8840529433232187, + "learning_rate": 5.6314349472187435e-06, + "loss": 0.1668701171875, + "step": 36880 + }, + { + "epoch": 0.31893368842465697, + "grad_norm": 21.587605144712324, + "learning_rate": 5.63133710325557e-06, + "loss": 0.19289703369140626, + "step": 36885 + }, + { + "epoch": 0.3189769219462002, + "grad_norm": 7.9405050377729935, + "learning_rate": 5.631239247156903e-06, + "loss": 0.30325927734375, + "step": 36890 + }, + { + "epoch": 0.3190201554677435, + "grad_norm": 10.36628868777588, + "learning_rate": 5.631141378923192e-06, + "loss": 0.09338912963867188, + "step": 36895 + }, + { + "epoch": 0.3190633889892867, + "grad_norm": 2.3755658903269077, + "learning_rate": 5.631043498554891e-06, + "loss": 0.1418212890625, + "step": 36900 + }, + { + "epoch": 0.31910662251083, + "grad_norm": 19.572518105170744, + "learning_rate": 5.6309456060524485e-06, + "loss": 0.2714805603027344, + "step": 36905 + }, + { + "epoch": 0.3191498560323733, + "grad_norm": 11.63383774067822, + "learning_rate": 5.630847701416318e-06, + "loss": 0.08260269165039062, + "step": 36910 + }, + { + "epoch": 0.3191930895539165, + "grad_norm": 4.717672921469144, + "learning_rate": 5.6307497846469505e-06, + "loss": 0.090966796875, + "step": 36915 + }, + { + "epoch": 0.3192363230754598, + "grad_norm": 1.8001464339366322, + "learning_rate": 5.630651855744797e-06, + "loss": 0.32580108642578126, + "step": 36920 + }, + { + "epoch": 0.3192795565970031, + "grad_norm": 1.3716365406617632, + "learning_rate": 5.630553914710309e-06, + "loss": 0.330340576171875, + "step": 36925 + }, + { + "epoch": 0.3193227901185463, + "grad_norm": 29.813325413128926, + "learning_rate": 5.6304559615439395e-06, + "loss": 0.16233444213867188, + "step": 36930 + }, + { + "epoch": 0.3193660236400896, + "grad_norm": 14.51782522412921, + "learning_rate": 5.63035799624614e-06, + "loss": 0.1075439453125, + "step": 36935 + }, + { + "epoch": 0.3194092571616328, + "grad_norm": 0.983036617546968, + "learning_rate": 5.630260018817362e-06, + "loss": 0.058428955078125, + "step": 36940 + }, + { + "epoch": 0.3194524906831761, + "grad_norm": 6.1643526263587605, + "learning_rate": 5.630162029258056e-06, + "loss": 0.35060958862304686, + "step": 36945 + }, + { + "epoch": 0.3194957242047194, + "grad_norm": 1.0021904092939988, + "learning_rate": 5.630064027568676e-06, + "loss": 0.2297882080078125, + "step": 36950 + }, + { + "epoch": 0.3195389577262626, + "grad_norm": 6.630891989268332, + "learning_rate": 5.629966013749672e-06, + "loss": 0.09178466796875, + "step": 36955 + }, + { + "epoch": 0.3195821912478059, + "grad_norm": 4.674322696624779, + "learning_rate": 5.629867987801499e-06, + "loss": 0.279241943359375, + "step": 36960 + }, + { + "epoch": 0.3196254247693492, + "grad_norm": 0.8227584108494539, + "learning_rate": 5.629769949724606e-06, + "loss": 0.35871734619140627, + "step": 36965 + }, + { + "epoch": 0.3196686582908924, + "grad_norm": 0.5448761297495306, + "learning_rate": 5.629671899519446e-06, + "loss": 0.25109710693359377, + "step": 36970 + }, + { + "epoch": 0.3197118918124357, + "grad_norm": 1.1828786564812785, + "learning_rate": 5.629573837186472e-06, + "loss": 0.101654052734375, + "step": 36975 + }, + { + "epoch": 0.31975512533397893, + "grad_norm": 17.540543557646128, + "learning_rate": 5.629475762726137e-06, + "loss": 0.2330780029296875, + "step": 36980 + }, + { + "epoch": 0.3197983588555222, + "grad_norm": 6.850133228576783, + "learning_rate": 5.62937767613889e-06, + "loss": 0.5451377868652344, + "step": 36985 + }, + { + "epoch": 0.3198415923770655, + "grad_norm": 0.2838517035598962, + "learning_rate": 5.629279577425187e-06, + "loss": 0.1762847900390625, + "step": 36990 + }, + { + "epoch": 0.3198848258986087, + "grad_norm": 0.7151843894065918, + "learning_rate": 5.629181466585478e-06, + "loss": 0.10033035278320312, + "step": 36995 + }, + { + "epoch": 0.319928059420152, + "grad_norm": 6.030095083799543, + "learning_rate": 5.629083343620217e-06, + "loss": 0.07364501953125, + "step": 37000 + }, + { + "epoch": 0.3199712929416953, + "grad_norm": 3.0323301283886264, + "learning_rate": 5.628985208529857e-06, + "loss": 0.1390625, + "step": 37005 + }, + { + "epoch": 0.3200145264632385, + "grad_norm": 0.49993709421472715, + "learning_rate": 5.628887061314848e-06, + "loss": 0.0735992431640625, + "step": 37010 + }, + { + "epoch": 0.3200577599847818, + "grad_norm": 23.506497259488718, + "learning_rate": 5.628788901975644e-06, + "loss": 0.24877166748046875, + "step": 37015 + }, + { + "epoch": 0.32010099350632504, + "grad_norm": 18.053558512932458, + "learning_rate": 5.628690730512699e-06, + "loss": 0.24826087951660156, + "step": 37020 + }, + { + "epoch": 0.3201442270278683, + "grad_norm": 7.162325142816544, + "learning_rate": 5.628592546926463e-06, + "loss": 0.1236083984375, + "step": 37025 + }, + { + "epoch": 0.3201874605494116, + "grad_norm": 8.1548347640601, + "learning_rate": 5.628494351217392e-06, + "loss": 0.0752960205078125, + "step": 37030 + }, + { + "epoch": 0.32023069407095484, + "grad_norm": 5.466974795547418, + "learning_rate": 5.6283961433859364e-06, + "loss": 0.20205078125, + "step": 37035 + }, + { + "epoch": 0.3202739275924981, + "grad_norm": 63.624485054746096, + "learning_rate": 5.62829792343255e-06, + "loss": 0.25340576171875, + "step": 37040 + }, + { + "epoch": 0.3203171611140414, + "grad_norm": 6.721517021769432, + "learning_rate": 5.628199691357686e-06, + "loss": 0.1582122802734375, + "step": 37045 + }, + { + "epoch": 0.32036039463558463, + "grad_norm": 3.1151573263016292, + "learning_rate": 5.628101447161798e-06, + "loss": 0.196099853515625, + "step": 37050 + }, + { + "epoch": 0.3204036281571279, + "grad_norm": 10.825571069430422, + "learning_rate": 5.628003190845337e-06, + "loss": 0.3881683349609375, + "step": 37055 + }, + { + "epoch": 0.3204468616786712, + "grad_norm": 0.9168146481767577, + "learning_rate": 5.627904922408758e-06, + "loss": 0.204290771484375, + "step": 37060 + }, + { + "epoch": 0.32049009520021443, + "grad_norm": 25.13647939599445, + "learning_rate": 5.6278066418525145e-06, + "loss": 0.43499755859375, + "step": 37065 + }, + { + "epoch": 0.3205333287217577, + "grad_norm": 7.082850651901783, + "learning_rate": 5.627708349177058e-06, + "loss": 0.14564552307128906, + "step": 37070 + }, + { + "epoch": 0.32057656224330094, + "grad_norm": 17.08236189644997, + "learning_rate": 5.627610044382844e-06, + "loss": 0.36529541015625, + "step": 37075 + }, + { + "epoch": 0.32061979576484423, + "grad_norm": 2.1922301748496036, + "learning_rate": 5.627511727470323e-06, + "loss": 0.030698394775390624, + "step": 37080 + }, + { + "epoch": 0.3206630292863875, + "grad_norm": 5.726385178536389, + "learning_rate": 5.627413398439952e-06, + "loss": 0.22580718994140625, + "step": 37085 + }, + { + "epoch": 0.32070626280793074, + "grad_norm": 4.097894334202689, + "learning_rate": 5.627315057292182e-06, + "loss": 0.20719757080078124, + "step": 37090 + }, + { + "epoch": 0.320749496329474, + "grad_norm": 10.821116099350968, + "learning_rate": 5.627216704027466e-06, + "loss": 0.21012725830078124, + "step": 37095 + }, + { + "epoch": 0.3207927298510173, + "grad_norm": 22.649115859381656, + "learning_rate": 5.627118338646259e-06, + "loss": 0.15503692626953125, + "step": 37100 + }, + { + "epoch": 0.32083596337256054, + "grad_norm": 17.76285039098355, + "learning_rate": 5.627019961149015e-06, + "loss": 0.210003662109375, + "step": 37105 + }, + { + "epoch": 0.3208791968941038, + "grad_norm": 7.8485128867081135, + "learning_rate": 5.626921571536187e-06, + "loss": 0.2378173828125, + "step": 37110 + }, + { + "epoch": 0.32092243041564705, + "grad_norm": 5.496271423492937, + "learning_rate": 5.6268231698082276e-06, + "loss": 0.107720947265625, + "step": 37115 + }, + { + "epoch": 0.32096566393719034, + "grad_norm": 10.813879192373664, + "learning_rate": 5.626724755965594e-06, + "loss": 0.34269256591796876, + "step": 37120 + }, + { + "epoch": 0.3210088974587336, + "grad_norm": 3.058253327056786, + "learning_rate": 5.626626330008736e-06, + "loss": 0.36908111572265623, + "step": 37125 + }, + { + "epoch": 0.32105213098027685, + "grad_norm": 1.586544578085161, + "learning_rate": 5.62652789193811e-06, + "loss": 0.25142593383789064, + "step": 37130 + }, + { + "epoch": 0.32109536450182014, + "grad_norm": 1.6601617117217287, + "learning_rate": 5.62642944175417e-06, + "loss": 0.3154327392578125, + "step": 37135 + }, + { + "epoch": 0.3211385980233634, + "grad_norm": 4.230477248062447, + "learning_rate": 5.6263309794573695e-06, + "loss": 0.037237548828125, + "step": 37140 + }, + { + "epoch": 0.32118183154490665, + "grad_norm": 5.768460301267615, + "learning_rate": 5.6262325050481615e-06, + "loss": 0.2551849365234375, + "step": 37145 + }, + { + "epoch": 0.32122506506644993, + "grad_norm": 14.220400188420985, + "learning_rate": 5.6261340185270025e-06, + "loss": 0.07138137817382813, + "step": 37150 + }, + { + "epoch": 0.32126829858799316, + "grad_norm": 7.503764928886755, + "learning_rate": 5.626035519894345e-06, + "loss": 0.1155914306640625, + "step": 37155 + }, + { + "epoch": 0.32131153210953645, + "grad_norm": 13.671169137458303, + "learning_rate": 5.625937009150644e-06, + "loss": 0.213568115234375, + "step": 37160 + }, + { + "epoch": 0.32135476563107973, + "grad_norm": 1.4544511364920483, + "learning_rate": 5.625838486296353e-06, + "loss": 0.2517662048339844, + "step": 37165 + }, + { + "epoch": 0.32139799915262296, + "grad_norm": 7.614937390098748, + "learning_rate": 5.625739951331927e-06, + "loss": 0.100103759765625, + "step": 37170 + }, + { + "epoch": 0.32144123267416624, + "grad_norm": 1.3224307101413106, + "learning_rate": 5.6256414042578206e-06, + "loss": 0.08468399047851563, + "step": 37175 + }, + { + "epoch": 0.32148446619570953, + "grad_norm": 27.069028395606225, + "learning_rate": 5.625542845074487e-06, + "loss": 0.2826568603515625, + "step": 37180 + }, + { + "epoch": 0.32152769971725276, + "grad_norm": 4.439266350265442, + "learning_rate": 5.625444273782383e-06, + "loss": 0.288092041015625, + "step": 37185 + }, + { + "epoch": 0.32157093323879604, + "grad_norm": 1.2315470790570253, + "learning_rate": 5.625345690381962e-06, + "loss": 0.154864501953125, + "step": 37190 + }, + { + "epoch": 0.32161416676033927, + "grad_norm": 2.2048183381469615, + "learning_rate": 5.625247094873678e-06, + "loss": 0.20987281799316407, + "step": 37195 + }, + { + "epoch": 0.32165740028188256, + "grad_norm": 3.2233579767422693, + "learning_rate": 5.625148487257987e-06, + "loss": 0.1316131591796875, + "step": 37200 + }, + { + "epoch": 0.32170063380342584, + "grad_norm": 14.522547654143757, + "learning_rate": 5.6250498675353435e-06, + "loss": 0.10253238677978516, + "step": 37205 + }, + { + "epoch": 0.32174386732496907, + "grad_norm": 20.649893101453586, + "learning_rate": 5.624951235706201e-06, + "loss": 0.24760360717773439, + "step": 37210 + }, + { + "epoch": 0.32178710084651235, + "grad_norm": 13.791348934052925, + "learning_rate": 5.624852591771016e-06, + "loss": 0.076580810546875, + "step": 37215 + }, + { + "epoch": 0.32183033436805564, + "grad_norm": 4.513962416629074, + "learning_rate": 5.6247539357302426e-06, + "loss": 0.06124114990234375, + "step": 37220 + }, + { + "epoch": 0.32187356788959887, + "grad_norm": 8.815108308227936, + "learning_rate": 5.624655267584336e-06, + "loss": 0.216754150390625, + "step": 37225 + }, + { + "epoch": 0.32191680141114215, + "grad_norm": 1.9887193592830739, + "learning_rate": 5.62455658733375e-06, + "loss": 0.04993743896484375, + "step": 37230 + }, + { + "epoch": 0.3219600349326854, + "grad_norm": 49.74323948413851, + "learning_rate": 5.624457894978943e-06, + "loss": 0.14323348999023439, + "step": 37235 + }, + { + "epoch": 0.32200326845422866, + "grad_norm": 3.130825241621033, + "learning_rate": 5.6243591905203665e-06, + "loss": 0.1172332763671875, + "step": 37240 + }, + { + "epoch": 0.32204650197577195, + "grad_norm": 24.554431504397563, + "learning_rate": 5.6242604739584785e-06, + "loss": 0.6036376953125, + "step": 37245 + }, + { + "epoch": 0.3220897354973152, + "grad_norm": 26.344165688892762, + "learning_rate": 5.624161745293732e-06, + "loss": 0.2160614013671875, + "step": 37250 + }, + { + "epoch": 0.32213296901885846, + "grad_norm": 6.005614401768338, + "learning_rate": 5.624063004526584e-06, + "loss": 0.14528045654296876, + "step": 37255 + }, + { + "epoch": 0.32217620254040175, + "grad_norm": 39.24389837083869, + "learning_rate": 5.623964251657489e-06, + "loss": 0.064947509765625, + "step": 37260 + }, + { + "epoch": 0.322219436061945, + "grad_norm": 12.252570458977928, + "learning_rate": 5.623865486686903e-06, + "loss": 0.1555816650390625, + "step": 37265 + }, + { + "epoch": 0.32226266958348826, + "grad_norm": 7.7565242893529565, + "learning_rate": 5.623766709615281e-06, + "loss": 0.3020050048828125, + "step": 37270 + }, + { + "epoch": 0.32230590310503154, + "grad_norm": 20.890918183492182, + "learning_rate": 5.623667920443079e-06, + "loss": 0.12254257202148437, + "step": 37275 + }, + { + "epoch": 0.3223491366265748, + "grad_norm": 15.127587490633722, + "learning_rate": 5.623569119170752e-06, + "loss": 0.038043212890625, + "step": 37280 + }, + { + "epoch": 0.32239237014811806, + "grad_norm": 16.677997664993526, + "learning_rate": 5.623470305798756e-06, + "loss": 0.21441650390625, + "step": 37285 + }, + { + "epoch": 0.3224356036696613, + "grad_norm": 0.18967123787981413, + "learning_rate": 5.623371480327548e-06, + "loss": 0.4420166015625, + "step": 37290 + }, + { + "epoch": 0.32247883719120457, + "grad_norm": 17.937293008637432, + "learning_rate": 5.623272642757582e-06, + "loss": 0.43361053466796873, + "step": 37295 + }, + { + "epoch": 0.32252207071274785, + "grad_norm": 2.380646334786009, + "learning_rate": 5.623173793089314e-06, + "loss": 0.0865478515625, + "step": 37300 + }, + { + "epoch": 0.3225653042342911, + "grad_norm": 0.8520401250909803, + "learning_rate": 5.6230749313232e-06, + "loss": 0.32022705078125, + "step": 37305 + }, + { + "epoch": 0.32260853775583437, + "grad_norm": 2.6506954543247288, + "learning_rate": 5.622976057459696e-06, + "loss": 0.05918188095092773, + "step": 37310 + }, + { + "epoch": 0.32265177127737765, + "grad_norm": 43.10321558766843, + "learning_rate": 5.6228771714992584e-06, + "loss": 0.458685302734375, + "step": 37315 + }, + { + "epoch": 0.3226950047989209, + "grad_norm": 5.956459195496378, + "learning_rate": 5.6227782734423435e-06, + "loss": 0.35469207763671873, + "step": 37320 + }, + { + "epoch": 0.32273823832046417, + "grad_norm": 29.37001794168585, + "learning_rate": 5.622679363289407e-06, + "loss": 0.1777435302734375, + "step": 37325 + }, + { + "epoch": 0.3227814718420074, + "grad_norm": 4.050382299635953, + "learning_rate": 5.622580441040905e-06, + "loss": 0.405255126953125, + "step": 37330 + }, + { + "epoch": 0.3228247053635507, + "grad_norm": 4.0042027181882585, + "learning_rate": 5.622481506697293e-06, + "loss": 0.24608840942382812, + "step": 37335 + }, + { + "epoch": 0.32286793888509396, + "grad_norm": 0.9604911790760061, + "learning_rate": 5.6223825602590295e-06, + "loss": 0.0486175537109375, + "step": 37340 + }, + { + "epoch": 0.3229111724066372, + "grad_norm": 1.2078034542988865, + "learning_rate": 5.6222836017265676e-06, + "loss": 0.08001708984375, + "step": 37345 + }, + { + "epoch": 0.3229544059281805, + "grad_norm": 0.7227655647261398, + "learning_rate": 5.622184631100366e-06, + "loss": 0.08999786376953126, + "step": 37350 + }, + { + "epoch": 0.32299763944972376, + "grad_norm": 5.261403278646845, + "learning_rate": 5.6220856483808814e-06, + "loss": 0.193951416015625, + "step": 37355 + }, + { + "epoch": 0.323040872971267, + "grad_norm": 21.151286035016376, + "learning_rate": 5.621986653568569e-06, + "loss": 0.19678955078125, + "step": 37360 + }, + { + "epoch": 0.3230841064928103, + "grad_norm": 13.708642913095833, + "learning_rate": 5.621887646663885e-06, + "loss": 0.24400634765625, + "step": 37365 + }, + { + "epoch": 0.3231273400143535, + "grad_norm": 12.23987812529214, + "learning_rate": 5.621788627667288e-06, + "loss": 0.19259185791015626, + "step": 37370 + }, + { + "epoch": 0.3231705735358968, + "grad_norm": 19.230456438127085, + "learning_rate": 5.621689596579233e-06, + "loss": 0.464593505859375, + "step": 37375 + }, + { + "epoch": 0.3232138070574401, + "grad_norm": 4.345070429443906, + "learning_rate": 5.621590553400177e-06, + "loss": 0.4001220703125, + "step": 37380 + }, + { + "epoch": 0.3232570405789833, + "grad_norm": 0.1477072580576217, + "learning_rate": 5.6214914981305775e-06, + "loss": 0.5555755615234375, + "step": 37385 + }, + { + "epoch": 0.3233002741005266, + "grad_norm": 13.167001463722636, + "learning_rate": 5.6213924307708906e-06, + "loss": 0.20269927978515626, + "step": 37390 + }, + { + "epoch": 0.32334350762206987, + "grad_norm": 7.9613664006794576, + "learning_rate": 5.6212933513215735e-06, + "loss": 0.0721811294555664, + "step": 37395 + }, + { + "epoch": 0.3233867411436131, + "grad_norm": 5.431215053439904, + "learning_rate": 5.621194259783083e-06, + "loss": 0.0388336181640625, + "step": 37400 + }, + { + "epoch": 0.3234299746651564, + "grad_norm": 9.826768977107184, + "learning_rate": 5.621095156155877e-06, + "loss": 0.3612518310546875, + "step": 37405 + }, + { + "epoch": 0.3234732081866996, + "grad_norm": 0.5528147629471026, + "learning_rate": 5.620996040440411e-06, + "loss": 0.0414031982421875, + "step": 37410 + }, + { + "epoch": 0.3235164417082429, + "grad_norm": 26.235122518132084, + "learning_rate": 5.620896912637143e-06, + "loss": 0.2106781005859375, + "step": 37415 + }, + { + "epoch": 0.3235596752297862, + "grad_norm": 1.573701769912241, + "learning_rate": 5.6207977727465295e-06, + "loss": 0.0773529052734375, + "step": 37420 + }, + { + "epoch": 0.3236029087513294, + "grad_norm": 1.8972548436641563, + "learning_rate": 5.620698620769029e-06, + "loss": 0.1374176025390625, + "step": 37425 + }, + { + "epoch": 0.3236461422728727, + "grad_norm": 0.857558987106072, + "learning_rate": 5.620599456705097e-06, + "loss": 0.1270050048828125, + "step": 37430 + }, + { + "epoch": 0.323689375794416, + "grad_norm": 9.881337001236504, + "learning_rate": 5.620500280555193e-06, + "loss": 0.40493927001953123, + "step": 37435 + }, + { + "epoch": 0.3237326093159592, + "grad_norm": 66.93108347085128, + "learning_rate": 5.6204010923197725e-06, + "loss": 0.542669677734375, + "step": 37440 + }, + { + "epoch": 0.3237758428375025, + "grad_norm": 39.120345916620586, + "learning_rate": 5.620301891999294e-06, + "loss": 0.150482177734375, + "step": 37445 + }, + { + "epoch": 0.3238190763590458, + "grad_norm": 7.113556954866094, + "learning_rate": 5.620202679594215e-06, + "loss": 0.1087371826171875, + "step": 37450 + }, + { + "epoch": 0.323862309880589, + "grad_norm": 1.3810068543249465, + "learning_rate": 5.620103455104992e-06, + "loss": 0.1147430419921875, + "step": 37455 + }, + { + "epoch": 0.3239055434021323, + "grad_norm": 4.966714034582978, + "learning_rate": 5.620004218532084e-06, + "loss": 0.0931488037109375, + "step": 37460 + }, + { + "epoch": 0.3239487769236755, + "grad_norm": 10.523251527796218, + "learning_rate": 5.619904969875948e-06, + "loss": 0.0693115234375, + "step": 37465 + }, + { + "epoch": 0.3239920104452188, + "grad_norm": 1.8961541134037654, + "learning_rate": 5.619805709137041e-06, + "loss": 0.2456806182861328, + "step": 37470 + }, + { + "epoch": 0.3240352439667621, + "grad_norm": 15.27723564179116, + "learning_rate": 5.619706436315823e-06, + "loss": 0.30880889892578123, + "step": 37475 + }, + { + "epoch": 0.3240784774883053, + "grad_norm": 20.175467389708235, + "learning_rate": 5.6196071514127496e-06, + "loss": 0.09768142700195312, + "step": 37480 + }, + { + "epoch": 0.3241217110098486, + "grad_norm": 0.40135313396765987, + "learning_rate": 5.61950785442828e-06, + "loss": 0.07455978393554688, + "step": 37485 + }, + { + "epoch": 0.3241649445313919, + "grad_norm": 23.09813425248671, + "learning_rate": 5.6194085453628705e-06, + "loss": 0.3204620361328125, + "step": 37490 + }, + { + "epoch": 0.3242081780529351, + "grad_norm": 7.7356360543725415, + "learning_rate": 5.619309224216981e-06, + "loss": 0.5575942993164062, + "step": 37495 + }, + { + "epoch": 0.3242514115744784, + "grad_norm": 1.4090350125349351, + "learning_rate": 5.619209890991068e-06, + "loss": 0.3174957275390625, + "step": 37500 + }, + { + "epoch": 0.3242946450960216, + "grad_norm": 3.433282584553204, + "learning_rate": 5.619110545685592e-06, + "loss": 0.040335464477539065, + "step": 37505 + }, + { + "epoch": 0.3243378786175649, + "grad_norm": 26.050265990620264, + "learning_rate": 5.6190111883010095e-06, + "loss": 0.314654541015625, + "step": 37510 + }, + { + "epoch": 0.3243811121391082, + "grad_norm": 0.7752547261003798, + "learning_rate": 5.618911818837777e-06, + "loss": 0.14626235961914064, + "step": 37515 + }, + { + "epoch": 0.3244243456606514, + "grad_norm": 3.4812562651208236, + "learning_rate": 5.618812437296356e-06, + "loss": 0.1289154052734375, + "step": 37520 + }, + { + "epoch": 0.3244675791821947, + "grad_norm": 3.5551892525468096, + "learning_rate": 5.618713043677203e-06, + "loss": 0.5362045288085937, + "step": 37525 + }, + { + "epoch": 0.324510812703738, + "grad_norm": 0.18964676601151612, + "learning_rate": 5.618613637980777e-06, + "loss": 0.05323028564453125, + "step": 37530 + }, + { + "epoch": 0.3245540462252812, + "grad_norm": 30.286318901828267, + "learning_rate": 5.618514220207536e-06, + "loss": 0.28941650390625, + "step": 37535 + }, + { + "epoch": 0.3245972797468245, + "grad_norm": 7.765644667901948, + "learning_rate": 5.618414790357939e-06, + "loss": 0.1314361572265625, + "step": 37540 + }, + { + "epoch": 0.32464051326836774, + "grad_norm": 3.121678984413102, + "learning_rate": 5.6183153484324445e-06, + "loss": 0.0374969482421875, + "step": 37545 + }, + { + "epoch": 0.324683746789911, + "grad_norm": 14.12023787797309, + "learning_rate": 5.618215894431511e-06, + "loss": 0.0993194580078125, + "step": 37550 + }, + { + "epoch": 0.3247269803114543, + "grad_norm": 11.370432343729693, + "learning_rate": 5.618116428355597e-06, + "loss": 0.12915573120117188, + "step": 37555 + }, + { + "epoch": 0.32477021383299753, + "grad_norm": 8.57467897080112, + "learning_rate": 5.618016950205162e-06, + "loss": 0.142041015625, + "step": 37560 + }, + { + "epoch": 0.3248134473545408, + "grad_norm": 3.3555675761141144, + "learning_rate": 5.6179174599806625e-06, + "loss": 0.15925064086914062, + "step": 37565 + }, + { + "epoch": 0.3248566808760841, + "grad_norm": 5.749319315004916, + "learning_rate": 5.617817957682561e-06, + "loss": 0.272308349609375, + "step": 37570 + }, + { + "epoch": 0.32489991439762733, + "grad_norm": 4.6707393330510305, + "learning_rate": 5.617718443311312e-06, + "loss": 0.3099212646484375, + "step": 37575 + }, + { + "epoch": 0.3249431479191706, + "grad_norm": 38.43450995211305, + "learning_rate": 5.6176189168673785e-06, + "loss": 0.0857940673828125, + "step": 37580 + }, + { + "epoch": 0.32498638144071385, + "grad_norm": 3.4914246155518627, + "learning_rate": 5.617519378351218e-06, + "loss": 0.03741741180419922, + "step": 37585 + }, + { + "epoch": 0.32502961496225713, + "grad_norm": 4.001214302674825, + "learning_rate": 5.617419827763289e-06, + "loss": 0.20240478515625, + "step": 37590 + }, + { + "epoch": 0.3250728484838004, + "grad_norm": 28.552335528541168, + "learning_rate": 5.617320265104051e-06, + "loss": 0.20914764404296876, + "step": 37595 + }, + { + "epoch": 0.32511608200534364, + "grad_norm": 1.406977953018703, + "learning_rate": 5.617220690373963e-06, + "loss": 0.08170166015625, + "step": 37600 + }, + { + "epoch": 0.3251593155268869, + "grad_norm": 43.452336571922324, + "learning_rate": 5.617121103573485e-06, + "loss": 0.366943359375, + "step": 37605 + }, + { + "epoch": 0.3252025490484302, + "grad_norm": 20.352297881781283, + "learning_rate": 5.617021504703075e-06, + "loss": 0.6944969177246094, + "step": 37610 + }, + { + "epoch": 0.32524578256997344, + "grad_norm": 21.198306422700256, + "learning_rate": 5.616921893763194e-06, + "loss": 0.5196090698242187, + "step": 37615 + }, + { + "epoch": 0.3252890160915167, + "grad_norm": 19.362642167065825, + "learning_rate": 5.6168222707543e-06, + "loss": 0.211444091796875, + "step": 37620 + }, + { + "epoch": 0.32533224961306, + "grad_norm": 28.33893728823563, + "learning_rate": 5.616722635676853e-06, + "loss": 0.21119918823242187, + "step": 37625 + }, + { + "epoch": 0.32537548313460324, + "grad_norm": 52.19543659760939, + "learning_rate": 5.616622988531311e-06, + "loss": 0.2222442626953125, + "step": 37630 + }, + { + "epoch": 0.3254187166561465, + "grad_norm": 23.675582384596748, + "learning_rate": 5.616523329318137e-06, + "loss": 0.22119522094726562, + "step": 37635 + }, + { + "epoch": 0.32546195017768975, + "grad_norm": 31.397751186289202, + "learning_rate": 5.616423658037789e-06, + "loss": 0.612689208984375, + "step": 37640 + }, + { + "epoch": 0.32550518369923304, + "grad_norm": 1.6183373729245616, + "learning_rate": 5.616323974690725e-06, + "loss": 0.6133386611938476, + "step": 37645 + }, + { + "epoch": 0.3255484172207763, + "grad_norm": 5.878341080380693, + "learning_rate": 5.616224279277406e-06, + "loss": 0.2246551513671875, + "step": 37650 + }, + { + "epoch": 0.32559165074231955, + "grad_norm": 2.838040564869151, + "learning_rate": 5.616124571798291e-06, + "loss": 0.11103363037109375, + "step": 37655 + }, + { + "epoch": 0.32563488426386283, + "grad_norm": 4.351187985969005, + "learning_rate": 5.616024852253842e-06, + "loss": 0.26490478515625, + "step": 37660 + }, + { + "epoch": 0.3256781177854061, + "grad_norm": 19.33497460097848, + "learning_rate": 5.615925120644517e-06, + "loss": 0.26226806640625, + "step": 37665 + }, + { + "epoch": 0.32572135130694935, + "grad_norm": 38.16436868224441, + "learning_rate": 5.615825376970776e-06, + "loss": 0.45013427734375, + "step": 37670 + }, + { + "epoch": 0.32576458482849263, + "grad_norm": 12.332146661373327, + "learning_rate": 5.615725621233081e-06, + "loss": 0.170440673828125, + "step": 37675 + }, + { + "epoch": 0.32580781835003586, + "grad_norm": 4.863917047655223, + "learning_rate": 5.61562585343189e-06, + "loss": 0.203302001953125, + "step": 37680 + }, + { + "epoch": 0.32585105187157914, + "grad_norm": 26.890131048633126, + "learning_rate": 5.615526073567663e-06, + "loss": 0.19800796508789062, + "step": 37685 + }, + { + "epoch": 0.32589428539312243, + "grad_norm": 6.315145364278203, + "learning_rate": 5.6154262816408605e-06, + "loss": 0.2595458984375, + "step": 37690 + }, + { + "epoch": 0.32593751891466566, + "grad_norm": 3.8736815631072643, + "learning_rate": 5.615326477651945e-06, + "loss": 0.3333984375, + "step": 37695 + }, + { + "epoch": 0.32598075243620894, + "grad_norm": 3.838911432546424, + "learning_rate": 5.615226661601373e-06, + "loss": 0.261883544921875, + "step": 37700 + }, + { + "epoch": 0.3260239859577522, + "grad_norm": 36.65960553667265, + "learning_rate": 5.615126833489607e-06, + "loss": 0.17848663330078124, + "step": 37705 + }, + { + "epoch": 0.32606721947929546, + "grad_norm": 24.844133513143586, + "learning_rate": 5.615026993317108e-06, + "loss": 0.1850067138671875, + "step": 37710 + }, + { + "epoch": 0.32611045300083874, + "grad_norm": 5.8514574141295155, + "learning_rate": 5.614927141084335e-06, + "loss": 0.09763031005859375, + "step": 37715 + }, + { + "epoch": 0.32615368652238197, + "grad_norm": 0.2678208633980899, + "learning_rate": 5.614827276791749e-06, + "loss": 0.03686676025390625, + "step": 37720 + }, + { + "epoch": 0.32619692004392525, + "grad_norm": 30.396335605744646, + "learning_rate": 5.614727400439811e-06, + "loss": 0.2667816162109375, + "step": 37725 + }, + { + "epoch": 0.32624015356546854, + "grad_norm": 5.658361934101215, + "learning_rate": 5.6146275120289815e-06, + "loss": 0.06754150390625, + "step": 37730 + }, + { + "epoch": 0.32628338708701177, + "grad_norm": 2.7211195846211016, + "learning_rate": 5.614527611559721e-06, + "loss": 0.0559906005859375, + "step": 37735 + }, + { + "epoch": 0.32632662060855505, + "grad_norm": 0.8880698299565748, + "learning_rate": 5.6144276990324886e-06, + "loss": 0.23241958618164063, + "step": 37740 + }, + { + "epoch": 0.32636985413009834, + "grad_norm": 4.6027237713850795, + "learning_rate": 5.614327774447748e-06, + "loss": 0.0743072509765625, + "step": 37745 + }, + { + "epoch": 0.32641308765164156, + "grad_norm": 39.06238572753609, + "learning_rate": 5.614227837805959e-06, + "loss": 0.1067474365234375, + "step": 37750 + }, + { + "epoch": 0.32645632117318485, + "grad_norm": 1.1962450469180181, + "learning_rate": 5.614127889107582e-06, + "loss": 0.045688629150390625, + "step": 37755 + }, + { + "epoch": 0.3264995546947281, + "grad_norm": 29.86143926036908, + "learning_rate": 5.614027928353078e-06, + "loss": 0.1770050048828125, + "step": 37760 + }, + { + "epoch": 0.32654278821627136, + "grad_norm": 19.439573604915896, + "learning_rate": 5.613927955542908e-06, + "loss": 0.19559326171875, + "step": 37765 + }, + { + "epoch": 0.32658602173781465, + "grad_norm": 13.268822653725067, + "learning_rate": 5.613827970677533e-06, + "loss": 0.6003631591796875, + "step": 37770 + }, + { + "epoch": 0.3266292552593579, + "grad_norm": 1.4093126325227212, + "learning_rate": 5.613727973757414e-06, + "loss": 0.14278945922851563, + "step": 37775 + }, + { + "epoch": 0.32667248878090116, + "grad_norm": 4.165609214143688, + "learning_rate": 5.6136279647830135e-06, + "loss": 0.1238983154296875, + "step": 37780 + }, + { + "epoch": 0.32671572230244444, + "grad_norm": 1.402347390344944, + "learning_rate": 5.613527943754792e-06, + "loss": 0.27465362548828126, + "step": 37785 + }, + { + "epoch": 0.3267589558239877, + "grad_norm": 2.4970399497320184, + "learning_rate": 5.61342791067321e-06, + "loss": 0.06112823486328125, + "step": 37790 + }, + { + "epoch": 0.32680218934553096, + "grad_norm": 3.700953556414398, + "learning_rate": 5.613327865538729e-06, + "loss": 0.10537261962890625, + "step": 37795 + }, + { + "epoch": 0.32684542286707424, + "grad_norm": 2.186864835927223, + "learning_rate": 5.613227808351812e-06, + "loss": 0.08505706787109375, + "step": 37800 + }, + { + "epoch": 0.32688865638861747, + "grad_norm": 9.028250280636414, + "learning_rate": 5.613127739112918e-06, + "loss": 0.5192619323730469, + "step": 37805 + }, + { + "epoch": 0.32693188991016076, + "grad_norm": 3.7719651433310393, + "learning_rate": 5.61302765782251e-06, + "loss": 0.0544647216796875, + "step": 37810 + }, + { + "epoch": 0.326975123431704, + "grad_norm": 37.49363276951319, + "learning_rate": 5.61292756448105e-06, + "loss": 0.313104248046875, + "step": 37815 + }, + { + "epoch": 0.32701835695324727, + "grad_norm": 8.805176236931267, + "learning_rate": 5.612827459088999e-06, + "loss": 0.05970001220703125, + "step": 37820 + }, + { + "epoch": 0.32706159047479055, + "grad_norm": 14.14319899220817, + "learning_rate": 5.612727341646818e-06, + "loss": 0.24501266479492187, + "step": 37825 + }, + { + "epoch": 0.3271048239963338, + "grad_norm": 5.494363220350285, + "learning_rate": 5.612627212154969e-06, + "loss": 0.12558135986328126, + "step": 37830 + }, + { + "epoch": 0.32714805751787707, + "grad_norm": 5.476868508092531, + "learning_rate": 5.6125270706139144e-06, + "loss": 0.13477783203125, + "step": 37835 + }, + { + "epoch": 0.32719129103942035, + "grad_norm": 33.634925716528656, + "learning_rate": 5.612426917024117e-06, + "loss": 0.14063720703125, + "step": 37840 + }, + { + "epoch": 0.3272345245609636, + "grad_norm": 0.6185941436473318, + "learning_rate": 5.612326751386036e-06, + "loss": 0.126953125, + "step": 37845 + }, + { + "epoch": 0.32727775808250686, + "grad_norm": 36.513327617766656, + "learning_rate": 5.612226573700135e-06, + "loss": 0.5211181640625, + "step": 37850 + }, + { + "epoch": 0.3273209916040501, + "grad_norm": 19.94161460449619, + "learning_rate": 5.612126383966877e-06, + "loss": 0.123974609375, + "step": 37855 + }, + { + "epoch": 0.3273642251255934, + "grad_norm": 22.360511821770466, + "learning_rate": 5.612026182186722e-06, + "loss": 0.1098388671875, + "step": 37860 + }, + { + "epoch": 0.32740745864713666, + "grad_norm": 4.264160815508514, + "learning_rate": 5.611925968360133e-06, + "loss": 0.19434432983398436, + "step": 37865 + }, + { + "epoch": 0.3274506921686799, + "grad_norm": 11.575474421589822, + "learning_rate": 5.611825742487572e-06, + "loss": 0.176068115234375, + "step": 37870 + }, + { + "epoch": 0.3274939256902232, + "grad_norm": 23.38840436237618, + "learning_rate": 5.611725504569503e-06, + "loss": 0.21048393249511718, + "step": 37875 + }, + { + "epoch": 0.32753715921176646, + "grad_norm": 34.163782360300964, + "learning_rate": 5.611625254606385e-06, + "loss": 0.3269775390625, + "step": 37880 + }, + { + "epoch": 0.3275803927333097, + "grad_norm": 1.496926315544175, + "learning_rate": 5.611524992598683e-06, + "loss": 0.07734184265136719, + "step": 37885 + }, + { + "epoch": 0.327623626254853, + "grad_norm": 20.07365701337901, + "learning_rate": 5.611424718546858e-06, + "loss": 0.1415252685546875, + "step": 37890 + }, + { + "epoch": 0.3276668597763962, + "grad_norm": 19.39430425443837, + "learning_rate": 5.611324432451373e-06, + "loss": 0.21524200439453126, + "step": 37895 + }, + { + "epoch": 0.3277100932979395, + "grad_norm": 29.85924039314617, + "learning_rate": 5.611224134312691e-06, + "loss": 0.333477783203125, + "step": 37900 + }, + { + "epoch": 0.32775332681948277, + "grad_norm": 21.886450488951574, + "learning_rate": 5.611123824131274e-06, + "loss": 0.22770538330078124, + "step": 37905 + }, + { + "epoch": 0.327796560341026, + "grad_norm": 3.3322600498355053, + "learning_rate": 5.6110235019075845e-06, + "loss": 0.070025634765625, + "step": 37910 + }, + { + "epoch": 0.3278397938625693, + "grad_norm": 2.3651604865844336, + "learning_rate": 5.6109231676420855e-06, + "loss": 0.02249908447265625, + "step": 37915 + }, + { + "epoch": 0.32788302738411257, + "grad_norm": 18.187956638446337, + "learning_rate": 5.610822821335239e-06, + "loss": 0.056463623046875, + "step": 37920 + }, + { + "epoch": 0.3279262609056558, + "grad_norm": 0.8160300922845001, + "learning_rate": 5.610722462987508e-06, + "loss": 0.155889892578125, + "step": 37925 + }, + { + "epoch": 0.3279694944271991, + "grad_norm": 8.641447157447741, + "learning_rate": 5.610622092599357e-06, + "loss": 0.1099884033203125, + "step": 37930 + }, + { + "epoch": 0.3280127279487423, + "grad_norm": 0.7183657623522692, + "learning_rate": 5.610521710171247e-06, + "loss": 0.0771148681640625, + "step": 37935 + }, + { + "epoch": 0.3280559614702856, + "grad_norm": 6.08353336832198, + "learning_rate": 5.610421315703641e-06, + "loss": 0.07274665832519531, + "step": 37940 + }, + { + "epoch": 0.3280991949918289, + "grad_norm": 5.612844994968761, + "learning_rate": 5.610320909197003e-06, + "loss": 0.112164306640625, + "step": 37945 + }, + { + "epoch": 0.3281424285133721, + "grad_norm": 28.397378052973693, + "learning_rate": 5.610220490651796e-06, + "loss": 0.24570541381835936, + "step": 37950 + }, + { + "epoch": 0.3281856620349154, + "grad_norm": 48.25345673816122, + "learning_rate": 5.610120060068482e-06, + "loss": 0.196282958984375, + "step": 37955 + }, + { + "epoch": 0.3282288955564587, + "grad_norm": 16.219464205121973, + "learning_rate": 5.610019617447526e-06, + "loss": 0.12397184371948242, + "step": 37960 + }, + { + "epoch": 0.3282721290780019, + "grad_norm": 0.9015148744340301, + "learning_rate": 5.609919162789388e-06, + "loss": 0.10326461791992188, + "step": 37965 + }, + { + "epoch": 0.3283153625995452, + "grad_norm": 26.223293120483763, + "learning_rate": 5.609818696094536e-06, + "loss": 0.256951904296875, + "step": 37970 + }, + { + "epoch": 0.3283585961210884, + "grad_norm": 9.447119393024689, + "learning_rate": 5.60971821736343e-06, + "loss": 0.09398193359375, + "step": 37975 + }, + { + "epoch": 0.3284018296426317, + "grad_norm": 6.069111400329955, + "learning_rate": 5.609617726596533e-06, + "loss": 0.5559722900390625, + "step": 37980 + }, + { + "epoch": 0.328445063164175, + "grad_norm": 2.2426253153815825, + "learning_rate": 5.609517223794311e-06, + "loss": 0.0354522705078125, + "step": 37985 + }, + { + "epoch": 0.3284882966857182, + "grad_norm": 3.05565396905186, + "learning_rate": 5.609416708957225e-06, + "loss": 0.04483108520507813, + "step": 37990 + }, + { + "epoch": 0.3285315302072615, + "grad_norm": 20.423779498446645, + "learning_rate": 5.60931618208574e-06, + "loss": 0.147625732421875, + "step": 37995 + }, + { + "epoch": 0.3285747637288048, + "grad_norm": 10.234315931624227, + "learning_rate": 5.6092156431803195e-06, + "loss": 0.1276763916015625, + "step": 38000 + }, + { + "epoch": 0.328617997250348, + "grad_norm": 24.27801906768439, + "learning_rate": 5.609115092241427e-06, + "loss": 0.2525520324707031, + "step": 38005 + }, + { + "epoch": 0.3286612307718913, + "grad_norm": 45.003376801452276, + "learning_rate": 5.609014529269526e-06, + "loss": 0.4483001708984375, + "step": 38010 + }, + { + "epoch": 0.3287044642934346, + "grad_norm": 11.387973301540423, + "learning_rate": 5.608913954265082e-06, + "loss": 0.14645614624023437, + "step": 38015 + }, + { + "epoch": 0.3287476978149778, + "grad_norm": 0.6524681305444504, + "learning_rate": 5.608813367228556e-06, + "loss": 0.29562911987304685, + "step": 38020 + }, + { + "epoch": 0.3287909313365211, + "grad_norm": 8.417172085143244, + "learning_rate": 5.6087127681604136e-06, + "loss": 0.0958709716796875, + "step": 38025 + }, + { + "epoch": 0.3288341648580643, + "grad_norm": 1.8107558634493008, + "learning_rate": 5.6086121570611184e-06, + "loss": 0.1254364013671875, + "step": 38030 + }, + { + "epoch": 0.3288773983796076, + "grad_norm": 14.995190156747963, + "learning_rate": 5.608511533931134e-06, + "loss": 0.07496871948242187, + "step": 38035 + }, + { + "epoch": 0.3289206319011509, + "grad_norm": 0.7596559981456072, + "learning_rate": 5.608410898770925e-06, + "loss": 0.2003509521484375, + "step": 38040 + }, + { + "epoch": 0.3289638654226941, + "grad_norm": 4.207865708941754, + "learning_rate": 5.608310251580956e-06, + "loss": 0.4789581298828125, + "step": 38045 + }, + { + "epoch": 0.3290070989442374, + "grad_norm": 0.7120858398534542, + "learning_rate": 5.608209592361691e-06, + "loss": 0.1657470703125, + "step": 38050 + }, + { + "epoch": 0.3290503324657807, + "grad_norm": 10.562959691982181, + "learning_rate": 5.608108921113592e-06, + "loss": 0.20445947647094725, + "step": 38055 + }, + { + "epoch": 0.3290935659873239, + "grad_norm": 4.093798930421912, + "learning_rate": 5.6080082378371265e-06, + "loss": 0.11235809326171875, + "step": 38060 + }, + { + "epoch": 0.3291367995088672, + "grad_norm": 6.521531671748213, + "learning_rate": 5.607907542532757e-06, + "loss": 0.11488304138183594, + "step": 38065 + }, + { + "epoch": 0.32918003303041043, + "grad_norm": 1.656692885324622, + "learning_rate": 5.607806835200949e-06, + "loss": 0.089801025390625, + "step": 38070 + }, + { + "epoch": 0.3292232665519537, + "grad_norm": 5.291803191241972, + "learning_rate": 5.607706115842166e-06, + "loss": 0.2637847900390625, + "step": 38075 + }, + { + "epoch": 0.329266500073497, + "grad_norm": 2.6033358469496326, + "learning_rate": 5.607605384456872e-06, + "loss": 0.051100540161132815, + "step": 38080 + }, + { + "epoch": 0.32930973359504023, + "grad_norm": 26.116429157778963, + "learning_rate": 5.607504641045533e-06, + "loss": 0.3741546630859375, + "step": 38085 + }, + { + "epoch": 0.3293529671165835, + "grad_norm": 2.7363023352477827, + "learning_rate": 5.607403885608614e-06, + "loss": 0.049716758728027347, + "step": 38090 + }, + { + "epoch": 0.3293962006381268, + "grad_norm": 2.1405011845698554, + "learning_rate": 5.607303118146577e-06, + "loss": 0.048204803466796876, + "step": 38095 + }, + { + "epoch": 0.32943943415967003, + "grad_norm": 23.193488407486985, + "learning_rate": 5.6072023386598896e-06, + "loss": 0.15914154052734375, + "step": 38100 + }, + { + "epoch": 0.3294826676812133, + "grad_norm": 1.256997146897094, + "learning_rate": 5.607101547149014e-06, + "loss": 0.05874710083007813, + "step": 38105 + }, + { + "epoch": 0.32952590120275654, + "grad_norm": 16.997469995446544, + "learning_rate": 5.607000743614418e-06, + "loss": 0.490826416015625, + "step": 38110 + }, + { + "epoch": 0.32956913472429983, + "grad_norm": 2.536542052192595, + "learning_rate": 5.606899928056564e-06, + "loss": 0.08173828125, + "step": 38115 + }, + { + "epoch": 0.3296123682458431, + "grad_norm": 0.5422481475251945, + "learning_rate": 5.606799100475918e-06, + "loss": 0.1930908203125, + "step": 38120 + }, + { + "epoch": 0.32965560176738634, + "grad_norm": 0.7975993844185755, + "learning_rate": 5.6066982608729455e-06, + "loss": 0.06838760375976563, + "step": 38125 + }, + { + "epoch": 0.3296988352889296, + "grad_norm": 16.92134761671956, + "learning_rate": 5.60659740924811e-06, + "loss": 0.09665145874023437, + "step": 38130 + }, + { + "epoch": 0.3297420688104729, + "grad_norm": 11.113142685526107, + "learning_rate": 5.606496545601878e-06, + "loss": 0.163067626953125, + "step": 38135 + }, + { + "epoch": 0.32978530233201614, + "grad_norm": 1.7030894177719074, + "learning_rate": 5.606395669934715e-06, + "loss": 0.0126129150390625, + "step": 38140 + }, + { + "epoch": 0.3298285358535594, + "grad_norm": 75.86787920530816, + "learning_rate": 5.6062947822470845e-06, + "loss": 0.36470794677734375, + "step": 38145 + }, + { + "epoch": 0.32987176937510265, + "grad_norm": 6.40181828407473, + "learning_rate": 5.6061938825394535e-06, + "loss": 0.2074127197265625, + "step": 38150 + }, + { + "epoch": 0.32991500289664594, + "grad_norm": 6.997173219905954, + "learning_rate": 5.606092970812286e-06, + "loss": 0.3090728759765625, + "step": 38155 + }, + { + "epoch": 0.3299582364181892, + "grad_norm": 3.3491408705250625, + "learning_rate": 5.605992047066047e-06, + "loss": 0.10818061828613282, + "step": 38160 + }, + { + "epoch": 0.33000146993973245, + "grad_norm": 1.522536794760129, + "learning_rate": 5.6058911113012045e-06, + "loss": 0.12070770263671875, + "step": 38165 + }, + { + "epoch": 0.33004470346127573, + "grad_norm": 5.003320087297679, + "learning_rate": 5.605790163518222e-06, + "loss": 0.16214828491210936, + "step": 38170 + }, + { + "epoch": 0.330087936982819, + "grad_norm": 5.160809786873963, + "learning_rate": 5.605689203717566e-06, + "loss": 0.061932373046875, + "step": 38175 + }, + { + "epoch": 0.33013117050436225, + "grad_norm": 1.246503031424561, + "learning_rate": 5.605588231899701e-06, + "loss": 0.0829803466796875, + "step": 38180 + }, + { + "epoch": 0.33017440402590553, + "grad_norm": 3.4472656014438603, + "learning_rate": 5.605487248065094e-06, + "loss": 0.12843704223632812, + "step": 38185 + }, + { + "epoch": 0.3302176375474488, + "grad_norm": 25.906123396372646, + "learning_rate": 5.605386252214208e-06, + "loss": 0.24125289916992188, + "step": 38190 + }, + { + "epoch": 0.33026087106899205, + "grad_norm": 1.5703142371014143, + "learning_rate": 5.605285244347514e-06, + "loss": 0.049755287170410153, + "step": 38195 + }, + { + "epoch": 0.33030410459053533, + "grad_norm": 9.73540031207226, + "learning_rate": 5.605184224465472e-06, + "loss": 0.1035430908203125, + "step": 38200 + }, + { + "epoch": 0.33034733811207856, + "grad_norm": 0.625786520275332, + "learning_rate": 5.605083192568552e-06, + "loss": 0.09929065704345703, + "step": 38205 + }, + { + "epoch": 0.33039057163362184, + "grad_norm": 26.392425284175456, + "learning_rate": 5.6049821486572185e-06, + "loss": 0.12117166519165039, + "step": 38210 + }, + { + "epoch": 0.33043380515516513, + "grad_norm": 17.223566174645413, + "learning_rate": 5.604881092731936e-06, + "loss": 0.36929664611816404, + "step": 38215 + }, + { + "epoch": 0.33047703867670836, + "grad_norm": 2.5142073934762217, + "learning_rate": 5.604780024793174e-06, + "loss": 0.08039093017578125, + "step": 38220 + }, + { + "epoch": 0.33052027219825164, + "grad_norm": 4.363548245664671, + "learning_rate": 5.604678944841396e-06, + "loss": 0.11904182434082031, + "step": 38225 + }, + { + "epoch": 0.3305635057197949, + "grad_norm": 46.54928248002371, + "learning_rate": 5.604577852877068e-06, + "loss": 0.046643829345703124, + "step": 38230 + }, + { + "epoch": 0.33060673924133815, + "grad_norm": 1.094922074112609, + "learning_rate": 5.604476748900659e-06, + "loss": 0.1191925048828125, + "step": 38235 + }, + { + "epoch": 0.33064997276288144, + "grad_norm": 28.62811240650272, + "learning_rate": 5.60437563291263e-06, + "loss": 0.2037200927734375, + "step": 38240 + }, + { + "epoch": 0.33069320628442467, + "grad_norm": 8.389190865720073, + "learning_rate": 5.6042745049134544e-06, + "loss": 0.34393796920776365, + "step": 38245 + }, + { + "epoch": 0.33073643980596795, + "grad_norm": 14.025543145034773, + "learning_rate": 5.604173364903593e-06, + "loss": 0.1658926010131836, + "step": 38250 + }, + { + "epoch": 0.33077967332751124, + "grad_norm": 19.80617960484152, + "learning_rate": 5.604072212883514e-06, + "loss": 0.13936538696289064, + "step": 38255 + }, + { + "epoch": 0.33082290684905447, + "grad_norm": 36.89738540354014, + "learning_rate": 5.603971048853685e-06, + "loss": 0.35893096923828127, + "step": 38260 + }, + { + "epoch": 0.33086614037059775, + "grad_norm": 13.97352254957545, + "learning_rate": 5.603869872814571e-06, + "loss": 0.23505706787109376, + "step": 38265 + }, + { + "epoch": 0.33090937389214103, + "grad_norm": 8.37470258082947, + "learning_rate": 5.603768684766639e-06, + "loss": 0.20770263671875, + "step": 38270 + }, + { + "epoch": 0.33095260741368426, + "grad_norm": 2.8005936676279437, + "learning_rate": 5.603667484710357e-06, + "loss": 0.1596038818359375, + "step": 38275 + }, + { + "epoch": 0.33099584093522755, + "grad_norm": 1.3269183817638044, + "learning_rate": 5.60356627264619e-06, + "loss": 0.1489105224609375, + "step": 38280 + }, + { + "epoch": 0.3310390744567708, + "grad_norm": 2.593896048889864, + "learning_rate": 5.603465048574605e-06, + "loss": 0.161846923828125, + "step": 38285 + }, + { + "epoch": 0.33108230797831406, + "grad_norm": 3.7272860025696883, + "learning_rate": 5.60336381249607e-06, + "loss": 0.171990966796875, + "step": 38290 + }, + { + "epoch": 0.33112554149985735, + "grad_norm": 5.568867709252485, + "learning_rate": 5.60326256441105e-06, + "loss": 0.2724884033203125, + "step": 38295 + }, + { + "epoch": 0.3311687750214006, + "grad_norm": 18.13805735832987, + "learning_rate": 5.603161304320014e-06, + "loss": 0.25483551025390627, + "step": 38300 + }, + { + "epoch": 0.33121200854294386, + "grad_norm": 1.9055956903260178, + "learning_rate": 5.603060032223428e-06, + "loss": 0.0940460205078125, + "step": 38305 + }, + { + "epoch": 0.33125524206448714, + "grad_norm": 31.166646441522307, + "learning_rate": 5.602958748121759e-06, + "loss": 0.23806610107421874, + "step": 38310 + }, + { + "epoch": 0.33129847558603037, + "grad_norm": 30.464050498470417, + "learning_rate": 5.602857452015474e-06, + "loss": 0.3272418975830078, + "step": 38315 + }, + { + "epoch": 0.33134170910757366, + "grad_norm": 10.414099057090727, + "learning_rate": 5.602756143905041e-06, + "loss": 0.24360599517822265, + "step": 38320 + }, + { + "epoch": 0.3313849426291169, + "grad_norm": 79.68942545691324, + "learning_rate": 5.602654823790926e-06, + "loss": 0.1801727294921875, + "step": 38325 + }, + { + "epoch": 0.33142817615066017, + "grad_norm": 7.506952076209827, + "learning_rate": 5.602553491673598e-06, + "loss": 0.0925201416015625, + "step": 38330 + }, + { + "epoch": 0.33147140967220345, + "grad_norm": 3.6642993764891973, + "learning_rate": 5.602452147553522e-06, + "loss": 0.1900970458984375, + "step": 38335 + }, + { + "epoch": 0.3315146431937467, + "grad_norm": 24.03140274983541, + "learning_rate": 5.602350791431167e-06, + "loss": 0.16298370361328124, + "step": 38340 + }, + { + "epoch": 0.33155787671528997, + "grad_norm": 25.178937115462535, + "learning_rate": 5.602249423307e-06, + "loss": 0.38961639404296877, + "step": 38345 + }, + { + "epoch": 0.33160111023683325, + "grad_norm": 2.525046895622453, + "learning_rate": 5.6021480431814895e-06, + "loss": 0.07570953369140625, + "step": 38350 + }, + { + "epoch": 0.3316443437583765, + "grad_norm": 7.44534203406766, + "learning_rate": 5.602046651055102e-06, + "loss": 0.33398590087890623, + "step": 38355 + }, + { + "epoch": 0.33168757727991977, + "grad_norm": 7.682470548330904, + "learning_rate": 5.601945246928304e-06, + "loss": 0.0465423583984375, + "step": 38360 + }, + { + "epoch": 0.33173081080146305, + "grad_norm": 4.616622657144741, + "learning_rate": 5.601843830801565e-06, + "loss": 0.160400390625, + "step": 38365 + }, + { + "epoch": 0.3317740443230063, + "grad_norm": 4.648723153687763, + "learning_rate": 5.601742402675353e-06, + "loss": 0.07884063720703124, + "step": 38370 + }, + { + "epoch": 0.33181727784454956, + "grad_norm": 1.0431663100125632, + "learning_rate": 5.601640962550134e-06, + "loss": 0.19973907470703126, + "step": 38375 + }, + { + "epoch": 0.3318605113660928, + "grad_norm": 26.888230956833766, + "learning_rate": 5.601539510426377e-06, + "loss": 0.1183746337890625, + "step": 38380 + }, + { + "epoch": 0.3319037448876361, + "grad_norm": 34.1688039527553, + "learning_rate": 5.6014380463045494e-06, + "loss": 0.4886932373046875, + "step": 38385 + }, + { + "epoch": 0.33194697840917936, + "grad_norm": 58.7182192727584, + "learning_rate": 5.60133657018512e-06, + "loss": 0.18527641296386718, + "step": 38390 + }, + { + "epoch": 0.3319902119307226, + "grad_norm": 1.479367688088509, + "learning_rate": 5.601235082068556e-06, + "loss": 0.1419036865234375, + "step": 38395 + }, + { + "epoch": 0.3320334454522659, + "grad_norm": 45.254403192823226, + "learning_rate": 5.6011335819553255e-06, + "loss": 0.2336090087890625, + "step": 38400 + }, + { + "epoch": 0.33207667897380916, + "grad_norm": 1.3025691720682877, + "learning_rate": 5.601032069845897e-06, + "loss": 0.52103271484375, + "step": 38405 + }, + { + "epoch": 0.3321199124953524, + "grad_norm": 17.312187440686447, + "learning_rate": 5.600930545740738e-06, + "loss": 0.10012760162353515, + "step": 38410 + }, + { + "epoch": 0.33216314601689567, + "grad_norm": 15.443560419168158, + "learning_rate": 5.600829009640318e-06, + "loss": 0.11622848510742187, + "step": 38415 + }, + { + "epoch": 0.3322063795384389, + "grad_norm": 44.41191616024868, + "learning_rate": 5.600727461545104e-06, + "loss": 0.3395538330078125, + "step": 38420 + }, + { + "epoch": 0.3322496130599822, + "grad_norm": 9.538598434324113, + "learning_rate": 5.600625901455565e-06, + "loss": 0.06323699951171875, + "step": 38425 + }, + { + "epoch": 0.33229284658152547, + "grad_norm": 0.14327829847940818, + "learning_rate": 5.600524329372168e-06, + "loss": 0.19117889404296876, + "step": 38430 + }, + { + "epoch": 0.3323360801030687, + "grad_norm": 30.624837327460817, + "learning_rate": 5.600422745295384e-06, + "loss": 0.21286487579345703, + "step": 38435 + }, + { + "epoch": 0.332379313624612, + "grad_norm": 21.296288933312322, + "learning_rate": 5.600321149225678e-06, + "loss": 0.32816925048828127, + "step": 38440 + }, + { + "epoch": 0.33242254714615527, + "grad_norm": 0.6767825910948475, + "learning_rate": 5.600219541163522e-06, + "loss": 0.28645763397216795, + "step": 38445 + }, + { + "epoch": 0.3324657806676985, + "grad_norm": 2.5578633104471065, + "learning_rate": 5.600117921109384e-06, + "loss": 0.06978836059570312, + "step": 38450 + }, + { + "epoch": 0.3325090141892418, + "grad_norm": 1.117448186074832, + "learning_rate": 5.600016289063731e-06, + "loss": 0.18756103515625, + "step": 38455 + }, + { + "epoch": 0.332552247710785, + "grad_norm": 21.317374091307922, + "learning_rate": 5.599914645027032e-06, + "loss": 0.1900959014892578, + "step": 38460 + }, + { + "epoch": 0.3325954812323283, + "grad_norm": 39.548322361997016, + "learning_rate": 5.5998129889997575e-06, + "loss": 0.312261962890625, + "step": 38465 + }, + { + "epoch": 0.3326387147538716, + "grad_norm": 18.687501541412956, + "learning_rate": 5.599711320982375e-06, + "loss": 0.06821212768554688, + "step": 38470 + }, + { + "epoch": 0.3326819482754148, + "grad_norm": 32.2413825350389, + "learning_rate": 5.599609640975353e-06, + "loss": 0.3309906005859375, + "step": 38475 + }, + { + "epoch": 0.3327251817969581, + "grad_norm": 9.182485462114887, + "learning_rate": 5.5995079489791616e-06, + "loss": 0.06485481262207031, + "step": 38480 + }, + { + "epoch": 0.3327684153185014, + "grad_norm": 4.052374553123705, + "learning_rate": 5.59940624499427e-06, + "loss": 0.07924346923828125, + "step": 38485 + }, + { + "epoch": 0.3328116488400446, + "grad_norm": 21.1815399552894, + "learning_rate": 5.599304529021145e-06, + "loss": 0.18089599609375, + "step": 38490 + }, + { + "epoch": 0.3328548823615879, + "grad_norm": 14.83915674790512, + "learning_rate": 5.599202801060257e-06, + "loss": 0.2208892822265625, + "step": 38495 + }, + { + "epoch": 0.3328981158831311, + "grad_norm": 9.444122274171937, + "learning_rate": 5.5991010611120775e-06, + "loss": 0.21294097900390624, + "step": 38500 + }, + { + "epoch": 0.3329413494046744, + "grad_norm": 6.70529723954426, + "learning_rate": 5.598999309177072e-06, + "loss": 0.21156463623046876, + "step": 38505 + }, + { + "epoch": 0.3329845829262177, + "grad_norm": 7.985832266935573, + "learning_rate": 5.598897545255711e-06, + "loss": 0.39378662109375, + "step": 38510 + }, + { + "epoch": 0.3330278164477609, + "grad_norm": 11.691982979917745, + "learning_rate": 5.598795769348465e-06, + "loss": 0.35438232421875, + "step": 38515 + }, + { + "epoch": 0.3330710499693042, + "grad_norm": 1.326277017181044, + "learning_rate": 5.598693981455802e-06, + "loss": 0.13685531616210939, + "step": 38520 + }, + { + "epoch": 0.3331142834908475, + "grad_norm": 7.278539385474268, + "learning_rate": 5.598592181578193e-06, + "loss": 0.11130867004394532, + "step": 38525 + }, + { + "epoch": 0.3331575170123907, + "grad_norm": 6.632511063752761, + "learning_rate": 5.598490369716105e-06, + "loss": 0.1572662353515625, + "step": 38530 + }, + { + "epoch": 0.333200750533934, + "grad_norm": 21.135002105560684, + "learning_rate": 5.59838854587001e-06, + "loss": 0.0897003173828125, + "step": 38535 + }, + { + "epoch": 0.3332439840554773, + "grad_norm": 5.947344363007835, + "learning_rate": 5.598286710040376e-06, + "loss": 0.061602783203125, + "step": 38540 + }, + { + "epoch": 0.3332872175770205, + "grad_norm": 13.914978168246035, + "learning_rate": 5.598184862227674e-06, + "loss": 0.13011932373046875, + "step": 38545 + }, + { + "epoch": 0.3333304510985638, + "grad_norm": 6.904078823631049, + "learning_rate": 5.598083002432373e-06, + "loss": 0.18043212890625, + "step": 38550 + }, + { + "epoch": 0.333373684620107, + "grad_norm": 1.3127014302054434, + "learning_rate": 5.597981130654942e-06, + "loss": 0.0871246337890625, + "step": 38555 + }, + { + "epoch": 0.3334169181416503, + "grad_norm": 11.138779008852548, + "learning_rate": 5.597879246895852e-06, + "loss": 0.07795238494873047, + "step": 38560 + }, + { + "epoch": 0.3334601516631936, + "grad_norm": 60.6065135647808, + "learning_rate": 5.5977773511555725e-06, + "loss": 0.37574462890625, + "step": 38565 + }, + { + "epoch": 0.3335033851847368, + "grad_norm": 2.6426288401373377, + "learning_rate": 5.597675443434573e-06, + "loss": 0.04073944091796875, + "step": 38570 + }, + { + "epoch": 0.3335466187062801, + "grad_norm": 5.6372871503975, + "learning_rate": 5.597573523733325e-06, + "loss": 0.11834516525268554, + "step": 38575 + }, + { + "epoch": 0.3335898522278234, + "grad_norm": 4.855118953128878, + "learning_rate": 5.597471592052296e-06, + "loss": 0.21749191284179686, + "step": 38580 + }, + { + "epoch": 0.3336330857493666, + "grad_norm": 3.039122944672827, + "learning_rate": 5.5973696483919586e-06, + "loss": 0.4734977722167969, + "step": 38585 + }, + { + "epoch": 0.3336763192709099, + "grad_norm": 39.37454654863667, + "learning_rate": 5.597267692752781e-06, + "loss": 0.2378387451171875, + "step": 38590 + }, + { + "epoch": 0.33371955279245313, + "grad_norm": 4.855644983972508, + "learning_rate": 5.597165725135235e-06, + "loss": 0.2326873779296875, + "step": 38595 + }, + { + "epoch": 0.3337627863139964, + "grad_norm": 4.353323431549696, + "learning_rate": 5.59706374553979e-06, + "loss": 0.224969482421875, + "step": 38600 + }, + { + "epoch": 0.3338060198355397, + "grad_norm": 0.25626554992590783, + "learning_rate": 5.5969617539669165e-06, + "loss": 0.12534637451171876, + "step": 38605 + }, + { + "epoch": 0.33384925335708293, + "grad_norm": 6.399929775670022, + "learning_rate": 5.596859750417085e-06, + "loss": 0.1400846481323242, + "step": 38610 + }, + { + "epoch": 0.3338924868786262, + "grad_norm": 3.2616119928783474, + "learning_rate": 5.596757734890766e-06, + "loss": 0.132757568359375, + "step": 38615 + }, + { + "epoch": 0.3339357204001695, + "grad_norm": 36.66735031534822, + "learning_rate": 5.596655707388429e-06, + "loss": 0.2263408660888672, + "step": 38620 + }, + { + "epoch": 0.33397895392171273, + "grad_norm": 51.420509726972384, + "learning_rate": 5.596553667910546e-06, + "loss": 0.1976184844970703, + "step": 38625 + }, + { + "epoch": 0.334022187443256, + "grad_norm": 6.460959468883038, + "learning_rate": 5.596451616457586e-06, + "loss": 0.09296875, + "step": 38630 + }, + { + "epoch": 0.33406542096479924, + "grad_norm": 1.74527907423086, + "learning_rate": 5.596349553030022e-06, + "loss": 0.3911773681640625, + "step": 38635 + }, + { + "epoch": 0.3341086544863425, + "grad_norm": 4.50325140513256, + "learning_rate": 5.5962474776283215e-06, + "loss": 0.1156646728515625, + "step": 38640 + }, + { + "epoch": 0.3341518880078858, + "grad_norm": 4.596782148635876, + "learning_rate": 5.596145390252959e-06, + "loss": 0.08562164306640625, + "step": 38645 + }, + { + "epoch": 0.33419512152942904, + "grad_norm": 8.293972170080545, + "learning_rate": 5.596043290904402e-06, + "loss": 0.22874908447265624, + "step": 38650 + }, + { + "epoch": 0.3342383550509723, + "grad_norm": 3.1884206215325293, + "learning_rate": 5.5959411795831225e-06, + "loss": 0.3980224609375, + "step": 38655 + }, + { + "epoch": 0.3342815885725156, + "grad_norm": 18.846032855443116, + "learning_rate": 5.595839056289592e-06, + "loss": 0.13489303588867188, + "step": 38660 + }, + { + "epoch": 0.33432482209405884, + "grad_norm": 26.07378378194812, + "learning_rate": 5.595736921024282e-06, + "loss": 0.1745147705078125, + "step": 38665 + }, + { + "epoch": 0.3343680556156021, + "grad_norm": 22.82393671558354, + "learning_rate": 5.595634773787662e-06, + "loss": 0.22628555297851563, + "step": 38670 + }, + { + "epoch": 0.33441128913714535, + "grad_norm": 0.09241460163355086, + "learning_rate": 5.595532614580204e-06, + "loss": 0.19213504791259767, + "step": 38675 + }, + { + "epoch": 0.33445452265868864, + "grad_norm": 16.155051949212748, + "learning_rate": 5.595430443402378e-06, + "loss": 0.440069580078125, + "step": 38680 + }, + { + "epoch": 0.3344977561802319, + "grad_norm": 6.926396834303928, + "learning_rate": 5.595328260254658e-06, + "loss": 0.2227691650390625, + "step": 38685 + }, + { + "epoch": 0.33454098970177515, + "grad_norm": 9.509317590316977, + "learning_rate": 5.595226065137512e-06, + "loss": 0.2951568603515625, + "step": 38690 + }, + { + "epoch": 0.33458422322331843, + "grad_norm": 25.3451556757118, + "learning_rate": 5.595123858051413e-06, + "loss": 0.3104156494140625, + "step": 38695 + }, + { + "epoch": 0.3346274567448617, + "grad_norm": 1.806526631526786, + "learning_rate": 5.595021638996832e-06, + "loss": 0.3644828796386719, + "step": 38700 + }, + { + "epoch": 0.33467069026640495, + "grad_norm": 0.8854379365882084, + "learning_rate": 5.594919407974241e-06, + "loss": 0.30990028381347656, + "step": 38705 + }, + { + "epoch": 0.33471392378794823, + "grad_norm": 1.0827301729945111, + "learning_rate": 5.59481716498411e-06, + "loss": 0.0995330810546875, + "step": 38710 + }, + { + "epoch": 0.33475715730949146, + "grad_norm": 8.084750837103531, + "learning_rate": 5.594714910026913e-06, + "loss": 0.11448516845703124, + "step": 38715 + }, + { + "epoch": 0.33480039083103474, + "grad_norm": 34.32187732313767, + "learning_rate": 5.594612643103119e-06, + "loss": 0.44413604736328127, + "step": 38720 + }, + { + "epoch": 0.33484362435257803, + "grad_norm": 3.7924481300318633, + "learning_rate": 5.5945103642132e-06, + "loss": 0.0792236328125, + "step": 38725 + }, + { + "epoch": 0.33488685787412126, + "grad_norm": 8.245293038289198, + "learning_rate": 5.59440807335763e-06, + "loss": 0.15243377685546874, + "step": 38730 + }, + { + "epoch": 0.33493009139566454, + "grad_norm": 40.37470727515944, + "learning_rate": 5.594305770536879e-06, + "loss": 0.4286468505859375, + "step": 38735 + }, + { + "epoch": 0.3349733249172078, + "grad_norm": 7.9790006011592975, + "learning_rate": 5.594203455751418e-06, + "loss": 0.15291938781738282, + "step": 38740 + }, + { + "epoch": 0.33501655843875106, + "grad_norm": 3.5956125649668, + "learning_rate": 5.594101129001721e-06, + "loss": 0.022078227996826173, + "step": 38745 + }, + { + "epoch": 0.33505979196029434, + "grad_norm": 0.8318893348585072, + "learning_rate": 5.593998790288258e-06, + "loss": 0.29990234375, + "step": 38750 + }, + { + "epoch": 0.3351030254818376, + "grad_norm": 6.01650791575793, + "learning_rate": 5.593896439611502e-06, + "loss": 0.2156646728515625, + "step": 38755 + }, + { + "epoch": 0.33514625900338085, + "grad_norm": 5.9598037405913065, + "learning_rate": 5.593794076971925e-06, + "loss": 0.0686981201171875, + "step": 38760 + }, + { + "epoch": 0.33518949252492414, + "grad_norm": 14.530739883474274, + "learning_rate": 5.593691702369999e-06, + "loss": 0.31765403747558596, + "step": 38765 + }, + { + "epoch": 0.33523272604646737, + "grad_norm": 1.970991155481272, + "learning_rate": 5.593589315806196e-06, + "loss": 0.13528709411621093, + "step": 38770 + }, + { + "epoch": 0.33527595956801065, + "grad_norm": 1.0431275541636895, + "learning_rate": 5.5934869172809885e-06, + "loss": 0.1916015625, + "step": 38775 + }, + { + "epoch": 0.33531919308955394, + "grad_norm": 6.569647918036614, + "learning_rate": 5.593384506794847e-06, + "loss": 0.05855255126953125, + "step": 38780 + }, + { + "epoch": 0.33536242661109716, + "grad_norm": 0.12752058493937457, + "learning_rate": 5.5932820843482475e-06, + "loss": 0.04552650451660156, + "step": 38785 + }, + { + "epoch": 0.33540566013264045, + "grad_norm": 12.384255649591974, + "learning_rate": 5.5931796499416595e-06, + "loss": 0.16180496215820311, + "step": 38790 + }, + { + "epoch": 0.33544889365418373, + "grad_norm": 74.87765360932364, + "learning_rate": 5.593077203575555e-06, + "loss": 0.47051467895507815, + "step": 38795 + }, + { + "epoch": 0.33549212717572696, + "grad_norm": 27.68994342630397, + "learning_rate": 5.592974745250409e-06, + "loss": 0.08906402587890624, + "step": 38800 + }, + { + "epoch": 0.33553536069727025, + "grad_norm": 8.128778569140788, + "learning_rate": 5.592872274966691e-06, + "loss": 0.13731231689453124, + "step": 38805 + }, + { + "epoch": 0.3355785942188135, + "grad_norm": 4.92044245240729, + "learning_rate": 5.592769792724877e-06, + "loss": 0.28147430419921876, + "step": 38810 + }, + { + "epoch": 0.33562182774035676, + "grad_norm": 24.20511167385198, + "learning_rate": 5.592667298525436e-06, + "loss": 0.19701995849609374, + "step": 38815 + }, + { + "epoch": 0.33566506126190004, + "grad_norm": 9.265824714743603, + "learning_rate": 5.592564792368844e-06, + "loss": 0.17382965087890626, + "step": 38820 + }, + { + "epoch": 0.3357082947834433, + "grad_norm": 6.313081863958737, + "learning_rate": 5.592462274255572e-06, + "loss": 0.27777786254882814, + "step": 38825 + }, + { + "epoch": 0.33575152830498656, + "grad_norm": 7.247633242658407, + "learning_rate": 5.592359744186092e-06, + "loss": 0.3016014099121094, + "step": 38830 + }, + { + "epoch": 0.33579476182652984, + "grad_norm": 12.02489996546834, + "learning_rate": 5.592257202160879e-06, + "loss": 0.18936920166015625, + "step": 38835 + }, + { + "epoch": 0.33583799534807307, + "grad_norm": 0.3968824262225669, + "learning_rate": 5.592154648180405e-06, + "loss": 0.11562271118164062, + "step": 38840 + }, + { + "epoch": 0.33588122886961636, + "grad_norm": 0.5940252663148824, + "learning_rate": 5.592052082245142e-06, + "loss": 0.09776153564453124, + "step": 38845 + }, + { + "epoch": 0.3359244623911596, + "grad_norm": 6.026737746672777, + "learning_rate": 5.591949504355564e-06, + "loss": 0.3493377685546875, + "step": 38850 + }, + { + "epoch": 0.33596769591270287, + "grad_norm": 17.88978646136788, + "learning_rate": 5.591846914512144e-06, + "loss": 0.06319656372070312, + "step": 38855 + }, + { + "epoch": 0.33601092943424615, + "grad_norm": 0.5295035889641082, + "learning_rate": 5.591744312715355e-06, + "loss": 0.109014892578125, + "step": 38860 + }, + { + "epoch": 0.3360541629557894, + "grad_norm": 19.187076439643278, + "learning_rate": 5.591641698965671e-06, + "loss": 0.1744609832763672, + "step": 38865 + }, + { + "epoch": 0.33609739647733267, + "grad_norm": 2.8704424565664035, + "learning_rate": 5.591539073263563e-06, + "loss": 0.2770660400390625, + "step": 38870 + }, + { + "epoch": 0.33614062999887595, + "grad_norm": 6.19580082787114, + "learning_rate": 5.591436435609507e-06, + "loss": 0.05477752685546875, + "step": 38875 + }, + { + "epoch": 0.3361838635204192, + "grad_norm": 0.7262594193223132, + "learning_rate": 5.591333786003975e-06, + "loss": 0.15220794677734376, + "step": 38880 + }, + { + "epoch": 0.33622709704196246, + "grad_norm": 18.587807634840935, + "learning_rate": 5.59123112444744e-06, + "loss": 0.4241767883300781, + "step": 38885 + }, + { + "epoch": 0.3362703305635057, + "grad_norm": 19.247491636144698, + "learning_rate": 5.591128450940376e-06, + "loss": 0.07494964599609374, + "step": 38890 + }, + { + "epoch": 0.336313564085049, + "grad_norm": 4.451420761449283, + "learning_rate": 5.591025765483257e-06, + "loss": 0.2704833984375, + "step": 38895 + }, + { + "epoch": 0.33635679760659226, + "grad_norm": 26.896943971567048, + "learning_rate": 5.590923068076556e-06, + "loss": 0.2140625, + "step": 38900 + }, + { + "epoch": 0.3364000311281355, + "grad_norm": 40.924111744283245, + "learning_rate": 5.5908203587207465e-06, + "loss": 0.14511566162109374, + "step": 38905 + }, + { + "epoch": 0.3364432646496788, + "grad_norm": 0.12723821715826636, + "learning_rate": 5.590717637416302e-06, + "loss": 0.0375518798828125, + "step": 38910 + }, + { + "epoch": 0.33648649817122206, + "grad_norm": 16.917894626882514, + "learning_rate": 5.590614904163699e-06, + "loss": 0.43442306518554685, + "step": 38915 + }, + { + "epoch": 0.3365297316927653, + "grad_norm": 23.948979191434375, + "learning_rate": 5.590512158963406e-06, + "loss": 0.14287757873535156, + "step": 38920 + }, + { + "epoch": 0.3365729652143086, + "grad_norm": 2.504045770287656, + "learning_rate": 5.5904094018159015e-06, + "loss": 0.04703521728515625, + "step": 38925 + }, + { + "epoch": 0.33661619873585186, + "grad_norm": 9.082225624326673, + "learning_rate": 5.590306632721658e-06, + "loss": 0.1503753662109375, + "step": 38930 + }, + { + "epoch": 0.3366594322573951, + "grad_norm": 22.968237866978654, + "learning_rate": 5.590203851681148e-06, + "loss": 0.23300323486328126, + "step": 38935 + }, + { + "epoch": 0.33670266577893837, + "grad_norm": 18.863475083890737, + "learning_rate": 5.590101058694847e-06, + "loss": 0.242724609375, + "step": 38940 + }, + { + "epoch": 0.3367458993004816, + "grad_norm": 38.14616885996851, + "learning_rate": 5.589998253763229e-06, + "loss": 0.31429100036621094, + "step": 38945 + }, + { + "epoch": 0.3367891328220249, + "grad_norm": 3.1245011643442946, + "learning_rate": 5.589895436886768e-06, + "loss": 0.2663482666015625, + "step": 38950 + }, + { + "epoch": 0.33683236634356817, + "grad_norm": 9.11638548438506, + "learning_rate": 5.589792608065939e-06, + "loss": 0.2225931167602539, + "step": 38955 + }, + { + "epoch": 0.3368755998651114, + "grad_norm": 4.99290805015899, + "learning_rate": 5.589689767301214e-06, + "loss": 0.13152503967285156, + "step": 38960 + }, + { + "epoch": 0.3369188333866547, + "grad_norm": 58.42995460892253, + "learning_rate": 5.589586914593068e-06, + "loss": 0.15989990234375, + "step": 38965 + }, + { + "epoch": 0.33696206690819797, + "grad_norm": 6.579366358932836, + "learning_rate": 5.589484049941978e-06, + "loss": 0.1149169921875, + "step": 38970 + }, + { + "epoch": 0.3370053004297412, + "grad_norm": 5.032234760905874, + "learning_rate": 5.5893811733484156e-06, + "loss": 0.23477783203125, + "step": 38975 + }, + { + "epoch": 0.3370485339512845, + "grad_norm": 12.019819156021674, + "learning_rate": 5.589278284812855e-06, + "loss": 0.3436859130859375, + "step": 38980 + }, + { + "epoch": 0.3370917674728277, + "grad_norm": 5.843348107133733, + "learning_rate": 5.5891753843357735e-06, + "loss": 0.11724853515625, + "step": 38985 + }, + { + "epoch": 0.337135000994371, + "grad_norm": 10.409238411119043, + "learning_rate": 5.589072471917643e-06, + "loss": 0.10977249145507813, + "step": 38990 + }, + { + "epoch": 0.3371782345159143, + "grad_norm": 0.961817303159787, + "learning_rate": 5.588969547558938e-06, + "loss": 0.10978546142578124, + "step": 38995 + }, + { + "epoch": 0.3372214680374575, + "grad_norm": 0.11679727773515769, + "learning_rate": 5.588866611260136e-06, + "loss": 0.11211471557617188, + "step": 39000 + }, + { + "epoch": 0.3372647015590008, + "grad_norm": 12.972862504157053, + "learning_rate": 5.588763663021708e-06, + "loss": 0.3537628173828125, + "step": 39005 + }, + { + "epoch": 0.3373079350805441, + "grad_norm": 6.83733525678001, + "learning_rate": 5.588660702844133e-06, + "loss": 0.33043212890625, + "step": 39010 + }, + { + "epoch": 0.3373511686020873, + "grad_norm": 6.562121192836974, + "learning_rate": 5.588557730727882e-06, + "loss": 0.03593730926513672, + "step": 39015 + }, + { + "epoch": 0.3373944021236306, + "grad_norm": 0.8183104007134755, + "learning_rate": 5.588454746673432e-06, + "loss": 0.203125, + "step": 39020 + }, + { + "epoch": 0.3374376356451738, + "grad_norm": 2.691171244833585, + "learning_rate": 5.588351750681257e-06, + "loss": 0.34832305908203126, + "step": 39025 + }, + { + "epoch": 0.3374808691667171, + "grad_norm": 1.6155974010696825, + "learning_rate": 5.588248742751833e-06, + "loss": 0.11386566162109375, + "step": 39030 + }, + { + "epoch": 0.3375241026882604, + "grad_norm": 3.9145373974084468, + "learning_rate": 5.588145722885634e-06, + "loss": 0.07005767822265625, + "step": 39035 + }, + { + "epoch": 0.3375673362098036, + "grad_norm": 1.2571219817204153, + "learning_rate": 5.588042691083136e-06, + "loss": 0.133953857421875, + "step": 39040 + }, + { + "epoch": 0.3376105697313469, + "grad_norm": 8.321758197198363, + "learning_rate": 5.587939647344813e-06, + "loss": 0.139727783203125, + "step": 39045 + }, + { + "epoch": 0.3376538032528902, + "grad_norm": 2.249425246848257, + "learning_rate": 5.587836591671143e-06, + "loss": 0.10113525390625, + "step": 39050 + }, + { + "epoch": 0.3376970367744334, + "grad_norm": 7.005230781394162, + "learning_rate": 5.587733524062597e-06, + "loss": 0.1928863525390625, + "step": 39055 + }, + { + "epoch": 0.3377402702959767, + "grad_norm": 74.90519552316783, + "learning_rate": 5.5876304445196534e-06, + "loss": 0.5722648620605468, + "step": 39060 + }, + { + "epoch": 0.3377835038175199, + "grad_norm": 36.2496883333039, + "learning_rate": 5.587527353042788e-06, + "loss": 0.46313323974609377, + "step": 39065 + }, + { + "epoch": 0.3378267373390632, + "grad_norm": 6.487519253748086, + "learning_rate": 5.587424249632475e-06, + "loss": 0.2728710174560547, + "step": 39070 + }, + { + "epoch": 0.3378699708606065, + "grad_norm": 3.6589853516166424, + "learning_rate": 5.587321134289188e-06, + "loss": 0.2002349853515625, + "step": 39075 + }, + { + "epoch": 0.3379132043821497, + "grad_norm": 1.1129582930376407, + "learning_rate": 5.587218007013406e-06, + "loss": 0.1161773681640625, + "step": 39080 + }, + { + "epoch": 0.337956437903693, + "grad_norm": 13.755056939734896, + "learning_rate": 5.587114867805602e-06, + "loss": 0.11051025390625, + "step": 39085 + }, + { + "epoch": 0.3379996714252363, + "grad_norm": 18.584734662208895, + "learning_rate": 5.587011716666254e-06, + "loss": 0.07772674560546874, + "step": 39090 + }, + { + "epoch": 0.3380429049467795, + "grad_norm": 1.219033354436764, + "learning_rate": 5.586908553595837e-06, + "loss": 0.1608367919921875, + "step": 39095 + }, + { + "epoch": 0.3380861384683228, + "grad_norm": 5.507075157366722, + "learning_rate": 5.586805378594825e-06, + "loss": 0.28812255859375, + "step": 39100 + }, + { + "epoch": 0.3381293719898661, + "grad_norm": 5.849615493434502, + "learning_rate": 5.5867021916636954e-06, + "loss": 0.19501953125, + "step": 39105 + }, + { + "epoch": 0.3381726055114093, + "grad_norm": 1.666894159519057, + "learning_rate": 5.586598992802925e-06, + "loss": 0.14294891357421874, + "step": 39110 + }, + { + "epoch": 0.3382158390329526, + "grad_norm": 9.283142892508776, + "learning_rate": 5.5864957820129875e-06, + "loss": 0.3341705322265625, + "step": 39115 + }, + { + "epoch": 0.33825907255449583, + "grad_norm": 3.302564186033169, + "learning_rate": 5.586392559294361e-06, + "loss": 0.17882976531982422, + "step": 39120 + }, + { + "epoch": 0.3383023060760391, + "grad_norm": 5.799785629568706, + "learning_rate": 5.5862893246475194e-06, + "loss": 0.09722690582275391, + "step": 39125 + }, + { + "epoch": 0.3383455395975824, + "grad_norm": 11.954957088038508, + "learning_rate": 5.58618607807294e-06, + "loss": 0.235491943359375, + "step": 39130 + }, + { + "epoch": 0.33838877311912563, + "grad_norm": 8.375045716999084, + "learning_rate": 5.5860828195710995e-06, + "loss": 0.2035594940185547, + "step": 39135 + }, + { + "epoch": 0.3384320066406689, + "grad_norm": 1.0203129822395147, + "learning_rate": 5.585979549142473e-06, + "loss": 0.17363433837890624, + "step": 39140 + }, + { + "epoch": 0.3384752401622122, + "grad_norm": 3.036214052506606, + "learning_rate": 5.585876266787538e-06, + "loss": 0.17621078491210937, + "step": 39145 + }, + { + "epoch": 0.3385184736837554, + "grad_norm": 13.357768402239357, + "learning_rate": 5.58577297250677e-06, + "loss": 0.34703598022460935, + "step": 39150 + }, + { + "epoch": 0.3385617072052987, + "grad_norm": 15.141883322091992, + "learning_rate": 5.5856696663006455e-06, + "loss": 0.26330413818359377, + "step": 39155 + }, + { + "epoch": 0.33860494072684194, + "grad_norm": 2.48712151379179, + "learning_rate": 5.585566348169641e-06, + "loss": 0.11305084228515624, + "step": 39160 + }, + { + "epoch": 0.3386481742483852, + "grad_norm": 45.26663742970875, + "learning_rate": 5.585463018114233e-06, + "loss": 0.21888427734375, + "step": 39165 + }, + { + "epoch": 0.3386914077699285, + "grad_norm": 5.293583685329531, + "learning_rate": 5.585359676134897e-06, + "loss": 0.2195068359375, + "step": 39170 + }, + { + "epoch": 0.33873464129147174, + "grad_norm": 10.41170721697537, + "learning_rate": 5.585256322232112e-06, + "loss": 0.12551460266113282, + "step": 39175 + }, + { + "epoch": 0.338777874813015, + "grad_norm": 6.736578807109876, + "learning_rate": 5.585152956406353e-06, + "loss": 0.4619659423828125, + "step": 39180 + }, + { + "epoch": 0.3388211083345583, + "grad_norm": 10.508813082631235, + "learning_rate": 5.585049578658097e-06, + "loss": 0.11919898986816406, + "step": 39185 + }, + { + "epoch": 0.33886434185610154, + "grad_norm": 0.9709510252260757, + "learning_rate": 5.58494618898782e-06, + "loss": 0.15824432373046876, + "step": 39190 + }, + { + "epoch": 0.3389075753776448, + "grad_norm": 8.552788734779087, + "learning_rate": 5.584842787396e-06, + "loss": 0.14878311157226562, + "step": 39195 + }, + { + "epoch": 0.33895080889918805, + "grad_norm": 30.549221124557707, + "learning_rate": 5.584739373883114e-06, + "loss": 0.10511703491210937, + "step": 39200 + }, + { + "epoch": 0.33899404242073133, + "grad_norm": 6.157089746605243, + "learning_rate": 5.5846359484496375e-06, + "loss": 0.062221527099609375, + "step": 39205 + }, + { + "epoch": 0.3390372759422746, + "grad_norm": 18.83730248796578, + "learning_rate": 5.584532511096049e-06, + "loss": 0.16837921142578124, + "step": 39210 + }, + { + "epoch": 0.33908050946381785, + "grad_norm": 5.230289067916854, + "learning_rate": 5.5844290618228244e-06, + "loss": 0.1141754150390625, + "step": 39215 + }, + { + "epoch": 0.33912374298536113, + "grad_norm": 3.418833958072013, + "learning_rate": 5.584325600630442e-06, + "loss": 0.21103057861328126, + "step": 39220 + }, + { + "epoch": 0.3391669765069044, + "grad_norm": 23.610393481220363, + "learning_rate": 5.584222127519378e-06, + "loss": 0.212060546875, + "step": 39225 + }, + { + "epoch": 0.33921021002844765, + "grad_norm": 11.487172943434215, + "learning_rate": 5.58411864249011e-06, + "loss": 0.1665191650390625, + "step": 39230 + }, + { + "epoch": 0.33925344354999093, + "grad_norm": 1.0825478477594423, + "learning_rate": 5.5840151455431146e-06, + "loss": 0.5079750061035156, + "step": 39235 + }, + { + "epoch": 0.33929667707153416, + "grad_norm": 0.7473038635155165, + "learning_rate": 5.58391163667887e-06, + "loss": 0.14243431091308595, + "step": 39240 + }, + { + "epoch": 0.33933991059307744, + "grad_norm": 4.93815828550098, + "learning_rate": 5.583808115897854e-06, + "loss": 0.10415267944335938, + "step": 39245 + }, + { + "epoch": 0.3393831441146207, + "grad_norm": 15.257038624946384, + "learning_rate": 5.5837045832005425e-06, + "loss": 0.4683441162109375, + "step": 39250 + }, + { + "epoch": 0.33942637763616396, + "grad_norm": 9.94746617814544, + "learning_rate": 5.583601038587414e-06, + "loss": 0.32679290771484376, + "step": 39255 + }, + { + "epoch": 0.33946961115770724, + "grad_norm": 23.467935754334352, + "learning_rate": 5.583497482058946e-06, + "loss": 0.40980224609375, + "step": 39260 + }, + { + "epoch": 0.3395128446792505, + "grad_norm": 10.803573803651021, + "learning_rate": 5.583393913615615e-06, + "loss": 0.10158767700195312, + "step": 39265 + }, + { + "epoch": 0.33955607820079375, + "grad_norm": 5.7946086471644875, + "learning_rate": 5.5832903332579e-06, + "loss": 0.176995849609375, + "step": 39270 + }, + { + "epoch": 0.33959931172233704, + "grad_norm": 2.4699524678229854, + "learning_rate": 5.583186740986279e-06, + "loss": 0.03810043334960937, + "step": 39275 + }, + { + "epoch": 0.3396425452438803, + "grad_norm": 1.4947887772822226, + "learning_rate": 5.583083136801228e-06, + "loss": 0.12328872680664063, + "step": 39280 + }, + { + "epoch": 0.33968577876542355, + "grad_norm": 3.117239815464888, + "learning_rate": 5.582979520703226e-06, + "loss": 0.10001335144042969, + "step": 39285 + }, + { + "epoch": 0.33972901228696684, + "grad_norm": 33.14219071453262, + "learning_rate": 5.582875892692751e-06, + "loss": 0.21597137451171874, + "step": 39290 + }, + { + "epoch": 0.33977224580851006, + "grad_norm": 1.2420169398641012, + "learning_rate": 5.582772252770281e-06, + "loss": 0.084271240234375, + "step": 39295 + }, + { + "epoch": 0.33981547933005335, + "grad_norm": 0.7055656819246562, + "learning_rate": 5.582668600936293e-06, + "loss": 0.22363433837890626, + "step": 39300 + }, + { + "epoch": 0.33985871285159663, + "grad_norm": 2.72572657508497, + "learning_rate": 5.582564937191266e-06, + "loss": 0.1772705078125, + "step": 39305 + }, + { + "epoch": 0.33990194637313986, + "grad_norm": 8.033320836800714, + "learning_rate": 5.5824612615356775e-06, + "loss": 0.12204513549804688, + "step": 39310 + }, + { + "epoch": 0.33994517989468315, + "grad_norm": 15.400673203490383, + "learning_rate": 5.582357573970005e-06, + "loss": 0.1044778823852539, + "step": 39315 + }, + { + "epoch": 0.33998841341622643, + "grad_norm": 6.109172595151102, + "learning_rate": 5.582253874494729e-06, + "loss": 0.15660629272460938, + "step": 39320 + }, + { + "epoch": 0.34003164693776966, + "grad_norm": 20.36823585586223, + "learning_rate": 5.5821501631103255e-06, + "loss": 0.4984130859375, + "step": 39325 + }, + { + "epoch": 0.34007488045931294, + "grad_norm": 4.632364025827549, + "learning_rate": 5.582046439817274e-06, + "loss": 0.059247589111328124, + "step": 39330 + }, + { + "epoch": 0.3401181139808562, + "grad_norm": 8.816589217957693, + "learning_rate": 5.581942704616052e-06, + "loss": 0.2874420166015625, + "step": 39335 + }, + { + "epoch": 0.34016134750239946, + "grad_norm": 3.5252331160504986, + "learning_rate": 5.581838957507138e-06, + "loss": 0.06708831787109375, + "step": 39340 + }, + { + "epoch": 0.34020458102394274, + "grad_norm": 6.197137032799022, + "learning_rate": 5.581735198491012e-06, + "loss": 0.06541595458984376, + "step": 39345 + }, + { + "epoch": 0.34024781454548597, + "grad_norm": 17.47418218157359, + "learning_rate": 5.5816314275681506e-06, + "loss": 0.0921478271484375, + "step": 39350 + }, + { + "epoch": 0.34029104806702926, + "grad_norm": 25.690957713146457, + "learning_rate": 5.581527644739033e-06, + "loss": 0.12690811157226561, + "step": 39355 + }, + { + "epoch": 0.34033428158857254, + "grad_norm": 2.783073493182998, + "learning_rate": 5.581423850004139e-06, + "loss": 0.132598876953125, + "step": 39360 + }, + { + "epoch": 0.34037751511011577, + "grad_norm": 18.96339565548106, + "learning_rate": 5.5813200433639466e-06, + "loss": 0.2615486145019531, + "step": 39365 + }, + { + "epoch": 0.34042074863165905, + "grad_norm": 6.619874273943353, + "learning_rate": 5.581216224818932e-06, + "loss": 0.554815673828125, + "step": 39370 + }, + { + "epoch": 0.3404639821532023, + "grad_norm": 2.366804439483656, + "learning_rate": 5.581112394369578e-06, + "loss": 0.06797637939453124, + "step": 39375 + }, + { + "epoch": 0.34050721567474557, + "grad_norm": 14.57148731939594, + "learning_rate": 5.581008552016361e-06, + "loss": 0.15496826171875, + "step": 39380 + }, + { + "epoch": 0.34055044919628885, + "grad_norm": 3.265871410782355, + "learning_rate": 5.580904697759761e-06, + "loss": 0.1054412841796875, + "step": 39385 + }, + { + "epoch": 0.3405936827178321, + "grad_norm": 25.483720368478036, + "learning_rate": 5.580800831600257e-06, + "loss": 0.3944305419921875, + "step": 39390 + }, + { + "epoch": 0.34063691623937536, + "grad_norm": 7.210158782811769, + "learning_rate": 5.580696953538327e-06, + "loss": 0.09812774658203124, + "step": 39395 + }, + { + "epoch": 0.34068014976091865, + "grad_norm": 6.311685654460721, + "learning_rate": 5.580593063574451e-06, + "loss": 0.210992431640625, + "step": 39400 + }, + { + "epoch": 0.3407233832824619, + "grad_norm": 7.38619715256654, + "learning_rate": 5.580489161709107e-06, + "loss": 0.10878143310546876, + "step": 39405 + }, + { + "epoch": 0.34076661680400516, + "grad_norm": 32.159425931943474, + "learning_rate": 5.580385247942776e-06, + "loss": 0.644970703125, + "step": 39410 + }, + { + "epoch": 0.3408098503255484, + "grad_norm": 7.6253783879350525, + "learning_rate": 5.580281322275937e-06, + "loss": 0.0857177734375, + "step": 39415 + }, + { + "epoch": 0.3408530838470917, + "grad_norm": 14.947019923844886, + "learning_rate": 5.580177384709066e-06, + "loss": 0.12778549194335936, + "step": 39420 + }, + { + "epoch": 0.34089631736863496, + "grad_norm": 50.42885529857966, + "learning_rate": 5.580073435242647e-06, + "loss": 0.448797607421875, + "step": 39425 + }, + { + "epoch": 0.3409395508901782, + "grad_norm": 6.637341599779554, + "learning_rate": 5.579969473877157e-06, + "loss": 0.13455734252929688, + "step": 39430 + }, + { + "epoch": 0.3409827844117215, + "grad_norm": 10.969617065211654, + "learning_rate": 5.579865500613074e-06, + "loss": 0.2954437255859375, + "step": 39435 + }, + { + "epoch": 0.34102601793326476, + "grad_norm": 6.35577816237078, + "learning_rate": 5.579761515450881e-06, + "loss": 0.28125, + "step": 39440 + }, + { + "epoch": 0.341069251454808, + "grad_norm": 0.6616206398872869, + "learning_rate": 5.579657518391057e-06, + "loss": 0.08776016235351562, + "step": 39445 + }, + { + "epoch": 0.34111248497635127, + "grad_norm": 0.3148185750609605, + "learning_rate": 5.579553509434078e-06, + "loss": 0.038037109375, + "step": 39450 + }, + { + "epoch": 0.3411557184978945, + "grad_norm": 30.59058468503109, + "learning_rate": 5.579449488580427e-06, + "loss": 0.4466880798339844, + "step": 39455 + }, + { + "epoch": 0.3411989520194378, + "grad_norm": 1.7789850834694492, + "learning_rate": 5.579345455830583e-06, + "loss": 0.06448745727539062, + "step": 39460 + }, + { + "epoch": 0.34124218554098107, + "grad_norm": 25.320341798319383, + "learning_rate": 5.579241411185026e-06, + "loss": 0.3578277587890625, + "step": 39465 + }, + { + "epoch": 0.3412854190625243, + "grad_norm": 0.8278357063109144, + "learning_rate": 5.579137354644235e-06, + "loss": 0.238006591796875, + "step": 39470 + }, + { + "epoch": 0.3413286525840676, + "grad_norm": 2.9347967589905735, + "learning_rate": 5.57903328620869e-06, + "loss": 0.36219024658203125, + "step": 39475 + }, + { + "epoch": 0.34137188610561087, + "grad_norm": 2.0201519612294487, + "learning_rate": 5.578929205878873e-06, + "loss": 0.10355644226074219, + "step": 39480 + }, + { + "epoch": 0.3414151196271541, + "grad_norm": 2.791866002313071, + "learning_rate": 5.57882511365526e-06, + "loss": 0.1808074951171875, + "step": 39485 + }, + { + "epoch": 0.3414583531486974, + "grad_norm": 43.88163324494068, + "learning_rate": 5.578721009538336e-06, + "loss": 0.3568572998046875, + "step": 39490 + }, + { + "epoch": 0.34150158667024066, + "grad_norm": 1.9716863886164193, + "learning_rate": 5.578616893528577e-06, + "loss": 0.36788177490234375, + "step": 39495 + }, + { + "epoch": 0.3415448201917839, + "grad_norm": 27.304695561840727, + "learning_rate": 5.578512765626465e-06, + "loss": 0.128399658203125, + "step": 39500 + }, + { + "epoch": 0.3415880537133272, + "grad_norm": 21.912245900319345, + "learning_rate": 5.57840862583248e-06, + "loss": 0.13150100708007811, + "step": 39505 + }, + { + "epoch": 0.3416312872348704, + "grad_norm": 26.685546928422074, + "learning_rate": 5.578304474147103e-06, + "loss": 0.10449981689453125, + "step": 39510 + }, + { + "epoch": 0.3416745207564137, + "grad_norm": 2.5888025010591287, + "learning_rate": 5.578200310570812e-06, + "loss": 0.20458984375, + "step": 39515 + }, + { + "epoch": 0.341717754277957, + "grad_norm": 26.573923340695234, + "learning_rate": 5.57809613510409e-06, + "loss": 0.31207275390625, + "step": 39520 + }, + { + "epoch": 0.3417609877995002, + "grad_norm": 5.474028627142167, + "learning_rate": 5.577991947747416e-06, + "loss": 0.2110748291015625, + "step": 39525 + }, + { + "epoch": 0.3418042213210435, + "grad_norm": 0.4792016822422337, + "learning_rate": 5.577887748501272e-06, + "loss": 0.32146148681640624, + "step": 39530 + }, + { + "epoch": 0.3418474548425868, + "grad_norm": 23.61897964846635, + "learning_rate": 5.577783537366137e-06, + "loss": 0.1196807861328125, + "step": 39535 + }, + { + "epoch": 0.34189068836413, + "grad_norm": 7.642098833014208, + "learning_rate": 5.577679314342491e-06, + "loss": 0.300067138671875, + "step": 39540 + }, + { + "epoch": 0.3419339218856733, + "grad_norm": 4.007308259072301, + "learning_rate": 5.577575079430817e-06, + "loss": 0.0908447265625, + "step": 39545 + }, + { + "epoch": 0.3419771554072165, + "grad_norm": 0.9768444545463106, + "learning_rate": 5.577470832631594e-06, + "loss": 0.24942054748535156, + "step": 39550 + }, + { + "epoch": 0.3420203889287598, + "grad_norm": 1.3267667897016489, + "learning_rate": 5.577366573945304e-06, + "loss": 0.370611572265625, + "step": 39555 + }, + { + "epoch": 0.3420636224503031, + "grad_norm": 2.7491994317827966, + "learning_rate": 5.577262303372426e-06, + "loss": 0.35213584899902345, + "step": 39560 + }, + { + "epoch": 0.3421068559718463, + "grad_norm": 23.67834373177465, + "learning_rate": 5.577158020913442e-06, + "loss": 0.16895370483398436, + "step": 39565 + }, + { + "epoch": 0.3421500894933896, + "grad_norm": 22.223055958269672, + "learning_rate": 5.577053726568834e-06, + "loss": 0.21045303344726562, + "step": 39570 + }, + { + "epoch": 0.3421933230149329, + "grad_norm": 9.701484966123846, + "learning_rate": 5.576949420339081e-06, + "loss": 0.1272754669189453, + "step": 39575 + }, + { + "epoch": 0.3422365565364761, + "grad_norm": 12.055343786910377, + "learning_rate": 5.576845102224666e-06, + "loss": 0.14468464851379395, + "step": 39580 + }, + { + "epoch": 0.3422797900580194, + "grad_norm": 4.779179401032781, + "learning_rate": 5.5767407722260684e-06, + "loss": 0.29741973876953126, + "step": 39585 + }, + { + "epoch": 0.3423230235795626, + "grad_norm": 1.1959699651837603, + "learning_rate": 5.5766364303437695e-06, + "loss": 0.030889892578125, + "step": 39590 + }, + { + "epoch": 0.3423662571011059, + "grad_norm": 2.062029994588209, + "learning_rate": 5.576532076578251e-06, + "loss": 0.1360992431640625, + "step": 39595 + }, + { + "epoch": 0.3424094906226492, + "grad_norm": 11.738278664361996, + "learning_rate": 5.576427710929995e-06, + "loss": 0.16116943359375, + "step": 39600 + }, + { + "epoch": 0.3424527241441924, + "grad_norm": 5.65261362717791, + "learning_rate": 5.576323333399482e-06, + "loss": 0.09991531372070313, + "step": 39605 + }, + { + "epoch": 0.3424959576657357, + "grad_norm": 2.369848573212097, + "learning_rate": 5.576218943987194e-06, + "loss": 0.4870147705078125, + "step": 39610 + }, + { + "epoch": 0.342539191187279, + "grad_norm": 12.25621904065792, + "learning_rate": 5.57611454269361e-06, + "loss": 0.09585418701171874, + "step": 39615 + }, + { + "epoch": 0.3425824247088222, + "grad_norm": 115.1175203307774, + "learning_rate": 5.576010129519215e-06, + "loss": 0.41444091796875, + "step": 39620 + }, + { + "epoch": 0.3426256582303655, + "grad_norm": 35.86661558827953, + "learning_rate": 5.575905704464488e-06, + "loss": 0.4092254638671875, + "step": 39625 + }, + { + "epoch": 0.34266889175190873, + "grad_norm": 0.4863660178650895, + "learning_rate": 5.575801267529913e-06, + "loss": 0.2341094970703125, + "step": 39630 + }, + { + "epoch": 0.342712125273452, + "grad_norm": 17.225116920950427, + "learning_rate": 5.575696818715969e-06, + "loss": 0.18798828125, + "step": 39635 + }, + { + "epoch": 0.3427553587949953, + "grad_norm": 1.8231811974353882, + "learning_rate": 5.5755923580231384e-06, + "loss": 0.09254989624023438, + "step": 39640 + }, + { + "epoch": 0.34279859231653853, + "grad_norm": 40.81176357377374, + "learning_rate": 5.575487885451904e-06, + "loss": 0.182098388671875, + "step": 39645 + }, + { + "epoch": 0.3428418258380818, + "grad_norm": 16.426443218809162, + "learning_rate": 5.575383401002747e-06, + "loss": 0.14018707275390624, + "step": 39650 + }, + { + "epoch": 0.3428850593596251, + "grad_norm": 15.097743462196643, + "learning_rate": 5.5752789046761495e-06, + "loss": 0.16885833740234374, + "step": 39655 + }, + { + "epoch": 0.34292829288116833, + "grad_norm": 3.0263368290526613, + "learning_rate": 5.575174396472593e-06, + "loss": 0.08926963806152344, + "step": 39660 + }, + { + "epoch": 0.3429715264027116, + "grad_norm": 18.968903538606106, + "learning_rate": 5.575069876392559e-06, + "loss": 0.20161590576171876, + "step": 39665 + }, + { + "epoch": 0.3430147599242549, + "grad_norm": 13.765116983229944, + "learning_rate": 5.5749653444365325e-06, + "loss": 0.082159423828125, + "step": 39670 + }, + { + "epoch": 0.3430579934457981, + "grad_norm": 3.9302161092360617, + "learning_rate": 5.5748608006049915e-06, + "loss": 0.26551666259765627, + "step": 39675 + }, + { + "epoch": 0.3431012269673414, + "grad_norm": 31.639956213787457, + "learning_rate": 5.5747562448984195e-06, + "loss": 0.36536865234375, + "step": 39680 + }, + { + "epoch": 0.34314446048888464, + "grad_norm": 17.605641524868066, + "learning_rate": 5.5746516773173e-06, + "loss": 0.263385009765625, + "step": 39685 + }, + { + "epoch": 0.3431876940104279, + "grad_norm": 0.8371477092150298, + "learning_rate": 5.574547097862115e-06, + "loss": 0.16993560791015624, + "step": 39690 + }, + { + "epoch": 0.3432309275319712, + "grad_norm": 0.8763063937937942, + "learning_rate": 5.574442506533346e-06, + "loss": 0.04871826171875, + "step": 39695 + }, + { + "epoch": 0.34327416105351444, + "grad_norm": 34.8769885533571, + "learning_rate": 5.574337903331476e-06, + "loss": 0.40032958984375, + "step": 39700 + }, + { + "epoch": 0.3433173945750577, + "grad_norm": 8.451491253720869, + "learning_rate": 5.574233288256986e-06, + "loss": 0.1175445556640625, + "step": 39705 + }, + { + "epoch": 0.343360628096601, + "grad_norm": 4.253278650393212, + "learning_rate": 5.574128661310361e-06, + "loss": 0.19094390869140626, + "step": 39710 + }, + { + "epoch": 0.34340386161814423, + "grad_norm": 1.4820362005261092, + "learning_rate": 5.574024022492081e-06, + "loss": 0.23867416381835938, + "step": 39715 + }, + { + "epoch": 0.3434470951396875, + "grad_norm": 28.358880969166314, + "learning_rate": 5.57391937180263e-06, + "loss": 0.396575927734375, + "step": 39720 + }, + { + "epoch": 0.34349032866123075, + "grad_norm": 5.90700264046569, + "learning_rate": 5.57381470924249e-06, + "loss": 0.21166152954101564, + "step": 39725 + }, + { + "epoch": 0.34353356218277403, + "grad_norm": 1.142700791561249, + "learning_rate": 5.573710034812144e-06, + "loss": 0.089501953125, + "step": 39730 + }, + { + "epoch": 0.3435767957043173, + "grad_norm": 4.533609065458505, + "learning_rate": 5.573605348512075e-06, + "loss": 0.2835723876953125, + "step": 39735 + }, + { + "epoch": 0.34362002922586055, + "grad_norm": 6.526352615807778, + "learning_rate": 5.573500650342765e-06, + "loss": 0.06731491088867188, + "step": 39740 + }, + { + "epoch": 0.34366326274740383, + "grad_norm": 4.324466710159785, + "learning_rate": 5.573395940304698e-06, + "loss": 0.1297454833984375, + "step": 39745 + }, + { + "epoch": 0.3437064962689471, + "grad_norm": 0.8874542073174322, + "learning_rate": 5.573291218398356e-06, + "loss": 0.07892532348632812, + "step": 39750 + }, + { + "epoch": 0.34374972979049034, + "grad_norm": 1.0547836229705716, + "learning_rate": 5.573186484624222e-06, + "loss": 0.13214664459228515, + "step": 39755 + }, + { + "epoch": 0.34379296331203363, + "grad_norm": 1.849875558206195, + "learning_rate": 5.57308173898278e-06, + "loss": 0.10016937255859375, + "step": 39760 + }, + { + "epoch": 0.34383619683357686, + "grad_norm": 24.170593137746142, + "learning_rate": 5.572976981474512e-06, + "loss": 0.188336181640625, + "step": 39765 + }, + { + "epoch": 0.34387943035512014, + "grad_norm": 8.970601206413752, + "learning_rate": 5.572872212099902e-06, + "loss": 0.48704681396484373, + "step": 39770 + }, + { + "epoch": 0.3439226638766634, + "grad_norm": 0.5033986603844864, + "learning_rate": 5.572767430859432e-06, + "loss": 0.34390106201171877, + "step": 39775 + }, + { + "epoch": 0.34396589739820665, + "grad_norm": 22.48211374446377, + "learning_rate": 5.572662637753586e-06, + "loss": 0.17872314453125, + "step": 39780 + }, + { + "epoch": 0.34400913091974994, + "grad_norm": 50.44222647566031, + "learning_rate": 5.572557832782848e-06, + "loss": 0.07413253784179688, + "step": 39785 + }, + { + "epoch": 0.3440523644412932, + "grad_norm": 12.27166659817345, + "learning_rate": 5.572453015947699e-06, + "loss": 0.09898529052734376, + "step": 39790 + }, + { + "epoch": 0.34409559796283645, + "grad_norm": 40.265246012686276, + "learning_rate": 5.572348187248626e-06, + "loss": 0.29994659423828124, + "step": 39795 + }, + { + "epoch": 0.34413883148437974, + "grad_norm": 21.124673685340802, + "learning_rate": 5.5722433466861095e-06, + "loss": 0.1779388427734375, + "step": 39800 + }, + { + "epoch": 0.34418206500592297, + "grad_norm": 2.143533778787777, + "learning_rate": 5.572138494260633e-06, + "loss": 0.1678924560546875, + "step": 39805 + }, + { + "epoch": 0.34422529852746625, + "grad_norm": 32.13972807688078, + "learning_rate": 5.572033629972684e-06, + "loss": 0.15457305908203126, + "step": 39810 + }, + { + "epoch": 0.34426853204900953, + "grad_norm": 71.45318331893118, + "learning_rate": 5.5719287538227404e-06, + "loss": 0.2767303466796875, + "step": 39815 + }, + { + "epoch": 0.34431176557055276, + "grad_norm": 0.7962373903290207, + "learning_rate": 5.571823865811289e-06, + "loss": 0.2378082275390625, + "step": 39820 + }, + { + "epoch": 0.34435499909209605, + "grad_norm": 0.7897826991193834, + "learning_rate": 5.5717189659388145e-06, + "loss": 0.14013214111328126, + "step": 39825 + }, + { + "epoch": 0.34439823261363933, + "grad_norm": 7.251021578568026, + "learning_rate": 5.571614054205797e-06, + "loss": 0.37003173828125, + "step": 39830 + }, + { + "epoch": 0.34444146613518256, + "grad_norm": 0.18389859839724895, + "learning_rate": 5.571509130612725e-06, + "loss": 0.5024734497070312, + "step": 39835 + }, + { + "epoch": 0.34448469965672585, + "grad_norm": 25.356822249265186, + "learning_rate": 5.571404195160079e-06, + "loss": 0.29895362854003904, + "step": 39840 + }, + { + "epoch": 0.34452793317826913, + "grad_norm": 2.285926174387116, + "learning_rate": 5.571299247848345e-06, + "loss": 0.2198272705078125, + "step": 39845 + }, + { + "epoch": 0.34457116669981236, + "grad_norm": 0.6240450126780186, + "learning_rate": 5.5711942886780056e-06, + "loss": 0.22982711791992189, + "step": 39850 + }, + { + "epoch": 0.34461440022135564, + "grad_norm": 7.9780711521501715, + "learning_rate": 5.571089317649545e-06, + "loss": 0.5849945068359375, + "step": 39855 + }, + { + "epoch": 0.3446576337428989, + "grad_norm": 5.198513319841628, + "learning_rate": 5.5709843347634476e-06, + "loss": 0.3324615478515625, + "step": 39860 + }, + { + "epoch": 0.34470086726444216, + "grad_norm": 3.9180287096779884, + "learning_rate": 5.570879340020197e-06, + "loss": 0.2768646240234375, + "step": 39865 + }, + { + "epoch": 0.34474410078598544, + "grad_norm": 2.5820455002480935, + "learning_rate": 5.570774333420279e-06, + "loss": 0.43453283309936525, + "step": 39870 + }, + { + "epoch": 0.34478733430752867, + "grad_norm": 0.9311352295669028, + "learning_rate": 5.570669314964177e-06, + "loss": 0.4816070556640625, + "step": 39875 + }, + { + "epoch": 0.34483056782907195, + "grad_norm": 1.058292384803323, + "learning_rate": 5.5705642846523756e-06, + "loss": 0.134912109375, + "step": 39880 + }, + { + "epoch": 0.34487380135061524, + "grad_norm": 1.9876652782744562, + "learning_rate": 5.570459242485358e-06, + "loss": 0.334405517578125, + "step": 39885 + }, + { + "epoch": 0.34491703487215847, + "grad_norm": 18.02300039136403, + "learning_rate": 5.570354188463609e-06, + "loss": 0.191998291015625, + "step": 39890 + }, + { + "epoch": 0.34496026839370175, + "grad_norm": 16.59506486441118, + "learning_rate": 5.570249122587616e-06, + "loss": 0.07336082458496093, + "step": 39895 + }, + { + "epoch": 0.345003501915245, + "grad_norm": 0.040781814902261446, + "learning_rate": 5.57014404485786e-06, + "loss": 0.175616455078125, + "step": 39900 + }, + { + "epoch": 0.34504673543678827, + "grad_norm": 1.3206452638768804, + "learning_rate": 5.570038955274826e-06, + "loss": 0.0810638427734375, + "step": 39905 + }, + { + "epoch": 0.34508996895833155, + "grad_norm": 8.386887363971585, + "learning_rate": 5.569933853838999e-06, + "loss": 0.18994140625, + "step": 39910 + }, + { + "epoch": 0.3451332024798748, + "grad_norm": 2.864305561961725, + "learning_rate": 5.5698287405508654e-06, + "loss": 0.04351806640625, + "step": 39915 + }, + { + "epoch": 0.34517643600141806, + "grad_norm": 3.0483071474550365, + "learning_rate": 5.569723615410908e-06, + "loss": 0.27862396240234377, + "step": 39920 + }, + { + "epoch": 0.34521966952296135, + "grad_norm": 3.853878880275241, + "learning_rate": 5.569618478419613e-06, + "loss": 0.0442626953125, + "step": 39925 + }, + { + "epoch": 0.3452629030445046, + "grad_norm": 1.6183277648455148, + "learning_rate": 5.569513329577465e-06, + "loss": 0.267138671875, + "step": 39930 + }, + { + "epoch": 0.34530613656604786, + "grad_norm": 6.605102362896709, + "learning_rate": 5.5694081688849465e-06, + "loss": 0.1183542251586914, + "step": 39935 + }, + { + "epoch": 0.3453493700875911, + "grad_norm": 6.993183376353334, + "learning_rate": 5.5693029963425465e-06, + "loss": 0.096240234375, + "step": 39940 + }, + { + "epoch": 0.3453926036091344, + "grad_norm": 35.21103533790344, + "learning_rate": 5.569197811950747e-06, + "loss": 0.20530319213867188, + "step": 39945 + }, + { + "epoch": 0.34543583713067766, + "grad_norm": 1.9441121413688116, + "learning_rate": 5.569092615710035e-06, + "loss": 0.2131927490234375, + "step": 39950 + }, + { + "epoch": 0.3454790706522209, + "grad_norm": 28.55964573095053, + "learning_rate": 5.568987407620895e-06, + "loss": 0.334027099609375, + "step": 39955 + }, + { + "epoch": 0.34552230417376417, + "grad_norm": 34.561959368978286, + "learning_rate": 5.568882187683811e-06, + "loss": 0.35762557983398435, + "step": 39960 + }, + { + "epoch": 0.34556553769530746, + "grad_norm": 35.07150607760492, + "learning_rate": 5.568776955899271e-06, + "loss": 0.18266448974609376, + "step": 39965 + }, + { + "epoch": 0.3456087712168507, + "grad_norm": 16.20364032294851, + "learning_rate": 5.568671712267757e-06, + "loss": 0.09557037353515625, + "step": 39970 + }, + { + "epoch": 0.34565200473839397, + "grad_norm": 17.3495597595283, + "learning_rate": 5.568566456789757e-06, + "loss": 0.19049034118652344, + "step": 39975 + }, + { + "epoch": 0.3456952382599372, + "grad_norm": 2.1320214125860577, + "learning_rate": 5.568461189465755e-06, + "loss": 0.08824920654296875, + "step": 39980 + }, + { + "epoch": 0.3457384717814805, + "grad_norm": 0.20545687470723986, + "learning_rate": 5.568355910296237e-06, + "loss": 0.160638427734375, + "step": 39985 + }, + { + "epoch": 0.34578170530302377, + "grad_norm": 38.99589552146025, + "learning_rate": 5.568250619281689e-06, + "loss": 0.4490928649902344, + "step": 39990 + }, + { + "epoch": 0.345824938824567, + "grad_norm": 8.701437972626621, + "learning_rate": 5.5681453164225955e-06, + "loss": 0.3404022216796875, + "step": 39995 + }, + { + "epoch": 0.3458681723461103, + "grad_norm": 0.25307710163309416, + "learning_rate": 5.568040001719443e-06, + "loss": 0.41152191162109375, + "step": 40000 + }, + { + "epoch": 0.3458681723461103, + "eval_loss": 0.13926450908184052, + "eval_margin": 0.1340373158454895, + "eval_mean_neg": 0.014631230384111404, + "eval_mean_pos": 0.700334906578064, + "eval_runtime": 19.772, + "eval_samples_per_second": 11.683, + "eval_steps_per_second": 5.867, + "step": 40000 + }, + { + "epoch": 0.34591140586765357, + "grad_norm": 24.628908147675318, + "learning_rate": 5.567934675172716e-06, + "loss": 0.2661376953125, + "step": 40005 + }, + { + "epoch": 0.3459546393891968, + "grad_norm": 4.298891119212175, + "learning_rate": 5.567829336782902e-06, + "loss": 0.074407958984375, + "step": 40010 + }, + { + "epoch": 0.3459978729107401, + "grad_norm": 34.409739196734044, + "learning_rate": 5.567723986550486e-06, + "loss": 0.296929931640625, + "step": 40015 + }, + { + "epoch": 0.3460411064322833, + "grad_norm": 6.031818695593964, + "learning_rate": 5.567618624475954e-06, + "loss": 0.1298980712890625, + "step": 40020 + }, + { + "epoch": 0.3460843399538266, + "grad_norm": 9.784552183027163, + "learning_rate": 5.567513250559792e-06, + "loss": 0.2040283203125, + "step": 40025 + }, + { + "epoch": 0.3461275734753699, + "grad_norm": 4.598336367180972, + "learning_rate": 5.567407864802486e-06, + "loss": 0.04920616149902344, + "step": 40030 + }, + { + "epoch": 0.3461708069969131, + "grad_norm": 4.762323006817539, + "learning_rate": 5.567302467204521e-06, + "loss": 0.20866928100585938, + "step": 40035 + }, + { + "epoch": 0.3462140405184564, + "grad_norm": 2.5005556311193238, + "learning_rate": 5.567197057766384e-06, + "loss": 0.07350616455078125, + "step": 40040 + }, + { + "epoch": 0.3462572740399997, + "grad_norm": 6.442446355043783, + "learning_rate": 5.567091636488561e-06, + "loss": 0.1177001953125, + "step": 40045 + }, + { + "epoch": 0.3463005075615429, + "grad_norm": 1.3435400182086106, + "learning_rate": 5.5669862033715386e-06, + "loss": 0.18997802734375, + "step": 40050 + }, + { + "epoch": 0.3463437410830862, + "grad_norm": 11.103354722689472, + "learning_rate": 5.5668807584158024e-06, + "loss": 0.2904296875, + "step": 40055 + }, + { + "epoch": 0.34638697460462947, + "grad_norm": 21.11869991718726, + "learning_rate": 5.566775301621839e-06, + "loss": 0.33564453125, + "step": 40060 + }, + { + "epoch": 0.3464302081261727, + "grad_norm": 17.996589259015224, + "learning_rate": 5.566669832990134e-06, + "loss": 0.2964996337890625, + "step": 40065 + }, + { + "epoch": 0.346473441647716, + "grad_norm": 2.198383262089791, + "learning_rate": 5.566564352521175e-06, + "loss": 0.2990447998046875, + "step": 40070 + }, + { + "epoch": 0.3465166751692592, + "grad_norm": 17.613338346807314, + "learning_rate": 5.566458860215448e-06, + "loss": 0.0961456298828125, + "step": 40075 + }, + { + "epoch": 0.3465599086908025, + "grad_norm": 5.810039270104754, + "learning_rate": 5.56635335607344e-06, + "loss": 0.164501953125, + "step": 40080 + }, + { + "epoch": 0.3466031422123458, + "grad_norm": 28.37851755631543, + "learning_rate": 5.566247840095637e-06, + "loss": 0.25864105224609374, + "step": 40085 + }, + { + "epoch": 0.346646375733889, + "grad_norm": 16.221495167228078, + "learning_rate": 5.5661423122825254e-06, + "loss": 0.11043853759765625, + "step": 40090 + }, + { + "epoch": 0.3466896092554323, + "grad_norm": 7.8524896884397855, + "learning_rate": 5.566036772634592e-06, + "loss": 0.218408203125, + "step": 40095 + }, + { + "epoch": 0.3467328427769756, + "grad_norm": 10.901318517069301, + "learning_rate": 5.5659312211523245e-06, + "loss": 0.34616546630859374, + "step": 40100 + }, + { + "epoch": 0.3467760762985188, + "grad_norm": 15.066291845874108, + "learning_rate": 5.565825657836207e-06, + "loss": 0.206634521484375, + "step": 40105 + }, + { + "epoch": 0.3468193098200621, + "grad_norm": 2.814597918821739, + "learning_rate": 5.565720082686731e-06, + "loss": 0.1941558837890625, + "step": 40110 + }, + { + "epoch": 0.3468625433416053, + "grad_norm": 23.87791918193535, + "learning_rate": 5.565614495704379e-06, + "loss": 0.23372116088867187, + "step": 40115 + }, + { + "epoch": 0.3469057768631486, + "grad_norm": 0.3109490679381371, + "learning_rate": 5.56550889688964e-06, + "loss": 0.061236572265625, + "step": 40120 + }, + { + "epoch": 0.3469490103846919, + "grad_norm": 24.52725193229744, + "learning_rate": 5.5654032862430014e-06, + "loss": 0.2449493408203125, + "step": 40125 + }, + { + "epoch": 0.3469922439062351, + "grad_norm": 17.85793287365388, + "learning_rate": 5.565297663764948e-06, + "loss": 0.1345001220703125, + "step": 40130 + }, + { + "epoch": 0.3470354774277784, + "grad_norm": 5.597830488526257, + "learning_rate": 5.56519202945597e-06, + "loss": 0.131304931640625, + "step": 40135 + }, + { + "epoch": 0.3470787109493217, + "grad_norm": 5.4239163807769035, + "learning_rate": 5.565086383316552e-06, + "loss": 0.08018646240234376, + "step": 40140 + }, + { + "epoch": 0.3471219444708649, + "grad_norm": 0.5731196460187776, + "learning_rate": 5.564980725347182e-06, + "loss": 0.036346435546875, + "step": 40145 + }, + { + "epoch": 0.3471651779924082, + "grad_norm": 1.9941849847351956, + "learning_rate": 5.5648750555483485e-06, + "loss": 0.1477752685546875, + "step": 40150 + }, + { + "epoch": 0.34720841151395143, + "grad_norm": 0.9610709164392383, + "learning_rate": 5.564769373920538e-06, + "loss": 0.313494873046875, + "step": 40155 + }, + { + "epoch": 0.3472516450354947, + "grad_norm": 0.8080130181116991, + "learning_rate": 5.5646636804642365e-06, + "loss": 0.04485664367675781, + "step": 40160 + }, + { + "epoch": 0.347294878557038, + "grad_norm": 4.870895572374685, + "learning_rate": 5.564557975179933e-06, + "loss": 0.36123085021972656, + "step": 40165 + }, + { + "epoch": 0.34733811207858123, + "grad_norm": 0.12471362396777967, + "learning_rate": 5.564452258068116e-06, + "loss": 0.2518760681152344, + "step": 40170 + }, + { + "epoch": 0.3473813456001245, + "grad_norm": 26.452847304025113, + "learning_rate": 5.564346529129271e-06, + "loss": 0.3335762023925781, + "step": 40175 + }, + { + "epoch": 0.3474245791216678, + "grad_norm": 4.671526807952252, + "learning_rate": 5.564240788363886e-06, + "loss": 0.0836822509765625, + "step": 40180 + }, + { + "epoch": 0.347467812643211, + "grad_norm": 4.072616973471304, + "learning_rate": 5.564135035772449e-06, + "loss": 0.42549991607666016, + "step": 40185 + }, + { + "epoch": 0.3475110461647543, + "grad_norm": 0.42592209529562386, + "learning_rate": 5.564029271355449e-06, + "loss": 0.22471466064453124, + "step": 40190 + }, + { + "epoch": 0.34755427968629754, + "grad_norm": 14.205183045160137, + "learning_rate": 5.5639234951133715e-06, + "loss": 0.1278076171875, + "step": 40195 + }, + { + "epoch": 0.3475975132078408, + "grad_norm": 8.474024268097375, + "learning_rate": 5.563817707046706e-06, + "loss": 0.41796112060546875, + "step": 40200 + }, + { + "epoch": 0.3476407467293841, + "grad_norm": 22.79569934224162, + "learning_rate": 5.5637119071559395e-06, + "loss": 0.1432649612426758, + "step": 40205 + }, + { + "epoch": 0.34768398025092734, + "grad_norm": 0.775107318530544, + "learning_rate": 5.56360609544156e-06, + "loss": 0.1392730712890625, + "step": 40210 + }, + { + "epoch": 0.3477272137724706, + "grad_norm": 4.717572802209072, + "learning_rate": 5.563500271904056e-06, + "loss": 0.11571502685546875, + "step": 40215 + }, + { + "epoch": 0.3477704472940139, + "grad_norm": 1.1307295492042388, + "learning_rate": 5.563394436543915e-06, + "loss": 0.5053375244140625, + "step": 40220 + }, + { + "epoch": 0.34781368081555714, + "grad_norm": 1.7646313793721158, + "learning_rate": 5.563288589361625e-06, + "loss": 0.15216522216796874, + "step": 40225 + }, + { + "epoch": 0.3478569143371004, + "grad_norm": 18.757812063139163, + "learning_rate": 5.563182730357676e-06, + "loss": 0.09865646362304688, + "step": 40230 + }, + { + "epoch": 0.3479001478586437, + "grad_norm": 0.14867178997620328, + "learning_rate": 5.5630768595325535e-06, + "loss": 0.04371337890625, + "step": 40235 + }, + { + "epoch": 0.34794338138018693, + "grad_norm": 46.89338230661769, + "learning_rate": 5.5629709768867475e-06, + "loss": 0.2754180908203125, + "step": 40240 + }, + { + "epoch": 0.3479866149017302, + "grad_norm": 3.9739249294808894, + "learning_rate": 5.562865082420746e-06, + "loss": 0.027471923828125, + "step": 40245 + }, + { + "epoch": 0.34802984842327345, + "grad_norm": 4.718777535682361, + "learning_rate": 5.562759176135036e-06, + "loss": 0.0999786376953125, + "step": 40250 + }, + { + "epoch": 0.34807308194481673, + "grad_norm": 0.1979305732383882, + "learning_rate": 5.562653258030108e-06, + "loss": 0.0939788818359375, + "step": 40255 + }, + { + "epoch": 0.34811631546636, + "grad_norm": 26.49462929519146, + "learning_rate": 5.5625473281064494e-06, + "loss": 0.32035064697265625, + "step": 40260 + }, + { + "epoch": 0.34815954898790324, + "grad_norm": 9.46924079412947, + "learning_rate": 5.56244138636455e-06, + "loss": 0.2400787353515625, + "step": 40265 + }, + { + "epoch": 0.34820278250944653, + "grad_norm": 45.5675823886827, + "learning_rate": 5.5623354328048955e-06, + "loss": 0.600799560546875, + "step": 40270 + }, + { + "epoch": 0.3482460160309898, + "grad_norm": 3.1999537725211997, + "learning_rate": 5.562229467427978e-06, + "loss": 0.6275177001953125, + "step": 40275 + }, + { + "epoch": 0.34828924955253304, + "grad_norm": 16.81263475653594, + "learning_rate": 5.562123490234284e-06, + "loss": 0.10284767150878907, + "step": 40280 + }, + { + "epoch": 0.3483324830740763, + "grad_norm": 17.892036193894963, + "learning_rate": 5.562017501224302e-06, + "loss": 0.18568000793457032, + "step": 40285 + }, + { + "epoch": 0.34837571659561956, + "grad_norm": 1.6707349705420902, + "learning_rate": 5.561911500398523e-06, + "loss": 0.15856781005859374, + "step": 40290 + }, + { + "epoch": 0.34841895011716284, + "grad_norm": 17.42022638836178, + "learning_rate": 5.561805487757435e-06, + "loss": 0.109442138671875, + "step": 40295 + }, + { + "epoch": 0.3484621836387061, + "grad_norm": 5.4709350113556665, + "learning_rate": 5.561699463301525e-06, + "loss": 0.0441986083984375, + "step": 40300 + }, + { + "epoch": 0.34850541716024935, + "grad_norm": 1.9237952279669133, + "learning_rate": 5.561593427031283e-06, + "loss": 0.221185302734375, + "step": 40305 + }, + { + "epoch": 0.34854865068179264, + "grad_norm": 17.07358129063664, + "learning_rate": 5.5614873789471994e-06, + "loss": 0.1618072509765625, + "step": 40310 + }, + { + "epoch": 0.3485918842033359, + "grad_norm": 4.823618883869524, + "learning_rate": 5.561381319049763e-06, + "loss": 0.16778182983398438, + "step": 40315 + }, + { + "epoch": 0.34863511772487915, + "grad_norm": 10.359808041377722, + "learning_rate": 5.561275247339461e-06, + "loss": 0.1711334228515625, + "step": 40320 + }, + { + "epoch": 0.34867835124642244, + "grad_norm": 2.496006940542368, + "learning_rate": 5.561169163816784e-06, + "loss": 0.18582649230957032, + "step": 40325 + }, + { + "epoch": 0.34872158476796566, + "grad_norm": 0.8211608153123618, + "learning_rate": 5.561063068482223e-06, + "loss": 0.03565702438354492, + "step": 40330 + }, + { + "epoch": 0.34876481828950895, + "grad_norm": 35.52172144178006, + "learning_rate": 5.5609569613362634e-06, + "loss": 0.24008712768554688, + "step": 40335 + }, + { + "epoch": 0.34880805181105223, + "grad_norm": 20.6803305200565, + "learning_rate": 5.560850842379397e-06, + "loss": 0.5964523315429687, + "step": 40340 + }, + { + "epoch": 0.34885128533259546, + "grad_norm": 21.186551075629765, + "learning_rate": 5.560744711612113e-06, + "loss": 0.25029296875, + "step": 40345 + }, + { + "epoch": 0.34889451885413875, + "grad_norm": 16.081416384995613, + "learning_rate": 5.5606385690349005e-06, + "loss": 0.10486874580383301, + "step": 40350 + }, + { + "epoch": 0.34893775237568203, + "grad_norm": 2.877107055635678, + "learning_rate": 5.560532414648249e-06, + "loss": 0.16836128234863282, + "step": 40355 + }, + { + "epoch": 0.34898098589722526, + "grad_norm": 6.619933952487609, + "learning_rate": 5.560426248452649e-06, + "loss": 0.274224853515625, + "step": 40360 + }, + { + "epoch": 0.34902421941876854, + "grad_norm": 10.037326488096026, + "learning_rate": 5.560320070448589e-06, + "loss": 0.18928985595703124, + "step": 40365 + }, + { + "epoch": 0.3490674529403118, + "grad_norm": 10.294904106828824, + "learning_rate": 5.560213880636559e-06, + "loss": 0.18353729248046874, + "step": 40370 + }, + { + "epoch": 0.34911068646185506, + "grad_norm": 1.10614156109384, + "learning_rate": 5.560107679017049e-06, + "loss": 0.2142608642578125, + "step": 40375 + }, + { + "epoch": 0.34915391998339834, + "grad_norm": 18.40900276336842, + "learning_rate": 5.560001465590548e-06, + "loss": 0.2270599365234375, + "step": 40380 + }, + { + "epoch": 0.34919715350494157, + "grad_norm": 12.189760430181543, + "learning_rate": 5.5598952403575474e-06, + "loss": 0.16021728515625, + "step": 40385 + }, + { + "epoch": 0.34924038702648486, + "grad_norm": 3.9601078334329842, + "learning_rate": 5.559789003318535e-06, + "loss": 0.3201396942138672, + "step": 40390 + }, + { + "epoch": 0.34928362054802814, + "grad_norm": 52.34139237132152, + "learning_rate": 5.559682754474002e-06, + "loss": 0.32667236328125, + "step": 40395 + }, + { + "epoch": 0.34932685406957137, + "grad_norm": 35.98440022375821, + "learning_rate": 5.55957649382444e-06, + "loss": 0.12439079284667968, + "step": 40400 + }, + { + "epoch": 0.34937008759111465, + "grad_norm": 28.63892047613772, + "learning_rate": 5.559470221370336e-06, + "loss": 0.11917572021484375, + "step": 40405 + }, + { + "epoch": 0.34941332111265794, + "grad_norm": 29.24750426463084, + "learning_rate": 5.559363937112182e-06, + "loss": 0.367987060546875, + "step": 40410 + }, + { + "epoch": 0.34945655463420117, + "grad_norm": 13.144049368375272, + "learning_rate": 5.5592576410504665e-06, + "loss": 0.19654388427734376, + "step": 40415 + }, + { + "epoch": 0.34949978815574445, + "grad_norm": 0.2650848276043051, + "learning_rate": 5.559151333185682e-06, + "loss": 0.211334228515625, + "step": 40420 + }, + { + "epoch": 0.3495430216772877, + "grad_norm": 31.765620066423445, + "learning_rate": 5.5590450135183174e-06, + "loss": 0.5062088966369629, + "step": 40425 + }, + { + "epoch": 0.34958625519883096, + "grad_norm": 0.734315115340287, + "learning_rate": 5.558938682048864e-06, + "loss": 0.245880126953125, + "step": 40430 + }, + { + "epoch": 0.34962948872037425, + "grad_norm": 1.9807461458972218, + "learning_rate": 5.558832338777811e-06, + "loss": 0.14913864135742189, + "step": 40435 + }, + { + "epoch": 0.3496727222419175, + "grad_norm": 13.839527627781742, + "learning_rate": 5.5587259837056485e-06, + "loss": 0.45056686401367185, + "step": 40440 + }, + { + "epoch": 0.34971595576346076, + "grad_norm": 9.872116951308273, + "learning_rate": 5.558619616832869e-06, + "loss": 0.24330673217773438, + "step": 40445 + }, + { + "epoch": 0.34975918928500405, + "grad_norm": 6.291707195747628, + "learning_rate": 5.5585132381599615e-06, + "loss": 0.115087890625, + "step": 40450 + }, + { + "epoch": 0.3498024228065473, + "grad_norm": 1.2255327893159647, + "learning_rate": 5.558406847687416e-06, + "loss": 0.2375, + "step": 40455 + }, + { + "epoch": 0.34984565632809056, + "grad_norm": 28.48076694949617, + "learning_rate": 5.558300445415726e-06, + "loss": 0.3888721466064453, + "step": 40460 + }, + { + "epoch": 0.3498888898496338, + "grad_norm": 1.9360275049402291, + "learning_rate": 5.55819403134538e-06, + "loss": 0.25501708984375, + "step": 40465 + }, + { + "epoch": 0.3499321233711771, + "grad_norm": 52.44500969258209, + "learning_rate": 5.5580876054768686e-06, + "loss": 0.22618865966796875, + "step": 40470 + }, + { + "epoch": 0.34997535689272036, + "grad_norm": 9.993636349553466, + "learning_rate": 5.557981167810683e-06, + "loss": 0.14321670532226563, + "step": 40475 + }, + { + "epoch": 0.3500185904142636, + "grad_norm": 23.535461329843788, + "learning_rate": 5.5578747183473154e-06, + "loss": 0.1911163330078125, + "step": 40480 + }, + { + "epoch": 0.35006182393580687, + "grad_norm": 4.110117973230297, + "learning_rate": 5.5577682570872545e-06, + "loss": 0.31171531677246095, + "step": 40485 + }, + { + "epoch": 0.35010505745735016, + "grad_norm": 13.75548151326752, + "learning_rate": 5.5576617840309925e-06, + "loss": 0.09700164794921876, + "step": 40490 + }, + { + "epoch": 0.3501482909788934, + "grad_norm": 8.449656105481278, + "learning_rate": 5.557555299179022e-06, + "loss": 0.4331146240234375, + "step": 40495 + }, + { + "epoch": 0.35019152450043667, + "grad_norm": 5.673317316671431, + "learning_rate": 5.55744880253183e-06, + "loss": 0.20674285888671876, + "step": 40500 + }, + { + "epoch": 0.3502347580219799, + "grad_norm": 20.509630514332464, + "learning_rate": 5.557342294089912e-06, + "loss": 0.294873046875, + "step": 40505 + }, + { + "epoch": 0.3502779915435232, + "grad_norm": 1.8135609078625547, + "learning_rate": 5.557235773853756e-06, + "loss": 0.227325439453125, + "step": 40510 + }, + { + "epoch": 0.35032122506506647, + "grad_norm": 3.3503478013482115, + "learning_rate": 5.557129241823855e-06, + "loss": 0.146466064453125, + "step": 40515 + }, + { + "epoch": 0.3503644585866097, + "grad_norm": 4.39155691474321, + "learning_rate": 5.5570226980007e-06, + "loss": 0.24251632690429686, + "step": 40520 + }, + { + "epoch": 0.350407692108153, + "grad_norm": 0.6454797737907831, + "learning_rate": 5.556916142384783e-06, + "loss": 0.2501434326171875, + "step": 40525 + }, + { + "epoch": 0.35045092562969626, + "grad_norm": 0.7343145561381423, + "learning_rate": 5.556809574976593e-06, + "loss": 0.11570053100585938, + "step": 40530 + }, + { + "epoch": 0.3504941591512395, + "grad_norm": 0.942549832257533, + "learning_rate": 5.556702995776625e-06, + "loss": 0.09221420288085938, + "step": 40535 + }, + { + "epoch": 0.3505373926727828, + "grad_norm": 4.484038889623902, + "learning_rate": 5.556596404785367e-06, + "loss": 0.04519596099853516, + "step": 40540 + }, + { + "epoch": 0.350580626194326, + "grad_norm": 0.05649141128608836, + "learning_rate": 5.556489802003313e-06, + "loss": 0.10394172668457032, + "step": 40545 + }, + { + "epoch": 0.3506238597158693, + "grad_norm": 42.99332094865235, + "learning_rate": 5.556383187430954e-06, + "loss": 0.3781494140625, + "step": 40550 + }, + { + "epoch": 0.3506670932374126, + "grad_norm": 1.0505824186215091, + "learning_rate": 5.556276561068782e-06, + "loss": 0.1956695556640625, + "step": 40555 + }, + { + "epoch": 0.3507103267589558, + "grad_norm": 29.79720989490254, + "learning_rate": 5.556169922917287e-06, + "loss": 0.11726264953613282, + "step": 40560 + }, + { + "epoch": 0.3507535602804991, + "grad_norm": 3.842715318509261, + "learning_rate": 5.556063272976963e-06, + "loss": 0.22607574462890626, + "step": 40565 + }, + { + "epoch": 0.3507967938020424, + "grad_norm": 13.107980581841257, + "learning_rate": 5.555956611248302e-06, + "loss": 0.08178863525390626, + "step": 40570 + }, + { + "epoch": 0.3508400273235856, + "grad_norm": 0.9539442492468635, + "learning_rate": 5.5558499377317946e-06, + "loss": 0.08640518188476562, + "step": 40575 + }, + { + "epoch": 0.3508832608451289, + "grad_norm": 4.8073507659034105, + "learning_rate": 5.555743252427932e-06, + "loss": 0.18896865844726562, + "step": 40580 + }, + { + "epoch": 0.35092649436667217, + "grad_norm": 3.299018233397554, + "learning_rate": 5.555636555337208e-06, + "loss": 0.1043701171875, + "step": 40585 + }, + { + "epoch": 0.3509697278882154, + "grad_norm": 4.8759594677509925, + "learning_rate": 5.555529846460115e-06, + "loss": 0.360382080078125, + "step": 40590 + }, + { + "epoch": 0.3510129614097587, + "grad_norm": 11.651293587901092, + "learning_rate": 5.555423125797142e-06, + "loss": 0.27130851745605467, + "step": 40595 + }, + { + "epoch": 0.3510561949313019, + "grad_norm": 32.81132050143377, + "learning_rate": 5.555316393348786e-06, + "loss": 0.14430770874023438, + "step": 40600 + }, + { + "epoch": 0.3510994284528452, + "grad_norm": 1.7683110325919167, + "learning_rate": 5.555209649115535e-06, + "loss": 0.1316436767578125, + "step": 40605 + }, + { + "epoch": 0.3511426619743885, + "grad_norm": 40.19495545611485, + "learning_rate": 5.555102893097882e-06, + "loss": 0.41085968017578123, + "step": 40610 + }, + { + "epoch": 0.3511858954959317, + "grad_norm": 11.21081768874985, + "learning_rate": 5.554996125296322e-06, + "loss": 0.13754653930664062, + "step": 40615 + }, + { + "epoch": 0.351229129017475, + "grad_norm": 7.7485859232437155, + "learning_rate": 5.554889345711345e-06, + "loss": 0.058393096923828124, + "step": 40620 + }, + { + "epoch": 0.3512723625390183, + "grad_norm": 8.142210887974352, + "learning_rate": 5.554782554343443e-06, + "loss": 0.1773895263671875, + "step": 40625 + }, + { + "epoch": 0.3513155960605615, + "grad_norm": 0.652961540103946, + "learning_rate": 5.554675751193112e-06, + "loss": 0.5166458129882813, + "step": 40630 + }, + { + "epoch": 0.3513588295821048, + "grad_norm": 14.855074117777349, + "learning_rate": 5.5545689362608405e-06, + "loss": 0.507806396484375, + "step": 40635 + }, + { + "epoch": 0.351402063103648, + "grad_norm": 3.595571645325042, + "learning_rate": 5.554462109547123e-06, + "loss": 0.2030303955078125, + "step": 40640 + }, + { + "epoch": 0.3514452966251913, + "grad_norm": 24.877216771547907, + "learning_rate": 5.5543552710524514e-06, + "loss": 0.28594970703125, + "step": 40645 + }, + { + "epoch": 0.3514885301467346, + "grad_norm": 6.583692158530597, + "learning_rate": 5.55424842077732e-06, + "loss": 0.07199554443359375, + "step": 40650 + }, + { + "epoch": 0.3515317636682778, + "grad_norm": 38.069119216182166, + "learning_rate": 5.55414155872222e-06, + "loss": 0.14495010375976564, + "step": 40655 + }, + { + "epoch": 0.3515749971898211, + "grad_norm": 28.728757740826612, + "learning_rate": 5.5540346848876445e-06, + "loss": 0.5819305419921875, + "step": 40660 + }, + { + "epoch": 0.3516182307113644, + "grad_norm": 8.498810696110953, + "learning_rate": 5.553927799274087e-06, + "loss": 0.270794677734375, + "step": 40665 + }, + { + "epoch": 0.3516614642329076, + "grad_norm": 1.0057232569292611, + "learning_rate": 5.553820901882041e-06, + "loss": 0.1610240936279297, + "step": 40670 + }, + { + "epoch": 0.3517046977544509, + "grad_norm": 6.014977767879911, + "learning_rate": 5.553713992711998e-06, + "loss": 0.056762313842773436, + "step": 40675 + }, + { + "epoch": 0.35174793127599413, + "grad_norm": 10.005775921701556, + "learning_rate": 5.553607071764452e-06, + "loss": 0.06252288818359375, + "step": 40680 + }, + { + "epoch": 0.3517911647975374, + "grad_norm": 0.6497790031435369, + "learning_rate": 5.553500139039895e-06, + "loss": 0.07548980712890625, + "step": 40685 + }, + { + "epoch": 0.3518343983190807, + "grad_norm": 9.266303579156457, + "learning_rate": 5.553393194538822e-06, + "loss": 0.22041854858398438, + "step": 40690 + }, + { + "epoch": 0.35187763184062393, + "grad_norm": 9.241195044996724, + "learning_rate": 5.553286238261725e-06, + "loss": 0.073370361328125, + "step": 40695 + }, + { + "epoch": 0.3519208653621672, + "grad_norm": 12.725830967660713, + "learning_rate": 5.553179270209098e-06, + "loss": 0.06697845458984375, + "step": 40700 + }, + { + "epoch": 0.3519640988837105, + "grad_norm": 4.099338013265039, + "learning_rate": 5.553072290381432e-06, + "loss": 0.07822265625, + "step": 40705 + }, + { + "epoch": 0.3520073324052537, + "grad_norm": 3.1128435725446995, + "learning_rate": 5.552965298779223e-06, + "loss": 0.174969482421875, + "step": 40710 + }, + { + "epoch": 0.352050565926797, + "grad_norm": 17.500527622291596, + "learning_rate": 5.552858295402964e-06, + "loss": 0.11612453460693359, + "step": 40715 + }, + { + "epoch": 0.35209379944834024, + "grad_norm": 2.0729239634610646, + "learning_rate": 5.552751280253148e-06, + "loss": 0.01784515380859375, + "step": 40720 + }, + { + "epoch": 0.3521370329698835, + "grad_norm": 3.016052583447483, + "learning_rate": 5.552644253330269e-06, + "loss": 0.1095855712890625, + "step": 40725 + }, + { + "epoch": 0.3521802664914268, + "grad_norm": 3.271846160254535, + "learning_rate": 5.55253721463482e-06, + "loss": 0.1679473876953125, + "step": 40730 + }, + { + "epoch": 0.35222350001297004, + "grad_norm": 45.56948219365736, + "learning_rate": 5.552430164167295e-06, + "loss": 0.464373779296875, + "step": 40735 + }, + { + "epoch": 0.3522667335345133, + "grad_norm": 11.20533857256237, + "learning_rate": 5.552323101928187e-06, + "loss": 0.22249832153320312, + "step": 40740 + }, + { + "epoch": 0.3523099670560566, + "grad_norm": 2.6502732350348306, + "learning_rate": 5.552216027917991e-06, + "loss": 0.1081268310546875, + "step": 40745 + }, + { + "epoch": 0.35235320057759983, + "grad_norm": 7.132372051009153, + "learning_rate": 5.5521089421372e-06, + "loss": 0.6479522705078125, + "step": 40750 + }, + { + "epoch": 0.3523964340991431, + "grad_norm": 5.872142101965706, + "learning_rate": 5.552001844586308e-06, + "loss": 0.20717735290527345, + "step": 40755 + }, + { + "epoch": 0.35243966762068635, + "grad_norm": 0.5423427072818674, + "learning_rate": 5.551894735265809e-06, + "loss": 0.11887626647949219, + "step": 40760 + }, + { + "epoch": 0.35248290114222963, + "grad_norm": 4.047258309306702, + "learning_rate": 5.551787614176197e-06, + "loss": 0.16392669677734376, + "step": 40765 + }, + { + "epoch": 0.3525261346637729, + "grad_norm": 48.40879803307279, + "learning_rate": 5.551680481317966e-06, + "loss": 0.31388168334960936, + "step": 40770 + }, + { + "epoch": 0.35256936818531615, + "grad_norm": 10.353830786433045, + "learning_rate": 5.551573336691611e-06, + "loss": 0.439398193359375, + "step": 40775 + }, + { + "epoch": 0.35261260170685943, + "grad_norm": 15.567299990407085, + "learning_rate": 5.5514661802976235e-06, + "loss": 0.09472389221191406, + "step": 40780 + }, + { + "epoch": 0.3526558352284027, + "grad_norm": 19.53988820636809, + "learning_rate": 5.551359012136501e-06, + "loss": 0.3557373046875, + "step": 40785 + }, + { + "epoch": 0.35269906874994594, + "grad_norm": 1.8231007477665409, + "learning_rate": 5.5512518322087355e-06, + "loss": 0.13693695068359374, + "step": 40790 + }, + { + "epoch": 0.3527423022714892, + "grad_norm": 24.728220554834763, + "learning_rate": 5.551144640514822e-06, + "loss": 0.24263916015625, + "step": 40795 + }, + { + "epoch": 0.3527855357930325, + "grad_norm": 44.87112631987885, + "learning_rate": 5.5510374370552545e-06, + "loss": 0.295806884765625, + "step": 40800 + }, + { + "epoch": 0.35282876931457574, + "grad_norm": 8.66499557887344, + "learning_rate": 5.550930221830528e-06, + "loss": 0.052973175048828126, + "step": 40805 + }, + { + "epoch": 0.352872002836119, + "grad_norm": 1.2297698284125265, + "learning_rate": 5.550822994841137e-06, + "loss": 0.044272232055664065, + "step": 40810 + }, + { + "epoch": 0.35291523635766225, + "grad_norm": 22.039009251005844, + "learning_rate": 5.550715756087576e-06, + "loss": 0.47951278686523435, + "step": 40815 + }, + { + "epoch": 0.35295846987920554, + "grad_norm": 4.43004896226267, + "learning_rate": 5.550608505570338e-06, + "loss": 0.1589599609375, + "step": 40820 + }, + { + "epoch": 0.3530017034007488, + "grad_norm": 0.5695866949862185, + "learning_rate": 5.55050124328992e-06, + "loss": 0.07472381591796876, + "step": 40825 + }, + { + "epoch": 0.35304493692229205, + "grad_norm": 4.545851014788443, + "learning_rate": 5.550393969246815e-06, + "loss": 0.14502601623535155, + "step": 40830 + }, + { + "epoch": 0.35308817044383534, + "grad_norm": 0.8924717184769514, + "learning_rate": 5.5502866834415194e-06, + "loss": 0.07847518920898437, + "step": 40835 + }, + { + "epoch": 0.3531314039653786, + "grad_norm": 22.261908064312482, + "learning_rate": 5.550179385874526e-06, + "loss": 0.174371337890625, + "step": 40840 + }, + { + "epoch": 0.35317463748692185, + "grad_norm": 38.483379654688335, + "learning_rate": 5.5500720765463315e-06, + "loss": 0.297918701171875, + "step": 40845 + }, + { + "epoch": 0.35321787100846513, + "grad_norm": 15.588375932956327, + "learning_rate": 5.5499647554574295e-06, + "loss": 0.44568710327148436, + "step": 40850 + }, + { + "epoch": 0.35326110453000836, + "grad_norm": 0.41147607856307034, + "learning_rate": 5.549857422608315e-06, + "loss": 0.10856170654296875, + "step": 40855 + }, + { + "epoch": 0.35330433805155165, + "grad_norm": 10.189088069378252, + "learning_rate": 5.5497500779994835e-06, + "loss": 0.07714462280273438, + "step": 40860 + }, + { + "epoch": 0.35334757157309493, + "grad_norm": 14.754115420639227, + "learning_rate": 5.5496427216314305e-06, + "loss": 0.27177581787109373, + "step": 40865 + }, + { + "epoch": 0.35339080509463816, + "grad_norm": 16.279727395787322, + "learning_rate": 5.54953535350465e-06, + "loss": 0.14545135498046874, + "step": 40870 + }, + { + "epoch": 0.35343403861618145, + "grad_norm": 4.330739155581509, + "learning_rate": 5.549427973619637e-06, + "loss": 0.08175048828125, + "step": 40875 + }, + { + "epoch": 0.35347727213772473, + "grad_norm": 8.385847164747627, + "learning_rate": 5.5493205819768894e-06, + "loss": 0.06907501220703124, + "step": 40880 + }, + { + "epoch": 0.35352050565926796, + "grad_norm": 8.800724857546639, + "learning_rate": 5.549213178576899e-06, + "loss": 0.2626983642578125, + "step": 40885 + }, + { + "epoch": 0.35356373918081124, + "grad_norm": 8.472653345333681, + "learning_rate": 5.549105763420163e-06, + "loss": 0.29154052734375, + "step": 40890 + }, + { + "epoch": 0.35360697270235447, + "grad_norm": 8.633368503992672, + "learning_rate": 5.548998336507177e-06, + "loss": 0.53419189453125, + "step": 40895 + }, + { + "epoch": 0.35365020622389776, + "grad_norm": 43.23450218154411, + "learning_rate": 5.548890897838435e-06, + "loss": 0.5340423583984375, + "step": 40900 + }, + { + "epoch": 0.35369343974544104, + "grad_norm": 1.1749527615628381, + "learning_rate": 5.5487834474144345e-06, + "loss": 0.14507904052734374, + "step": 40905 + }, + { + "epoch": 0.35373667326698427, + "grad_norm": 0.5368391565687791, + "learning_rate": 5.5486759852356695e-06, + "loss": 0.049456787109375, + "step": 40910 + }, + { + "epoch": 0.35377990678852755, + "grad_norm": 5.9629090083146625, + "learning_rate": 5.548568511302635e-06, + "loss": 0.25874176025390627, + "step": 40915 + }, + { + "epoch": 0.35382314031007084, + "grad_norm": 1.5263894048777358, + "learning_rate": 5.548461025615829e-06, + "loss": 0.19429473876953124, + "step": 40920 + }, + { + "epoch": 0.35386637383161407, + "grad_norm": 10.95248556303549, + "learning_rate": 5.548353528175745e-06, + "loss": 0.22181472778320313, + "step": 40925 + }, + { + "epoch": 0.35390960735315735, + "grad_norm": 13.155799165924662, + "learning_rate": 5.548246018982881e-06, + "loss": 0.20506362915039061, + "step": 40930 + }, + { + "epoch": 0.3539528408747006, + "grad_norm": 7.668945676716776, + "learning_rate": 5.548138498037731e-06, + "loss": 0.09486083984375, + "step": 40935 + }, + { + "epoch": 0.35399607439624386, + "grad_norm": 27.400974262551795, + "learning_rate": 5.548030965340791e-06, + "loss": 0.28822784423828124, + "step": 40940 + }, + { + "epoch": 0.35403930791778715, + "grad_norm": 20.983575447311896, + "learning_rate": 5.547923420892557e-06, + "loss": 0.25064239501953123, + "step": 40945 + }, + { + "epoch": 0.3540825414393304, + "grad_norm": 9.564890202687264, + "learning_rate": 5.5478158646935264e-06, + "loss": 0.10361328125, + "step": 40950 + }, + { + "epoch": 0.35412577496087366, + "grad_norm": 13.901140215107091, + "learning_rate": 5.547708296744193e-06, + "loss": 0.10595550537109374, + "step": 40955 + }, + { + "epoch": 0.35416900848241695, + "grad_norm": 1.292747058263668, + "learning_rate": 5.5476007170450545e-06, + "loss": 0.2823333740234375, + "step": 40960 + }, + { + "epoch": 0.3542122420039602, + "grad_norm": 12.45396673035936, + "learning_rate": 5.547493125596607e-06, + "loss": 0.1982147216796875, + "step": 40965 + }, + { + "epoch": 0.35425547552550346, + "grad_norm": 6.315347174596962, + "learning_rate": 5.547385522399347e-06, + "loss": 0.24220199584960939, + "step": 40970 + }, + { + "epoch": 0.35429870904704674, + "grad_norm": 0.6441787019729657, + "learning_rate": 5.547277907453768e-06, + "loss": 0.3708648681640625, + "step": 40975 + }, + { + "epoch": 0.35434194256859, + "grad_norm": 11.607559752495769, + "learning_rate": 5.54717028076037e-06, + "loss": 0.15309295654296876, + "step": 40980 + }, + { + "epoch": 0.35438517609013326, + "grad_norm": 8.552789010282956, + "learning_rate": 5.547062642319647e-06, + "loss": 0.20751800537109374, + "step": 40985 + }, + { + "epoch": 0.3544284096116765, + "grad_norm": 2.4570607033492546, + "learning_rate": 5.546954992132097e-06, + "loss": 0.07690811157226562, + "step": 40990 + }, + { + "epoch": 0.35447164313321977, + "grad_norm": 2.057934249434966, + "learning_rate": 5.5468473301982145e-06, + "loss": 0.04816169738769531, + "step": 40995 + }, + { + "epoch": 0.35451487665476306, + "grad_norm": 18.698834321091947, + "learning_rate": 5.546739656518498e-06, + "loss": 0.3623329162597656, + "step": 41000 + }, + { + "epoch": 0.3545581101763063, + "grad_norm": 0.6066263950400874, + "learning_rate": 5.5466319710934426e-06, + "loss": 0.02254180908203125, + "step": 41005 + }, + { + "epoch": 0.35460134369784957, + "grad_norm": 3.803363297408112, + "learning_rate": 5.546524273923546e-06, + "loss": 0.25079879760742185, + "step": 41010 + }, + { + "epoch": 0.35464457721939285, + "grad_norm": 0.4802633660254009, + "learning_rate": 5.5464165650093045e-06, + "loss": 0.34403076171875, + "step": 41015 + }, + { + "epoch": 0.3546878107409361, + "grad_norm": 11.413218659982958, + "learning_rate": 5.546308844351214e-06, + "loss": 0.12576751708984374, + "step": 41020 + }, + { + "epoch": 0.35473104426247937, + "grad_norm": 12.906158297199097, + "learning_rate": 5.546201111949773e-06, + "loss": 0.358416748046875, + "step": 41025 + }, + { + "epoch": 0.3547742777840226, + "grad_norm": 18.584652416698106, + "learning_rate": 5.546093367805477e-06, + "loss": 0.0557098388671875, + "step": 41030 + }, + { + "epoch": 0.3548175113055659, + "grad_norm": 4.37174415470841, + "learning_rate": 5.5459856119188236e-06, + "loss": 0.08416748046875, + "step": 41035 + }, + { + "epoch": 0.35486074482710916, + "grad_norm": 3.295952520274206, + "learning_rate": 5.54587784429031e-06, + "loss": 0.14066314697265625, + "step": 41040 + }, + { + "epoch": 0.3549039783486524, + "grad_norm": 0.09880338263538584, + "learning_rate": 5.545770064920433e-06, + "loss": 0.07994804382324219, + "step": 41045 + }, + { + "epoch": 0.3549472118701957, + "grad_norm": 9.040512717199805, + "learning_rate": 5.545662273809688e-06, + "loss": 0.2038421630859375, + "step": 41050 + }, + { + "epoch": 0.35499044539173896, + "grad_norm": 12.904460442157783, + "learning_rate": 5.545554470958575e-06, + "loss": 0.196923828125, + "step": 41055 + }, + { + "epoch": 0.3550336789132822, + "grad_norm": 5.9516200471644, + "learning_rate": 5.545446656367588e-06, + "loss": 0.0539398193359375, + "step": 41060 + }, + { + "epoch": 0.3550769124348255, + "grad_norm": 36.530878012472606, + "learning_rate": 5.545338830037227e-06, + "loss": 0.29894561767578126, + "step": 41065 + }, + { + "epoch": 0.3551201459563687, + "grad_norm": 4.401847962690854, + "learning_rate": 5.545230991967988e-06, + "loss": 0.0435302734375, + "step": 41070 + }, + { + "epoch": 0.355163379477912, + "grad_norm": 5.709239117071277, + "learning_rate": 5.545123142160369e-06, + "loss": 0.02161407470703125, + "step": 41075 + }, + { + "epoch": 0.3552066129994553, + "grad_norm": 4.725826227917074, + "learning_rate": 5.545015280614866e-06, + "loss": 0.07628421783447266, + "step": 41080 + }, + { + "epoch": 0.3552498465209985, + "grad_norm": 5.626929899245852, + "learning_rate": 5.544907407331978e-06, + "loss": 0.24808082580566407, + "step": 41085 + }, + { + "epoch": 0.3552930800425418, + "grad_norm": 32.016646727724385, + "learning_rate": 5.544799522312202e-06, + "loss": 0.1246246337890625, + "step": 41090 + }, + { + "epoch": 0.35533631356408507, + "grad_norm": 6.422369268827486, + "learning_rate": 5.544691625556036e-06, + "loss": 0.06727676391601563, + "step": 41095 + }, + { + "epoch": 0.3553795470856283, + "grad_norm": 23.219411310523615, + "learning_rate": 5.544583717063976e-06, + "loss": 0.047557830810546875, + "step": 41100 + }, + { + "epoch": 0.3554227806071716, + "grad_norm": 16.15142850284398, + "learning_rate": 5.544475796836521e-06, + "loss": 0.07501106262207032, + "step": 41105 + }, + { + "epoch": 0.3554660141287148, + "grad_norm": 0.7355492616844204, + "learning_rate": 5.544367864874169e-06, + "loss": 0.18697357177734375, + "step": 41110 + }, + { + "epoch": 0.3555092476502581, + "grad_norm": 5.085360471108633, + "learning_rate": 5.544259921177417e-06, + "loss": 0.06400146484375, + "step": 41115 + }, + { + "epoch": 0.3555524811718014, + "grad_norm": 2.9951265341906037, + "learning_rate": 5.544151965746762e-06, + "loss": 0.2661376953125, + "step": 41120 + }, + { + "epoch": 0.3555957146933446, + "grad_norm": 2.2045441865229347, + "learning_rate": 5.544043998582705e-06, + "loss": 0.09918212890625, + "step": 41125 + }, + { + "epoch": 0.3556389482148879, + "grad_norm": 6.3344805726771565, + "learning_rate": 5.54393601968574e-06, + "loss": 0.20361576080322266, + "step": 41130 + }, + { + "epoch": 0.3556821817364312, + "grad_norm": 0.5564444288701527, + "learning_rate": 5.543828029056368e-06, + "loss": 0.35373382568359374, + "step": 41135 + }, + { + "epoch": 0.3557254152579744, + "grad_norm": 9.070347314416264, + "learning_rate": 5.543720026695085e-06, + "loss": 0.463330078125, + "step": 41140 + }, + { + "epoch": 0.3557686487795177, + "grad_norm": 6.062115409751216, + "learning_rate": 5.543612012602391e-06, + "loss": 0.06766738891601562, + "step": 41145 + }, + { + "epoch": 0.355811882301061, + "grad_norm": 37.23898425147459, + "learning_rate": 5.543503986778783e-06, + "loss": 0.2201690673828125, + "step": 41150 + }, + { + "epoch": 0.3558551158226042, + "grad_norm": 7.1452631570900165, + "learning_rate": 5.543395949224758e-06, + "loss": 0.057978057861328126, + "step": 41155 + }, + { + "epoch": 0.3558983493441475, + "grad_norm": 16.385193862909663, + "learning_rate": 5.5432878999408175e-06, + "loss": 0.11508026123046874, + "step": 41160 + }, + { + "epoch": 0.3559415828656907, + "grad_norm": 7.591688115330764, + "learning_rate": 5.543179838927457e-06, + "loss": 0.2917633056640625, + "step": 41165 + }, + { + "epoch": 0.355984816387234, + "grad_norm": 7.88268837738556, + "learning_rate": 5.543071766185176e-06, + "loss": 0.08732757568359376, + "step": 41170 + }, + { + "epoch": 0.3560280499087773, + "grad_norm": 7.651541941551274, + "learning_rate": 5.542963681714472e-06, + "loss": 0.4599311828613281, + "step": 41175 + }, + { + "epoch": 0.3560712834303205, + "grad_norm": 1.7500765112667296, + "learning_rate": 5.542855585515846e-06, + "loss": 0.19881591796875, + "step": 41180 + }, + { + "epoch": 0.3561145169518638, + "grad_norm": 8.464964740806149, + "learning_rate": 5.542747477589792e-06, + "loss": 0.269122314453125, + "step": 41185 + }, + { + "epoch": 0.3561577504734071, + "grad_norm": 27.994988654627743, + "learning_rate": 5.542639357936814e-06, + "loss": 0.11653900146484375, + "step": 41190 + }, + { + "epoch": 0.3562009839949503, + "grad_norm": 1.1247451734572467, + "learning_rate": 5.542531226557406e-06, + "loss": 0.23193588256835937, + "step": 41195 + }, + { + "epoch": 0.3562442175164936, + "grad_norm": 11.404585219243982, + "learning_rate": 5.54242308345207e-06, + "loss": 0.1814300537109375, + "step": 41200 + }, + { + "epoch": 0.35628745103803683, + "grad_norm": 1.0061296274872902, + "learning_rate": 5.542314928621302e-06, + "loss": 0.32773284912109374, + "step": 41205 + }, + { + "epoch": 0.3563306845595801, + "grad_norm": 26.750492764160278, + "learning_rate": 5.542206762065603e-06, + "loss": 0.3598045349121094, + "step": 41210 + }, + { + "epoch": 0.3563739180811234, + "grad_norm": 21.71332049278171, + "learning_rate": 5.542098583785472e-06, + "loss": 0.21741600036621095, + "step": 41215 + }, + { + "epoch": 0.3564171516026666, + "grad_norm": 12.016456160438484, + "learning_rate": 5.541990393781406e-06, + "loss": 0.10773544311523438, + "step": 41220 + }, + { + "epoch": 0.3564603851242099, + "grad_norm": 14.52332622815077, + "learning_rate": 5.541882192053905e-06, + "loss": 0.0614501953125, + "step": 41225 + }, + { + "epoch": 0.3565036186457532, + "grad_norm": 5.698585986351648, + "learning_rate": 5.541773978603468e-06, + "loss": 0.15044097900390624, + "step": 41230 + }, + { + "epoch": 0.3565468521672964, + "grad_norm": 12.123514762599084, + "learning_rate": 5.541665753430594e-06, + "loss": 0.32720947265625, + "step": 41235 + }, + { + "epoch": 0.3565900856888397, + "grad_norm": 23.754934764639874, + "learning_rate": 5.5415575165357814e-06, + "loss": 0.4344451904296875, + "step": 41240 + }, + { + "epoch": 0.35663331921038294, + "grad_norm": 1.2642387498049992, + "learning_rate": 5.541449267919531e-06, + "loss": 0.07392120361328125, + "step": 41245 + }, + { + "epoch": 0.3566765527319262, + "grad_norm": 7.020955136508019, + "learning_rate": 5.541341007582341e-06, + "loss": 0.49367828369140626, + "step": 41250 + }, + { + "epoch": 0.3567197862534695, + "grad_norm": 2.756011150399427, + "learning_rate": 5.541232735524711e-06, + "loss": 0.20762786865234376, + "step": 41255 + }, + { + "epoch": 0.35676301977501274, + "grad_norm": 26.66120069291145, + "learning_rate": 5.54112445174714e-06, + "loss": 0.34780426025390626, + "step": 41260 + }, + { + "epoch": 0.356806253296556, + "grad_norm": 1.1865286341100405, + "learning_rate": 5.541016156250127e-06, + "loss": 0.22490386962890624, + "step": 41265 + }, + { + "epoch": 0.3568494868180993, + "grad_norm": 5.839313177774661, + "learning_rate": 5.5409078490341724e-06, + "loss": 0.2800689697265625, + "step": 41270 + }, + { + "epoch": 0.35689272033964253, + "grad_norm": 29.028475795474648, + "learning_rate": 5.5407995300997756e-06, + "loss": 0.220147705078125, + "step": 41275 + }, + { + "epoch": 0.3569359538611858, + "grad_norm": 2.7848846856793004, + "learning_rate": 5.540691199447435e-06, + "loss": 0.15401535034179686, + "step": 41280 + }, + { + "epoch": 0.35697918738272905, + "grad_norm": 38.60229887309941, + "learning_rate": 5.540582857077652e-06, + "loss": 0.34819869995117186, + "step": 41285 + }, + { + "epoch": 0.35702242090427233, + "grad_norm": 4.56366596244436, + "learning_rate": 5.540474502990925e-06, + "loss": 0.1326263427734375, + "step": 41290 + }, + { + "epoch": 0.3570656544258156, + "grad_norm": 20.46084554001562, + "learning_rate": 5.540366137187755e-06, + "loss": 0.395928955078125, + "step": 41295 + }, + { + "epoch": 0.35710888794735884, + "grad_norm": 25.98503789618083, + "learning_rate": 5.540257759668639e-06, + "loss": 0.4427909851074219, + "step": 41300 + }, + { + "epoch": 0.35715212146890213, + "grad_norm": 0.15902399957443164, + "learning_rate": 5.5401493704340805e-06, + "loss": 0.08037872314453125, + "step": 41305 + }, + { + "epoch": 0.3571953549904454, + "grad_norm": 15.62084245176451, + "learning_rate": 5.540040969484577e-06, + "loss": 0.10053329467773438, + "step": 41310 + }, + { + "epoch": 0.35723858851198864, + "grad_norm": 7.713033202112247, + "learning_rate": 5.5399325568206284e-06, + "loss": 0.242877197265625, + "step": 41315 + }, + { + "epoch": 0.3572818220335319, + "grad_norm": 26.83517193158832, + "learning_rate": 5.539824132442735e-06, + "loss": 0.17342529296875, + "step": 41320 + }, + { + "epoch": 0.3573250555550752, + "grad_norm": 2.3010075480630343, + "learning_rate": 5.539715696351398e-06, + "loss": 0.16719207763671876, + "step": 41325 + }, + { + "epoch": 0.35736828907661844, + "grad_norm": 0.292710010758715, + "learning_rate": 5.539607248547116e-06, + "loss": 0.08769721984863281, + "step": 41330 + }, + { + "epoch": 0.3574115225981617, + "grad_norm": 7.793469783557028, + "learning_rate": 5.539498789030391e-06, + "loss": 0.6282257080078125, + "step": 41335 + }, + { + "epoch": 0.35745475611970495, + "grad_norm": 4.781940672063838, + "learning_rate": 5.539390317801721e-06, + "loss": 0.06023101806640625, + "step": 41340 + }, + { + "epoch": 0.35749798964124824, + "grad_norm": 0.38389899537295075, + "learning_rate": 5.5392818348616074e-06, + "loss": 0.07199764251708984, + "step": 41345 + }, + { + "epoch": 0.3575412231627915, + "grad_norm": 48.00853412336559, + "learning_rate": 5.539173340210549e-06, + "loss": 0.2846527099609375, + "step": 41350 + }, + { + "epoch": 0.35758445668433475, + "grad_norm": 36.024169004599905, + "learning_rate": 5.53906483384905e-06, + "loss": 0.787890625, + "step": 41355 + }, + { + "epoch": 0.35762769020587803, + "grad_norm": 1.859609650331293, + "learning_rate": 5.538956315777607e-06, + "loss": 0.09658660888671874, + "step": 41360 + }, + { + "epoch": 0.3576709237274213, + "grad_norm": 10.819791067798015, + "learning_rate": 5.538847785996722e-06, + "loss": 0.185406494140625, + "step": 41365 + }, + { + "epoch": 0.35771415724896455, + "grad_norm": 12.138000114446838, + "learning_rate": 5.5387392445068955e-06, + "loss": 0.08235149383544922, + "step": 41370 + }, + { + "epoch": 0.35775739077050783, + "grad_norm": 5.836846698636566, + "learning_rate": 5.538630691308628e-06, + "loss": 0.17996826171875, + "step": 41375 + }, + { + "epoch": 0.35780062429205106, + "grad_norm": 1.400988159274997, + "learning_rate": 5.538522126402419e-06, + "loss": 0.17997207641601562, + "step": 41380 + }, + { + "epoch": 0.35784385781359435, + "grad_norm": 32.25252062334365, + "learning_rate": 5.538413549788772e-06, + "loss": 0.4608604431152344, + "step": 41385 + }, + { + "epoch": 0.35788709133513763, + "grad_norm": 21.75303188547268, + "learning_rate": 5.5383049614681836e-06, + "loss": 0.1139678955078125, + "step": 41390 + }, + { + "epoch": 0.35793032485668086, + "grad_norm": 5.424608947305274, + "learning_rate": 5.53819636144116e-06, + "loss": 0.07130603790283203, + "step": 41395 + }, + { + "epoch": 0.35797355837822414, + "grad_norm": 2.122998742576118, + "learning_rate": 5.5380877497081965e-06, + "loss": 0.24229011535644532, + "step": 41400 + }, + { + "epoch": 0.35801679189976743, + "grad_norm": 10.40656917548105, + "learning_rate": 5.537979126269798e-06, + "loss": 0.2233306884765625, + "step": 41405 + }, + { + "epoch": 0.35806002542131066, + "grad_norm": 16.238793875733617, + "learning_rate": 5.5378704911264635e-06, + "loss": 0.1863006591796875, + "step": 41410 + }, + { + "epoch": 0.35810325894285394, + "grad_norm": 12.735274709046264, + "learning_rate": 5.537761844278696e-06, + "loss": 0.1919921875, + "step": 41415 + }, + { + "epoch": 0.35814649246439717, + "grad_norm": 1.4336609454323497, + "learning_rate": 5.537653185726993e-06, + "loss": 0.162481689453125, + "step": 41420 + }, + { + "epoch": 0.35818972598594045, + "grad_norm": 30.44841966967212, + "learning_rate": 5.537544515471858e-06, + "loss": 0.274884033203125, + "step": 41425 + }, + { + "epoch": 0.35823295950748374, + "grad_norm": 6.562201959573349, + "learning_rate": 5.537435833513793e-06, + "loss": 0.11297988891601562, + "step": 41430 + }, + { + "epoch": 0.35827619302902697, + "grad_norm": 0.7024001751074553, + "learning_rate": 5.537327139853298e-06, + "loss": 0.06507644653320313, + "step": 41435 + }, + { + "epoch": 0.35831942655057025, + "grad_norm": 0.11832044939316395, + "learning_rate": 5.537218434490873e-06, + "loss": 0.17537689208984375, + "step": 41440 + }, + { + "epoch": 0.35836266007211354, + "grad_norm": 45.99287919634268, + "learning_rate": 5.537109717427022e-06, + "loss": 0.1292572021484375, + "step": 41445 + }, + { + "epoch": 0.35840589359365677, + "grad_norm": 4.8324105198873575, + "learning_rate": 5.537000988662245e-06, + "loss": 0.21908111572265626, + "step": 41450 + }, + { + "epoch": 0.35844912711520005, + "grad_norm": 0.08895632959177702, + "learning_rate": 5.536892248197044e-06, + "loss": 0.3252998352050781, + "step": 41455 + }, + { + "epoch": 0.3584923606367433, + "grad_norm": 14.285094113155308, + "learning_rate": 5.53678349603192e-06, + "loss": 0.5949188232421875, + "step": 41460 + }, + { + "epoch": 0.35853559415828656, + "grad_norm": 50.09089890038171, + "learning_rate": 5.536674732167374e-06, + "loss": 0.42202606201171877, + "step": 41465 + }, + { + "epoch": 0.35857882767982985, + "grad_norm": 42.02196380610391, + "learning_rate": 5.5365659566039085e-06, + "loss": 0.34867401123046876, + "step": 41470 + }, + { + "epoch": 0.3586220612013731, + "grad_norm": 0.12077046629087079, + "learning_rate": 5.536457169342025e-06, + "loss": 0.09181365966796876, + "step": 41475 + }, + { + "epoch": 0.35866529472291636, + "grad_norm": 0.13198019763705784, + "learning_rate": 5.536348370382226e-06, + "loss": 0.20578041076660156, + "step": 41480 + }, + { + "epoch": 0.35870852824445965, + "grad_norm": 0.9644622709054006, + "learning_rate": 5.536239559725011e-06, + "loss": 0.16733551025390625, + "step": 41485 + }, + { + "epoch": 0.3587517617660029, + "grad_norm": 57.22381503171543, + "learning_rate": 5.536130737370885e-06, + "loss": 0.17650527954101564, + "step": 41490 + }, + { + "epoch": 0.35879499528754616, + "grad_norm": 2.9113602733134534, + "learning_rate": 5.536021903320346e-06, + "loss": 0.13355178833007814, + "step": 41495 + }, + { + "epoch": 0.3588382288090894, + "grad_norm": 5.362202618963046, + "learning_rate": 5.5359130575739e-06, + "loss": 0.09982833862304688, + "step": 41500 + }, + { + "epoch": 0.3588814623306327, + "grad_norm": 13.538558392698661, + "learning_rate": 5.535804200132045e-06, + "loss": 0.486083984375, + "step": 41505 + }, + { + "epoch": 0.35892469585217596, + "grad_norm": 0.7552317334696886, + "learning_rate": 5.535695330995287e-06, + "loss": 0.12654571533203124, + "step": 41510 + }, + { + "epoch": 0.3589679293737192, + "grad_norm": 0.4391575662372293, + "learning_rate": 5.535586450164125e-06, + "loss": 0.07757568359375, + "step": 41515 + }, + { + "epoch": 0.35901116289526247, + "grad_norm": 3.418241198842024, + "learning_rate": 5.535477557639063e-06, + "loss": 0.2802490234375, + "step": 41520 + }, + { + "epoch": 0.35905439641680575, + "grad_norm": 10.439618993217497, + "learning_rate": 5.535368653420603e-06, + "loss": 0.18344535827636718, + "step": 41525 + }, + { + "epoch": 0.359097629938349, + "grad_norm": 5.640577984509456, + "learning_rate": 5.535259737509246e-06, + "loss": 0.14129638671875, + "step": 41530 + }, + { + "epoch": 0.35914086345989227, + "grad_norm": 20.382919150850338, + "learning_rate": 5.535150809905495e-06, + "loss": 0.11278228759765625, + "step": 41535 + }, + { + "epoch": 0.35918409698143555, + "grad_norm": 23.531383499532325, + "learning_rate": 5.535041870609853e-06, + "loss": 0.3121337890625, + "step": 41540 + }, + { + "epoch": 0.3592273305029788, + "grad_norm": 6.500605013518276, + "learning_rate": 5.5349329196228214e-06, + "loss": 0.08450469970703126, + "step": 41545 + }, + { + "epoch": 0.35927056402452207, + "grad_norm": 15.130300596555136, + "learning_rate": 5.534823956944903e-06, + "loss": 0.17981719970703125, + "step": 41550 + }, + { + "epoch": 0.3593137975460653, + "grad_norm": 11.135566385638048, + "learning_rate": 5.534714982576602e-06, + "loss": 0.043556594848632814, + "step": 41555 + }, + { + "epoch": 0.3593570310676086, + "grad_norm": 14.615955646798342, + "learning_rate": 5.534605996518417e-06, + "loss": 0.4085205078125, + "step": 41560 + }, + { + "epoch": 0.35940026458915186, + "grad_norm": 16.85865292453554, + "learning_rate": 5.534496998770855e-06, + "loss": 0.34105682373046875, + "step": 41565 + }, + { + "epoch": 0.3594434981106951, + "grad_norm": 3.600579643274234, + "learning_rate": 5.534387989334416e-06, + "loss": 0.0710113525390625, + "step": 41570 + }, + { + "epoch": 0.3594867316322384, + "grad_norm": 0.07528754009207782, + "learning_rate": 5.534278968209604e-06, + "loss": 0.225762939453125, + "step": 41575 + }, + { + "epoch": 0.35952996515378166, + "grad_norm": 14.306604887387032, + "learning_rate": 5.5341699353969215e-06, + "loss": 0.09381351470947266, + "step": 41580 + }, + { + "epoch": 0.3595731986753249, + "grad_norm": 0.9014477041859457, + "learning_rate": 5.534060890896871e-06, + "loss": 0.20340576171875, + "step": 41585 + }, + { + "epoch": 0.3596164321968682, + "grad_norm": 10.021689644244228, + "learning_rate": 5.533951834709955e-06, + "loss": 0.03030853271484375, + "step": 41590 + }, + { + "epoch": 0.3596596657184114, + "grad_norm": 14.928983019484976, + "learning_rate": 5.533842766836678e-06, + "loss": 0.31781005859375, + "step": 41595 + }, + { + "epoch": 0.3597028992399547, + "grad_norm": 0.42950543384204076, + "learning_rate": 5.533733687277541e-06, + "loss": 0.08034515380859375, + "step": 41600 + }, + { + "epoch": 0.35974613276149797, + "grad_norm": 12.051853059016159, + "learning_rate": 5.533624596033048e-06, + "loss": 0.09052734375, + "step": 41605 + }, + { + "epoch": 0.3597893662830412, + "grad_norm": 5.8799562551008115, + "learning_rate": 5.533515493103704e-06, + "loss": 0.058827972412109374, + "step": 41610 + }, + { + "epoch": 0.3598325998045845, + "grad_norm": 7.567653339486701, + "learning_rate": 5.533406378490009e-06, + "loss": 0.11956863403320313, + "step": 41615 + }, + { + "epoch": 0.35987583332612777, + "grad_norm": 1.7407625640056676, + "learning_rate": 5.533297252192468e-06, + "loss": 0.0759317398071289, + "step": 41620 + }, + { + "epoch": 0.359919066847671, + "grad_norm": 0.07949652255130911, + "learning_rate": 5.5331881142115834e-06, + "loss": 0.18333015441894532, + "step": 41625 + }, + { + "epoch": 0.3599623003692143, + "grad_norm": 9.789561580012132, + "learning_rate": 5.53307896454786e-06, + "loss": 0.07870330810546874, + "step": 41630 + }, + { + "epoch": 0.3600055338907575, + "grad_norm": 3.807242596237677, + "learning_rate": 5.532969803201799e-06, + "loss": 0.118463134765625, + "step": 41635 + }, + { + "epoch": 0.3600487674123008, + "grad_norm": 27.528161109900363, + "learning_rate": 5.5328606301739066e-06, + "loss": 0.19146728515625, + "step": 41640 + }, + { + "epoch": 0.3600920009338441, + "grad_norm": 1.791015323552771, + "learning_rate": 5.532751445464683e-06, + "loss": 0.06951522827148438, + "step": 41645 + }, + { + "epoch": 0.3601352344553873, + "grad_norm": 11.556947166831012, + "learning_rate": 5.532642249074635e-06, + "loss": 0.1183197021484375, + "step": 41650 + }, + { + "epoch": 0.3601784679769306, + "grad_norm": 28.771640106385718, + "learning_rate": 5.532533041004265e-06, + "loss": 0.15925674438476561, + "step": 41655 + }, + { + "epoch": 0.3602217014984739, + "grad_norm": 15.167948610863954, + "learning_rate": 5.532423821254075e-06, + "loss": 0.09896011352539062, + "step": 41660 + }, + { + "epoch": 0.3602649350200171, + "grad_norm": 18.333730655132666, + "learning_rate": 5.532314589824571e-06, + "loss": 0.9062713623046875, + "step": 41665 + }, + { + "epoch": 0.3603081685415604, + "grad_norm": 45.5287195756047, + "learning_rate": 5.532205346716255e-06, + "loss": 0.1003936767578125, + "step": 41670 + }, + { + "epoch": 0.3603514020631036, + "grad_norm": 6.063329960455354, + "learning_rate": 5.532096091929633e-06, + "loss": 0.2446807861328125, + "step": 41675 + }, + { + "epoch": 0.3603946355846469, + "grad_norm": 8.514773322397742, + "learning_rate": 5.531986825465206e-06, + "loss": 0.119580078125, + "step": 41680 + }, + { + "epoch": 0.3604378691061902, + "grad_norm": 28.71689199323436, + "learning_rate": 5.531877547323481e-06, + "loss": 0.169647216796875, + "step": 41685 + }, + { + "epoch": 0.3604811026277334, + "grad_norm": 0.9910275734776564, + "learning_rate": 5.53176825750496e-06, + "loss": 0.16895675659179688, + "step": 41690 + }, + { + "epoch": 0.3605243361492767, + "grad_norm": 8.268034418682358, + "learning_rate": 5.531658956010147e-06, + "loss": 0.13833885192871093, + "step": 41695 + }, + { + "epoch": 0.36056756967082, + "grad_norm": 0.9627972983849469, + "learning_rate": 5.531549642839547e-06, + "loss": 0.057100677490234376, + "step": 41700 + }, + { + "epoch": 0.3606108031923632, + "grad_norm": 7.259165654064277, + "learning_rate": 5.531440317993663e-06, + "loss": 0.11079864501953125, + "step": 41705 + }, + { + "epoch": 0.3606540367139065, + "grad_norm": 2.496710778122082, + "learning_rate": 5.5313309814730005e-06, + "loss": 0.30025405883789064, + "step": 41710 + }, + { + "epoch": 0.3606972702354498, + "grad_norm": 3.288305484864664, + "learning_rate": 5.531221633278064e-06, + "loss": 0.0643798828125, + "step": 41715 + }, + { + "epoch": 0.360740503756993, + "grad_norm": 0.3254019342623508, + "learning_rate": 5.531112273409356e-06, + "loss": 0.1712158203125, + "step": 41720 + }, + { + "epoch": 0.3607837372785363, + "grad_norm": 4.677899762294543, + "learning_rate": 5.531002901867382e-06, + "loss": 0.11975269317626953, + "step": 41725 + }, + { + "epoch": 0.3608269708000795, + "grad_norm": 1.9682225342685207, + "learning_rate": 5.530893518652647e-06, + "loss": 0.09810562133789062, + "step": 41730 + }, + { + "epoch": 0.3608702043216228, + "grad_norm": 2.6505236980534046, + "learning_rate": 5.530784123765654e-06, + "loss": 0.148199462890625, + "step": 41735 + }, + { + "epoch": 0.3609134378431661, + "grad_norm": 21.566561712690586, + "learning_rate": 5.530674717206908e-06, + "loss": 0.37196044921875, + "step": 41740 + }, + { + "epoch": 0.3609566713647093, + "grad_norm": 42.00717671392255, + "learning_rate": 5.530565298976915e-06, + "loss": 0.27712554931640626, + "step": 41745 + }, + { + "epoch": 0.3609999048862526, + "grad_norm": 25.879931426379965, + "learning_rate": 5.530455869076178e-06, + "loss": 0.355340576171875, + "step": 41750 + }, + { + "epoch": 0.3610431384077959, + "grad_norm": 4.779280643625326, + "learning_rate": 5.530346427505201e-06, + "loss": 0.11787567138671876, + "step": 41755 + }, + { + "epoch": 0.3610863719293391, + "grad_norm": 41.082865543226085, + "learning_rate": 5.530236974264491e-06, + "loss": 0.29431686401367185, + "step": 41760 + }, + { + "epoch": 0.3611296054508824, + "grad_norm": 0.6153477735696843, + "learning_rate": 5.5301275093545506e-06, + "loss": 0.04887237548828125, + "step": 41765 + }, + { + "epoch": 0.36117283897242564, + "grad_norm": 12.103141290451688, + "learning_rate": 5.530018032775887e-06, + "loss": 0.12297744750976562, + "step": 41770 + }, + { + "epoch": 0.3612160724939689, + "grad_norm": 20.587777149272984, + "learning_rate": 5.529908544529003e-06, + "loss": 0.082586669921875, + "step": 41775 + }, + { + "epoch": 0.3612593060155122, + "grad_norm": 1.439529923524378, + "learning_rate": 5.529799044614404e-06, + "loss": 0.2879364013671875, + "step": 41780 + }, + { + "epoch": 0.36130253953705543, + "grad_norm": 3.2450254535052463, + "learning_rate": 5.529689533032596e-06, + "loss": 0.235552978515625, + "step": 41785 + }, + { + "epoch": 0.3613457730585987, + "grad_norm": 36.0324573616481, + "learning_rate": 5.529580009784083e-06, + "loss": 0.37855377197265627, + "step": 41790 + }, + { + "epoch": 0.361389006580142, + "grad_norm": 9.735230283642169, + "learning_rate": 5.529470474869371e-06, + "loss": 0.40445938110351565, + "step": 41795 + }, + { + "epoch": 0.36143224010168523, + "grad_norm": 11.97848726062262, + "learning_rate": 5.529360928288965e-06, + "loss": 0.15746231079101564, + "step": 41800 + }, + { + "epoch": 0.3614754736232285, + "grad_norm": 14.548042913830706, + "learning_rate": 5.529251370043368e-06, + "loss": 0.11293487548828125, + "step": 41805 + }, + { + "epoch": 0.36151870714477174, + "grad_norm": 27.969601799144368, + "learning_rate": 5.529141800133089e-06, + "loss": 0.09772758483886719, + "step": 41810 + }, + { + "epoch": 0.36156194066631503, + "grad_norm": 51.4988813516701, + "learning_rate": 5.5290322185586315e-06, + "loss": 0.5354354858398438, + "step": 41815 + }, + { + "epoch": 0.3616051741878583, + "grad_norm": 25.048372435530474, + "learning_rate": 5.5289226253205e-06, + "loss": 0.36417388916015625, + "step": 41820 + }, + { + "epoch": 0.36164840770940154, + "grad_norm": 1.239404912463008, + "learning_rate": 5.528813020419201e-06, + "loss": 0.3573486328125, + "step": 41825 + }, + { + "epoch": 0.3616916412309448, + "grad_norm": 12.749080106948744, + "learning_rate": 5.5287034038552405e-06, + "loss": 0.20229949951171874, + "step": 41830 + }, + { + "epoch": 0.3617348747524881, + "grad_norm": 7.771489972899884, + "learning_rate": 5.5285937756291226e-06, + "loss": 0.08028411865234375, + "step": 41835 + }, + { + "epoch": 0.36177810827403134, + "grad_norm": 2.712025743540737, + "learning_rate": 5.528484135741354e-06, + "loss": 0.2381072998046875, + "step": 41840 + }, + { + "epoch": 0.3618213417955746, + "grad_norm": 0.1349392359955097, + "learning_rate": 5.52837448419244e-06, + "loss": 0.17858505249023438, + "step": 41845 + }, + { + "epoch": 0.36186457531711785, + "grad_norm": 71.72540314040786, + "learning_rate": 5.528264820982886e-06, + "loss": 0.867578125, + "step": 41850 + }, + { + "epoch": 0.36190780883866114, + "grad_norm": 14.70339257006119, + "learning_rate": 5.528155146113198e-06, + "loss": 0.1785747528076172, + "step": 41855 + }, + { + "epoch": 0.3619510423602044, + "grad_norm": 2.731053681147775, + "learning_rate": 5.528045459583882e-06, + "loss": 0.365985107421875, + "step": 41860 + }, + { + "epoch": 0.36199427588174765, + "grad_norm": 5.201255116390585, + "learning_rate": 5.527935761395444e-06, + "loss": 0.07698974609375, + "step": 41865 + }, + { + "epoch": 0.36203750940329094, + "grad_norm": 14.578994470971326, + "learning_rate": 5.527826051548389e-06, + "loss": 0.04752655029296875, + "step": 41870 + }, + { + "epoch": 0.3620807429248342, + "grad_norm": 30.52897067144294, + "learning_rate": 5.5277163300432234e-06, + "loss": 0.14033737182617187, + "step": 41875 + }, + { + "epoch": 0.36212397644637745, + "grad_norm": 3.1427676764033863, + "learning_rate": 5.527606596880455e-06, + "loss": 0.095050048828125, + "step": 41880 + }, + { + "epoch": 0.36216720996792073, + "grad_norm": 7.551720132140465, + "learning_rate": 5.5274968520605875e-06, + "loss": 0.08194122314453126, + "step": 41885 + }, + { + "epoch": 0.362210443489464, + "grad_norm": 38.286343610269974, + "learning_rate": 5.527387095584127e-06, + "loss": 0.2968658447265625, + "step": 41890 + }, + { + "epoch": 0.36225367701100725, + "grad_norm": 1.8009055993038612, + "learning_rate": 5.527277327451582e-06, + "loss": 0.15225830078125, + "step": 41895 + }, + { + "epoch": 0.36229691053255053, + "grad_norm": 17.344938793300496, + "learning_rate": 5.527167547663456e-06, + "loss": 0.20734405517578125, + "step": 41900 + }, + { + "epoch": 0.36234014405409376, + "grad_norm": 2.5883766077118264, + "learning_rate": 5.527057756220257e-06, + "loss": 0.1270599365234375, + "step": 41905 + }, + { + "epoch": 0.36238337757563704, + "grad_norm": 2.076798714911825, + "learning_rate": 5.526947953122491e-06, + "loss": 0.0453857421875, + "step": 41910 + }, + { + "epoch": 0.36242661109718033, + "grad_norm": 13.387763005152665, + "learning_rate": 5.526838138370664e-06, + "loss": 0.21695098876953126, + "step": 41915 + }, + { + "epoch": 0.36246984461872356, + "grad_norm": 0.6895226602598244, + "learning_rate": 5.526728311965284e-06, + "loss": 0.065374755859375, + "step": 41920 + }, + { + "epoch": 0.36251307814026684, + "grad_norm": 19.680232998592828, + "learning_rate": 5.526618473906856e-06, + "loss": 0.29658050537109376, + "step": 41925 + }, + { + "epoch": 0.3625563116618101, + "grad_norm": 6.402861047419217, + "learning_rate": 5.526508624195885e-06, + "loss": 0.08457565307617188, + "step": 41930 + }, + { + "epoch": 0.36259954518335336, + "grad_norm": 33.79792067029085, + "learning_rate": 5.526398762832881e-06, + "loss": 0.4615509033203125, + "step": 41935 + }, + { + "epoch": 0.36264277870489664, + "grad_norm": 3.245548695291569, + "learning_rate": 5.526288889818349e-06, + "loss": 0.3608154296875, + "step": 41940 + }, + { + "epoch": 0.36268601222643987, + "grad_norm": 30.155216715279394, + "learning_rate": 5.526179005152796e-06, + "loss": 0.488519287109375, + "step": 41945 + }, + { + "epoch": 0.36272924574798315, + "grad_norm": 1.0615509220408952, + "learning_rate": 5.5260691088367285e-06, + "loss": 0.13202590942382814, + "step": 41950 + }, + { + "epoch": 0.36277247926952644, + "grad_norm": 10.324100362444526, + "learning_rate": 5.525959200870654e-06, + "loss": 0.5220947265625, + "step": 41955 + }, + { + "epoch": 0.36281571279106967, + "grad_norm": 0.71508323044603, + "learning_rate": 5.525849281255077e-06, + "loss": 0.1891998291015625, + "step": 41960 + }, + { + "epoch": 0.36285894631261295, + "grad_norm": 10.617047609605635, + "learning_rate": 5.525739349990508e-06, + "loss": 0.2508125305175781, + "step": 41965 + }, + { + "epoch": 0.36290217983415624, + "grad_norm": 32.041273637250875, + "learning_rate": 5.525629407077452e-06, + "loss": 0.3125297546386719, + "step": 41970 + }, + { + "epoch": 0.36294541335569946, + "grad_norm": 20.832714952153935, + "learning_rate": 5.525519452516416e-06, + "loss": 0.12406158447265625, + "step": 41975 + }, + { + "epoch": 0.36298864687724275, + "grad_norm": 1.0910507396929194, + "learning_rate": 5.525409486307908e-06, + "loss": 0.3261253356933594, + "step": 41980 + }, + { + "epoch": 0.363031880398786, + "grad_norm": 30.33854991552917, + "learning_rate": 5.525299508452435e-06, + "loss": 0.540203857421875, + "step": 41985 + }, + { + "epoch": 0.36307511392032926, + "grad_norm": 39.343011193673, + "learning_rate": 5.525189518950502e-06, + "loss": 0.2104278564453125, + "step": 41990 + }, + { + "epoch": 0.36311834744187255, + "grad_norm": 13.605568814766748, + "learning_rate": 5.525079517802619e-06, + "loss": 0.10609893798828125, + "step": 41995 + }, + { + "epoch": 0.3631615809634158, + "grad_norm": 2.197421674554241, + "learning_rate": 5.524969505009292e-06, + "loss": 0.0492156982421875, + "step": 42000 + }, + { + "epoch": 0.36320481448495906, + "grad_norm": 17.272908442214018, + "learning_rate": 5.52485948057103e-06, + "loss": 0.32801055908203125, + "step": 42005 + }, + { + "epoch": 0.36324804800650234, + "grad_norm": 0.8371082959706072, + "learning_rate": 5.524749444488338e-06, + "loss": 0.09290924072265624, + "step": 42010 + }, + { + "epoch": 0.3632912815280456, + "grad_norm": 6.937032209927106, + "learning_rate": 5.5246393967617255e-06, + "loss": 0.094427490234375, + "step": 42015 + }, + { + "epoch": 0.36333451504958886, + "grad_norm": 3.100902241080226, + "learning_rate": 5.524529337391699e-06, + "loss": 0.097149658203125, + "step": 42020 + }, + { + "epoch": 0.3633777485711321, + "grad_norm": 12.68883390878693, + "learning_rate": 5.524419266378765e-06, + "loss": 0.11279296875, + "step": 42025 + }, + { + "epoch": 0.36342098209267537, + "grad_norm": 12.449782244629363, + "learning_rate": 5.524309183723434e-06, + "loss": 0.27582550048828125, + "step": 42030 + }, + { + "epoch": 0.36346421561421866, + "grad_norm": 6.822580333831165, + "learning_rate": 5.524199089426211e-06, + "loss": 0.16044921875, + "step": 42035 + }, + { + "epoch": 0.3635074491357619, + "grad_norm": 6.065052655768193, + "learning_rate": 5.524088983487605e-06, + "loss": 0.2588836669921875, + "step": 42040 + }, + { + "epoch": 0.36355068265730517, + "grad_norm": 31.95914779692188, + "learning_rate": 5.523978865908125e-06, + "loss": 0.21272125244140624, + "step": 42045 + }, + { + "epoch": 0.36359391617884845, + "grad_norm": 20.592880289995794, + "learning_rate": 5.523868736688277e-06, + "loss": 0.20057373046875, + "step": 42050 + }, + { + "epoch": 0.3636371497003917, + "grad_norm": 12.736832458979013, + "learning_rate": 5.523758595828568e-06, + "loss": 0.10020523071289063, + "step": 42055 + }, + { + "epoch": 0.36368038322193497, + "grad_norm": 26.13841101529014, + "learning_rate": 5.523648443329508e-06, + "loss": 0.1976654052734375, + "step": 42060 + }, + { + "epoch": 0.36372361674347825, + "grad_norm": 30.169204115938676, + "learning_rate": 5.523538279191604e-06, + "loss": 0.40569610595703126, + "step": 42065 + }, + { + "epoch": 0.3637668502650215, + "grad_norm": 1.439821622813904, + "learning_rate": 5.523428103415366e-06, + "loss": 0.02943000793457031, + "step": 42070 + }, + { + "epoch": 0.36381008378656476, + "grad_norm": 2.535026730467665, + "learning_rate": 5.5233179160013e-06, + "loss": 0.1689117431640625, + "step": 42075 + }, + { + "epoch": 0.363853317308108, + "grad_norm": 6.858271855089975, + "learning_rate": 5.523207716949915e-06, + "loss": 0.280810546875, + "step": 42080 + }, + { + "epoch": 0.3638965508296513, + "grad_norm": 1.5128926144723118, + "learning_rate": 5.523097506261718e-06, + "loss": 0.336187744140625, + "step": 42085 + }, + { + "epoch": 0.36393978435119456, + "grad_norm": 1.5832671047699043, + "learning_rate": 5.522987283937218e-06, + "loss": 0.43590240478515624, + "step": 42090 + }, + { + "epoch": 0.3639830178727378, + "grad_norm": 3.0891151174286278, + "learning_rate": 5.522877049976924e-06, + "loss": 0.0574432373046875, + "step": 42095 + }, + { + "epoch": 0.3640262513942811, + "grad_norm": 18.46685140193319, + "learning_rate": 5.5227668043813446e-06, + "loss": 0.2953214645385742, + "step": 42100 + }, + { + "epoch": 0.36406948491582436, + "grad_norm": 4.107206362630515, + "learning_rate": 5.522656547150987e-06, + "loss": 0.1473480224609375, + "step": 42105 + }, + { + "epoch": 0.3641127184373676, + "grad_norm": 40.39589125968738, + "learning_rate": 5.52254627828636e-06, + "loss": 0.4369842529296875, + "step": 42110 + }, + { + "epoch": 0.3641559519589109, + "grad_norm": 11.537960928347902, + "learning_rate": 5.522435997787973e-06, + "loss": 0.12614021301269532, + "step": 42115 + }, + { + "epoch": 0.3641991854804541, + "grad_norm": 42.93176896181281, + "learning_rate": 5.5223257056563345e-06, + "loss": 0.29909515380859375, + "step": 42120 + }, + { + "epoch": 0.3642424190019974, + "grad_norm": 1.161978654398881, + "learning_rate": 5.522215401891952e-06, + "loss": 0.10465087890625, + "step": 42125 + }, + { + "epoch": 0.36428565252354067, + "grad_norm": 12.189799782500735, + "learning_rate": 5.522105086495335e-06, + "loss": 0.10077056884765626, + "step": 42130 + }, + { + "epoch": 0.3643288860450839, + "grad_norm": 26.507317018471724, + "learning_rate": 5.521994759466992e-06, + "loss": 0.33709716796875, + "step": 42135 + }, + { + "epoch": 0.3643721195666272, + "grad_norm": 3.3787424916440307, + "learning_rate": 5.521884420807432e-06, + "loss": 0.1716094970703125, + "step": 42140 + }, + { + "epoch": 0.36441535308817047, + "grad_norm": 7.888438351590131, + "learning_rate": 5.521774070517164e-06, + "loss": 0.11993408203125, + "step": 42145 + }, + { + "epoch": 0.3644585866097137, + "grad_norm": 17.926123458433334, + "learning_rate": 5.5216637085966975e-06, + "loss": 0.548046875, + "step": 42150 + }, + { + "epoch": 0.364501820131257, + "grad_norm": 3.6617276623178996, + "learning_rate": 5.521553335046541e-06, + "loss": 0.06541748046875, + "step": 42155 + }, + { + "epoch": 0.3645450536528002, + "grad_norm": 7.940455096262474, + "learning_rate": 5.5214429498672025e-06, + "loss": 0.3062713623046875, + "step": 42160 + }, + { + "epoch": 0.3645882871743435, + "grad_norm": 0.5118200495801896, + "learning_rate": 5.521332553059192e-06, + "loss": 0.026151275634765624, + "step": 42165 + }, + { + "epoch": 0.3646315206958868, + "grad_norm": 11.821077079742869, + "learning_rate": 5.521222144623019e-06, + "loss": 0.160504150390625, + "step": 42170 + }, + { + "epoch": 0.36467475421743, + "grad_norm": 4.6338071707400985, + "learning_rate": 5.521111724559191e-06, + "loss": 0.023028564453125, + "step": 42175 + }, + { + "epoch": 0.3647179877389733, + "grad_norm": 1.8045869528911493, + "learning_rate": 5.521001292868219e-06, + "loss": 0.14857978820800782, + "step": 42180 + }, + { + "epoch": 0.3647612212605166, + "grad_norm": 25.714152705166402, + "learning_rate": 5.520890849550613e-06, + "loss": 0.10284423828125, + "step": 42185 + }, + { + "epoch": 0.3648044547820598, + "grad_norm": 17.331582994935424, + "learning_rate": 5.52078039460688e-06, + "loss": 0.16806507110595703, + "step": 42190 + }, + { + "epoch": 0.3648476883036031, + "grad_norm": 23.301907827945023, + "learning_rate": 5.520669928037531e-06, + "loss": 0.21432247161865234, + "step": 42195 + }, + { + "epoch": 0.3648909218251463, + "grad_norm": 11.892451972909093, + "learning_rate": 5.520559449843075e-06, + "loss": 0.2319580078125, + "step": 42200 + }, + { + "epoch": 0.3649341553466896, + "grad_norm": 14.127521452782398, + "learning_rate": 5.520448960024021e-06, + "loss": 0.16195297241210938, + "step": 42205 + }, + { + "epoch": 0.3649773888682329, + "grad_norm": 3.7159516954471212, + "learning_rate": 5.52033845858088e-06, + "loss": 0.070703125, + "step": 42210 + }, + { + "epoch": 0.3650206223897761, + "grad_norm": 3.1585096148228042, + "learning_rate": 5.52022794551416e-06, + "loss": 0.2575859069824219, + "step": 42215 + }, + { + "epoch": 0.3650638559113194, + "grad_norm": 3.173725222899546, + "learning_rate": 5.520117420824371e-06, + "loss": 0.04730224609375, + "step": 42220 + }, + { + "epoch": 0.3651070894328627, + "grad_norm": 1.4013596523584242, + "learning_rate": 5.520006884512024e-06, + "loss": 0.250408935546875, + "step": 42225 + }, + { + "epoch": 0.3651503229544059, + "grad_norm": 11.971738812059689, + "learning_rate": 5.519896336577628e-06, + "loss": 0.19522705078125, + "step": 42230 + }, + { + "epoch": 0.3651935564759492, + "grad_norm": 4.891613810613451, + "learning_rate": 5.519785777021693e-06, + "loss": 0.23323326110839843, + "step": 42235 + }, + { + "epoch": 0.36523678999749243, + "grad_norm": 13.26318774623621, + "learning_rate": 5.519675205844728e-06, + "loss": 0.3106487274169922, + "step": 42240 + }, + { + "epoch": 0.3652800235190357, + "grad_norm": 16.00919196773932, + "learning_rate": 5.519564623047243e-06, + "loss": 0.285552978515625, + "step": 42245 + }, + { + "epoch": 0.365323257040579, + "grad_norm": 24.61818984246488, + "learning_rate": 5.51945402862975e-06, + "loss": 0.27358551025390626, + "step": 42250 + }, + { + "epoch": 0.3653664905621222, + "grad_norm": 6.428748518002303, + "learning_rate": 5.519343422592756e-06, + "loss": 0.0748260498046875, + "step": 42255 + }, + { + "epoch": 0.3654097240836655, + "grad_norm": 1.3621418617444918, + "learning_rate": 5.519232804936775e-06, + "loss": 0.09124755859375, + "step": 42260 + }, + { + "epoch": 0.3654529576052088, + "grad_norm": 16.474930062322894, + "learning_rate": 5.519122175662313e-06, + "loss": 0.09266128540039062, + "step": 42265 + }, + { + "epoch": 0.365496191126752, + "grad_norm": 1.0524847699057445, + "learning_rate": 5.519011534769883e-06, + "loss": 0.42860107421875, + "step": 42270 + }, + { + "epoch": 0.3655394246482953, + "grad_norm": 0.10060426563438833, + "learning_rate": 5.518900882259994e-06, + "loss": 0.1226308822631836, + "step": 42275 + }, + { + "epoch": 0.3655826581698386, + "grad_norm": 3.076429145206404, + "learning_rate": 5.518790218133156e-06, + "loss": 0.0686309814453125, + "step": 42280 + }, + { + "epoch": 0.3656258916913818, + "grad_norm": 4.485216489394292, + "learning_rate": 5.5186795423898815e-06, + "loss": 0.2796657562255859, + "step": 42285 + }, + { + "epoch": 0.3656691252129251, + "grad_norm": 3.9247220466357566, + "learning_rate": 5.518568855030679e-06, + "loss": 0.1945556640625, + "step": 42290 + }, + { + "epoch": 0.36571235873446833, + "grad_norm": 18.155052794168537, + "learning_rate": 5.51845815605606e-06, + "loss": 0.11480960845947266, + "step": 42295 + }, + { + "epoch": 0.3657555922560116, + "grad_norm": 2.794758894886772, + "learning_rate": 5.518347445466533e-06, + "loss": 0.371063232421875, + "step": 42300 + }, + { + "epoch": 0.3657988257775549, + "grad_norm": 8.606355407182189, + "learning_rate": 5.518236723262611e-06, + "loss": 0.1205657958984375, + "step": 42305 + }, + { + "epoch": 0.36584205929909813, + "grad_norm": 10.678982687076143, + "learning_rate": 5.518125989444804e-06, + "loss": 0.06268310546875, + "step": 42310 + }, + { + "epoch": 0.3658852928206414, + "grad_norm": 16.163354022561208, + "learning_rate": 5.518015244013622e-06, + "loss": 0.17406845092773438, + "step": 42315 + }, + { + "epoch": 0.3659285263421847, + "grad_norm": 2.3543275041518963, + "learning_rate": 5.517904486969576e-06, + "loss": 0.10579833984375, + "step": 42320 + }, + { + "epoch": 0.36597175986372793, + "grad_norm": 0.28142095006226514, + "learning_rate": 5.517793718313179e-06, + "loss": 0.24773712158203126, + "step": 42325 + }, + { + "epoch": 0.3660149933852712, + "grad_norm": 3.0665355030441983, + "learning_rate": 5.517682938044937e-06, + "loss": 0.380242919921875, + "step": 42330 + }, + { + "epoch": 0.36605822690681444, + "grad_norm": 35.409409689806935, + "learning_rate": 5.517572146165365e-06, + "loss": 0.2594482421875, + "step": 42335 + }, + { + "epoch": 0.36610146042835773, + "grad_norm": 15.304236993981144, + "learning_rate": 5.517461342674973e-06, + "loss": 0.20280914306640624, + "step": 42340 + }, + { + "epoch": 0.366144693949901, + "grad_norm": 7.052307973689256, + "learning_rate": 5.517350527574271e-06, + "loss": 0.209820556640625, + "step": 42345 + }, + { + "epoch": 0.36618792747144424, + "grad_norm": 7.202138686305275, + "learning_rate": 5.517239700863772e-06, + "loss": 0.03815460205078125, + "step": 42350 + }, + { + "epoch": 0.3662311609929875, + "grad_norm": 8.561901367419358, + "learning_rate": 5.517128862543985e-06, + "loss": 0.11427726745605468, + "step": 42355 + }, + { + "epoch": 0.3662743945145308, + "grad_norm": 2.4491509324746943, + "learning_rate": 5.517018012615422e-06, + "loss": 0.2841316223144531, + "step": 42360 + }, + { + "epoch": 0.36631762803607404, + "grad_norm": 3.3005727494378982, + "learning_rate": 5.516907151078595e-06, + "loss": 0.2113311767578125, + "step": 42365 + }, + { + "epoch": 0.3663608615576173, + "grad_norm": 27.577618832439374, + "learning_rate": 5.516796277934014e-06, + "loss": 0.2198577880859375, + "step": 42370 + }, + { + "epoch": 0.36640409507916055, + "grad_norm": 0.5114844174550257, + "learning_rate": 5.516685393182191e-06, + "loss": 0.14366455078125, + "step": 42375 + }, + { + "epoch": 0.36644732860070384, + "grad_norm": 18.671631742052995, + "learning_rate": 5.5165744968236375e-06, + "loss": 0.2626064300537109, + "step": 42380 + }, + { + "epoch": 0.3664905621222471, + "grad_norm": 19.041722250289553, + "learning_rate": 5.516463588858865e-06, + "loss": 0.2126077175140381, + "step": 42385 + }, + { + "epoch": 0.36653379564379035, + "grad_norm": 31.36351324290025, + "learning_rate": 5.516352669288385e-06, + "loss": 0.244482421875, + "step": 42390 + }, + { + "epoch": 0.36657702916533363, + "grad_norm": 16.29214583490606, + "learning_rate": 5.516241738112708e-06, + "loss": 0.3953987121582031, + "step": 42395 + }, + { + "epoch": 0.3666202626868769, + "grad_norm": 20.459518112370773, + "learning_rate": 5.5161307953323465e-06, + "loss": 0.1188232421875, + "step": 42400 + }, + { + "epoch": 0.36666349620842015, + "grad_norm": 15.456331097842742, + "learning_rate": 5.516019840947813e-06, + "loss": 0.3129791259765625, + "step": 42405 + }, + { + "epoch": 0.36670672972996343, + "grad_norm": 15.691565994144984, + "learning_rate": 5.515908874959617e-06, + "loss": 0.380950927734375, + "step": 42410 + }, + { + "epoch": 0.36674996325150666, + "grad_norm": 3.6382023206296332, + "learning_rate": 5.515797897368273e-06, + "loss": 0.5365966796875, + "step": 42415 + }, + { + "epoch": 0.36679319677304995, + "grad_norm": 1.9540798222510716, + "learning_rate": 5.5156869081742905e-06, + "loss": 0.06018638610839844, + "step": 42420 + }, + { + "epoch": 0.36683643029459323, + "grad_norm": 25.33543295714028, + "learning_rate": 5.5155759073781826e-06, + "loss": 0.20211143493652345, + "step": 42425 + }, + { + "epoch": 0.36687966381613646, + "grad_norm": 9.32618614008981, + "learning_rate": 5.515464894980461e-06, + "loss": 0.15268630981445314, + "step": 42430 + }, + { + "epoch": 0.36692289733767974, + "grad_norm": 7.140017077252273, + "learning_rate": 5.515353870981638e-06, + "loss": 0.3093994140625, + "step": 42435 + }, + { + "epoch": 0.366966130859223, + "grad_norm": 9.456980376516748, + "learning_rate": 5.515242835382224e-06, + "loss": 0.067724609375, + "step": 42440 + }, + { + "epoch": 0.36700936438076626, + "grad_norm": 35.09821747185279, + "learning_rate": 5.515131788182733e-06, + "loss": 0.103924560546875, + "step": 42445 + }, + { + "epoch": 0.36705259790230954, + "grad_norm": 6.488618771507872, + "learning_rate": 5.515020729383676e-06, + "loss": 0.24036865234375, + "step": 42450 + }, + { + "epoch": 0.3670958314238528, + "grad_norm": 5.343467668924222, + "learning_rate": 5.514909658985566e-06, + "loss": 0.0769500732421875, + "step": 42455 + }, + { + "epoch": 0.36713906494539605, + "grad_norm": 4.237647508624633, + "learning_rate": 5.514798576988915e-06, + "loss": 0.0657318115234375, + "step": 42460 + }, + { + "epoch": 0.36718229846693934, + "grad_norm": 7.939094582587382, + "learning_rate": 5.514687483394235e-06, + "loss": 0.22223968505859376, + "step": 42465 + }, + { + "epoch": 0.36722553198848257, + "grad_norm": 1.0323418381910423, + "learning_rate": 5.514576378202039e-06, + "loss": 0.042718505859375, + "step": 42470 + }, + { + "epoch": 0.36726876551002585, + "grad_norm": 2.77532271888152, + "learning_rate": 5.514465261412838e-06, + "loss": 0.32865180969238283, + "step": 42475 + }, + { + "epoch": 0.36731199903156914, + "grad_norm": 18.41944054718262, + "learning_rate": 5.514354133027146e-06, + "loss": 0.0994873046875, + "step": 42480 + }, + { + "epoch": 0.36735523255311237, + "grad_norm": 17.026563720854238, + "learning_rate": 5.514242993045475e-06, + "loss": 0.21943283081054688, + "step": 42485 + }, + { + "epoch": 0.36739846607465565, + "grad_norm": 23.722300871446798, + "learning_rate": 5.514131841468337e-06, + "loss": 0.31326904296875, + "step": 42490 + }, + { + "epoch": 0.36744169959619893, + "grad_norm": 3.5115743239963275, + "learning_rate": 5.514020678296245e-06, + "loss": 0.146160888671875, + "step": 42495 + }, + { + "epoch": 0.36748493311774216, + "grad_norm": 7.7055781119324624, + "learning_rate": 5.5139095035297125e-06, + "loss": 0.12667999267578126, + "step": 42500 + }, + { + "epoch": 0.36752816663928545, + "grad_norm": 46.29255185684108, + "learning_rate": 5.513798317169251e-06, + "loss": 0.539862060546875, + "step": 42505 + }, + { + "epoch": 0.3675714001608287, + "grad_norm": 31.064442538662345, + "learning_rate": 5.513687119215374e-06, + "loss": 0.1491180419921875, + "step": 42510 + }, + { + "epoch": 0.36761463368237196, + "grad_norm": 0.6777679817361393, + "learning_rate": 5.513575909668594e-06, + "loss": 0.19002532958984375, + "step": 42515 + }, + { + "epoch": 0.36765786720391525, + "grad_norm": 21.088877229145485, + "learning_rate": 5.513464688529424e-06, + "loss": 0.525103759765625, + "step": 42520 + }, + { + "epoch": 0.3677011007254585, + "grad_norm": 0.5673260172283205, + "learning_rate": 5.513353455798377e-06, + "loss": 0.16951370239257812, + "step": 42525 + }, + { + "epoch": 0.36774433424700176, + "grad_norm": 1.6438527124371585, + "learning_rate": 5.5132422114759655e-06, + "loss": 0.2506126403808594, + "step": 42530 + }, + { + "epoch": 0.36778756776854504, + "grad_norm": 16.830783261234334, + "learning_rate": 5.513130955562704e-06, + "loss": 0.3315155029296875, + "step": 42535 + }, + { + "epoch": 0.36783080129008827, + "grad_norm": 1.5091968248706555, + "learning_rate": 5.513019688059103e-06, + "loss": 0.023089599609375, + "step": 42540 + }, + { + "epoch": 0.36787403481163156, + "grad_norm": 7.530773371997278, + "learning_rate": 5.51290840896568e-06, + "loss": 0.130096435546875, + "step": 42545 + }, + { + "epoch": 0.3679172683331748, + "grad_norm": 30.57628886133942, + "learning_rate": 5.512797118282944e-06, + "loss": 0.28295745849609377, + "step": 42550 + }, + { + "epoch": 0.36796050185471807, + "grad_norm": 5.180549109537284, + "learning_rate": 5.512685816011408e-06, + "loss": 0.24593887329101563, + "step": 42555 + }, + { + "epoch": 0.36800373537626135, + "grad_norm": 1.8987299458888696, + "learning_rate": 5.512574502151589e-06, + "loss": 0.1506622314453125, + "step": 42560 + }, + { + "epoch": 0.3680469688978046, + "grad_norm": 4.286502101036086, + "learning_rate": 5.512463176703998e-06, + "loss": 0.252703857421875, + "step": 42565 + }, + { + "epoch": 0.36809020241934787, + "grad_norm": 3.9974975817363227, + "learning_rate": 5.512351839669149e-06, + "loss": 0.0978057861328125, + "step": 42570 + }, + { + "epoch": 0.36813343594089115, + "grad_norm": 20.557188252281005, + "learning_rate": 5.512240491047555e-06, + "loss": 0.3617584228515625, + "step": 42575 + }, + { + "epoch": 0.3681766694624344, + "grad_norm": 0.39004984052059316, + "learning_rate": 5.5121291308397295e-06, + "loss": 0.10633392333984375, + "step": 42580 + }, + { + "epoch": 0.36821990298397766, + "grad_norm": 10.847683385901185, + "learning_rate": 5.512017759046187e-06, + "loss": 0.058916473388671876, + "step": 42585 + }, + { + "epoch": 0.3682631365055209, + "grad_norm": 62.931724935141006, + "learning_rate": 5.51190637566744e-06, + "loss": 0.20839614868164064, + "step": 42590 + }, + { + "epoch": 0.3683063700270642, + "grad_norm": 3.6482904028274334, + "learning_rate": 5.511794980704003e-06, + "loss": 0.4329536437988281, + "step": 42595 + }, + { + "epoch": 0.36834960354860746, + "grad_norm": 19.923428569854277, + "learning_rate": 5.51168357415639e-06, + "loss": 0.0943267822265625, + "step": 42600 + }, + { + "epoch": 0.3683928370701507, + "grad_norm": 8.936057721995676, + "learning_rate": 5.511572156025115e-06, + "loss": 0.38150634765625, + "step": 42605 + }, + { + "epoch": 0.368436070591694, + "grad_norm": 7.752398380060742, + "learning_rate": 5.511460726310689e-06, + "loss": 0.221044921875, + "step": 42610 + }, + { + "epoch": 0.36847930411323726, + "grad_norm": 0.16186770025491695, + "learning_rate": 5.511349285013629e-06, + "loss": 0.0433685302734375, + "step": 42615 + }, + { + "epoch": 0.3685225376347805, + "grad_norm": 1.719535830099673, + "learning_rate": 5.5112378321344486e-06, + "loss": 0.381781005859375, + "step": 42620 + }, + { + "epoch": 0.3685657711563238, + "grad_norm": 1.0022744104483807, + "learning_rate": 5.511126367673661e-06, + "loss": 0.1627471923828125, + "step": 42625 + }, + { + "epoch": 0.36860900467786706, + "grad_norm": 7.4345445979155285, + "learning_rate": 5.51101489163178e-06, + "loss": 0.1537017822265625, + "step": 42630 + }, + { + "epoch": 0.3686522381994103, + "grad_norm": 4.733710243527496, + "learning_rate": 5.51090340400932e-06, + "loss": 0.07135162353515626, + "step": 42635 + }, + { + "epoch": 0.36869547172095357, + "grad_norm": 17.92961859729797, + "learning_rate": 5.510791904806796e-06, + "loss": 0.5755752563476563, + "step": 42640 + }, + { + "epoch": 0.3687387052424968, + "grad_norm": 5.850895623933088, + "learning_rate": 5.510680394024722e-06, + "loss": 0.4106756210327148, + "step": 42645 + }, + { + "epoch": 0.3687819387640401, + "grad_norm": 17.40049780924936, + "learning_rate": 5.510568871663611e-06, + "loss": 0.35730514526367185, + "step": 42650 + }, + { + "epoch": 0.36882517228558337, + "grad_norm": 3.00857996486137, + "learning_rate": 5.510457337723977e-06, + "loss": 0.080181884765625, + "step": 42655 + }, + { + "epoch": 0.3688684058071266, + "grad_norm": 12.356593454131726, + "learning_rate": 5.510345792206338e-06, + "loss": 0.05477294921875, + "step": 42660 + }, + { + "epoch": 0.3689116393286699, + "grad_norm": 0.3622719651443987, + "learning_rate": 5.510234235111204e-06, + "loss": 0.08792266845703126, + "step": 42665 + }, + { + "epoch": 0.36895487285021317, + "grad_norm": 8.13953097408313, + "learning_rate": 5.510122666439094e-06, + "loss": 0.06943511962890625, + "step": 42670 + }, + { + "epoch": 0.3689981063717564, + "grad_norm": 1.5860283788683331, + "learning_rate": 5.510011086190518e-06, + "loss": 0.0879486083984375, + "step": 42675 + }, + { + "epoch": 0.3690413398932997, + "grad_norm": 18.163526857571856, + "learning_rate": 5.509899494365993e-06, + "loss": 0.09272537231445313, + "step": 42680 + }, + { + "epoch": 0.3690845734148429, + "grad_norm": 27.0105590212827, + "learning_rate": 5.509787890966034e-06, + "loss": 0.2232666015625, + "step": 42685 + }, + { + "epoch": 0.3691278069363862, + "grad_norm": 1.0047654610494106, + "learning_rate": 5.509676275991155e-06, + "loss": 0.08752593994140626, + "step": 42690 + }, + { + "epoch": 0.3691710404579295, + "grad_norm": 12.688766187896967, + "learning_rate": 5.50956464944187e-06, + "loss": 0.06307373046875, + "step": 42695 + }, + { + "epoch": 0.3692142739794727, + "grad_norm": 24.340231879759678, + "learning_rate": 5.509453011318695e-06, + "loss": 0.133123779296875, + "step": 42700 + }, + { + "epoch": 0.369257507501016, + "grad_norm": 12.491574798445138, + "learning_rate": 5.509341361622145e-06, + "loss": 0.24963226318359374, + "step": 42705 + }, + { + "epoch": 0.3693007410225593, + "grad_norm": 73.31112085346393, + "learning_rate": 5.509229700352733e-06, + "loss": 0.36886138916015626, + "step": 42710 + }, + { + "epoch": 0.3693439745441025, + "grad_norm": 2.3315436148193234, + "learning_rate": 5.509118027510977e-06, + "loss": 0.0468780517578125, + "step": 42715 + }, + { + "epoch": 0.3693872080656458, + "grad_norm": 1.847776253144992, + "learning_rate": 5.509006343097389e-06, + "loss": 0.183038330078125, + "step": 42720 + }, + { + "epoch": 0.369430441587189, + "grad_norm": 61.672550123924275, + "learning_rate": 5.508894647112486e-06, + "loss": 0.299200439453125, + "step": 42725 + }, + { + "epoch": 0.3694736751087323, + "grad_norm": 30.46074600805107, + "learning_rate": 5.5087829395567824e-06, + "loss": 0.273486328125, + "step": 42730 + }, + { + "epoch": 0.3695169086302756, + "grad_norm": 2.0343361181071415, + "learning_rate": 5.508671220430794e-06, + "loss": 0.1690277099609375, + "step": 42735 + }, + { + "epoch": 0.3695601421518188, + "grad_norm": 5.694897639484896, + "learning_rate": 5.508559489735035e-06, + "loss": 0.2733649253845215, + "step": 42740 + }, + { + "epoch": 0.3696033756733621, + "grad_norm": 34.1035516942765, + "learning_rate": 5.508447747470021e-06, + "loss": 0.163897705078125, + "step": 42745 + }, + { + "epoch": 0.3696466091949054, + "grad_norm": 29.420107740428552, + "learning_rate": 5.508335993636268e-06, + "loss": 0.07233982086181641, + "step": 42750 + }, + { + "epoch": 0.3696898427164486, + "grad_norm": 2.987612555187108, + "learning_rate": 5.508224228234292e-06, + "loss": 0.1335742950439453, + "step": 42755 + }, + { + "epoch": 0.3697330762379919, + "grad_norm": 0.7748746619486995, + "learning_rate": 5.5081124512646065e-06, + "loss": 0.1852294921875, + "step": 42760 + }, + { + "epoch": 0.3697763097595351, + "grad_norm": 5.456157689928639, + "learning_rate": 5.508000662727728e-06, + "loss": 0.082275390625, + "step": 42765 + }, + { + "epoch": 0.3698195432810784, + "grad_norm": 5.539518422797297, + "learning_rate": 5.5078888626241725e-06, + "loss": 0.09237632751464844, + "step": 42770 + }, + { + "epoch": 0.3698627768026217, + "grad_norm": 3.0968086427055037, + "learning_rate": 5.507777050954455e-06, + "loss": 0.2410888671875, + "step": 42775 + }, + { + "epoch": 0.3699060103241649, + "grad_norm": 16.212731484022594, + "learning_rate": 5.507665227719091e-06, + "loss": 0.07888946533203126, + "step": 42780 + }, + { + "epoch": 0.3699492438457082, + "grad_norm": 10.34509732563279, + "learning_rate": 5.507553392918598e-06, + "loss": 0.275048828125, + "step": 42785 + }, + { + "epoch": 0.3699924773672515, + "grad_norm": 3.291816090355942, + "learning_rate": 5.507441546553488e-06, + "loss": 0.10812721252441407, + "step": 42790 + }, + { + "epoch": 0.3700357108887947, + "grad_norm": 1.3442139113458822, + "learning_rate": 5.507329688624281e-06, + "loss": 0.09119415283203125, + "step": 42795 + }, + { + "epoch": 0.370078944410338, + "grad_norm": 20.736409863761548, + "learning_rate": 5.50721781913149e-06, + "loss": 0.05887908935546875, + "step": 42800 + }, + { + "epoch": 0.3701221779318813, + "grad_norm": 10.47263924549922, + "learning_rate": 5.507105938075633e-06, + "loss": 0.355645751953125, + "step": 42805 + }, + { + "epoch": 0.3701654114534245, + "grad_norm": 6.983293196437513, + "learning_rate": 5.506994045457224e-06, + "loss": 0.20819091796875, + "step": 42810 + }, + { + "epoch": 0.3702086449749678, + "grad_norm": 0.7212224448239557, + "learning_rate": 5.50688214127678e-06, + "loss": 0.06326141357421874, + "step": 42815 + }, + { + "epoch": 0.37025187849651103, + "grad_norm": 67.55263085257064, + "learning_rate": 5.506770225534817e-06, + "loss": 0.5121414184570312, + "step": 42820 + }, + { + "epoch": 0.3702951120180543, + "grad_norm": 1.325446241825966, + "learning_rate": 5.506658298231851e-06, + "loss": 0.30469970703125, + "step": 42825 + }, + { + "epoch": 0.3703383455395976, + "grad_norm": 6.7451644152558226, + "learning_rate": 5.506546359368399e-06, + "loss": 0.5371192932128906, + "step": 42830 + }, + { + "epoch": 0.37038157906114083, + "grad_norm": 0.3241788494593442, + "learning_rate": 5.506434408944977e-06, + "loss": 0.09964447021484375, + "step": 42835 + }, + { + "epoch": 0.3704248125826841, + "grad_norm": 19.745118383071897, + "learning_rate": 5.5063224469621e-06, + "loss": 0.09878997802734375, + "step": 42840 + }, + { + "epoch": 0.3704680461042274, + "grad_norm": 35.68794898834445, + "learning_rate": 5.506210473420287e-06, + "loss": 0.1599639892578125, + "step": 42845 + }, + { + "epoch": 0.37051127962577063, + "grad_norm": 8.873828250881644, + "learning_rate": 5.5060984883200515e-06, + "loss": 0.1035888671875, + "step": 42850 + }, + { + "epoch": 0.3705545131473139, + "grad_norm": 34.12205452645336, + "learning_rate": 5.505986491661911e-06, + "loss": 0.340167236328125, + "step": 42855 + }, + { + "epoch": 0.37059774666885714, + "grad_norm": 4.292901924457606, + "learning_rate": 5.505874483446383e-06, + "loss": 0.14662399291992187, + "step": 42860 + }, + { + "epoch": 0.3706409801904004, + "grad_norm": 34.455277885375864, + "learning_rate": 5.505762463673983e-06, + "loss": 0.392327880859375, + "step": 42865 + }, + { + "epoch": 0.3706842137119437, + "grad_norm": 2.676959600354356, + "learning_rate": 5.505650432345228e-06, + "loss": 0.03364715576171875, + "step": 42870 + }, + { + "epoch": 0.37072744723348694, + "grad_norm": 34.8179529255898, + "learning_rate": 5.505538389460634e-06, + "loss": 0.27325439453125, + "step": 42875 + }, + { + "epoch": 0.3707706807550302, + "grad_norm": 1.692251884789156, + "learning_rate": 5.505426335020719e-06, + "loss": 0.25717887878417967, + "step": 42880 + }, + { + "epoch": 0.3708139142765735, + "grad_norm": 2.6328430679362627, + "learning_rate": 5.5053142690259984e-06, + "loss": 0.1792360305786133, + "step": 42885 + }, + { + "epoch": 0.37085714779811674, + "grad_norm": 5.107652192196084, + "learning_rate": 5.505202191476991e-06, + "loss": 0.16331024169921876, + "step": 42890 + }, + { + "epoch": 0.37090038131966, + "grad_norm": 5.082596282141805, + "learning_rate": 5.5050901023742114e-06, + "loss": 0.11247272491455078, + "step": 42895 + }, + { + "epoch": 0.37094361484120325, + "grad_norm": 45.78114125377722, + "learning_rate": 5.504978001718179e-06, + "loss": 0.30823593139648436, + "step": 42900 + }, + { + "epoch": 0.37098684836274654, + "grad_norm": 5.389861075285743, + "learning_rate": 5.504865889509407e-06, + "loss": 0.1114898681640625, + "step": 42905 + }, + { + "epoch": 0.3710300818842898, + "grad_norm": 5.7518146476800345, + "learning_rate": 5.504753765748417e-06, + "loss": 0.1703765869140625, + "step": 42910 + }, + { + "epoch": 0.37107331540583305, + "grad_norm": 28.013086123532453, + "learning_rate": 5.5046416304357224e-06, + "loss": 0.115802001953125, + "step": 42915 + }, + { + "epoch": 0.37111654892737633, + "grad_norm": 1.0150819187498996, + "learning_rate": 5.504529483571844e-06, + "loss": 0.062007904052734375, + "step": 42920 + }, + { + "epoch": 0.3711597824489196, + "grad_norm": 37.224559823649244, + "learning_rate": 5.504417325157294e-06, + "loss": 0.2444122314453125, + "step": 42925 + }, + { + "epoch": 0.37120301597046285, + "grad_norm": 16.31390646369234, + "learning_rate": 5.504305155192595e-06, + "loss": 0.0408538818359375, + "step": 42930 + }, + { + "epoch": 0.37124624949200613, + "grad_norm": 43.215099164514214, + "learning_rate": 5.504192973678261e-06, + "loss": 0.304327392578125, + "step": 42935 + }, + { + "epoch": 0.37128948301354936, + "grad_norm": 0.4917083237865728, + "learning_rate": 5.504080780614811e-06, + "loss": 0.03588180541992188, + "step": 42940 + }, + { + "epoch": 0.37133271653509264, + "grad_norm": 0.5522241752345105, + "learning_rate": 5.50396857600276e-06, + "loss": 0.22119674682617188, + "step": 42945 + }, + { + "epoch": 0.37137595005663593, + "grad_norm": 4.4219165451191715, + "learning_rate": 5.503856359842628e-06, + "loss": 0.055096435546875, + "step": 42950 + }, + { + "epoch": 0.37141918357817916, + "grad_norm": 6.539045029830479, + "learning_rate": 5.503744132134932e-06, + "loss": 0.13789520263671876, + "step": 42955 + }, + { + "epoch": 0.37146241709972244, + "grad_norm": 10.394238716273948, + "learning_rate": 5.50363189288019e-06, + "loss": 0.11530303955078125, + "step": 42960 + }, + { + "epoch": 0.3715056506212657, + "grad_norm": 7.464916553345798, + "learning_rate": 5.503519642078918e-06, + "loss": 0.43016891479492186, + "step": 42965 + }, + { + "epoch": 0.37154888414280896, + "grad_norm": 1.3675510605938024, + "learning_rate": 5.503407379731635e-06, + "loss": 0.17077865600585937, + "step": 42970 + }, + { + "epoch": 0.37159211766435224, + "grad_norm": 8.023304679458148, + "learning_rate": 5.503295105838858e-06, + "loss": 0.06818580627441406, + "step": 42975 + }, + { + "epoch": 0.37163535118589547, + "grad_norm": 6.759676384479623, + "learning_rate": 5.503182820401106e-06, + "loss": 0.094927978515625, + "step": 42980 + }, + { + "epoch": 0.37167858470743875, + "grad_norm": 31.547523384226935, + "learning_rate": 5.503070523418895e-06, + "loss": 0.295703125, + "step": 42985 + }, + { + "epoch": 0.37172181822898204, + "grad_norm": 4.751787185556274, + "learning_rate": 5.502958214892744e-06, + "loss": 0.1470489501953125, + "step": 42990 + }, + { + "epoch": 0.37176505175052527, + "grad_norm": 6.593053413868408, + "learning_rate": 5.502845894823171e-06, + "loss": 0.27083663940429686, + "step": 42995 + }, + { + "epoch": 0.37180828527206855, + "grad_norm": 22.391292091984077, + "learning_rate": 5.502733563210694e-06, + "loss": 0.10113067626953125, + "step": 43000 + }, + { + "epoch": 0.37185151879361183, + "grad_norm": 32.29562332675997, + "learning_rate": 5.502621220055831e-06, + "loss": 0.5207443237304688, + "step": 43005 + }, + { + "epoch": 0.37189475231515506, + "grad_norm": 2.3684169214295148, + "learning_rate": 5.5025088653591e-06, + "loss": 0.09587554931640625, + "step": 43010 + }, + { + "epoch": 0.37193798583669835, + "grad_norm": 0.45428650889155114, + "learning_rate": 5.50239649912102e-06, + "loss": 0.08645477294921874, + "step": 43015 + }, + { + "epoch": 0.37198121935824163, + "grad_norm": 0.40103643399284217, + "learning_rate": 5.502284121342107e-06, + "loss": 0.22245407104492188, + "step": 43020 + }, + { + "epoch": 0.37202445287978486, + "grad_norm": 16.053954959215567, + "learning_rate": 5.5021717320228826e-06, + "loss": 0.09709930419921875, + "step": 43025 + }, + { + "epoch": 0.37206768640132815, + "grad_norm": 0.3896132005078445, + "learning_rate": 5.502059331163862e-06, + "loss": 0.123419189453125, + "step": 43030 + }, + { + "epoch": 0.3721109199228714, + "grad_norm": 17.570873286577623, + "learning_rate": 5.501946918765564e-06, + "loss": 0.408612060546875, + "step": 43035 + }, + { + "epoch": 0.37215415344441466, + "grad_norm": 39.50310038984484, + "learning_rate": 5.50183449482851e-06, + "loss": 0.19945297241210938, + "step": 43040 + }, + { + "epoch": 0.37219738696595794, + "grad_norm": 5.093263807122418, + "learning_rate": 5.501722059353215e-06, + "loss": 0.043485260009765624, + "step": 43045 + }, + { + "epoch": 0.3722406204875012, + "grad_norm": 2.5779787062474764, + "learning_rate": 5.501609612340199e-06, + "loss": 0.05144500732421875, + "step": 43050 + }, + { + "epoch": 0.37228385400904446, + "grad_norm": 33.222404293950405, + "learning_rate": 5.501497153789982e-06, + "loss": 0.09705581665039062, + "step": 43055 + }, + { + "epoch": 0.37232708753058774, + "grad_norm": 43.709922365650705, + "learning_rate": 5.501384683703079e-06, + "loss": 0.5199966430664062, + "step": 43060 + }, + { + "epoch": 0.37237032105213097, + "grad_norm": 1.1765999560951976, + "learning_rate": 5.5012722020800115e-06, + "loss": 0.17896041870117188, + "step": 43065 + }, + { + "epoch": 0.37241355457367425, + "grad_norm": 22.547079557137128, + "learning_rate": 5.501159708921297e-06, + "loss": 0.5588088989257812, + "step": 43070 + }, + { + "epoch": 0.3724567880952175, + "grad_norm": 0.33241675444669494, + "learning_rate": 5.501047204227456e-06, + "loss": 0.1129180908203125, + "step": 43075 + }, + { + "epoch": 0.37250002161676077, + "grad_norm": 4.573991564391443, + "learning_rate": 5.500934687999007e-06, + "loss": 0.13103504180908204, + "step": 43080 + }, + { + "epoch": 0.37254325513830405, + "grad_norm": 21.324929712896292, + "learning_rate": 5.500822160236468e-06, + "loss": 0.1996063232421875, + "step": 43085 + }, + { + "epoch": 0.3725864886598473, + "grad_norm": 9.20510483596853, + "learning_rate": 5.500709620940357e-06, + "loss": 0.05034942626953125, + "step": 43090 + }, + { + "epoch": 0.37262972218139057, + "grad_norm": 36.7500139659845, + "learning_rate": 5.500597070111194e-06, + "loss": 0.13835601806640624, + "step": 43095 + }, + { + "epoch": 0.37267295570293385, + "grad_norm": 4.393947485904721, + "learning_rate": 5.5004845077494996e-06, + "loss": 0.2413116455078125, + "step": 43100 + }, + { + "epoch": 0.3727161892244771, + "grad_norm": 0.7822313829474611, + "learning_rate": 5.500371933855791e-06, + "loss": 0.6645668029785157, + "step": 43105 + }, + { + "epoch": 0.37275942274602036, + "grad_norm": 3.2420444870694527, + "learning_rate": 5.5002593484305875e-06, + "loss": 0.04719066619873047, + "step": 43110 + }, + { + "epoch": 0.3728026562675636, + "grad_norm": 31.572977872591135, + "learning_rate": 5.500146751474409e-06, + "loss": 0.21887874603271484, + "step": 43115 + }, + { + "epoch": 0.3728458897891069, + "grad_norm": 5.92025788730128, + "learning_rate": 5.500034142987775e-06, + "loss": 0.44608612060546876, + "step": 43120 + }, + { + "epoch": 0.37288912331065016, + "grad_norm": 17.87276447449358, + "learning_rate": 5.499921522971204e-06, + "loss": 0.23276290893554688, + "step": 43125 + }, + { + "epoch": 0.3729323568321934, + "grad_norm": 2.9690586447027316, + "learning_rate": 5.499808891425217e-06, + "loss": 0.1956714630126953, + "step": 43130 + }, + { + "epoch": 0.3729755903537367, + "grad_norm": 8.7392763042239, + "learning_rate": 5.499696248350331e-06, + "loss": 0.20537185668945312, + "step": 43135 + }, + { + "epoch": 0.37301882387527996, + "grad_norm": 1.3305116558683594, + "learning_rate": 5.499583593747067e-06, + "loss": 0.1279449462890625, + "step": 43140 + }, + { + "epoch": 0.3730620573968232, + "grad_norm": 1.1313558887962885, + "learning_rate": 5.499470927615944e-06, + "loss": 0.033863067626953125, + "step": 43145 + }, + { + "epoch": 0.3731052909183665, + "grad_norm": 14.853199585304917, + "learning_rate": 5.4993582499574825e-06, + "loss": 0.112286376953125, + "step": 43150 + }, + { + "epoch": 0.3731485244399097, + "grad_norm": 16.148193747646744, + "learning_rate": 5.4992455607722026e-06, + "loss": 0.2343780517578125, + "step": 43155 + }, + { + "epoch": 0.373191757961453, + "grad_norm": 0.8224779308218023, + "learning_rate": 5.499132860060621e-06, + "loss": 0.1520263671875, + "step": 43160 + }, + { + "epoch": 0.37323499148299627, + "grad_norm": 2.8986848831758696, + "learning_rate": 5.4990201478232615e-06, + "loss": 0.0822357177734375, + "step": 43165 + }, + { + "epoch": 0.3732782250045395, + "grad_norm": 0.32660831076936525, + "learning_rate": 5.498907424060641e-06, + "loss": 0.114984130859375, + "step": 43170 + }, + { + "epoch": 0.3733214585260828, + "grad_norm": 0.6925227102074247, + "learning_rate": 5.498794688773279e-06, + "loss": 0.40823936462402344, + "step": 43175 + }, + { + "epoch": 0.37336469204762607, + "grad_norm": 0.09196508497872084, + "learning_rate": 5.498681941961698e-06, + "loss": 0.0774566650390625, + "step": 43180 + }, + { + "epoch": 0.3734079255691693, + "grad_norm": 0.6819057698441852, + "learning_rate": 5.498569183626416e-06, + "loss": 0.0267425537109375, + "step": 43185 + }, + { + "epoch": 0.3734511590907126, + "grad_norm": 3.2542751451979415, + "learning_rate": 5.498456413767954e-06, + "loss": 0.21422348022460938, + "step": 43190 + }, + { + "epoch": 0.37349439261225587, + "grad_norm": 5.583128501646165, + "learning_rate": 5.4983436323868326e-06, + "loss": 0.2822357177734375, + "step": 43195 + }, + { + "epoch": 0.3735376261337991, + "grad_norm": 3.4504877878623694, + "learning_rate": 5.49823083948357e-06, + "loss": 0.09040679931640624, + "step": 43200 + }, + { + "epoch": 0.3735808596553424, + "grad_norm": 81.44183528583832, + "learning_rate": 5.498118035058688e-06, + "loss": 0.09502487182617188, + "step": 43205 + }, + { + "epoch": 0.3736240931768856, + "grad_norm": 2.2620009960302023, + "learning_rate": 5.498005219112707e-06, + "loss": 0.22794342041015625, + "step": 43210 + }, + { + "epoch": 0.3736673266984289, + "grad_norm": 0.29000879604916185, + "learning_rate": 5.497892391646145e-06, + "loss": 0.13491973876953126, + "step": 43215 + }, + { + "epoch": 0.3737105602199722, + "grad_norm": 8.653290614392159, + "learning_rate": 5.497779552659526e-06, + "loss": 0.2867435455322266, + "step": 43220 + }, + { + "epoch": 0.3737537937415154, + "grad_norm": 23.584441450405386, + "learning_rate": 5.497666702153367e-06, + "loss": 0.11980819702148438, + "step": 43225 + }, + { + "epoch": 0.3737970272630587, + "grad_norm": 17.158293115612338, + "learning_rate": 5.497553840128191e-06, + "loss": 0.11853485107421875, + "step": 43230 + }, + { + "epoch": 0.373840260784602, + "grad_norm": 0.9660083135075385, + "learning_rate": 5.497440966584516e-06, + "loss": 0.17178497314453126, + "step": 43235 + }, + { + "epoch": 0.3738834943061452, + "grad_norm": 0.18389193912023627, + "learning_rate": 5.4973280815228645e-06, + "loss": 0.3138394355773926, + "step": 43240 + }, + { + "epoch": 0.3739267278276885, + "grad_norm": 2.5603422908294453, + "learning_rate": 5.497215184943757e-06, + "loss": 0.160174560546875, + "step": 43245 + }, + { + "epoch": 0.3739699613492317, + "grad_norm": 3.875275259030302, + "learning_rate": 5.497102276847714e-06, + "loss": 0.23961563110351564, + "step": 43250 + }, + { + "epoch": 0.374013194870775, + "grad_norm": 2.726360600539499, + "learning_rate": 5.496989357235254e-06, + "loss": 0.4451549530029297, + "step": 43255 + }, + { + "epoch": 0.3740564283923183, + "grad_norm": 7.0243086893716224, + "learning_rate": 5.496876426106901e-06, + "loss": 0.2820037841796875, + "step": 43260 + }, + { + "epoch": 0.3740996619138615, + "grad_norm": 2.573272344364317, + "learning_rate": 5.496763483463175e-06, + "loss": 0.3237892150878906, + "step": 43265 + }, + { + "epoch": 0.3741428954354048, + "grad_norm": 9.280195740606864, + "learning_rate": 5.496650529304596e-06, + "loss": 0.28920745849609375, + "step": 43270 + }, + { + "epoch": 0.3741861289569481, + "grad_norm": 12.650174527921665, + "learning_rate": 5.496537563631686e-06, + "loss": 0.37185134887695315, + "step": 43275 + }, + { + "epoch": 0.3742293624784913, + "grad_norm": 20.913290200670613, + "learning_rate": 5.496424586444965e-06, + "loss": 0.1376953125, + "step": 43280 + }, + { + "epoch": 0.3742725960000346, + "grad_norm": 9.1143241193292, + "learning_rate": 5.4963115977449534e-06, + "loss": 0.19807357788085939, + "step": 43285 + }, + { + "epoch": 0.3743158295215778, + "grad_norm": 23.531002311413488, + "learning_rate": 5.496198597532174e-06, + "loss": 0.26064071655273435, + "step": 43290 + }, + { + "epoch": 0.3743590630431211, + "grad_norm": 36.609213785140646, + "learning_rate": 5.496085585807147e-06, + "loss": 0.4064434051513672, + "step": 43295 + }, + { + "epoch": 0.3744022965646644, + "grad_norm": 0.9922674673130926, + "learning_rate": 5.495972562570394e-06, + "loss": 0.0726409912109375, + "step": 43300 + }, + { + "epoch": 0.3744455300862076, + "grad_norm": 35.799571139554196, + "learning_rate": 5.495859527822436e-06, + "loss": 0.1611175537109375, + "step": 43305 + }, + { + "epoch": 0.3744887636077509, + "grad_norm": 39.69089641816607, + "learning_rate": 5.495746481563795e-06, + "loss": 0.62213134765625, + "step": 43310 + }, + { + "epoch": 0.3745319971292942, + "grad_norm": 2.197534420086273, + "learning_rate": 5.495633423794991e-06, + "loss": 0.16108665466308594, + "step": 43315 + }, + { + "epoch": 0.3745752306508374, + "grad_norm": 10.467494648809291, + "learning_rate": 5.495520354516547e-06, + "loss": 0.054993820190429685, + "step": 43320 + }, + { + "epoch": 0.3746184641723807, + "grad_norm": 4.755363151206429, + "learning_rate": 5.495407273728983e-06, + "loss": 0.0250213623046875, + "step": 43325 + }, + { + "epoch": 0.37466169769392393, + "grad_norm": 0.6451360474677644, + "learning_rate": 5.495294181432822e-06, + "loss": 0.10507659912109375, + "step": 43330 + }, + { + "epoch": 0.3747049312154672, + "grad_norm": 24.825779849810115, + "learning_rate": 5.495181077628583e-06, + "loss": 0.2909149169921875, + "step": 43335 + }, + { + "epoch": 0.3747481647370105, + "grad_norm": 0.3673211173209336, + "learning_rate": 5.49506796231679e-06, + "loss": 0.10428524017333984, + "step": 43340 + }, + { + "epoch": 0.37479139825855373, + "grad_norm": 11.400393690734102, + "learning_rate": 5.494954835497965e-06, + "loss": 0.0942962646484375, + "step": 43345 + }, + { + "epoch": 0.374834631780097, + "grad_norm": 0.19629118359053144, + "learning_rate": 5.494841697172628e-06, + "loss": 0.037725830078125, + "step": 43350 + }, + { + "epoch": 0.3748778653016403, + "grad_norm": 13.240751608795902, + "learning_rate": 5.494728547341302e-06, + "loss": 0.07894020080566407, + "step": 43355 + }, + { + "epoch": 0.37492109882318353, + "grad_norm": 2.849562826082907, + "learning_rate": 5.4946153860045075e-06, + "loss": 0.12680397033691407, + "step": 43360 + }, + { + "epoch": 0.3749643323447268, + "grad_norm": 22.685999295848873, + "learning_rate": 5.494502213162768e-06, + "loss": 0.10062484741210938, + "step": 43365 + }, + { + "epoch": 0.3750075658662701, + "grad_norm": 1.7475536633927773, + "learning_rate": 5.494389028816605e-06, + "loss": 0.11414794921875, + "step": 43370 + }, + { + "epoch": 0.3750507993878133, + "grad_norm": 4.530918681965543, + "learning_rate": 5.49427583296654e-06, + "loss": 0.37105560302734375, + "step": 43375 + }, + { + "epoch": 0.3750940329093566, + "grad_norm": 5.2024489188038645, + "learning_rate": 5.494162625613094e-06, + "loss": 0.1177703857421875, + "step": 43380 + }, + { + "epoch": 0.37513726643089984, + "grad_norm": 1.141406531320194, + "learning_rate": 5.494049406756792e-06, + "loss": 0.10079345703125, + "step": 43385 + }, + { + "epoch": 0.3751804999524431, + "grad_norm": 19.737772706549237, + "learning_rate": 5.493936176398154e-06, + "loss": 0.09179840087890626, + "step": 43390 + }, + { + "epoch": 0.3752237334739864, + "grad_norm": 25.831433982287958, + "learning_rate": 5.493822934537703e-06, + "loss": 0.16909637451171874, + "step": 43395 + }, + { + "epoch": 0.37526696699552964, + "grad_norm": 7.89529631669344, + "learning_rate": 5.49370968117596e-06, + "loss": 0.247100830078125, + "step": 43400 + }, + { + "epoch": 0.3753102005170729, + "grad_norm": 0.3044044369432907, + "learning_rate": 5.493596416313449e-06, + "loss": 0.04246864318847656, + "step": 43405 + }, + { + "epoch": 0.3753534340386162, + "grad_norm": 28.49055823091018, + "learning_rate": 5.49348313995069e-06, + "loss": 0.2331573486328125, + "step": 43410 + }, + { + "epoch": 0.37539666756015944, + "grad_norm": 23.123841725309934, + "learning_rate": 5.493369852088209e-06, + "loss": 0.10566177368164062, + "step": 43415 + }, + { + "epoch": 0.3754399010817027, + "grad_norm": 13.787104046035392, + "learning_rate": 5.493256552726527e-06, + "loss": 0.14039764404296876, + "step": 43420 + }, + { + "epoch": 0.37548313460324595, + "grad_norm": 2.3249447066377953, + "learning_rate": 5.493143241866165e-06, + "loss": 0.3017425537109375, + "step": 43425 + }, + { + "epoch": 0.37552636812478923, + "grad_norm": 19.680317934685437, + "learning_rate": 5.4930299195076476e-06, + "loss": 0.2975738525390625, + "step": 43430 + }, + { + "epoch": 0.3755696016463325, + "grad_norm": 3.8451853152834494, + "learning_rate": 5.492916585651495e-06, + "loss": 0.20527992248535157, + "step": 43435 + }, + { + "epoch": 0.37561283516787575, + "grad_norm": 11.404594124694592, + "learning_rate": 5.492803240298233e-06, + "loss": 0.4036369323730469, + "step": 43440 + }, + { + "epoch": 0.37565606868941903, + "grad_norm": 28.036679823556206, + "learning_rate": 5.492689883448382e-06, + "loss": 0.4948394775390625, + "step": 43445 + }, + { + "epoch": 0.3756993022109623, + "grad_norm": 0.2458106043567353, + "learning_rate": 5.492576515102466e-06, + "loss": 0.14245147705078126, + "step": 43450 + }, + { + "epoch": 0.37574253573250554, + "grad_norm": 7.3766054238787, + "learning_rate": 5.492463135261006e-06, + "loss": 0.07065277099609375, + "step": 43455 + }, + { + "epoch": 0.37578576925404883, + "grad_norm": 0.4937093371713382, + "learning_rate": 5.492349743924527e-06, + "loss": 0.11446914672851563, + "step": 43460 + }, + { + "epoch": 0.37582900277559206, + "grad_norm": 10.226209619014776, + "learning_rate": 5.4922363410935515e-06, + "loss": 0.19262542724609374, + "step": 43465 + }, + { + "epoch": 0.37587223629713534, + "grad_norm": 9.5021444304025, + "learning_rate": 5.492122926768602e-06, + "loss": 0.104132080078125, + "step": 43470 + }, + { + "epoch": 0.3759154698186786, + "grad_norm": 5.9796194218122185, + "learning_rate": 5.492009500950201e-06, + "loss": 0.34298248291015626, + "step": 43475 + }, + { + "epoch": 0.37595870334022186, + "grad_norm": 1.230473283466772, + "learning_rate": 5.4918960636388734e-06, + "loss": 0.145001220703125, + "step": 43480 + }, + { + "epoch": 0.37600193686176514, + "grad_norm": 16.181934148400376, + "learning_rate": 5.491782614835141e-06, + "loss": 0.0426727294921875, + "step": 43485 + }, + { + "epoch": 0.3760451703833084, + "grad_norm": 10.448723546616089, + "learning_rate": 5.491669154539528e-06, + "loss": 0.28263397216796876, + "step": 43490 + }, + { + "epoch": 0.37608840390485165, + "grad_norm": 3.4300167905134016, + "learning_rate": 5.491555682752557e-06, + "loss": 0.06251220703125, + "step": 43495 + }, + { + "epoch": 0.37613163742639494, + "grad_norm": 0.5324125943614045, + "learning_rate": 5.491442199474749e-06, + "loss": 0.1360931396484375, + "step": 43500 + }, + { + "epoch": 0.37617487094793817, + "grad_norm": 4.6868804967824556, + "learning_rate": 5.491328704706632e-06, + "loss": 0.13839187622070312, + "step": 43505 + }, + { + "epoch": 0.37621810446948145, + "grad_norm": 6.92527402827326, + "learning_rate": 5.491215198448727e-06, + "loss": 0.1266021728515625, + "step": 43510 + }, + { + "epoch": 0.37626133799102474, + "grad_norm": 37.869003675723796, + "learning_rate": 5.4911016807015576e-06, + "loss": 0.16094474792480468, + "step": 43515 + }, + { + "epoch": 0.37630457151256796, + "grad_norm": 5.324875760116542, + "learning_rate": 5.4909881514656465e-06, + "loss": 0.1920684814453125, + "step": 43520 + }, + { + "epoch": 0.37634780503411125, + "grad_norm": 11.674481011498285, + "learning_rate": 5.490874610741518e-06, + "loss": 0.19197235107421876, + "step": 43525 + }, + { + "epoch": 0.37639103855565453, + "grad_norm": 9.698172969141535, + "learning_rate": 5.490761058529697e-06, + "loss": 0.1289093017578125, + "step": 43530 + }, + { + "epoch": 0.37643427207719776, + "grad_norm": 16.082417111434626, + "learning_rate": 5.4906474948307065e-06, + "loss": 0.101068115234375, + "step": 43535 + }, + { + "epoch": 0.37647750559874105, + "grad_norm": 24.88924992244423, + "learning_rate": 5.490533919645069e-06, + "loss": 0.39297332763671877, + "step": 43540 + }, + { + "epoch": 0.37652073912028433, + "grad_norm": 22.949552558125678, + "learning_rate": 5.490420332973309e-06, + "loss": 0.257965087890625, + "step": 43545 + }, + { + "epoch": 0.37656397264182756, + "grad_norm": 9.951098523981877, + "learning_rate": 5.490306734815951e-06, + "loss": 0.1491943359375, + "step": 43550 + }, + { + "epoch": 0.37660720616337084, + "grad_norm": 11.567877754083982, + "learning_rate": 5.490193125173517e-06, + "loss": 0.1061309814453125, + "step": 43555 + }, + { + "epoch": 0.3766504396849141, + "grad_norm": 11.333868918432543, + "learning_rate": 5.490079504046535e-06, + "loss": 0.21719970703125, + "step": 43560 + }, + { + "epoch": 0.37669367320645736, + "grad_norm": 19.50649966922712, + "learning_rate": 5.489965871435524e-06, + "loss": 0.3197540283203125, + "step": 43565 + }, + { + "epoch": 0.37673690672800064, + "grad_norm": 3.8707029901252645, + "learning_rate": 5.489852227341011e-06, + "loss": 0.511578369140625, + "step": 43570 + }, + { + "epoch": 0.37678014024954387, + "grad_norm": 9.35768062311951, + "learning_rate": 5.48973857176352e-06, + "loss": 0.0772237777709961, + "step": 43575 + }, + { + "epoch": 0.37682337377108716, + "grad_norm": 0.5133215904384529, + "learning_rate": 5.489624904703575e-06, + "loss": 0.046262359619140624, + "step": 43580 + }, + { + "epoch": 0.37686660729263044, + "grad_norm": 40.88372904790289, + "learning_rate": 5.489511226161699e-06, + "loss": 0.3969635009765625, + "step": 43585 + }, + { + "epoch": 0.37690984081417367, + "grad_norm": 4.9090295910992765, + "learning_rate": 5.489397536138418e-06, + "loss": 0.24599151611328124, + "step": 43590 + }, + { + "epoch": 0.37695307433571695, + "grad_norm": 6.432790384966777, + "learning_rate": 5.489283834634255e-06, + "loss": 0.1904510498046875, + "step": 43595 + }, + { + "epoch": 0.3769963078572602, + "grad_norm": 3.0235759293072886, + "learning_rate": 5.489170121649735e-06, + "loss": 0.191961669921875, + "step": 43600 + }, + { + "epoch": 0.37703954137880347, + "grad_norm": 31.132779973010432, + "learning_rate": 5.4890563971853814e-06, + "loss": 0.38581047058105467, + "step": 43605 + }, + { + "epoch": 0.37708277490034675, + "grad_norm": 4.159811174069653, + "learning_rate": 5.48894266124172e-06, + "loss": 0.0670440673828125, + "step": 43610 + }, + { + "epoch": 0.37712600842189, + "grad_norm": 0.7433788720166278, + "learning_rate": 5.4888289138192766e-06, + "loss": 0.053479766845703124, + "step": 43615 + }, + { + "epoch": 0.37716924194343326, + "grad_norm": 3.237508868046934, + "learning_rate": 5.488715154918573e-06, + "loss": 0.242291259765625, + "step": 43620 + }, + { + "epoch": 0.37721247546497655, + "grad_norm": 1.9007834706955253, + "learning_rate": 5.488601384540135e-06, + "loss": 0.18122711181640624, + "step": 43625 + }, + { + "epoch": 0.3772557089865198, + "grad_norm": 58.42582529646827, + "learning_rate": 5.488487602684487e-06, + "loss": 0.1720245361328125, + "step": 43630 + }, + { + "epoch": 0.37729894250806306, + "grad_norm": 4.094564092999294, + "learning_rate": 5.488373809352155e-06, + "loss": 0.13092193603515626, + "step": 43635 + }, + { + "epoch": 0.3773421760296063, + "grad_norm": 38.13840200884481, + "learning_rate": 5.488260004543663e-06, + "loss": 0.1240234375, + "step": 43640 + }, + { + "epoch": 0.3773854095511496, + "grad_norm": 4.7784933803767435, + "learning_rate": 5.4881461882595346e-06, + "loss": 0.22529773712158202, + "step": 43645 + }, + { + "epoch": 0.37742864307269286, + "grad_norm": 0.7698114754955435, + "learning_rate": 5.488032360500296e-06, + "loss": 0.1034698486328125, + "step": 43650 + }, + { + "epoch": 0.3774718765942361, + "grad_norm": 28.843259351753474, + "learning_rate": 5.4879185212664725e-06, + "loss": 0.1940032958984375, + "step": 43655 + }, + { + "epoch": 0.3775151101157794, + "grad_norm": 6.344936066932554, + "learning_rate": 5.487804670558588e-06, + "loss": 0.142962646484375, + "step": 43660 + }, + { + "epoch": 0.37755834363732266, + "grad_norm": 4.394657016167086, + "learning_rate": 5.487690808377168e-06, + "loss": 0.053326416015625, + "step": 43665 + }, + { + "epoch": 0.3776015771588659, + "grad_norm": 10.94721736956757, + "learning_rate": 5.4875769347227395e-06, + "loss": 0.1727264404296875, + "step": 43670 + }, + { + "epoch": 0.37764481068040917, + "grad_norm": 15.514844837603052, + "learning_rate": 5.487463049595824e-06, + "loss": 0.1365509033203125, + "step": 43675 + }, + { + "epoch": 0.3776880442019524, + "grad_norm": 4.7652281193532255, + "learning_rate": 5.48734915299695e-06, + "loss": 0.05732231140136719, + "step": 43680 + }, + { + "epoch": 0.3777312777234957, + "grad_norm": 3.132784714017411, + "learning_rate": 5.4872352449266415e-06, + "loss": 0.137384033203125, + "step": 43685 + }, + { + "epoch": 0.37777451124503897, + "grad_norm": 10.456310730861563, + "learning_rate": 5.4871213253854226e-06, + "loss": 0.044469451904296874, + "step": 43690 + }, + { + "epoch": 0.3778177447665822, + "grad_norm": 1.4722759359498157, + "learning_rate": 5.487007394373821e-06, + "loss": 0.27172279357910156, + "step": 43695 + }, + { + "epoch": 0.3778609782881255, + "grad_norm": 39.71532412769557, + "learning_rate": 5.486893451892361e-06, + "loss": 0.28316612243652345, + "step": 43700 + }, + { + "epoch": 0.37790421180966877, + "grad_norm": 18.224209262054412, + "learning_rate": 5.486779497941568e-06, + "loss": 0.4657596588134766, + "step": 43705 + }, + { + "epoch": 0.377947445331212, + "grad_norm": 14.006251133426153, + "learning_rate": 5.486665532521967e-06, + "loss": 0.23515625, + "step": 43710 + }, + { + "epoch": 0.3779906788527553, + "grad_norm": 24.972514843296125, + "learning_rate": 5.486551555634085e-06, + "loss": 0.38018646240234377, + "step": 43715 + }, + { + "epoch": 0.3780339123742985, + "grad_norm": 21.462004436080765, + "learning_rate": 5.486437567278448e-06, + "loss": 0.10430984497070313, + "step": 43720 + }, + { + "epoch": 0.3780771458958418, + "grad_norm": 27.160827341305687, + "learning_rate": 5.486323567455578e-06, + "loss": 0.15954437255859374, + "step": 43725 + }, + { + "epoch": 0.3781203794173851, + "grad_norm": 5.982581540945716, + "learning_rate": 5.486209556166006e-06, + "loss": 0.27202720642089845, + "step": 43730 + }, + { + "epoch": 0.3781636129389283, + "grad_norm": 28.010029442455547, + "learning_rate": 5.486095533410254e-06, + "loss": 0.466094970703125, + "step": 43735 + }, + { + "epoch": 0.3782068464604716, + "grad_norm": 1.148413420423748, + "learning_rate": 5.485981499188848e-06, + "loss": 0.0797821044921875, + "step": 43740 + }, + { + "epoch": 0.3782500799820149, + "grad_norm": 3.004449432605923, + "learning_rate": 5.485867453502317e-06, + "loss": 0.39065399169921877, + "step": 43745 + }, + { + "epoch": 0.3782933135035581, + "grad_norm": 22.07957879532725, + "learning_rate": 5.485753396351184e-06, + "loss": 0.1408447265625, + "step": 43750 + }, + { + "epoch": 0.3783365470251014, + "grad_norm": 11.015434254147014, + "learning_rate": 5.4856393277359764e-06, + "loss": 0.20901641845703126, + "step": 43755 + }, + { + "epoch": 0.3783797805466447, + "grad_norm": 67.03410189811949, + "learning_rate": 5.48552524765722e-06, + "loss": 0.61026611328125, + "step": 43760 + }, + { + "epoch": 0.3784230140681879, + "grad_norm": 3.167921427218215, + "learning_rate": 5.48541115611544e-06, + "loss": 0.15137958526611328, + "step": 43765 + }, + { + "epoch": 0.3784662475897312, + "grad_norm": 13.217226470093301, + "learning_rate": 5.485297053111163e-06, + "loss": 0.40618896484375, + "step": 43770 + }, + { + "epoch": 0.3785094811112744, + "grad_norm": 19.901811495204623, + "learning_rate": 5.485182938644917e-06, + "loss": 0.249493408203125, + "step": 43775 + }, + { + "epoch": 0.3785527146328177, + "grad_norm": 1.1391971067251432, + "learning_rate": 5.485068812717226e-06, + "loss": 0.09355087280273437, + "step": 43780 + }, + { + "epoch": 0.378595948154361, + "grad_norm": 54.76770154096042, + "learning_rate": 5.4849546753286175e-06, + "loss": 0.278875732421875, + "step": 43785 + }, + { + "epoch": 0.3786391816759042, + "grad_norm": 3.3623027587559062, + "learning_rate": 5.484840526479617e-06, + "loss": 0.147076416015625, + "step": 43790 + }, + { + "epoch": 0.3786824151974475, + "grad_norm": 1.8943725683665797, + "learning_rate": 5.484726366170752e-06, + "loss": 0.02373924255371094, + "step": 43795 + }, + { + "epoch": 0.3787256487189908, + "grad_norm": 1.2937491204654943, + "learning_rate": 5.484612194402549e-06, + "loss": 0.1815887451171875, + "step": 43800 + }, + { + "epoch": 0.378768882240534, + "grad_norm": 18.958036429312767, + "learning_rate": 5.4844980111755334e-06, + "loss": 0.4677886962890625, + "step": 43805 + }, + { + "epoch": 0.3788121157620773, + "grad_norm": 1.1256379830749879, + "learning_rate": 5.484383816490233e-06, + "loss": 0.07642822265625, + "step": 43810 + }, + { + "epoch": 0.3788553492836205, + "grad_norm": 5.012219282089206, + "learning_rate": 5.484269610347174e-06, + "loss": 0.05133686065673828, + "step": 43815 + }, + { + "epoch": 0.3788985828051638, + "grad_norm": 59.41802732991578, + "learning_rate": 5.484155392746883e-06, + "loss": 0.4271484375, + "step": 43820 + }, + { + "epoch": 0.3789418163267071, + "grad_norm": 1.9192067359589278, + "learning_rate": 5.484041163689887e-06, + "loss": 0.1732654571533203, + "step": 43825 + }, + { + "epoch": 0.3789850498482503, + "grad_norm": 6.346673702725154, + "learning_rate": 5.483926923176713e-06, + "loss": 0.2676567077636719, + "step": 43830 + }, + { + "epoch": 0.3790282833697936, + "grad_norm": 34.03164499684542, + "learning_rate": 5.483812671207885e-06, + "loss": 0.21812057495117188, + "step": 43835 + }, + { + "epoch": 0.3790715168913369, + "grad_norm": 8.97892143707442, + "learning_rate": 5.4836984077839344e-06, + "loss": 0.265374755859375, + "step": 43840 + }, + { + "epoch": 0.3791147504128801, + "grad_norm": 5.509777631244143, + "learning_rate": 5.483584132905387e-06, + "loss": 0.13665924072265626, + "step": 43845 + }, + { + "epoch": 0.3791579839344234, + "grad_norm": 1.2699863461445844, + "learning_rate": 5.483469846572768e-06, + "loss": 0.164459228515625, + "step": 43850 + }, + { + "epoch": 0.37920121745596663, + "grad_norm": 0.585001418865177, + "learning_rate": 5.483355548786606e-06, + "loss": 0.0973175048828125, + "step": 43855 + }, + { + "epoch": 0.3792444509775099, + "grad_norm": 3.5042917333226904, + "learning_rate": 5.483241239547428e-06, + "loss": 0.1968231201171875, + "step": 43860 + }, + { + "epoch": 0.3792876844990532, + "grad_norm": 0.7297789140320659, + "learning_rate": 5.483126918855759e-06, + "loss": 0.05061149597167969, + "step": 43865 + }, + { + "epoch": 0.37933091802059643, + "grad_norm": 2.387089391427703, + "learning_rate": 5.48301258671213e-06, + "loss": 0.19183349609375, + "step": 43870 + }, + { + "epoch": 0.3793741515421397, + "grad_norm": 20.69205660894346, + "learning_rate": 5.482898243117065e-06, + "loss": 0.15593948364257812, + "step": 43875 + }, + { + "epoch": 0.379417385063683, + "grad_norm": 7.314109080741355, + "learning_rate": 5.482783888071093e-06, + "loss": 0.24255599975585937, + "step": 43880 + }, + { + "epoch": 0.37946061858522623, + "grad_norm": 0.2840062379688594, + "learning_rate": 5.4826695215747406e-06, + "loss": 0.021207427978515624, + "step": 43885 + }, + { + "epoch": 0.3795038521067695, + "grad_norm": 11.582472812067723, + "learning_rate": 5.482555143628538e-06, + "loss": 0.2071929931640625, + "step": 43890 + }, + { + "epoch": 0.37954708562831274, + "grad_norm": 8.287914286692793, + "learning_rate": 5.482440754233008e-06, + "loss": 0.168011474609375, + "step": 43895 + }, + { + "epoch": 0.379590319149856, + "grad_norm": 1.8990330972269678, + "learning_rate": 5.482326353388681e-06, + "loss": 0.11445236206054688, + "step": 43900 + }, + { + "epoch": 0.3796335526713993, + "grad_norm": 0.5224952615174705, + "learning_rate": 5.4822119410960855e-06, + "loss": 0.07910385131835937, + "step": 43905 + }, + { + "epoch": 0.37967678619294254, + "grad_norm": 8.264771231884536, + "learning_rate": 5.482097517355746e-06, + "loss": 0.297900390625, + "step": 43910 + }, + { + "epoch": 0.3797200197144858, + "grad_norm": 31.48308924482875, + "learning_rate": 5.481983082168194e-06, + "loss": 0.71669921875, + "step": 43915 + }, + { + "epoch": 0.3797632532360291, + "grad_norm": 29.95620040594587, + "learning_rate": 5.481868635533954e-06, + "loss": 0.43655548095703123, + "step": 43920 + }, + { + "epoch": 0.37980648675757234, + "grad_norm": 4.767063704039091, + "learning_rate": 5.481754177453556e-06, + "loss": 0.06489181518554688, + "step": 43925 + }, + { + "epoch": 0.3798497202791156, + "grad_norm": 8.539306438337093, + "learning_rate": 5.481639707927527e-06, + "loss": 0.175830078125, + "step": 43930 + }, + { + "epoch": 0.3798929538006589, + "grad_norm": 0.8552798160053563, + "learning_rate": 5.481525226956394e-06, + "loss": 0.11141357421875, + "step": 43935 + }, + { + "epoch": 0.37993618732220213, + "grad_norm": 98.03846334777639, + "learning_rate": 5.481410734540687e-06, + "loss": 0.5094146728515625, + "step": 43940 + }, + { + "epoch": 0.3799794208437454, + "grad_norm": 0.500179308425044, + "learning_rate": 5.481296230680932e-06, + "loss": 0.10242233276367188, + "step": 43945 + }, + { + "epoch": 0.38002265436528865, + "grad_norm": 5.819365675135459, + "learning_rate": 5.481181715377658e-06, + "loss": 0.134527587890625, + "step": 43950 + }, + { + "epoch": 0.38006588788683193, + "grad_norm": 37.387883184348915, + "learning_rate": 5.481067188631394e-06, + "loss": 0.66162109375, + "step": 43955 + }, + { + "epoch": 0.3801091214083752, + "grad_norm": 46.850292607013884, + "learning_rate": 5.4809526504426676e-06, + "loss": 0.1712890625, + "step": 43960 + }, + { + "epoch": 0.38015235492991845, + "grad_norm": 1.1265949933104793, + "learning_rate": 5.480838100812005e-06, + "loss": 0.041187286376953125, + "step": 43965 + }, + { + "epoch": 0.38019558845146173, + "grad_norm": 5.947932882273587, + "learning_rate": 5.480723539739938e-06, + "loss": 0.1969757080078125, + "step": 43970 + }, + { + "epoch": 0.380238821973005, + "grad_norm": 10.581536953676673, + "learning_rate": 5.480608967226993e-06, + "loss": 0.27593994140625, + "step": 43975 + }, + { + "epoch": 0.38028205549454824, + "grad_norm": 6.634011591678558, + "learning_rate": 5.480494383273698e-06, + "loss": 0.14288482666015626, + "step": 43980 + }, + { + "epoch": 0.38032528901609153, + "grad_norm": 4.407114523117595, + "learning_rate": 5.480379787880584e-06, + "loss": 0.2791330337524414, + "step": 43985 + }, + { + "epoch": 0.38036852253763476, + "grad_norm": 26.93629518273842, + "learning_rate": 5.480265181048175e-06, + "loss": 0.11940460205078125, + "step": 43990 + }, + { + "epoch": 0.38041175605917804, + "grad_norm": 7.343832793926656, + "learning_rate": 5.480150562777003e-06, + "loss": 0.11142425537109375, + "step": 43995 + }, + { + "epoch": 0.3804549895807213, + "grad_norm": 0.7731850692129854, + "learning_rate": 5.4800359330675965e-06, + "loss": 0.28409881591796876, + "step": 44000 + }, + { + "epoch": 0.38049822310226455, + "grad_norm": 25.986824273818424, + "learning_rate": 5.479921291920482e-06, + "loss": 0.1731658935546875, + "step": 44005 + }, + { + "epoch": 0.38054145662380784, + "grad_norm": 0.8818111867013648, + "learning_rate": 5.479806639336191e-06, + "loss": 0.340338134765625, + "step": 44010 + }, + { + "epoch": 0.3805846901453511, + "grad_norm": 0.3085841094858339, + "learning_rate": 5.479691975315251e-06, + "loss": 0.03069915771484375, + "step": 44015 + }, + { + "epoch": 0.38062792366689435, + "grad_norm": 9.279736551894189, + "learning_rate": 5.47957729985819e-06, + "loss": 0.0839385986328125, + "step": 44020 + }, + { + "epoch": 0.38067115718843764, + "grad_norm": 5.360576486320866, + "learning_rate": 5.479462612965537e-06, + "loss": 0.03330230712890625, + "step": 44025 + }, + { + "epoch": 0.38071439070998087, + "grad_norm": 3.7381032872968696, + "learning_rate": 5.479347914637822e-06, + "loss": 0.0896484375, + "step": 44030 + }, + { + "epoch": 0.38075762423152415, + "grad_norm": 9.107852483065193, + "learning_rate": 5.479233204875574e-06, + "loss": 0.06980743408203124, + "step": 44035 + }, + { + "epoch": 0.38080085775306743, + "grad_norm": 1.954231771773525, + "learning_rate": 5.479118483679321e-06, + "loss": 0.319537353515625, + "step": 44040 + }, + { + "epoch": 0.38084409127461066, + "grad_norm": 1.160777122535235, + "learning_rate": 5.479003751049593e-06, + "loss": 0.1006103515625, + "step": 44045 + }, + { + "epoch": 0.38088732479615395, + "grad_norm": 2.2620902680878436, + "learning_rate": 5.478889006986918e-06, + "loss": 0.113006591796875, + "step": 44050 + }, + { + "epoch": 0.38093055831769723, + "grad_norm": 26.037087378385596, + "learning_rate": 5.478774251491827e-06, + "loss": 0.0907073974609375, + "step": 44055 + }, + { + "epoch": 0.38097379183924046, + "grad_norm": 16.473357526377427, + "learning_rate": 5.478659484564848e-06, + "loss": 0.15119705200195313, + "step": 44060 + }, + { + "epoch": 0.38101702536078375, + "grad_norm": 6.173399707081099, + "learning_rate": 5.4785447062065095e-06, + "loss": 0.1895751953125, + "step": 44065 + }, + { + "epoch": 0.381060258882327, + "grad_norm": 0.2756804718860803, + "learning_rate": 5.478429916417344e-06, + "loss": 0.1167694091796875, + "step": 44070 + }, + { + "epoch": 0.38110349240387026, + "grad_norm": 3.575846841027654, + "learning_rate": 5.478315115197877e-06, + "loss": 0.054705810546875, + "step": 44075 + }, + { + "epoch": 0.38114672592541354, + "grad_norm": 1.4704349320482415, + "learning_rate": 5.47820030254864e-06, + "loss": 0.20214767456054689, + "step": 44080 + }, + { + "epoch": 0.38118995944695677, + "grad_norm": 1.1499955030839029, + "learning_rate": 5.478085478470162e-06, + "loss": 0.038238525390625, + "step": 44085 + }, + { + "epoch": 0.38123319296850006, + "grad_norm": 17.425833467413398, + "learning_rate": 5.477970642962972e-06, + "loss": 0.1544281005859375, + "step": 44090 + }, + { + "epoch": 0.38127642649004334, + "grad_norm": 54.02824792673214, + "learning_rate": 5.477855796027601e-06, + "loss": 0.25745849609375, + "step": 44095 + }, + { + "epoch": 0.38131966001158657, + "grad_norm": 12.11689946507417, + "learning_rate": 5.477740937664579e-06, + "loss": 0.08437690734863282, + "step": 44100 + }, + { + "epoch": 0.38136289353312985, + "grad_norm": 4.3184205080960085, + "learning_rate": 5.477626067874434e-06, + "loss": 0.46233177185058594, + "step": 44105 + }, + { + "epoch": 0.38140612705467314, + "grad_norm": 11.024822372675189, + "learning_rate": 5.477511186657696e-06, + "loss": 0.06939620971679687, + "step": 44110 + }, + { + "epoch": 0.38144936057621637, + "grad_norm": 2.986373296487096, + "learning_rate": 5.4773962940148965e-06, + "loss": 0.2036163330078125, + "step": 44115 + }, + { + "epoch": 0.38149259409775965, + "grad_norm": 9.118200664750375, + "learning_rate": 5.477281389946563e-06, + "loss": 0.15218887329101563, + "step": 44120 + }, + { + "epoch": 0.3815358276193029, + "grad_norm": 4.376190990049772, + "learning_rate": 5.477166474453227e-06, + "loss": 0.0672454833984375, + "step": 44125 + }, + { + "epoch": 0.38157906114084617, + "grad_norm": 5.468494909386899, + "learning_rate": 5.477051547535418e-06, + "loss": 0.1197418212890625, + "step": 44130 + }, + { + "epoch": 0.38162229466238945, + "grad_norm": 16.314931146315836, + "learning_rate": 5.476936609193667e-06, + "loss": 0.10909423828125, + "step": 44135 + }, + { + "epoch": 0.3816655281839327, + "grad_norm": 4.001832307653622, + "learning_rate": 5.476821659428502e-06, + "loss": 0.35132598876953125, + "step": 44140 + }, + { + "epoch": 0.38170876170547596, + "grad_norm": 25.927145192927544, + "learning_rate": 5.476706698240455e-06, + "loss": 0.25867462158203125, + "step": 44145 + }, + { + "epoch": 0.38175199522701925, + "grad_norm": 2.720713773925191, + "learning_rate": 5.476591725630055e-06, + "loss": 0.108197021484375, + "step": 44150 + }, + { + "epoch": 0.3817952287485625, + "grad_norm": 18.1957094695026, + "learning_rate": 5.4764767415978335e-06, + "loss": 0.15084762573242189, + "step": 44155 + }, + { + "epoch": 0.38183846227010576, + "grad_norm": 7.271502290623601, + "learning_rate": 5.47636174614432e-06, + "loss": 0.22192535400390626, + "step": 44160 + }, + { + "epoch": 0.381881695791649, + "grad_norm": 7.037132641107962, + "learning_rate": 5.476246739270045e-06, + "loss": 0.1304840087890625, + "step": 44165 + }, + { + "epoch": 0.3819249293131923, + "grad_norm": 12.722742558618213, + "learning_rate": 5.476131720975538e-06, + "loss": 0.07802200317382812, + "step": 44170 + }, + { + "epoch": 0.38196816283473556, + "grad_norm": 0.5516728986685568, + "learning_rate": 5.476016691261331e-06, + "loss": 0.25994071960449217, + "step": 44175 + }, + { + "epoch": 0.3820113963562788, + "grad_norm": 3.6601190819246137, + "learning_rate": 5.475901650127954e-06, + "loss": 0.05514373779296875, + "step": 44180 + }, + { + "epoch": 0.38205462987782207, + "grad_norm": 11.613982715229943, + "learning_rate": 5.475786597575937e-06, + "loss": 0.0963623046875, + "step": 44185 + }, + { + "epoch": 0.38209786339936536, + "grad_norm": 0.8561409090630762, + "learning_rate": 5.475671533605811e-06, + "loss": 0.072906494140625, + "step": 44190 + }, + { + "epoch": 0.3821410969209086, + "grad_norm": 5.57854184901463, + "learning_rate": 5.4755564582181075e-06, + "loss": 0.07089157104492187, + "step": 44195 + }, + { + "epoch": 0.38218433044245187, + "grad_norm": 17.790443972980157, + "learning_rate": 5.475441371413355e-06, + "loss": 0.304327392578125, + "step": 44200 + }, + { + "epoch": 0.3822275639639951, + "grad_norm": 19.186592950909134, + "learning_rate": 5.475326273192086e-06, + "loss": 0.13793792724609374, + "step": 44205 + }, + { + "epoch": 0.3822707974855384, + "grad_norm": 2.2521433756724347, + "learning_rate": 5.475211163554832e-06, + "loss": 0.07401123046875, + "step": 44210 + }, + { + "epoch": 0.38231403100708167, + "grad_norm": 25.803532820237482, + "learning_rate": 5.475096042502121e-06, + "loss": 0.109771728515625, + "step": 44215 + }, + { + "epoch": 0.3823572645286249, + "grad_norm": 7.512405109993741, + "learning_rate": 5.474980910034487e-06, + "loss": 0.2866424560546875, + "step": 44220 + }, + { + "epoch": 0.3824004980501682, + "grad_norm": 3.8693448608954757, + "learning_rate": 5.4748657661524596e-06, + "loss": 0.0541717529296875, + "step": 44225 + }, + { + "epoch": 0.38244373157171146, + "grad_norm": 13.160543961852206, + "learning_rate": 5.4747506108565695e-06, + "loss": 0.084881591796875, + "step": 44230 + }, + { + "epoch": 0.3824869650932547, + "grad_norm": 42.26488982942941, + "learning_rate": 5.474635444147349e-06, + "loss": 0.2733968734741211, + "step": 44235 + }, + { + "epoch": 0.382530198614798, + "grad_norm": 10.634988491059925, + "learning_rate": 5.474520266025328e-06, + "loss": 0.274078369140625, + "step": 44240 + }, + { + "epoch": 0.3825734321363412, + "grad_norm": 1.349726622165307, + "learning_rate": 5.474405076491039e-06, + "loss": 0.020528030395507813, + "step": 44245 + }, + { + "epoch": 0.3826166656578845, + "grad_norm": 0.5787354960500498, + "learning_rate": 5.474289875545011e-06, + "loss": 0.06857147216796874, + "step": 44250 + }, + { + "epoch": 0.3826598991794278, + "grad_norm": 13.703594318291051, + "learning_rate": 5.474174663187778e-06, + "loss": 0.1799285888671875, + "step": 44255 + }, + { + "epoch": 0.382703132700971, + "grad_norm": 7.4241082301784305, + "learning_rate": 5.47405943941987e-06, + "loss": 0.09972991943359374, + "step": 44260 + }, + { + "epoch": 0.3827463662225143, + "grad_norm": 6.873513731422722, + "learning_rate": 5.473944204241817e-06, + "loss": 0.1931610107421875, + "step": 44265 + }, + { + "epoch": 0.3827895997440576, + "grad_norm": 10.989063085566753, + "learning_rate": 5.4738289576541524e-06, + "loss": 0.0939666748046875, + "step": 44270 + }, + { + "epoch": 0.3828328332656008, + "grad_norm": 0.9465661067277065, + "learning_rate": 5.473713699657409e-06, + "loss": 0.12060470581054687, + "step": 44275 + }, + { + "epoch": 0.3828760667871441, + "grad_norm": 17.061957548024576, + "learning_rate": 5.473598430252114e-06, + "loss": 0.4340797424316406, + "step": 44280 + }, + { + "epoch": 0.38291930030868737, + "grad_norm": 0.7280197129696424, + "learning_rate": 5.473483149438804e-06, + "loss": 0.08512535095214843, + "step": 44285 + }, + { + "epoch": 0.3829625338302306, + "grad_norm": 0.9959277684090453, + "learning_rate": 5.4733678572180065e-06, + "loss": 0.042686843872070314, + "step": 44290 + }, + { + "epoch": 0.3830057673517739, + "grad_norm": 1.5026544473040946, + "learning_rate": 5.4732525535902565e-06, + "loss": 0.14931640625, + "step": 44295 + }, + { + "epoch": 0.3830490008733171, + "grad_norm": 8.461940145761535, + "learning_rate": 5.473137238556083e-06, + "loss": 0.0861053466796875, + "step": 44300 + }, + { + "epoch": 0.3830922343948604, + "grad_norm": 4.675733506492102, + "learning_rate": 5.47302191211602e-06, + "loss": 0.14906005859375, + "step": 44305 + }, + { + "epoch": 0.3831354679164037, + "grad_norm": 7.638803027330174, + "learning_rate": 5.4729065742705974e-06, + "loss": 0.0441192626953125, + "step": 44310 + }, + { + "epoch": 0.3831787014379469, + "grad_norm": 5.748119317634074, + "learning_rate": 5.472791225020349e-06, + "loss": 0.171875, + "step": 44315 + }, + { + "epoch": 0.3832219349594902, + "grad_norm": 0.5555170441796236, + "learning_rate": 5.472675864365806e-06, + "loss": 0.15772857666015624, + "step": 44320 + }, + { + "epoch": 0.3832651684810335, + "grad_norm": 2.0553277070158025, + "learning_rate": 5.4725604923075e-06, + "loss": 0.1184906005859375, + "step": 44325 + }, + { + "epoch": 0.3833084020025767, + "grad_norm": 5.560968925420995, + "learning_rate": 5.472445108845963e-06, + "loss": 0.1091033935546875, + "step": 44330 + }, + { + "epoch": 0.38335163552412, + "grad_norm": 6.57814908889275, + "learning_rate": 5.472329713981729e-06, + "loss": 0.14611597061157228, + "step": 44335 + }, + { + "epoch": 0.3833948690456632, + "grad_norm": 3.989663302418769, + "learning_rate": 5.472214307715328e-06, + "loss": 0.106341552734375, + "step": 44340 + }, + { + "epoch": 0.3834381025672065, + "grad_norm": 8.575875936466142, + "learning_rate": 5.472098890047293e-06, + "loss": 0.19271697998046874, + "step": 44345 + }, + { + "epoch": 0.3834813360887498, + "grad_norm": 1.0070488011213299, + "learning_rate": 5.471983460978157e-06, + "loss": 0.1700592041015625, + "step": 44350 + }, + { + "epoch": 0.383524569610293, + "grad_norm": 14.059152950057838, + "learning_rate": 5.4718680205084515e-06, + "loss": 0.46512451171875, + "step": 44355 + }, + { + "epoch": 0.3835678031318363, + "grad_norm": 23.800923339441827, + "learning_rate": 5.471752568638709e-06, + "loss": 0.24921188354492188, + "step": 44360 + }, + { + "epoch": 0.3836110366533796, + "grad_norm": 1.0847489583699357, + "learning_rate": 5.471637105369462e-06, + "loss": 0.19837188720703125, + "step": 44365 + }, + { + "epoch": 0.3836542701749228, + "grad_norm": 6.922374304368603, + "learning_rate": 5.4715216307012424e-06, + "loss": 0.10609893798828125, + "step": 44370 + }, + { + "epoch": 0.3836975036964661, + "grad_norm": 3.2343254447473684, + "learning_rate": 5.4714061446345845e-06, + "loss": 0.03549880981445312, + "step": 44375 + }, + { + "epoch": 0.38374073721800933, + "grad_norm": 2.0365688046601202, + "learning_rate": 5.471290647170019e-06, + "loss": 0.114654541015625, + "step": 44380 + }, + { + "epoch": 0.3837839707395526, + "grad_norm": 0.45433732003375266, + "learning_rate": 5.47117513830808e-06, + "loss": 0.14760360717773438, + "step": 44385 + }, + { + "epoch": 0.3838272042610959, + "grad_norm": 0.2274188833650072, + "learning_rate": 5.471059618049299e-06, + "loss": 0.17363433837890624, + "step": 44390 + }, + { + "epoch": 0.38387043778263913, + "grad_norm": 2.6672870399735187, + "learning_rate": 5.4709440863942104e-06, + "loss": 0.06573486328125, + "step": 44395 + }, + { + "epoch": 0.3839136713041824, + "grad_norm": 2.819561661229212, + "learning_rate": 5.470828543343345e-06, + "loss": 0.06735773086547851, + "step": 44400 + }, + { + "epoch": 0.3839569048257257, + "grad_norm": 5.444129123916589, + "learning_rate": 5.470712988897237e-06, + "loss": 0.21210861206054688, + "step": 44405 + }, + { + "epoch": 0.3840001383472689, + "grad_norm": 22.543625580677492, + "learning_rate": 5.4705974230564194e-06, + "loss": 0.3579376220703125, + "step": 44410 + }, + { + "epoch": 0.3840433718688122, + "grad_norm": 2.384265738758016, + "learning_rate": 5.470481845821424e-06, + "loss": 0.17635231018066405, + "step": 44415 + }, + { + "epoch": 0.38408660539035544, + "grad_norm": 0.52129627153228, + "learning_rate": 5.470366257192786e-06, + "loss": 0.4623992919921875, + "step": 44420 + }, + { + "epoch": 0.3841298389118987, + "grad_norm": 4.2446825338150775, + "learning_rate": 5.4702506571710354e-06, + "loss": 0.1501800537109375, + "step": 44425 + }, + { + "epoch": 0.384173072433442, + "grad_norm": 4.227574097597585, + "learning_rate": 5.470135045756708e-06, + "loss": 0.27454833984375, + "step": 44430 + }, + { + "epoch": 0.38421630595498524, + "grad_norm": 3.911975893708599, + "learning_rate": 5.470019422950336e-06, + "loss": 0.282708740234375, + "step": 44435 + }, + { + "epoch": 0.3842595394765285, + "grad_norm": 1.5150233057227898, + "learning_rate": 5.469903788752452e-06, + "loss": 0.18057708740234374, + "step": 44440 + }, + { + "epoch": 0.3843027729980718, + "grad_norm": 23.8564369812352, + "learning_rate": 5.469788143163591e-06, + "loss": 0.10480194091796875, + "step": 44445 + }, + { + "epoch": 0.38434600651961504, + "grad_norm": 12.046061456403864, + "learning_rate": 5.4696724861842846e-06, + "loss": 0.1198822021484375, + "step": 44450 + }, + { + "epoch": 0.3843892400411583, + "grad_norm": 28.355596159013224, + "learning_rate": 5.469556817815066e-06, + "loss": 0.1220693588256836, + "step": 44455 + }, + { + "epoch": 0.38443247356270155, + "grad_norm": 17.022364355591474, + "learning_rate": 5.469441138056472e-06, + "loss": 0.16636123657226562, + "step": 44460 + }, + { + "epoch": 0.38447570708424483, + "grad_norm": 15.425610990872219, + "learning_rate": 5.469325446909033e-06, + "loss": 0.16817626953125, + "step": 44465 + }, + { + "epoch": 0.3845189406057881, + "grad_norm": 2.5416055379273392, + "learning_rate": 5.469209744373281e-06, + "loss": 0.10750503540039062, + "step": 44470 + }, + { + "epoch": 0.38456217412733135, + "grad_norm": 1.8807772404015872, + "learning_rate": 5.469094030449755e-06, + "loss": 0.38666534423828125, + "step": 44475 + }, + { + "epoch": 0.38460540764887463, + "grad_norm": 6.665271636143978, + "learning_rate": 5.468978305138984e-06, + "loss": 0.130029296875, + "step": 44480 + }, + { + "epoch": 0.3846486411704179, + "grad_norm": 4.260366823086772, + "learning_rate": 5.468862568441502e-06, + "loss": 0.2393218994140625, + "step": 44485 + }, + { + "epoch": 0.38469187469196114, + "grad_norm": 20.26658404221925, + "learning_rate": 5.468746820357846e-06, + "loss": 0.08425531387329102, + "step": 44490 + }, + { + "epoch": 0.38473510821350443, + "grad_norm": 9.237952196842189, + "learning_rate": 5.468631060888547e-06, + "loss": 0.08160552978515626, + "step": 44495 + }, + { + "epoch": 0.3847783417350477, + "grad_norm": 0.5536583703220777, + "learning_rate": 5.468515290034139e-06, + "loss": 0.38276824951171873, + "step": 44500 + }, + { + "epoch": 0.38482157525659094, + "grad_norm": 17.636633280875753, + "learning_rate": 5.468399507795158e-06, + "loss": 0.08690338134765625, + "step": 44505 + }, + { + "epoch": 0.3848648087781342, + "grad_norm": 6.462111366585513, + "learning_rate": 5.4682837141721355e-06, + "loss": 0.0789031982421875, + "step": 44510 + }, + { + "epoch": 0.38490804229967746, + "grad_norm": 5.457536019931883, + "learning_rate": 5.468167909165608e-06, + "loss": 0.05978240966796875, + "step": 44515 + }, + { + "epoch": 0.38495127582122074, + "grad_norm": 0.6041683810856875, + "learning_rate": 5.468052092776106e-06, + "loss": 0.28902397155761717, + "step": 44520 + }, + { + "epoch": 0.384994509342764, + "grad_norm": 3.4209061234079425, + "learning_rate": 5.467936265004167e-06, + "loss": 0.07458076477050782, + "step": 44525 + }, + { + "epoch": 0.38503774286430725, + "grad_norm": 13.073730144082521, + "learning_rate": 5.467820425850325e-06, + "loss": 0.13597869873046875, + "step": 44530 + }, + { + "epoch": 0.38508097638585054, + "grad_norm": 39.586732849045056, + "learning_rate": 5.467704575315112e-06, + "loss": 0.4898872375488281, + "step": 44535 + }, + { + "epoch": 0.3851242099073938, + "grad_norm": 0.293598709078842, + "learning_rate": 5.4675887133990635e-06, + "loss": 0.12969913482666015, + "step": 44540 + }, + { + "epoch": 0.38516744342893705, + "grad_norm": 54.67755720018433, + "learning_rate": 5.467472840102713e-06, + "loss": 0.3840015411376953, + "step": 44545 + }, + { + "epoch": 0.38521067695048034, + "grad_norm": 4.704943736489314, + "learning_rate": 5.467356955426598e-06, + "loss": 0.07543487548828125, + "step": 44550 + }, + { + "epoch": 0.38525391047202356, + "grad_norm": 37.04409782873164, + "learning_rate": 5.46724105937125e-06, + "loss": 0.21635208129882813, + "step": 44555 + }, + { + "epoch": 0.38529714399356685, + "grad_norm": 0.1728873173015424, + "learning_rate": 5.467125151937203e-06, + "loss": 0.023582839965820314, + "step": 44560 + }, + { + "epoch": 0.38534037751511013, + "grad_norm": 5.114208169020282, + "learning_rate": 5.467009233124994e-06, + "loss": 0.20474853515625, + "step": 44565 + }, + { + "epoch": 0.38538361103665336, + "grad_norm": 34.09802334527712, + "learning_rate": 5.466893302935157e-06, + "loss": 0.14786300659179688, + "step": 44570 + }, + { + "epoch": 0.38542684455819665, + "grad_norm": 23.38196676844157, + "learning_rate": 5.466777361368224e-06, + "loss": 0.13677825927734374, + "step": 44575 + }, + { + "epoch": 0.38547007807973993, + "grad_norm": 0.6635928047096554, + "learning_rate": 5.466661408424733e-06, + "loss": 0.038861083984375, + "step": 44580 + }, + { + "epoch": 0.38551331160128316, + "grad_norm": 35.89743997381034, + "learning_rate": 5.466545444105219e-06, + "loss": 0.3801158905029297, + "step": 44585 + }, + { + "epoch": 0.38555654512282644, + "grad_norm": 5.739060984457336, + "learning_rate": 5.4664294684102134e-06, + "loss": 0.05715484619140625, + "step": 44590 + }, + { + "epoch": 0.3855997786443697, + "grad_norm": 43.72349591453352, + "learning_rate": 5.466313481340253e-06, + "loss": 0.19977569580078125, + "step": 44595 + }, + { + "epoch": 0.38564301216591296, + "grad_norm": 6.684358034574735, + "learning_rate": 5.466197482895874e-06, + "loss": 0.5167572021484375, + "step": 44600 + }, + { + "epoch": 0.38568624568745624, + "grad_norm": 23.763127309726144, + "learning_rate": 5.466081473077609e-06, + "loss": 0.097015380859375, + "step": 44605 + }, + { + "epoch": 0.38572947920899947, + "grad_norm": 0.13665248413119005, + "learning_rate": 5.465965451885995e-06, + "loss": 0.06096954345703125, + "step": 44610 + }, + { + "epoch": 0.38577271273054276, + "grad_norm": 0.4557887621467741, + "learning_rate": 5.465849419321566e-06, + "loss": 0.06947402954101563, + "step": 44615 + }, + { + "epoch": 0.38581594625208604, + "grad_norm": 12.464743540724923, + "learning_rate": 5.465733375384857e-06, + "loss": 0.2533355712890625, + "step": 44620 + }, + { + "epoch": 0.38585917977362927, + "grad_norm": 20.36555903735328, + "learning_rate": 5.465617320076403e-06, + "loss": 0.27167816162109376, + "step": 44625 + }, + { + "epoch": 0.38590241329517255, + "grad_norm": 2.698543715205454, + "learning_rate": 5.465501253396741e-06, + "loss": 0.2141357421875, + "step": 44630 + }, + { + "epoch": 0.3859456468167158, + "grad_norm": 30.075102974367148, + "learning_rate": 5.465385175346405e-06, + "loss": 0.09979629516601562, + "step": 44635 + }, + { + "epoch": 0.38598888033825907, + "grad_norm": 8.440404381311245, + "learning_rate": 5.465269085925929e-06, + "loss": 0.358892822265625, + "step": 44640 + }, + { + "epoch": 0.38603211385980235, + "grad_norm": 0.7791528338641068, + "learning_rate": 5.465152985135851e-06, + "loss": 0.2189422607421875, + "step": 44645 + }, + { + "epoch": 0.3860753473813456, + "grad_norm": 11.825850390007496, + "learning_rate": 5.465036872976705e-06, + "loss": 0.087548828125, + "step": 44650 + }, + { + "epoch": 0.38611858090288886, + "grad_norm": 46.525808679232505, + "learning_rate": 5.464920749449027e-06, + "loss": 0.76041259765625, + "step": 44655 + }, + { + "epoch": 0.38616181442443215, + "grad_norm": 12.615425174539332, + "learning_rate": 5.464804614553352e-06, + "loss": 0.163458251953125, + "step": 44660 + }, + { + "epoch": 0.3862050479459754, + "grad_norm": 3.833416568262103, + "learning_rate": 5.464688468290217e-06, + "loss": 0.16169281005859376, + "step": 44665 + }, + { + "epoch": 0.38624828146751866, + "grad_norm": 47.30347323178761, + "learning_rate": 5.4645723106601545e-06, + "loss": 0.192291259765625, + "step": 44670 + }, + { + "epoch": 0.38629151498906195, + "grad_norm": 7.757119611117802, + "learning_rate": 5.464456141663704e-06, + "loss": 0.1179351806640625, + "step": 44675 + }, + { + "epoch": 0.3863347485106052, + "grad_norm": 1.5150208732813681, + "learning_rate": 5.464339961301399e-06, + "loss": 0.5342117309570312, + "step": 44680 + }, + { + "epoch": 0.38637798203214846, + "grad_norm": 27.03209767446761, + "learning_rate": 5.464223769573775e-06, + "loss": 0.28345947265625, + "step": 44685 + }, + { + "epoch": 0.3864212155536917, + "grad_norm": 5.9435993917882755, + "learning_rate": 5.464107566481371e-06, + "loss": 0.18877944946289063, + "step": 44690 + }, + { + "epoch": 0.386464449075235, + "grad_norm": 7.212132725856374, + "learning_rate": 5.463991352024718e-06, + "loss": 0.15995407104492188, + "step": 44695 + }, + { + "epoch": 0.38650768259677826, + "grad_norm": 1.320392041182512, + "learning_rate": 5.463875126204355e-06, + "loss": 0.06360092163085937, + "step": 44700 + }, + { + "epoch": 0.3865509161183215, + "grad_norm": 24.8472169791927, + "learning_rate": 5.46375888902082e-06, + "loss": 0.1805328369140625, + "step": 44705 + }, + { + "epoch": 0.38659414963986477, + "grad_norm": 1.9910863564461545, + "learning_rate": 5.4636426404746436e-06, + "loss": 0.1873809814453125, + "step": 44710 + }, + { + "epoch": 0.38663738316140805, + "grad_norm": 21.144541656042595, + "learning_rate": 5.463526380566368e-06, + "loss": 0.259320068359375, + "step": 44715 + }, + { + "epoch": 0.3866806166829513, + "grad_norm": 56.026106074415175, + "learning_rate": 5.4634101092965244e-06, + "loss": 0.8745033264160156, + "step": 44720 + }, + { + "epoch": 0.38672385020449457, + "grad_norm": 24.79567245247091, + "learning_rate": 5.463293826665651e-06, + "loss": 0.3665252685546875, + "step": 44725 + }, + { + "epoch": 0.3867670837260378, + "grad_norm": 3.2400268853332186, + "learning_rate": 5.463177532674285e-06, + "loss": 0.4633796691894531, + "step": 44730 + }, + { + "epoch": 0.3868103172475811, + "grad_norm": 5.079300740636236, + "learning_rate": 5.463061227322961e-06, + "loss": 0.14161605834960939, + "step": 44735 + }, + { + "epoch": 0.38685355076912437, + "grad_norm": 13.932196009755188, + "learning_rate": 5.462944910612217e-06, + "loss": 0.11485214233398437, + "step": 44740 + }, + { + "epoch": 0.3868967842906676, + "grad_norm": 8.906351807356993, + "learning_rate": 5.462828582542589e-06, + "loss": 0.20088958740234375, + "step": 44745 + }, + { + "epoch": 0.3869400178122109, + "grad_norm": 0.7353000649903444, + "learning_rate": 5.462712243114613e-06, + "loss": 0.0963104248046875, + "step": 44750 + }, + { + "epoch": 0.38698325133375416, + "grad_norm": 7.392973415279169, + "learning_rate": 5.462595892328825e-06, + "loss": 0.1502685546875, + "step": 44755 + }, + { + "epoch": 0.3870264848552974, + "grad_norm": 9.618861960032294, + "learning_rate": 5.462479530185763e-06, + "loss": 0.07141075134277344, + "step": 44760 + }, + { + "epoch": 0.3870697183768407, + "grad_norm": 5.59610481398384, + "learning_rate": 5.462363156685962e-06, + "loss": 0.316387939453125, + "step": 44765 + }, + { + "epoch": 0.3871129518983839, + "grad_norm": 6.019207798514456, + "learning_rate": 5.462246771829961e-06, + "loss": 0.14256668090820312, + "step": 44770 + }, + { + "epoch": 0.3871561854199272, + "grad_norm": 30.382318880624208, + "learning_rate": 5.4621303756182955e-06, + "loss": 0.17788238525390626, + "step": 44775 + }, + { + "epoch": 0.3871994189414705, + "grad_norm": 0.46452432911079705, + "learning_rate": 5.4620139680515015e-06, + "loss": 0.33155479431152346, + "step": 44780 + }, + { + "epoch": 0.3872426524630137, + "grad_norm": 37.932566012583116, + "learning_rate": 5.4618975491301175e-06, + "loss": 0.559344482421875, + "step": 44785 + }, + { + "epoch": 0.387285885984557, + "grad_norm": 33.3667455269607, + "learning_rate": 5.461781118854678e-06, + "loss": 0.637847900390625, + "step": 44790 + }, + { + "epoch": 0.3873291195061003, + "grad_norm": 30.939040091337755, + "learning_rate": 5.461664677225723e-06, + "loss": 0.4957547187805176, + "step": 44795 + }, + { + "epoch": 0.3873723530276435, + "grad_norm": 3.651181818396817, + "learning_rate": 5.461548224243787e-06, + "loss": 0.2588958740234375, + "step": 44800 + }, + { + "epoch": 0.3874155865491868, + "grad_norm": 5.907315139325936, + "learning_rate": 5.461431759909409e-06, + "loss": 0.0634185791015625, + "step": 44805 + }, + { + "epoch": 0.38745882007073, + "grad_norm": 2.0455763841137995, + "learning_rate": 5.461315284223124e-06, + "loss": 0.070965576171875, + "step": 44810 + }, + { + "epoch": 0.3875020535922733, + "grad_norm": 11.1700073831079, + "learning_rate": 5.4611987971854715e-06, + "loss": 0.282855224609375, + "step": 44815 + }, + { + "epoch": 0.3875452871138166, + "grad_norm": 8.209876826953867, + "learning_rate": 5.461082298796987e-06, + "loss": 0.14671630859375, + "step": 44820 + }, + { + "epoch": 0.3875885206353598, + "grad_norm": 0.48211055388216373, + "learning_rate": 5.460965789058208e-06, + "loss": 0.204302978515625, + "step": 44825 + }, + { + "epoch": 0.3876317541569031, + "grad_norm": 17.656613168148922, + "learning_rate": 5.4608492679696736e-06, + "loss": 0.13130340576171876, + "step": 44830 + }, + { + "epoch": 0.3876749876784464, + "grad_norm": 2.3273513465976716, + "learning_rate": 5.460732735531919e-06, + "loss": 0.43165283203125, + "step": 44835 + }, + { + "epoch": 0.3877182211999896, + "grad_norm": 6.33412788881548, + "learning_rate": 5.460616191745483e-06, + "loss": 0.10897979736328126, + "step": 44840 + }, + { + "epoch": 0.3877614547215329, + "grad_norm": 10.889616209185222, + "learning_rate": 5.460499636610902e-06, + "loss": 0.4287567138671875, + "step": 44845 + }, + { + "epoch": 0.3878046882430762, + "grad_norm": 30.349180300236945, + "learning_rate": 5.4603830701287135e-06, + "loss": 0.19996795654296876, + "step": 44850 + }, + { + "epoch": 0.3878479217646194, + "grad_norm": 1.0439884769833634, + "learning_rate": 5.460266492299457e-06, + "loss": 0.1944122314453125, + "step": 44855 + }, + { + "epoch": 0.3878911552861627, + "grad_norm": 13.406496498203781, + "learning_rate": 5.460149903123668e-06, + "loss": 0.07603034973144532, + "step": 44860 + }, + { + "epoch": 0.3879343888077059, + "grad_norm": 6.667316099059308, + "learning_rate": 5.460033302601885e-06, + "loss": 0.08579559326171875, + "step": 44865 + }, + { + "epoch": 0.3879776223292492, + "grad_norm": 30.261812087211922, + "learning_rate": 5.459916690734646e-06, + "loss": 0.42413330078125, + "step": 44870 + }, + { + "epoch": 0.3880208558507925, + "grad_norm": 13.757871739639903, + "learning_rate": 5.459800067522489e-06, + "loss": 0.3433349609375, + "step": 44875 + }, + { + "epoch": 0.3880640893723357, + "grad_norm": 3.595987971397167, + "learning_rate": 5.45968343296595e-06, + "loss": 0.095550537109375, + "step": 44880 + }, + { + "epoch": 0.388107322893879, + "grad_norm": 9.137148388457657, + "learning_rate": 5.459566787065571e-06, + "loss": 0.08859710693359375, + "step": 44885 + }, + { + "epoch": 0.3881505564154223, + "grad_norm": 1.6968704222449238, + "learning_rate": 5.459450129821885e-06, + "loss": 0.04304084777832031, + "step": 44890 + }, + { + "epoch": 0.3881937899369655, + "grad_norm": 0.7267360868843582, + "learning_rate": 5.4593334612354325e-06, + "loss": 0.32906532287597656, + "step": 44895 + }, + { + "epoch": 0.3882370234585088, + "grad_norm": 35.843905506076204, + "learning_rate": 5.459216781306752e-06, + "loss": 0.151800537109375, + "step": 44900 + }, + { + "epoch": 0.38828025698005203, + "grad_norm": 0.12438752204385266, + "learning_rate": 5.459100090036382e-06, + "loss": 0.033856201171875, + "step": 44905 + }, + { + "epoch": 0.3883234905015953, + "grad_norm": 30.191521621263025, + "learning_rate": 5.458983387424858e-06, + "loss": 0.60499267578125, + "step": 44910 + }, + { + "epoch": 0.3883667240231386, + "grad_norm": 6.1338479222306495, + "learning_rate": 5.45886667347272e-06, + "loss": 0.12581405639648438, + "step": 44915 + }, + { + "epoch": 0.3884099575446818, + "grad_norm": 17.466571093762507, + "learning_rate": 5.458749948180506e-06, + "loss": 0.06782608032226563, + "step": 44920 + }, + { + "epoch": 0.3884531910662251, + "grad_norm": 1.8687817534139863, + "learning_rate": 5.458633211548756e-06, + "loss": 0.211968994140625, + "step": 44925 + }, + { + "epoch": 0.3884964245877684, + "grad_norm": 2.9689152220042536, + "learning_rate": 5.458516463578006e-06, + "loss": 0.03999786376953125, + "step": 44930 + }, + { + "epoch": 0.3885396581093116, + "grad_norm": 3.21843276580538, + "learning_rate": 5.458399704268795e-06, + "loss": 0.37235107421875, + "step": 44935 + }, + { + "epoch": 0.3885828916308549, + "grad_norm": 20.05179989685526, + "learning_rate": 5.458282933621662e-06, + "loss": 0.2741973876953125, + "step": 44940 + }, + { + "epoch": 0.38862612515239814, + "grad_norm": 4.5670883958028226, + "learning_rate": 5.458166151637146e-06, + "loss": 0.199603271484375, + "step": 44945 + }, + { + "epoch": 0.3886693586739414, + "grad_norm": 5.3355584293825045, + "learning_rate": 5.458049358315785e-06, + "loss": 0.3392444610595703, + "step": 44950 + }, + { + "epoch": 0.3887125921954847, + "grad_norm": 40.609473995470374, + "learning_rate": 5.457932553658116e-06, + "loss": 0.7416259765625, + "step": 44955 + }, + { + "epoch": 0.38875582571702794, + "grad_norm": 2.46985786846335, + "learning_rate": 5.457815737664681e-06, + "loss": 0.1618408203125, + "step": 44960 + }, + { + "epoch": 0.3887990592385712, + "grad_norm": 14.024313986802259, + "learning_rate": 5.457698910336015e-06, + "loss": 0.5108062744140625, + "step": 44965 + }, + { + "epoch": 0.3888422927601145, + "grad_norm": 0.907123152440889, + "learning_rate": 5.45758207167266e-06, + "loss": 0.16597900390625, + "step": 44970 + }, + { + "epoch": 0.38888552628165773, + "grad_norm": 0.8930992526046393, + "learning_rate": 5.457465221675154e-06, + "loss": 0.39395599365234374, + "step": 44975 + }, + { + "epoch": 0.388928759803201, + "grad_norm": 0.9983742341974426, + "learning_rate": 5.4573483603440355e-06, + "loss": 0.05028076171875, + "step": 44980 + }, + { + "epoch": 0.38897199332474425, + "grad_norm": 9.270018145759899, + "learning_rate": 5.4572314876798435e-06, + "loss": 0.13016242980957032, + "step": 44985 + }, + { + "epoch": 0.38901522684628753, + "grad_norm": 2.3110232219441884, + "learning_rate": 5.457114603683116e-06, + "loss": 0.1764801025390625, + "step": 44990 + }, + { + "epoch": 0.3890584603678308, + "grad_norm": 0.5631241104864417, + "learning_rate": 5.456997708354394e-06, + "loss": 0.20912628173828124, + "step": 44995 + }, + { + "epoch": 0.38910169388937405, + "grad_norm": 14.784233689172762, + "learning_rate": 5.456880801694216e-06, + "loss": 0.3895538330078125, + "step": 45000 + }, + { + "epoch": 0.38914492741091733, + "grad_norm": 22.29620204650619, + "learning_rate": 5.456763883703119e-06, + "loss": 0.118109130859375, + "step": 45005 + }, + { + "epoch": 0.3891881609324606, + "grad_norm": 1.2317249951938924, + "learning_rate": 5.456646954381645e-06, + "loss": 0.14289169311523436, + "step": 45010 + }, + { + "epoch": 0.38923139445400384, + "grad_norm": 2.559678365794615, + "learning_rate": 5.4565300137303335e-06, + "loss": 0.13384857177734374, + "step": 45015 + }, + { + "epoch": 0.3892746279755471, + "grad_norm": 8.370226414708272, + "learning_rate": 5.456413061749721e-06, + "loss": 0.07818107604980469, + "step": 45020 + }, + { + "epoch": 0.3893178614970904, + "grad_norm": 2.025587566784332, + "learning_rate": 5.456296098440349e-06, + "loss": 0.121063232421875, + "step": 45025 + }, + { + "epoch": 0.38936109501863364, + "grad_norm": 24.09580824784668, + "learning_rate": 5.456179123802757e-06, + "loss": 0.13427734375, + "step": 45030 + }, + { + "epoch": 0.3894043285401769, + "grad_norm": 7.6275975936614335, + "learning_rate": 5.456062137837484e-06, + "loss": 0.0992828369140625, + "step": 45035 + }, + { + "epoch": 0.38944756206172015, + "grad_norm": 2.042470842111136, + "learning_rate": 5.4559451405450695e-06, + "loss": 0.12803955078125, + "step": 45040 + }, + { + "epoch": 0.38949079558326344, + "grad_norm": 3.309634811947122, + "learning_rate": 5.455828131926052e-06, + "loss": 0.062860107421875, + "step": 45045 + }, + { + "epoch": 0.3895340291048067, + "grad_norm": 12.22101262558532, + "learning_rate": 5.455711111980972e-06, + "loss": 0.3025810241699219, + "step": 45050 + }, + { + "epoch": 0.38957726262634995, + "grad_norm": 30.497849869445005, + "learning_rate": 5.455594080710371e-06, + "loss": 0.13373222351074218, + "step": 45055 + }, + { + "epoch": 0.38962049614789324, + "grad_norm": 6.060247204585349, + "learning_rate": 5.455477038114786e-06, + "loss": 0.20071449279785156, + "step": 45060 + }, + { + "epoch": 0.3896637296694365, + "grad_norm": 7.477650213276588, + "learning_rate": 5.4553599841947574e-06, + "loss": 0.2986602783203125, + "step": 45065 + }, + { + "epoch": 0.38970696319097975, + "grad_norm": 2.4175216899575744, + "learning_rate": 5.455242918950827e-06, + "loss": 0.176849365234375, + "step": 45070 + }, + { + "epoch": 0.38975019671252303, + "grad_norm": 19.52615546736309, + "learning_rate": 5.455125842383532e-06, + "loss": 0.12518081665039063, + "step": 45075 + }, + { + "epoch": 0.38979343023406626, + "grad_norm": 6.745291242994963, + "learning_rate": 5.455008754493414e-06, + "loss": 0.38996105194091796, + "step": 45080 + }, + { + "epoch": 0.38983666375560955, + "grad_norm": 21.254503712881327, + "learning_rate": 5.454891655281012e-06, + "loss": 0.09786376953125, + "step": 45085 + }, + { + "epoch": 0.38987989727715283, + "grad_norm": 2.107247109502836, + "learning_rate": 5.454774544746867e-06, + "loss": 0.111474609375, + "step": 45090 + }, + { + "epoch": 0.38992313079869606, + "grad_norm": 7.178697591116562, + "learning_rate": 5.4546574228915195e-06, + "loss": 0.22262725830078126, + "step": 45095 + }, + { + "epoch": 0.38996636432023934, + "grad_norm": 8.832598983970572, + "learning_rate": 5.454540289715507e-06, + "loss": 0.07530441284179687, + "step": 45100 + }, + { + "epoch": 0.39000959784178263, + "grad_norm": 0.6031122402686471, + "learning_rate": 5.454423145219373e-06, + "loss": 0.11935558319091796, + "step": 45105 + }, + { + "epoch": 0.39005283136332586, + "grad_norm": 3.602881402397209, + "learning_rate": 5.454305989403656e-06, + "loss": 0.0787384033203125, + "step": 45110 + }, + { + "epoch": 0.39009606488486914, + "grad_norm": 3.696874611601929, + "learning_rate": 5.454188822268897e-06, + "loss": 0.19307861328125, + "step": 45115 + }, + { + "epoch": 0.39013929840641237, + "grad_norm": 21.131038013717756, + "learning_rate": 5.454071643815635e-06, + "loss": 0.24930419921875, + "step": 45120 + }, + { + "epoch": 0.39018253192795566, + "grad_norm": 0.8674709734306492, + "learning_rate": 5.4539544540444124e-06, + "loss": 0.092236328125, + "step": 45125 + }, + { + "epoch": 0.39022576544949894, + "grad_norm": 33.646912098611296, + "learning_rate": 5.4538372529557685e-06, + "loss": 0.09300994873046875, + "step": 45130 + }, + { + "epoch": 0.39026899897104217, + "grad_norm": 32.52819415034131, + "learning_rate": 5.4537200405502434e-06, + "loss": 0.527642822265625, + "step": 45135 + }, + { + "epoch": 0.39031223249258545, + "grad_norm": 0.5170155847473085, + "learning_rate": 5.4536028168283786e-06, + "loss": 0.0842529296875, + "step": 45140 + }, + { + "epoch": 0.39035546601412874, + "grad_norm": 7.416808788813987, + "learning_rate": 5.453485581790715e-06, + "loss": 0.13897705078125, + "step": 45145 + }, + { + "epoch": 0.39039869953567197, + "grad_norm": 4.281655503837577, + "learning_rate": 5.453368335437792e-06, + "loss": 0.16443634033203125, + "step": 45150 + }, + { + "epoch": 0.39044193305721525, + "grad_norm": 12.35474063749433, + "learning_rate": 5.453251077770151e-06, + "loss": 0.159649658203125, + "step": 45155 + }, + { + "epoch": 0.3904851665787585, + "grad_norm": 34.53293720303093, + "learning_rate": 5.453133808788334e-06, + "loss": 0.3578521728515625, + "step": 45160 + }, + { + "epoch": 0.39052840010030176, + "grad_norm": 2.4925469776726152, + "learning_rate": 5.453016528492879e-06, + "loss": 0.0596588134765625, + "step": 45165 + }, + { + "epoch": 0.39057163362184505, + "grad_norm": 19.269348542888846, + "learning_rate": 5.45289923688433e-06, + "loss": 0.19599761962890624, + "step": 45170 + }, + { + "epoch": 0.3906148671433883, + "grad_norm": 11.259315128999868, + "learning_rate": 5.452781933963225e-06, + "loss": 0.2768989562988281, + "step": 45175 + }, + { + "epoch": 0.39065810066493156, + "grad_norm": 4.32002687902404, + "learning_rate": 5.452664619730108e-06, + "loss": 0.03251838684082031, + "step": 45180 + }, + { + "epoch": 0.39070133418647485, + "grad_norm": 2.1129274639633295, + "learning_rate": 5.452547294185517e-06, + "loss": 0.191949462890625, + "step": 45185 + }, + { + "epoch": 0.3907445677080181, + "grad_norm": 0.8043413100743012, + "learning_rate": 5.452429957329996e-06, + "loss": 0.306640625, + "step": 45190 + }, + { + "epoch": 0.39078780122956136, + "grad_norm": 19.328241557870307, + "learning_rate": 5.452312609164084e-06, + "loss": 0.12693252563476562, + "step": 45195 + }, + { + "epoch": 0.3908310347511046, + "grad_norm": 5.997107163559422, + "learning_rate": 5.452195249688323e-06, + "loss": 0.195806884765625, + "step": 45200 + }, + { + "epoch": 0.3908742682726479, + "grad_norm": 1.433420066603822, + "learning_rate": 5.452077878903253e-06, + "loss": 0.2567138671875, + "step": 45205 + }, + { + "epoch": 0.39091750179419116, + "grad_norm": 6.4545676829989365, + "learning_rate": 5.451960496809418e-06, + "loss": 0.22807731628417968, + "step": 45210 + }, + { + "epoch": 0.3909607353157344, + "grad_norm": 11.084192907514112, + "learning_rate": 5.451843103407357e-06, + "loss": 0.2445068359375, + "step": 45215 + }, + { + "epoch": 0.39100396883727767, + "grad_norm": 11.512601083401487, + "learning_rate": 5.4517256986976135e-06, + "loss": 0.10962677001953125, + "step": 45220 + }, + { + "epoch": 0.39104720235882096, + "grad_norm": 36.81508905018583, + "learning_rate": 5.451608282680726e-06, + "loss": 0.2650482177734375, + "step": 45225 + }, + { + "epoch": 0.3910904358803642, + "grad_norm": 2.640074028405149, + "learning_rate": 5.451490855357239e-06, + "loss": 0.05066757202148438, + "step": 45230 + }, + { + "epoch": 0.39113366940190747, + "grad_norm": 10.29647771133859, + "learning_rate": 5.451373416727693e-06, + "loss": 0.4037261962890625, + "step": 45235 + }, + { + "epoch": 0.39117690292345075, + "grad_norm": 3.5477226945313225, + "learning_rate": 5.451255966792628e-06, + "loss": 0.41510610580444335, + "step": 45240 + }, + { + "epoch": 0.391220136444994, + "grad_norm": 33.797142781679966, + "learning_rate": 5.451138505552589e-06, + "loss": 0.423199462890625, + "step": 45245 + }, + { + "epoch": 0.39126336996653727, + "grad_norm": 33.20934267821307, + "learning_rate": 5.451021033008114e-06, + "loss": 0.22820758819580078, + "step": 45250 + }, + { + "epoch": 0.3913066034880805, + "grad_norm": 16.55815041774884, + "learning_rate": 5.450903549159748e-06, + "loss": 0.15705795288085939, + "step": 45255 + }, + { + "epoch": 0.3913498370096238, + "grad_norm": 19.729455385743456, + "learning_rate": 5.45078605400803e-06, + "loss": 0.25679645538330076, + "step": 45260 + }, + { + "epoch": 0.39139307053116706, + "grad_norm": 1.6562550611935263, + "learning_rate": 5.4506685475535035e-06, + "loss": 0.046154022216796875, + "step": 45265 + }, + { + "epoch": 0.3914363040527103, + "grad_norm": 1.1287984200769299, + "learning_rate": 5.450551029796711e-06, + "loss": 0.14165802001953126, + "step": 45270 + }, + { + "epoch": 0.3914795375742536, + "grad_norm": 18.85507764142911, + "learning_rate": 5.450433500738194e-06, + "loss": 0.1729717254638672, + "step": 45275 + }, + { + "epoch": 0.39152277109579686, + "grad_norm": 50.36346560862175, + "learning_rate": 5.450315960378493e-06, + "loss": 0.26090106964111326, + "step": 45280 + }, + { + "epoch": 0.3915660046173401, + "grad_norm": 21.515448006251432, + "learning_rate": 5.450198408718152e-06, + "loss": 0.11311416625976563, + "step": 45285 + }, + { + "epoch": 0.3916092381388834, + "grad_norm": 5.997139453279023, + "learning_rate": 5.450080845757712e-06, + "loss": 0.07284088134765625, + "step": 45290 + }, + { + "epoch": 0.3916524716604266, + "grad_norm": 12.766357285916742, + "learning_rate": 5.4499632714977155e-06, + "loss": 0.129638671875, + "step": 45295 + }, + { + "epoch": 0.3916957051819699, + "grad_norm": 16.7088771871116, + "learning_rate": 5.449845685938706e-06, + "loss": 0.06242942810058594, + "step": 45300 + }, + { + "epoch": 0.3917389387035132, + "grad_norm": 7.026237538266415, + "learning_rate": 5.449728089081224e-06, + "loss": 0.238018798828125, + "step": 45305 + }, + { + "epoch": 0.3917821722250564, + "grad_norm": 0.043466387273442916, + "learning_rate": 5.449610480925812e-06, + "loss": 0.19540824890136718, + "step": 45310 + }, + { + "epoch": 0.3918254057465997, + "grad_norm": 10.581637588819333, + "learning_rate": 5.449492861473013e-06, + "loss": 0.0690704345703125, + "step": 45315 + }, + { + "epoch": 0.39186863926814297, + "grad_norm": 1.1042435302711884, + "learning_rate": 5.44937523072337e-06, + "loss": 0.19726638793945311, + "step": 45320 + }, + { + "epoch": 0.3919118727896862, + "grad_norm": 5.888168875019044, + "learning_rate": 5.449257588677425e-06, + "loss": 0.1290313720703125, + "step": 45325 + }, + { + "epoch": 0.3919551063112295, + "grad_norm": 19.50683361438374, + "learning_rate": 5.4491399353357196e-06, + "loss": 0.115997314453125, + "step": 45330 + }, + { + "epoch": 0.3919983398327727, + "grad_norm": 3.859112844096939, + "learning_rate": 5.449022270698798e-06, + "loss": 0.07987442016601562, + "step": 45335 + }, + { + "epoch": 0.392041573354316, + "grad_norm": 14.301575124786709, + "learning_rate": 5.448904594767203e-06, + "loss": 0.072662353515625, + "step": 45340 + }, + { + "epoch": 0.3920848068758593, + "grad_norm": 34.11456177188459, + "learning_rate": 5.4487869075414746e-06, + "loss": 0.40974273681640627, + "step": 45345 + }, + { + "epoch": 0.3921280403974025, + "grad_norm": 23.37879178078739, + "learning_rate": 5.448669209022158e-06, + "loss": 0.12970504760742188, + "step": 45350 + }, + { + "epoch": 0.3921712739189458, + "grad_norm": 16.20918224319134, + "learning_rate": 5.448551499209795e-06, + "loss": 0.22583656311035155, + "step": 45355 + }, + { + "epoch": 0.3922145074404891, + "grad_norm": 7.87188097550546, + "learning_rate": 5.4484337781049285e-06, + "loss": 0.164263916015625, + "step": 45360 + }, + { + "epoch": 0.3922577409620323, + "grad_norm": 10.353868166661286, + "learning_rate": 5.448316045708103e-06, + "loss": 0.1783843994140625, + "step": 45365 + }, + { + "epoch": 0.3923009744835756, + "grad_norm": 2.085014504758457, + "learning_rate": 5.44819830201986e-06, + "loss": 0.08562774658203125, + "step": 45370 + }, + { + "epoch": 0.3923442080051188, + "grad_norm": 7.936590986416514, + "learning_rate": 5.4480805470407425e-06, + "loss": 0.11806259155273438, + "step": 45375 + }, + { + "epoch": 0.3923874415266621, + "grad_norm": 9.326153475202831, + "learning_rate": 5.447962780771293e-06, + "loss": 0.0968170166015625, + "step": 45380 + }, + { + "epoch": 0.3924306750482054, + "grad_norm": 5.398717680199185, + "learning_rate": 5.447845003212057e-06, + "loss": 0.20926437377929688, + "step": 45385 + }, + { + "epoch": 0.3924739085697486, + "grad_norm": 1.13379296875807, + "learning_rate": 5.447727214363575e-06, + "loss": 0.2182586669921875, + "step": 45390 + }, + { + "epoch": 0.3925171420912919, + "grad_norm": 23.38469505242335, + "learning_rate": 5.447609414226391e-06, + "loss": 0.22611541748046876, + "step": 45395 + }, + { + "epoch": 0.3925603756128352, + "grad_norm": 9.072735615945238, + "learning_rate": 5.44749160280105e-06, + "loss": 0.16359481811523438, + "step": 45400 + }, + { + "epoch": 0.3926036091343784, + "grad_norm": 8.945777237980161, + "learning_rate": 5.447373780088093e-06, + "loss": 0.08605690002441406, + "step": 45405 + }, + { + "epoch": 0.3926468426559217, + "grad_norm": 2.704034002185759, + "learning_rate": 5.447255946088065e-06, + "loss": 0.39104156494140624, + "step": 45410 + }, + { + "epoch": 0.392690076177465, + "grad_norm": 2.475081954946596, + "learning_rate": 5.447138100801509e-06, + "loss": 0.13401641845703124, + "step": 45415 + }, + { + "epoch": 0.3927333096990082, + "grad_norm": 2.037525451176817, + "learning_rate": 5.447020244228967e-06, + "loss": 0.0569091796875, + "step": 45420 + }, + { + "epoch": 0.3927765432205515, + "grad_norm": 1.5876219221103445, + "learning_rate": 5.446902376370985e-06, + "loss": 0.10064697265625, + "step": 45425 + }, + { + "epoch": 0.39281977674209473, + "grad_norm": 11.36258550061293, + "learning_rate": 5.446784497228105e-06, + "loss": 0.128131103515625, + "step": 45430 + }, + { + "epoch": 0.392863010263638, + "grad_norm": 6.36847145604479, + "learning_rate": 5.446666606800872e-06, + "loss": 0.13596763610839843, + "step": 45435 + }, + { + "epoch": 0.3929062437851813, + "grad_norm": 1.5557994066043754, + "learning_rate": 5.446548705089828e-06, + "loss": 0.18414306640625, + "step": 45440 + }, + { + "epoch": 0.3929494773067245, + "grad_norm": 49.753810555697534, + "learning_rate": 5.446430792095517e-06, + "loss": 0.09886970520019531, + "step": 45445 + }, + { + "epoch": 0.3929927108282678, + "grad_norm": 1.0110834231862325, + "learning_rate": 5.446312867818485e-06, + "loss": 0.150421142578125, + "step": 45450 + }, + { + "epoch": 0.3930359443498111, + "grad_norm": 5.954923189589145, + "learning_rate": 5.446194932259273e-06, + "loss": 0.07880935668945313, + "step": 45455 + }, + { + "epoch": 0.3930791778713543, + "grad_norm": 0.6377933573590409, + "learning_rate": 5.446076985418425e-06, + "loss": 0.05684585571289062, + "step": 45460 + }, + { + "epoch": 0.3931224113928976, + "grad_norm": 20.144785290832548, + "learning_rate": 5.445959027296488e-06, + "loss": 0.270111083984375, + "step": 45465 + }, + { + "epoch": 0.39316564491444084, + "grad_norm": 3.133906945060741, + "learning_rate": 5.445841057894003e-06, + "loss": 0.21415786743164061, + "step": 45470 + }, + { + "epoch": 0.3932088784359841, + "grad_norm": 0.4040140435524277, + "learning_rate": 5.445723077211516e-06, + "loss": 0.20308837890625, + "step": 45475 + }, + { + "epoch": 0.3932521119575274, + "grad_norm": 1.7019497713480376, + "learning_rate": 5.44560508524957e-06, + "loss": 0.06696128845214844, + "step": 45480 + }, + { + "epoch": 0.39329534547907063, + "grad_norm": 1.6379670896945058, + "learning_rate": 5.445487082008709e-06, + "loss": 0.15839691162109376, + "step": 45485 + }, + { + "epoch": 0.3933385790006139, + "grad_norm": 3.279133676769928, + "learning_rate": 5.445369067489479e-06, + "loss": 0.0696044921875, + "step": 45490 + }, + { + "epoch": 0.3933818125221572, + "grad_norm": 17.088398924591655, + "learning_rate": 5.445251041692422e-06, + "loss": 0.40135498046875, + "step": 45495 + }, + { + "epoch": 0.39342504604370043, + "grad_norm": 4.212851070723344, + "learning_rate": 5.445133004618083e-06, + "loss": 0.10727920532226562, + "step": 45500 + }, + { + "epoch": 0.3934682795652437, + "grad_norm": 0.5500324303644114, + "learning_rate": 5.445014956267007e-06, + "loss": 0.08381195068359375, + "step": 45505 + }, + { + "epoch": 0.39351151308678695, + "grad_norm": 6.116265805119168, + "learning_rate": 5.444896896639738e-06, + "loss": 0.03645477294921875, + "step": 45510 + }, + { + "epoch": 0.39355474660833023, + "grad_norm": 6.580598000730393, + "learning_rate": 5.44477882573682e-06, + "loss": 0.24734954833984374, + "step": 45515 + }, + { + "epoch": 0.3935979801298735, + "grad_norm": 14.378397999495743, + "learning_rate": 5.444660743558799e-06, + "loss": 0.20542478561401367, + "step": 45520 + }, + { + "epoch": 0.39364121365141674, + "grad_norm": 15.302854200933748, + "learning_rate": 5.444542650106218e-06, + "loss": 0.2176025390625, + "step": 45525 + }, + { + "epoch": 0.39368444717296003, + "grad_norm": 13.09227774467083, + "learning_rate": 5.444424545379622e-06, + "loss": 0.0711883544921875, + "step": 45530 + }, + { + "epoch": 0.3937276806945033, + "grad_norm": 12.280028600617017, + "learning_rate": 5.444306429379557e-06, + "loss": 0.10612106323242188, + "step": 45535 + }, + { + "epoch": 0.39377091421604654, + "grad_norm": 0.41896308283283645, + "learning_rate": 5.4441883021065665e-06, + "loss": 0.10836257934570312, + "step": 45540 + }, + { + "epoch": 0.3938141477375898, + "grad_norm": 14.659816517562998, + "learning_rate": 5.444070163561195e-06, + "loss": 0.14233856201171874, + "step": 45545 + }, + { + "epoch": 0.39385738125913305, + "grad_norm": 26.15083646274244, + "learning_rate": 5.443952013743989e-06, + "loss": 0.5689682006835938, + "step": 45550 + }, + { + "epoch": 0.39390061478067634, + "grad_norm": 4.928998355984894, + "learning_rate": 5.443833852655491e-06, + "loss": 0.07900848388671874, + "step": 45555 + }, + { + "epoch": 0.3939438483022196, + "grad_norm": 28.11244207961824, + "learning_rate": 5.4437156802962474e-06, + "loss": 0.10041770935058594, + "step": 45560 + }, + { + "epoch": 0.39398708182376285, + "grad_norm": 9.29843833702891, + "learning_rate": 5.443597496666803e-06, + "loss": 0.1479625701904297, + "step": 45565 + }, + { + "epoch": 0.39403031534530614, + "grad_norm": 5.220507549586121, + "learning_rate": 5.443479301767703e-06, + "loss": 0.10310783386230468, + "step": 45570 + }, + { + "epoch": 0.3940735488668494, + "grad_norm": 24.362899948766618, + "learning_rate": 5.443361095599492e-06, + "loss": 0.4060791015625, + "step": 45575 + }, + { + "epoch": 0.39411678238839265, + "grad_norm": 4.3703553674662015, + "learning_rate": 5.4432428781627164e-06, + "loss": 0.17476654052734375, + "step": 45580 + }, + { + "epoch": 0.39416001590993593, + "grad_norm": 27.951140723780004, + "learning_rate": 5.44312464945792e-06, + "loss": 0.14749927520751954, + "step": 45585 + }, + { + "epoch": 0.3942032494314792, + "grad_norm": 0.28586345651896766, + "learning_rate": 5.443006409485649e-06, + "loss": 0.15276947021484374, + "step": 45590 + }, + { + "epoch": 0.39424648295302245, + "grad_norm": 2.842562074452444, + "learning_rate": 5.442888158246448e-06, + "loss": 0.27756271362304685, + "step": 45595 + }, + { + "epoch": 0.39428971647456573, + "grad_norm": 15.307290757657714, + "learning_rate": 5.442769895740863e-06, + "loss": 0.51080322265625, + "step": 45600 + }, + { + "epoch": 0.39433294999610896, + "grad_norm": 5.571448207167338, + "learning_rate": 5.442651621969439e-06, + "loss": 0.30485191345214846, + "step": 45605 + }, + { + "epoch": 0.39437618351765225, + "grad_norm": 18.878618702740454, + "learning_rate": 5.44253333693272e-06, + "loss": 0.17498779296875, + "step": 45610 + }, + { + "epoch": 0.39441941703919553, + "grad_norm": 5.229354752358557, + "learning_rate": 5.442415040631256e-06, + "loss": 0.112725830078125, + "step": 45615 + }, + { + "epoch": 0.39446265056073876, + "grad_norm": 0.5272444783428872, + "learning_rate": 5.442296733065587e-06, + "loss": 0.04300994873046875, + "step": 45620 + }, + { + "epoch": 0.39450588408228204, + "grad_norm": 22.705916047596244, + "learning_rate": 5.442178414236263e-06, + "loss": 0.3950836181640625, + "step": 45625 + }, + { + "epoch": 0.39454911760382533, + "grad_norm": 7.535888100874517, + "learning_rate": 5.4420600841438275e-06, + "loss": 0.07328338623046875, + "step": 45630 + }, + { + "epoch": 0.39459235112536856, + "grad_norm": 3.6956209373985054, + "learning_rate": 5.441941742788827e-06, + "loss": 0.07356643676757812, + "step": 45635 + }, + { + "epoch": 0.39463558464691184, + "grad_norm": 17.50806232907921, + "learning_rate": 5.441823390171805e-06, + "loss": 0.2763214111328125, + "step": 45640 + }, + { + "epoch": 0.39467881816845507, + "grad_norm": 6.544538330046117, + "learning_rate": 5.4417050262933116e-06, + "loss": 0.0983673095703125, + "step": 45645 + }, + { + "epoch": 0.39472205168999835, + "grad_norm": 18.83007174365412, + "learning_rate": 5.4415866511538895e-06, + "loss": 0.286431884765625, + "step": 45650 + }, + { + "epoch": 0.39476528521154164, + "grad_norm": 0.04038267021525401, + "learning_rate": 5.441468264754084e-06, + "loss": 0.28877696990966795, + "step": 45655 + }, + { + "epoch": 0.39480851873308487, + "grad_norm": 0.338880695118077, + "learning_rate": 5.4413498670944455e-06, + "loss": 0.0986297607421875, + "step": 45660 + }, + { + "epoch": 0.39485175225462815, + "grad_norm": 1.3523608722751363, + "learning_rate": 5.4412314581755155e-06, + "loss": 0.08728179931640626, + "step": 45665 + }, + { + "epoch": 0.39489498577617144, + "grad_norm": 2.2156632418628446, + "learning_rate": 5.441113037997841e-06, + "loss": 0.17081146240234374, + "step": 45670 + }, + { + "epoch": 0.39493821929771467, + "grad_norm": 14.823688409530748, + "learning_rate": 5.440994606561971e-06, + "loss": 0.06885185241699218, + "step": 45675 + }, + { + "epoch": 0.39498145281925795, + "grad_norm": 2.44080209701778, + "learning_rate": 5.4408761638684475e-06, + "loss": 0.06082916259765625, + "step": 45680 + }, + { + "epoch": 0.3950246863408012, + "grad_norm": 44.75520127554571, + "learning_rate": 5.440757709917821e-06, + "loss": 0.26729278564453124, + "step": 45685 + }, + { + "epoch": 0.39506791986234446, + "grad_norm": 7.676470688172489, + "learning_rate": 5.440639244710634e-06, + "loss": 0.16175384521484376, + "step": 45690 + }, + { + "epoch": 0.39511115338388775, + "grad_norm": 2.4231513395411772, + "learning_rate": 5.440520768247433e-06, + "loss": 0.161285400390625, + "step": 45695 + }, + { + "epoch": 0.395154386905431, + "grad_norm": 17.15878096410887, + "learning_rate": 5.440402280528769e-06, + "loss": 0.27567138671875, + "step": 45700 + }, + { + "epoch": 0.39519762042697426, + "grad_norm": 4.829694037430561, + "learning_rate": 5.440283781555183e-06, + "loss": 0.28227081298828127, + "step": 45705 + }, + { + "epoch": 0.39524085394851755, + "grad_norm": 5.292821525400804, + "learning_rate": 5.440165271327226e-06, + "loss": 0.21095046997070313, + "step": 45710 + }, + { + "epoch": 0.3952840874700608, + "grad_norm": 3.0771818477816537, + "learning_rate": 5.440046749845441e-06, + "loss": 0.12867279052734376, + "step": 45715 + }, + { + "epoch": 0.39532732099160406, + "grad_norm": 2.0305840953916765, + "learning_rate": 5.439928217110376e-06, + "loss": 0.084619140625, + "step": 45720 + }, + { + "epoch": 0.3953705545131473, + "grad_norm": 30.474373646712973, + "learning_rate": 5.439809673122577e-06, + "loss": 0.7262321472167969, + "step": 45725 + }, + { + "epoch": 0.39541378803469057, + "grad_norm": 2.6973925524873823, + "learning_rate": 5.439691117882593e-06, + "loss": 0.1397003173828125, + "step": 45730 + }, + { + "epoch": 0.39545702155623386, + "grad_norm": 11.198867759861983, + "learning_rate": 5.439572551390968e-06, + "loss": 0.28264427185058594, + "step": 45735 + }, + { + "epoch": 0.3955002550777771, + "grad_norm": 0.9261372612828788, + "learning_rate": 5.43945397364825e-06, + "loss": 0.06344223022460938, + "step": 45740 + }, + { + "epoch": 0.39554348859932037, + "grad_norm": 4.728716172927978, + "learning_rate": 5.439335384654986e-06, + "loss": 0.22943801879882814, + "step": 45745 + }, + { + "epoch": 0.39558672212086365, + "grad_norm": 1.064902860427808, + "learning_rate": 5.4392167844117235e-06, + "loss": 0.48213043212890627, + "step": 45750 + }, + { + "epoch": 0.3956299556424069, + "grad_norm": 21.614554674502166, + "learning_rate": 5.439098172919008e-06, + "loss": 0.11224365234375, + "step": 45755 + }, + { + "epoch": 0.39567318916395017, + "grad_norm": 0.3996930201062001, + "learning_rate": 5.438979550177387e-06, + "loss": 0.08910369873046875, + "step": 45760 + }, + { + "epoch": 0.39571642268549345, + "grad_norm": 10.477156133579564, + "learning_rate": 5.438860916187408e-06, + "loss": 0.04722518920898437, + "step": 45765 + }, + { + "epoch": 0.3957596562070367, + "grad_norm": 1.0900216172991133, + "learning_rate": 5.438742270949618e-06, + "loss": 0.07656173706054688, + "step": 45770 + }, + { + "epoch": 0.39580288972857997, + "grad_norm": 2.3809814511503, + "learning_rate": 5.438623614464565e-06, + "loss": 0.06407470703125, + "step": 45775 + }, + { + "epoch": 0.3958461232501232, + "grad_norm": 2.9869655143366227, + "learning_rate": 5.438504946732794e-06, + "loss": 0.07433624267578125, + "step": 45780 + }, + { + "epoch": 0.3958893567716665, + "grad_norm": 8.847766825402877, + "learning_rate": 5.438386267754854e-06, + "loss": 0.15001373291015624, + "step": 45785 + }, + { + "epoch": 0.39593259029320976, + "grad_norm": 0.8472966946286724, + "learning_rate": 5.438267577531293e-06, + "loss": 0.058414459228515625, + "step": 45790 + }, + { + "epoch": 0.395975823814753, + "grad_norm": 8.012744000069786, + "learning_rate": 5.438148876062656e-06, + "loss": 0.32334442138671876, + "step": 45795 + }, + { + "epoch": 0.3960190573362963, + "grad_norm": 7.89999840876827, + "learning_rate": 5.438030163349492e-06, + "loss": 0.23076324462890624, + "step": 45800 + }, + { + "epoch": 0.39606229085783956, + "grad_norm": 9.994393580918256, + "learning_rate": 5.43791143939235e-06, + "loss": 0.1921661376953125, + "step": 45805 + }, + { + "epoch": 0.3961055243793828, + "grad_norm": 1.6836470657482832, + "learning_rate": 5.437792704191774e-06, + "loss": 0.17489013671875, + "step": 45810 + }, + { + "epoch": 0.3961487579009261, + "grad_norm": 2.163506950134128, + "learning_rate": 5.4376739577483146e-06, + "loss": 0.1033172607421875, + "step": 45815 + }, + { + "epoch": 0.3961919914224693, + "grad_norm": 31.111689119838182, + "learning_rate": 5.437555200062517e-06, + "loss": 0.12887191772460938, + "step": 45820 + }, + { + "epoch": 0.3962352249440126, + "grad_norm": 0.22161756255425794, + "learning_rate": 5.437436431134931e-06, + "loss": 0.11917037963867187, + "step": 45825 + }, + { + "epoch": 0.39627845846555587, + "grad_norm": 0.7143678505343948, + "learning_rate": 5.437317650966104e-06, + "loss": 0.14989013671875, + "step": 45830 + }, + { + "epoch": 0.3963216919870991, + "grad_norm": 10.392955129325973, + "learning_rate": 5.437198859556582e-06, + "loss": 0.3193756103515625, + "step": 45835 + }, + { + "epoch": 0.3963649255086424, + "grad_norm": 31.815564075767416, + "learning_rate": 5.437080056906916e-06, + "loss": 0.40721435546875, + "step": 45840 + }, + { + "epoch": 0.39640815903018567, + "grad_norm": 0.6600960017745082, + "learning_rate": 5.43696124301765e-06, + "loss": 0.2242462158203125, + "step": 45845 + }, + { + "epoch": 0.3964513925517289, + "grad_norm": 4.636400996349435, + "learning_rate": 5.4368424178893365e-06, + "loss": 0.15807113647460938, + "step": 45850 + }, + { + "epoch": 0.3964946260732722, + "grad_norm": 38.89740942303284, + "learning_rate": 5.43672358152252e-06, + "loss": 0.46407623291015626, + "step": 45855 + }, + { + "epoch": 0.3965378595948154, + "grad_norm": 4.082849234752132, + "learning_rate": 5.436604733917749e-06, + "loss": 0.20607452392578124, + "step": 45860 + }, + { + "epoch": 0.3965810931163587, + "grad_norm": 5.2830115879261585, + "learning_rate": 5.436485875075573e-06, + "loss": 0.04609832763671875, + "step": 45865 + }, + { + "epoch": 0.396624326637902, + "grad_norm": 7.7780307093972425, + "learning_rate": 5.43636700499654e-06, + "loss": 0.052820587158203126, + "step": 45870 + }, + { + "epoch": 0.3966675601594452, + "grad_norm": 2.706523708594068, + "learning_rate": 5.436248123681197e-06, + "loss": 0.18100318908691407, + "step": 45875 + }, + { + "epoch": 0.3967107936809885, + "grad_norm": 17.655799734348708, + "learning_rate": 5.436129231130093e-06, + "loss": 0.31824951171875, + "step": 45880 + }, + { + "epoch": 0.3967540272025318, + "grad_norm": 29.9845060914814, + "learning_rate": 5.436010327343776e-06, + "loss": 0.294830322265625, + "step": 45885 + }, + { + "epoch": 0.396797260724075, + "grad_norm": 7.72286569005724, + "learning_rate": 5.435891412322795e-06, + "loss": 0.19352264404296876, + "step": 45890 + }, + { + "epoch": 0.3968404942456183, + "grad_norm": 9.890418787997937, + "learning_rate": 5.435772486067697e-06, + "loss": 0.14709091186523438, + "step": 45895 + }, + { + "epoch": 0.3968837277671615, + "grad_norm": 5.810989564834476, + "learning_rate": 5.435653548579032e-06, + "loss": 0.363055419921875, + "step": 45900 + }, + { + "epoch": 0.3969269612887048, + "grad_norm": 3.3647931347672717, + "learning_rate": 5.435534599857349e-06, + "loss": 0.36795654296875, + "step": 45905 + }, + { + "epoch": 0.3969701948102481, + "grad_norm": 18.46656080068253, + "learning_rate": 5.435415639903194e-06, + "loss": 0.22645416259765624, + "step": 45910 + }, + { + "epoch": 0.3970134283317913, + "grad_norm": 1.9139194313609456, + "learning_rate": 5.435296668717119e-06, + "loss": 0.43171844482421873, + "step": 45915 + }, + { + "epoch": 0.3970566618533346, + "grad_norm": 11.955643977935912, + "learning_rate": 5.43517768629967e-06, + "loss": 0.21074066162109376, + "step": 45920 + }, + { + "epoch": 0.3970998953748779, + "grad_norm": 2.153255815538391, + "learning_rate": 5.435058692651397e-06, + "loss": 0.12563705444335938, + "step": 45925 + }, + { + "epoch": 0.3971431288964211, + "grad_norm": 3.042964378744833, + "learning_rate": 5.434939687772849e-06, + "loss": 0.11637420654296875, + "step": 45930 + }, + { + "epoch": 0.3971863624179644, + "grad_norm": 2.99687190233438, + "learning_rate": 5.434820671664573e-06, + "loss": 0.088568115234375, + "step": 45935 + }, + { + "epoch": 0.39722959593950763, + "grad_norm": 21.82875096780498, + "learning_rate": 5.434701644327119e-06, + "loss": 0.3176361083984375, + "step": 45940 + }, + { + "epoch": 0.3972728294610509, + "grad_norm": 33.75229235169117, + "learning_rate": 5.434582605761037e-06, + "loss": 0.461859130859375, + "step": 45945 + }, + { + "epoch": 0.3973160629825942, + "grad_norm": 38.1225261377095, + "learning_rate": 5.434463555966876e-06, + "loss": 0.5511859893798828, + "step": 45950 + }, + { + "epoch": 0.3973592965041374, + "grad_norm": 14.136671457177716, + "learning_rate": 5.434344494945183e-06, + "loss": 0.25965576171875, + "step": 45955 + }, + { + "epoch": 0.3974025300256807, + "grad_norm": 22.353010652584203, + "learning_rate": 5.434225422696509e-06, + "loss": 0.20850448608398436, + "step": 45960 + }, + { + "epoch": 0.397445763547224, + "grad_norm": 7.689030276087616, + "learning_rate": 5.434106339221401e-06, + "loss": 0.07767486572265625, + "step": 45965 + }, + { + "epoch": 0.3974889970687672, + "grad_norm": 17.201923467645926, + "learning_rate": 5.433987244520411e-06, + "loss": 0.30447988510131835, + "step": 45970 + }, + { + "epoch": 0.3975322305903105, + "grad_norm": 1.1467744808584037, + "learning_rate": 5.433868138594087e-06, + "loss": 0.07555389404296875, + "step": 45975 + }, + { + "epoch": 0.3975754641118538, + "grad_norm": 1.8423165962948498, + "learning_rate": 5.433749021442977e-06, + "loss": 0.048626708984375, + "step": 45980 + }, + { + "epoch": 0.397618697633397, + "grad_norm": 0.5316489889486791, + "learning_rate": 5.433629893067633e-06, + "loss": 0.051741600036621094, + "step": 45985 + }, + { + "epoch": 0.3976619311549403, + "grad_norm": 1.287094965094084, + "learning_rate": 5.4335107534686024e-06, + "loss": 0.0498687744140625, + "step": 45990 + }, + { + "epoch": 0.39770516467648354, + "grad_norm": 7.57455213011002, + "learning_rate": 5.433391602646435e-06, + "loss": 0.08891887664794922, + "step": 45995 + }, + { + "epoch": 0.3977483981980268, + "grad_norm": 19.362308221903827, + "learning_rate": 5.4332724406016815e-06, + "loss": 0.29253158569335935, + "step": 46000 + }, + { + "epoch": 0.3977916317195701, + "grad_norm": 17.388607643690058, + "learning_rate": 5.43315326733489e-06, + "loss": 0.14778900146484375, + "step": 46005 + }, + { + "epoch": 0.39783486524111333, + "grad_norm": 3.736488069713629, + "learning_rate": 5.43303408284661e-06, + "loss": 0.10618534088134765, + "step": 46010 + }, + { + "epoch": 0.3978780987626566, + "grad_norm": 67.20238895690021, + "learning_rate": 5.4329148871373926e-06, + "loss": 0.20673065185546874, + "step": 46015 + }, + { + "epoch": 0.3979213322841999, + "grad_norm": 0.6697171774090928, + "learning_rate": 5.432795680207785e-06, + "loss": 0.0493408203125, + "step": 46020 + }, + { + "epoch": 0.39796456580574313, + "grad_norm": 3.0613429569275636, + "learning_rate": 5.432676462058341e-06, + "loss": 0.11086959838867187, + "step": 46025 + }, + { + "epoch": 0.3980077993272864, + "grad_norm": 27.413907486964668, + "learning_rate": 5.432557232689607e-06, + "loss": 0.0816314697265625, + "step": 46030 + }, + { + "epoch": 0.39805103284882964, + "grad_norm": 18.89357514399255, + "learning_rate": 5.432437992102134e-06, + "loss": 0.2047271728515625, + "step": 46035 + }, + { + "epoch": 0.39809426637037293, + "grad_norm": 4.262037405176947, + "learning_rate": 5.432318740296472e-06, + "loss": 0.09910888671875, + "step": 46040 + }, + { + "epoch": 0.3981374998919162, + "grad_norm": 19.000758472754722, + "learning_rate": 5.432199477273171e-06, + "loss": 0.571331787109375, + "step": 46045 + }, + { + "epoch": 0.39818073341345944, + "grad_norm": 15.420489486976912, + "learning_rate": 5.4320802030327805e-06, + "loss": 0.269403076171875, + "step": 46050 + }, + { + "epoch": 0.3982239669350027, + "grad_norm": 1.1371541144379922, + "learning_rate": 5.431960917575851e-06, + "loss": 0.0488128662109375, + "step": 46055 + }, + { + "epoch": 0.398267200456546, + "grad_norm": 0.25912833931968837, + "learning_rate": 5.431841620902934e-06, + "loss": 0.09367904663085938, + "step": 46060 + }, + { + "epoch": 0.39831043397808924, + "grad_norm": 4.214222311070237, + "learning_rate": 5.4317223130145766e-06, + "loss": 0.2344390869140625, + "step": 46065 + }, + { + "epoch": 0.3983536674996325, + "grad_norm": 44.56117197434424, + "learning_rate": 5.431602993911332e-06, + "loss": 0.243743896484375, + "step": 46070 + }, + { + "epoch": 0.39839690102117575, + "grad_norm": 37.496433812105415, + "learning_rate": 5.431483663593748e-06, + "loss": 0.4790679931640625, + "step": 46075 + }, + { + "epoch": 0.39844013454271904, + "grad_norm": 0.6517762209408189, + "learning_rate": 5.431364322062377e-06, + "loss": 0.05994911193847656, + "step": 46080 + }, + { + "epoch": 0.3984833680642623, + "grad_norm": 7.570811745757126, + "learning_rate": 5.431244969317768e-06, + "loss": 0.2740234375, + "step": 46085 + }, + { + "epoch": 0.39852660158580555, + "grad_norm": 1.2365064912765042, + "learning_rate": 5.431125605360473e-06, + "loss": 0.18662681579589843, + "step": 46090 + }, + { + "epoch": 0.39856983510734884, + "grad_norm": 21.770353613619424, + "learning_rate": 5.431006230191042e-06, + "loss": 0.10278472900390626, + "step": 46095 + }, + { + "epoch": 0.3986130686288921, + "grad_norm": 45.992210413169175, + "learning_rate": 5.430886843810023e-06, + "loss": 0.2815521240234375, + "step": 46100 + }, + { + "epoch": 0.39865630215043535, + "grad_norm": 8.386402319813266, + "learning_rate": 5.43076744621797e-06, + "loss": 0.224639892578125, + "step": 46105 + }, + { + "epoch": 0.39869953567197863, + "grad_norm": 16.911532958845434, + "learning_rate": 5.430648037415432e-06, + "loss": 0.17242431640625, + "step": 46110 + }, + { + "epoch": 0.39874276919352186, + "grad_norm": 1.0572571369593236, + "learning_rate": 5.4305286174029605e-06, + "loss": 0.1616363525390625, + "step": 46115 + }, + { + "epoch": 0.39878600271506515, + "grad_norm": 2.7300663268044896, + "learning_rate": 5.430409186181106e-06, + "loss": 0.10707168579101563, + "step": 46120 + }, + { + "epoch": 0.39882923623660843, + "grad_norm": 7.013238332132874, + "learning_rate": 5.430289743750418e-06, + "loss": 0.08150787353515625, + "step": 46125 + }, + { + "epoch": 0.39887246975815166, + "grad_norm": 0.6103900174875785, + "learning_rate": 5.430170290111449e-06, + "loss": 0.08063278198242188, + "step": 46130 + }, + { + "epoch": 0.39891570327969494, + "grad_norm": 17.627213592963503, + "learning_rate": 5.430050825264749e-06, + "loss": 0.27130126953125, + "step": 46135 + }, + { + "epoch": 0.39895893680123823, + "grad_norm": 15.161507996925799, + "learning_rate": 5.429931349210869e-06, + "loss": 0.3660743713378906, + "step": 46140 + }, + { + "epoch": 0.39900217032278146, + "grad_norm": 2.0649032026852123, + "learning_rate": 5.4298118619503615e-06, + "loss": 0.2959228515625, + "step": 46145 + }, + { + "epoch": 0.39904540384432474, + "grad_norm": 16.27519893400018, + "learning_rate": 5.429692363483776e-06, + "loss": 0.3509735107421875, + "step": 46150 + }, + { + "epoch": 0.399088637365868, + "grad_norm": 35.32304321051276, + "learning_rate": 5.4295728538116636e-06, + "loss": 0.3581298828125, + "step": 46155 + }, + { + "epoch": 0.39913187088741126, + "grad_norm": 1.6832578731058183, + "learning_rate": 5.429453332934576e-06, + "loss": 0.09610443115234375, + "step": 46160 + }, + { + "epoch": 0.39917510440895454, + "grad_norm": 0.21954680632535054, + "learning_rate": 5.429333800853064e-06, + "loss": 0.10103302001953125, + "step": 46165 + }, + { + "epoch": 0.39921833793049777, + "grad_norm": 1.3949758629994329, + "learning_rate": 5.42921425756768e-06, + "loss": 0.04197998046875, + "step": 46170 + }, + { + "epoch": 0.39926157145204105, + "grad_norm": 1.4912083855574754, + "learning_rate": 5.429094703078974e-06, + "loss": 0.03748397827148438, + "step": 46175 + }, + { + "epoch": 0.39930480497358434, + "grad_norm": 0.9887415648828352, + "learning_rate": 5.428975137387498e-06, + "loss": 0.189996337890625, + "step": 46180 + }, + { + "epoch": 0.39934803849512757, + "grad_norm": 27.361660494868197, + "learning_rate": 5.428855560493804e-06, + "loss": 0.10720329284667969, + "step": 46185 + }, + { + "epoch": 0.39939127201667085, + "grad_norm": 2.6270590218740226, + "learning_rate": 5.428735972398441e-06, + "loss": 0.2372314453125, + "step": 46190 + }, + { + "epoch": 0.39943450553821414, + "grad_norm": 15.868044036699896, + "learning_rate": 5.428616373101964e-06, + "loss": 0.15584716796875, + "step": 46195 + }, + { + "epoch": 0.39947773905975736, + "grad_norm": 22.510984179637287, + "learning_rate": 5.428496762604923e-06, + "loss": 0.2340576171875, + "step": 46200 + }, + { + "epoch": 0.39952097258130065, + "grad_norm": 1.2068170450230262, + "learning_rate": 5.428377140907868e-06, + "loss": 0.09659767150878906, + "step": 46205 + }, + { + "epoch": 0.3995642061028439, + "grad_norm": 5.471607989422295, + "learning_rate": 5.4282575080113526e-06, + "loss": 0.09839935302734375, + "step": 46210 + }, + { + "epoch": 0.39960743962438716, + "grad_norm": 7.61516289771854, + "learning_rate": 5.428137863915929e-06, + "loss": 0.38338623046875, + "step": 46215 + }, + { + "epoch": 0.39965067314593045, + "grad_norm": 2.7392158411017014, + "learning_rate": 5.428018208622147e-06, + "loss": 0.25701370239257815, + "step": 46220 + }, + { + "epoch": 0.3996939066674737, + "grad_norm": 29.460354054424034, + "learning_rate": 5.42789854213056e-06, + "loss": 0.17087364196777344, + "step": 46225 + }, + { + "epoch": 0.39973714018901696, + "grad_norm": 0.6491843060326701, + "learning_rate": 5.4277788644417196e-06, + "loss": 0.12524795532226562, + "step": 46230 + }, + { + "epoch": 0.39978037371056024, + "grad_norm": 12.05701425158566, + "learning_rate": 5.427659175556178e-06, + "loss": 0.14870147705078124, + "step": 46235 + }, + { + "epoch": 0.3998236072321035, + "grad_norm": 1.3386771694770452, + "learning_rate": 5.427539475474486e-06, + "loss": 0.10686798095703125, + "step": 46240 + }, + { + "epoch": 0.39986684075364676, + "grad_norm": 2.7338312211502225, + "learning_rate": 5.427419764197198e-06, + "loss": 0.04244308471679688, + "step": 46245 + }, + { + "epoch": 0.39991007427519, + "grad_norm": 0.4493726493871415, + "learning_rate": 5.427300041724863e-06, + "loss": 0.2202838897705078, + "step": 46250 + }, + { + "epoch": 0.39995330779673327, + "grad_norm": 16.293699787313248, + "learning_rate": 5.427180308058035e-06, + "loss": 0.1363311767578125, + "step": 46255 + }, + { + "epoch": 0.39999654131827656, + "grad_norm": 3.625375119926678, + "learning_rate": 5.427060563197266e-06, + "loss": 0.3534088134765625, + "step": 46260 + }, + { + "epoch": 0.4000397748398198, + "grad_norm": 0.2804929982337846, + "learning_rate": 5.426940807143108e-06, + "loss": 0.301423454284668, + "step": 46265 + }, + { + "epoch": 0.40008300836136307, + "grad_norm": 4.9638837976057495, + "learning_rate": 5.4268210398961135e-06, + "loss": 0.26551971435546873, + "step": 46270 + }, + { + "epoch": 0.40012624188290635, + "grad_norm": 10.730684775276377, + "learning_rate": 5.426701261456835e-06, + "loss": 0.16232452392578126, + "step": 46275 + }, + { + "epoch": 0.4001694754044496, + "grad_norm": 1.0194072459451646, + "learning_rate": 5.426581471825824e-06, + "loss": 0.29117813110351565, + "step": 46280 + }, + { + "epoch": 0.40021270892599287, + "grad_norm": 0.38521555010076586, + "learning_rate": 5.426461671003635e-06, + "loss": 0.09234237670898438, + "step": 46285 + }, + { + "epoch": 0.4002559424475361, + "grad_norm": 21.33827352853395, + "learning_rate": 5.426341858990818e-06, + "loss": 0.16741485595703126, + "step": 46290 + }, + { + "epoch": 0.4002991759690794, + "grad_norm": 5.043870671116529, + "learning_rate": 5.426222035787927e-06, + "loss": 0.244091796875, + "step": 46295 + }, + { + "epoch": 0.40034240949062266, + "grad_norm": 1.5169824425797753, + "learning_rate": 5.426102201395515e-06, + "loss": 0.0352203369140625, + "step": 46300 + }, + { + "epoch": 0.4003856430121659, + "grad_norm": 40.399513265007585, + "learning_rate": 5.425982355814133e-06, + "loss": 0.2563438415527344, + "step": 46305 + }, + { + "epoch": 0.4004288765337092, + "grad_norm": 40.794598079453465, + "learning_rate": 5.425862499044335e-06, + "loss": 0.3467559814453125, + "step": 46310 + }, + { + "epoch": 0.40047211005525246, + "grad_norm": 6.293910017932192, + "learning_rate": 5.425742631086674e-06, + "loss": 0.0358856201171875, + "step": 46315 + }, + { + "epoch": 0.4005153435767957, + "grad_norm": 6.356807243407288, + "learning_rate": 5.425622751941702e-06, + "loss": 0.131109619140625, + "step": 46320 + }, + { + "epoch": 0.400558577098339, + "grad_norm": 1.9918727433881245, + "learning_rate": 5.425502861609972e-06, + "loss": 0.07959990501403809, + "step": 46325 + }, + { + "epoch": 0.40060181061988226, + "grad_norm": 13.797267850126124, + "learning_rate": 5.4253829600920375e-06, + "loss": 0.2010141372680664, + "step": 46330 + }, + { + "epoch": 0.4006450441414255, + "grad_norm": 6.12682005699473, + "learning_rate": 5.425263047388452e-06, + "loss": 0.0391326904296875, + "step": 46335 + }, + { + "epoch": 0.4006882776629688, + "grad_norm": 13.619534761141074, + "learning_rate": 5.425143123499766e-06, + "loss": 0.2107452392578125, + "step": 46340 + }, + { + "epoch": 0.400731511184512, + "grad_norm": 27.951498091078527, + "learning_rate": 5.425023188426535e-06, + "loss": 0.20304412841796876, + "step": 46345 + }, + { + "epoch": 0.4007747447060553, + "grad_norm": 11.082359894950647, + "learning_rate": 5.424903242169311e-06, + "loss": 0.23934173583984375, + "step": 46350 + }, + { + "epoch": 0.40081797822759857, + "grad_norm": 10.262007542007401, + "learning_rate": 5.4247832847286474e-06, + "loss": 0.10382003784179687, + "step": 46355 + }, + { + "epoch": 0.4008612117491418, + "grad_norm": 27.497477582045157, + "learning_rate": 5.424663316105098e-06, + "loss": 0.15228805541992188, + "step": 46360 + }, + { + "epoch": 0.4009044452706851, + "grad_norm": 0.5393353245248748, + "learning_rate": 5.424543336299215e-06, + "loss": 0.08123416900634765, + "step": 46365 + }, + { + "epoch": 0.40094767879222837, + "grad_norm": 0.09733608862136386, + "learning_rate": 5.4244233453115535e-06, + "loss": 0.2044053077697754, + "step": 46370 + }, + { + "epoch": 0.4009909123137716, + "grad_norm": 29.05693707576983, + "learning_rate": 5.424303343142665e-06, + "loss": 0.19250907897949218, + "step": 46375 + }, + { + "epoch": 0.4010341458353149, + "grad_norm": 26.178879020662098, + "learning_rate": 5.424183329793104e-06, + "loss": 0.318304443359375, + "step": 46380 + }, + { + "epoch": 0.4010773793568581, + "grad_norm": 12.75224101454974, + "learning_rate": 5.424063305263423e-06, + "loss": 0.3979248046875, + "step": 46385 + }, + { + "epoch": 0.4011206128784014, + "grad_norm": 25.486459945350745, + "learning_rate": 5.423943269554177e-06, + "loss": 0.2769327163696289, + "step": 46390 + }, + { + "epoch": 0.4011638463999447, + "grad_norm": 7.277082604757436, + "learning_rate": 5.423823222665919e-06, + "loss": 0.0980010986328125, + "step": 46395 + }, + { + "epoch": 0.4012070799214879, + "grad_norm": 14.976653297957746, + "learning_rate": 5.423703164599202e-06, + "loss": 0.1283721923828125, + "step": 46400 + }, + { + "epoch": 0.4012503134430312, + "grad_norm": 1.0784226000382764, + "learning_rate": 5.423583095354579e-06, + "loss": 0.043310546875, + "step": 46405 + }, + { + "epoch": 0.4012935469645745, + "grad_norm": 3.322500592175056, + "learning_rate": 5.423463014932607e-06, + "loss": 0.16479644775390626, + "step": 46410 + }, + { + "epoch": 0.4013367804861177, + "grad_norm": 1.0477353315190094, + "learning_rate": 5.423342923333836e-06, + "loss": 0.01780853271484375, + "step": 46415 + }, + { + "epoch": 0.401380014007661, + "grad_norm": 24.986976197161045, + "learning_rate": 5.4232228205588226e-06, + "loss": 0.20626678466796874, + "step": 46420 + }, + { + "epoch": 0.4014232475292042, + "grad_norm": 4.186639245065592, + "learning_rate": 5.42310270660812e-06, + "loss": 0.217999267578125, + "step": 46425 + }, + { + "epoch": 0.4014664810507475, + "grad_norm": 10.89289679212179, + "learning_rate": 5.422982581482281e-06, + "loss": 0.201654052734375, + "step": 46430 + }, + { + "epoch": 0.4015097145722908, + "grad_norm": 11.003647430541081, + "learning_rate": 5.422862445181861e-06, + "loss": 0.1539947509765625, + "step": 46435 + }, + { + "epoch": 0.401552948093834, + "grad_norm": 18.0484355350781, + "learning_rate": 5.422742297707413e-06, + "loss": 0.3699493408203125, + "step": 46440 + }, + { + "epoch": 0.4015961816153773, + "grad_norm": 27.525007962501743, + "learning_rate": 5.422622139059492e-06, + "loss": 0.35947418212890625, + "step": 46445 + }, + { + "epoch": 0.4016394151369206, + "grad_norm": 1.0149723972234053, + "learning_rate": 5.4225019692386515e-06, + "loss": 0.0286102294921875, + "step": 46450 + }, + { + "epoch": 0.4016826486584638, + "grad_norm": 1.0864317764983478, + "learning_rate": 5.422381788245446e-06, + "loss": 0.0953094482421875, + "step": 46455 + }, + { + "epoch": 0.4017258821800071, + "grad_norm": 29.760441893246774, + "learning_rate": 5.42226159608043e-06, + "loss": 0.16738967895507811, + "step": 46460 + }, + { + "epoch": 0.40176911570155033, + "grad_norm": 13.279942713346216, + "learning_rate": 5.422141392744157e-06, + "loss": 0.164898681640625, + "step": 46465 + }, + { + "epoch": 0.4018123492230936, + "grad_norm": 2.905326608734627, + "learning_rate": 5.422021178237182e-06, + "loss": 0.12596588134765624, + "step": 46470 + }, + { + "epoch": 0.4018555827446369, + "grad_norm": 0.44251118638718556, + "learning_rate": 5.42190095256006e-06, + "loss": 0.41602630615234376, + "step": 46475 + }, + { + "epoch": 0.4018988162661801, + "grad_norm": 0.08231741104972601, + "learning_rate": 5.421780715713344e-06, + "loss": 0.09201507568359375, + "step": 46480 + }, + { + "epoch": 0.4019420497877234, + "grad_norm": 25.54723393617961, + "learning_rate": 5.421660467697589e-06, + "loss": 0.2618206024169922, + "step": 46485 + }, + { + "epoch": 0.4019852833092667, + "grad_norm": 2.836222444302388, + "learning_rate": 5.421540208513352e-06, + "loss": 0.216424560546875, + "step": 46490 + }, + { + "epoch": 0.4020285168308099, + "grad_norm": 5.9723973819289435, + "learning_rate": 5.421419938161183e-06, + "loss": 0.15759601593017578, + "step": 46495 + }, + { + "epoch": 0.4020717503523532, + "grad_norm": 3.249533558548813, + "learning_rate": 5.42129965664164e-06, + "loss": 0.11658782958984375, + "step": 46500 + }, + { + "epoch": 0.4021149838738965, + "grad_norm": 16.86182961414274, + "learning_rate": 5.4211793639552775e-06, + "loss": 0.14256134033203124, + "step": 46505 + }, + { + "epoch": 0.4021582173954397, + "grad_norm": 7.217253855584849, + "learning_rate": 5.42105906010265e-06, + "loss": 0.1616180419921875, + "step": 46510 + }, + { + "epoch": 0.402201450916983, + "grad_norm": 3.814867164553247, + "learning_rate": 5.4209387450843114e-06, + "loss": 0.2330810546875, + "step": 46515 + }, + { + "epoch": 0.40224468443852623, + "grad_norm": 32.957029282888, + "learning_rate": 5.420818418900818e-06, + "loss": 0.20452880859375, + "step": 46520 + }, + { + "epoch": 0.4022879179600695, + "grad_norm": 21.096247901597284, + "learning_rate": 5.420698081552723e-06, + "loss": 0.07206573486328124, + "step": 46525 + }, + { + "epoch": 0.4023311514816128, + "grad_norm": 0.15464738863925634, + "learning_rate": 5.420577733040582e-06, + "loss": 0.2560295104980469, + "step": 46530 + }, + { + "epoch": 0.40237438500315603, + "grad_norm": 3.4803005181551296, + "learning_rate": 5.420457373364951e-06, + "loss": 0.02405853271484375, + "step": 46535 + }, + { + "epoch": 0.4024176185246993, + "grad_norm": 10.424998092060223, + "learning_rate": 5.420337002526385e-06, + "loss": 0.0722747802734375, + "step": 46540 + }, + { + "epoch": 0.4024608520462426, + "grad_norm": 48.57844161740482, + "learning_rate": 5.420216620525438e-06, + "loss": 0.41383819580078124, + "step": 46545 + }, + { + "epoch": 0.40250408556778583, + "grad_norm": 2.307082569307237, + "learning_rate": 5.4200962273626664e-06, + "loss": 0.4246063232421875, + "step": 46550 + }, + { + "epoch": 0.4025473190893291, + "grad_norm": 15.379705142635217, + "learning_rate": 5.4199758230386246e-06, + "loss": 0.1512054443359375, + "step": 46555 + }, + { + "epoch": 0.40259055261087234, + "grad_norm": 0.18815426591032008, + "learning_rate": 5.419855407553868e-06, + "loss": 0.05248870849609375, + "step": 46560 + }, + { + "epoch": 0.4026337861324156, + "grad_norm": 19.455366085670384, + "learning_rate": 5.419734980908951e-06, + "loss": 0.34844970703125, + "step": 46565 + }, + { + "epoch": 0.4026770196539589, + "grad_norm": 1.8066226357179649, + "learning_rate": 5.419614543104431e-06, + "loss": 0.16414947509765626, + "step": 46570 + }, + { + "epoch": 0.40272025317550214, + "grad_norm": 0.4960023738225904, + "learning_rate": 5.419494094140863e-06, + "loss": 0.34815826416015627, + "step": 46575 + }, + { + "epoch": 0.4027634866970454, + "grad_norm": 0.7328465527360514, + "learning_rate": 5.419373634018802e-06, + "loss": 0.2150665283203125, + "step": 46580 + }, + { + "epoch": 0.4028067202185887, + "grad_norm": 9.697721890836409, + "learning_rate": 5.4192531627388025e-06, + "loss": 0.08266143798828125, + "step": 46585 + }, + { + "epoch": 0.40284995374013194, + "grad_norm": 0.1622216532333457, + "learning_rate": 5.419132680301422e-06, + "loss": 0.0539703369140625, + "step": 46590 + }, + { + "epoch": 0.4028931872616752, + "grad_norm": 0.2594809804795666, + "learning_rate": 5.419012186707216e-06, + "loss": 0.07011260986328124, + "step": 46595 + }, + { + "epoch": 0.40293642078321845, + "grad_norm": 29.253074671459462, + "learning_rate": 5.418891681956738e-06, + "loss": 0.33680877685546873, + "step": 46600 + }, + { + "epoch": 0.40297965430476174, + "grad_norm": 7.938236851931368, + "learning_rate": 5.418771166050546e-06, + "loss": 0.323822021484375, + "step": 46605 + }, + { + "epoch": 0.403022887826305, + "grad_norm": 8.238657893380699, + "learning_rate": 5.418650638989196e-06, + "loss": 0.1124420166015625, + "step": 46610 + }, + { + "epoch": 0.40306612134784825, + "grad_norm": 9.320754216423328, + "learning_rate": 5.418530100773241e-06, + "loss": 0.09958343505859375, + "step": 46615 + }, + { + "epoch": 0.40310935486939153, + "grad_norm": 35.448264538929536, + "learning_rate": 5.41840955140324e-06, + "loss": 0.3522987365722656, + "step": 46620 + }, + { + "epoch": 0.4031525883909348, + "grad_norm": 4.765384903791516, + "learning_rate": 5.418288990879748e-06, + "loss": 0.12818145751953125, + "step": 46625 + }, + { + "epoch": 0.40319582191247805, + "grad_norm": 1.9549507636480217, + "learning_rate": 5.418168419203321e-06, + "loss": 0.1106689453125, + "step": 46630 + }, + { + "epoch": 0.40323905543402133, + "grad_norm": 40.44520670170841, + "learning_rate": 5.4180478363745155e-06, + "loss": 0.59044189453125, + "step": 46635 + }, + { + "epoch": 0.40328228895556456, + "grad_norm": 15.316876035069878, + "learning_rate": 5.417927242393885e-06, + "loss": 0.09636993408203125, + "step": 46640 + }, + { + "epoch": 0.40332552247710785, + "grad_norm": 2.7940601621842056, + "learning_rate": 5.417806637261989e-06, + "loss": 0.201611328125, + "step": 46645 + }, + { + "epoch": 0.40336875599865113, + "grad_norm": 16.568139539653075, + "learning_rate": 5.417686020979382e-06, + "loss": 0.158197021484375, + "step": 46650 + }, + { + "epoch": 0.40341198952019436, + "grad_norm": 0.7258310501892766, + "learning_rate": 5.4175653935466214e-06, + "loss": 0.14575958251953125, + "step": 46655 + }, + { + "epoch": 0.40345522304173764, + "grad_norm": 7.026017339371473, + "learning_rate": 5.417444754964263e-06, + "loss": 0.15977630615234376, + "step": 46660 + }, + { + "epoch": 0.4034984565632809, + "grad_norm": 9.776144175938972, + "learning_rate": 5.417324105232862e-06, + "loss": 0.26663970947265625, + "step": 46665 + }, + { + "epoch": 0.40354169008482416, + "grad_norm": 33.952616561912755, + "learning_rate": 5.417203444352976e-06, + "loss": 0.31346282958984373, + "step": 46670 + }, + { + "epoch": 0.40358492360636744, + "grad_norm": 0.8241922632207959, + "learning_rate": 5.417082772325162e-06, + "loss": 0.111016845703125, + "step": 46675 + }, + { + "epoch": 0.40362815712791067, + "grad_norm": 7.562028442806661, + "learning_rate": 5.416962089149976e-06, + "loss": 0.214324951171875, + "step": 46680 + }, + { + "epoch": 0.40367139064945395, + "grad_norm": 19.99516205215316, + "learning_rate": 5.416841394827973e-06, + "loss": 0.1112213134765625, + "step": 46685 + }, + { + "epoch": 0.40371462417099724, + "grad_norm": 13.978079080526605, + "learning_rate": 5.416720689359712e-06, + "loss": 0.0692291259765625, + "step": 46690 + }, + { + "epoch": 0.40375785769254047, + "grad_norm": 3.6790765125075957, + "learning_rate": 5.416599972745748e-06, + "loss": 0.19815139770507811, + "step": 46695 + }, + { + "epoch": 0.40380109121408375, + "grad_norm": 0.19681230662719024, + "learning_rate": 5.4164792449866395e-06, + "loss": 0.04152679443359375, + "step": 46700 + }, + { + "epoch": 0.40384432473562704, + "grad_norm": 2.2018350266862474, + "learning_rate": 5.416358506082942e-06, + "loss": 0.1615020751953125, + "step": 46705 + }, + { + "epoch": 0.40388755825717026, + "grad_norm": 36.8175375430685, + "learning_rate": 5.416237756035212e-06, + "loss": 0.3054718017578125, + "step": 46710 + }, + { + "epoch": 0.40393079177871355, + "grad_norm": 3.6445259916607036, + "learning_rate": 5.4161169948440075e-06, + "loss": 0.15428543090820312, + "step": 46715 + }, + { + "epoch": 0.40397402530025683, + "grad_norm": 24.096357522478, + "learning_rate": 5.415996222509885e-06, + "loss": 0.21138458251953124, + "step": 46720 + }, + { + "epoch": 0.40401725882180006, + "grad_norm": 0.6797223564942741, + "learning_rate": 5.415875439033401e-06, + "loss": 0.4022193908691406, + "step": 46725 + }, + { + "epoch": 0.40406049234334335, + "grad_norm": 5.130061056679608, + "learning_rate": 5.415754644415112e-06, + "loss": 0.06480941772460938, + "step": 46730 + }, + { + "epoch": 0.4041037258648866, + "grad_norm": 3.6548034655789903, + "learning_rate": 5.4156338386555785e-06, + "loss": 0.088067626953125, + "step": 46735 + }, + { + "epoch": 0.40414695938642986, + "grad_norm": 1.2984407242165432, + "learning_rate": 5.415513021755354e-06, + "loss": 0.12079200744628907, + "step": 46740 + }, + { + "epoch": 0.40419019290797314, + "grad_norm": 28.665573982474093, + "learning_rate": 5.415392193714996e-06, + "loss": 0.40689544677734374, + "step": 46745 + }, + { + "epoch": 0.4042334264295164, + "grad_norm": 16.624875378142118, + "learning_rate": 5.415271354535064e-06, + "loss": 0.07353515625, + "step": 46750 + }, + { + "epoch": 0.40427665995105966, + "grad_norm": 1.4616463413382283, + "learning_rate": 5.415150504216113e-06, + "loss": 0.0741363525390625, + "step": 46755 + }, + { + "epoch": 0.40431989347260294, + "grad_norm": 0.8663617437939605, + "learning_rate": 5.415029642758701e-06, + "loss": 0.0226654052734375, + "step": 46760 + }, + { + "epoch": 0.40436312699414617, + "grad_norm": 0.5076329908020502, + "learning_rate": 5.414908770163387e-06, + "loss": 0.527798843383789, + "step": 46765 + }, + { + "epoch": 0.40440636051568946, + "grad_norm": 13.73349468824997, + "learning_rate": 5.414787886430726e-06, + "loss": 0.183587646484375, + "step": 46770 + }, + { + "epoch": 0.4044495940372327, + "grad_norm": 43.54539601950611, + "learning_rate": 5.414666991561277e-06, + "loss": 0.2636932373046875, + "step": 46775 + }, + { + "epoch": 0.40449282755877597, + "grad_norm": 1.0452098482566898, + "learning_rate": 5.414546085555597e-06, + "loss": 0.0920074462890625, + "step": 46780 + }, + { + "epoch": 0.40453606108031925, + "grad_norm": 16.79536913166361, + "learning_rate": 5.414425168414244e-06, + "loss": 0.3194988250732422, + "step": 46785 + }, + { + "epoch": 0.4045792946018625, + "grad_norm": 26.75832907021791, + "learning_rate": 5.414304240137776e-06, + "loss": 0.1923236846923828, + "step": 46790 + }, + { + "epoch": 0.40462252812340577, + "grad_norm": 2.864925573914559, + "learning_rate": 5.414183300726749e-06, + "loss": 0.08297119140625, + "step": 46795 + }, + { + "epoch": 0.40466576164494905, + "grad_norm": 2.0163748306747418, + "learning_rate": 5.414062350181722e-06, + "loss": 0.26929168701171874, + "step": 46800 + }, + { + "epoch": 0.4047089951664923, + "grad_norm": 35.19381260105806, + "learning_rate": 5.413941388503254e-06, + "loss": 0.393853759765625, + "step": 46805 + }, + { + "epoch": 0.40475222868803556, + "grad_norm": 0.10347795929422426, + "learning_rate": 5.413820415691901e-06, + "loss": 0.15150508880615235, + "step": 46810 + }, + { + "epoch": 0.4047954622095788, + "grad_norm": 18.4825347246553, + "learning_rate": 5.413699431748222e-06, + "loss": 0.11048660278320313, + "step": 46815 + }, + { + "epoch": 0.4048386957311221, + "grad_norm": 3.301882937272807, + "learning_rate": 5.413578436672774e-06, + "loss": 0.0869232177734375, + "step": 46820 + }, + { + "epoch": 0.40488192925266536, + "grad_norm": 0.34692533177916324, + "learning_rate": 5.4134574304661145e-06, + "loss": 0.08740997314453125, + "step": 46825 + }, + { + "epoch": 0.4049251627742086, + "grad_norm": 0.5870216698904741, + "learning_rate": 5.413336413128804e-06, + "loss": 0.24383926391601562, + "step": 46830 + }, + { + "epoch": 0.4049683962957519, + "grad_norm": 2.022606962988488, + "learning_rate": 5.413215384661399e-06, + "loss": 0.36318359375, + "step": 46835 + }, + { + "epoch": 0.40501162981729516, + "grad_norm": 12.873492856224619, + "learning_rate": 5.4130943450644575e-06, + "loss": 0.126202392578125, + "step": 46840 + }, + { + "epoch": 0.4050548633388384, + "grad_norm": 6.481415389525959, + "learning_rate": 5.412973294338538e-06, + "loss": 0.32647705078125, + "step": 46845 + }, + { + "epoch": 0.4050980968603817, + "grad_norm": 11.353751020907428, + "learning_rate": 5.4128522324842e-06, + "loss": 0.16556396484375, + "step": 46850 + }, + { + "epoch": 0.4051413303819249, + "grad_norm": 12.158843285839678, + "learning_rate": 5.412731159501999e-06, + "loss": 0.26386566162109376, + "step": 46855 + }, + { + "epoch": 0.4051845639034682, + "grad_norm": 1.0598174327987913, + "learning_rate": 5.412610075392496e-06, + "loss": 0.0553924560546875, + "step": 46860 + }, + { + "epoch": 0.40522779742501147, + "grad_norm": 1.7309237424404829, + "learning_rate": 5.412488980156249e-06, + "loss": 0.19538002014160155, + "step": 46865 + }, + { + "epoch": 0.4052710309465547, + "grad_norm": 2.112265938783345, + "learning_rate": 5.412367873793815e-06, + "loss": 0.069158935546875, + "step": 46870 + }, + { + "epoch": 0.405314264468098, + "grad_norm": 2.110484946415149, + "learning_rate": 5.4122467563057546e-06, + "loss": 0.152435302734375, + "step": 46875 + }, + { + "epoch": 0.40535749798964127, + "grad_norm": 1.1625055331160554, + "learning_rate": 5.412125627692624e-06, + "loss": 0.335076904296875, + "step": 46880 + }, + { + "epoch": 0.4054007315111845, + "grad_norm": 1.656720265032619, + "learning_rate": 5.412004487954985e-06, + "loss": 0.03736305236816406, + "step": 46885 + }, + { + "epoch": 0.4054439650327278, + "grad_norm": 83.19440889843742, + "learning_rate": 5.411883337093393e-06, + "loss": 0.376080322265625, + "step": 46890 + }, + { + "epoch": 0.40548719855427107, + "grad_norm": 0.2519392064101671, + "learning_rate": 5.411762175108408e-06, + "loss": 0.1297975540161133, + "step": 46895 + }, + { + "epoch": 0.4055304320758143, + "grad_norm": 1.5280412537844392, + "learning_rate": 5.411641002000589e-06, + "loss": 0.2974082946777344, + "step": 46900 + }, + { + "epoch": 0.4055736655973576, + "grad_norm": 4.60412127063727, + "learning_rate": 5.411519817770496e-06, + "loss": 0.10074462890625, + "step": 46905 + }, + { + "epoch": 0.4056168991189008, + "grad_norm": 0.6466932180440713, + "learning_rate": 5.411398622418686e-06, + "loss": 0.15884170532226563, + "step": 46910 + }, + { + "epoch": 0.4056601326404441, + "grad_norm": 38.96471254093137, + "learning_rate": 5.4112774159457175e-06, + "loss": 0.2534515380859375, + "step": 46915 + }, + { + "epoch": 0.4057033661619874, + "grad_norm": 0.1558191450255482, + "learning_rate": 5.411156198352152e-06, + "loss": 0.040576171875, + "step": 46920 + }, + { + "epoch": 0.4057465996835306, + "grad_norm": 2.3136839372917035, + "learning_rate": 5.411034969638547e-06, + "loss": 0.064703369140625, + "step": 46925 + }, + { + "epoch": 0.4057898332050739, + "grad_norm": 1.9264577390274649, + "learning_rate": 5.410913729805461e-06, + "loss": 0.09429893493652344, + "step": 46930 + }, + { + "epoch": 0.4058330667266172, + "grad_norm": 0.7811992823860967, + "learning_rate": 5.410792478853455e-06, + "loss": 0.1817718505859375, + "step": 46935 + }, + { + "epoch": 0.4058763002481604, + "grad_norm": 9.103370964124082, + "learning_rate": 5.4106712167830865e-06, + "loss": 0.14377784729003906, + "step": 46940 + }, + { + "epoch": 0.4059195337697037, + "grad_norm": 0.18172342384649198, + "learning_rate": 5.410549943594915e-06, + "loss": 0.49226856231689453, + "step": 46945 + }, + { + "epoch": 0.4059627672912469, + "grad_norm": 38.90396963858381, + "learning_rate": 5.410428659289501e-06, + "loss": 0.1553070068359375, + "step": 46950 + }, + { + "epoch": 0.4060060008127902, + "grad_norm": 3.5724588769296473, + "learning_rate": 5.410307363867403e-06, + "loss": 0.6404953002929688, + "step": 46955 + }, + { + "epoch": 0.4060492343343335, + "grad_norm": 33.07161209875613, + "learning_rate": 5.410186057329179e-06, + "loss": 0.224456787109375, + "step": 46960 + }, + { + "epoch": 0.4060924678558767, + "grad_norm": 13.93481251954223, + "learning_rate": 5.410064739675391e-06, + "loss": 0.16484375, + "step": 46965 + }, + { + "epoch": 0.40613570137742, + "grad_norm": 25.641798938979402, + "learning_rate": 5.409943410906598e-06, + "loss": 0.11002349853515625, + "step": 46970 + }, + { + "epoch": 0.4061789348989633, + "grad_norm": 1.1571729618365982, + "learning_rate": 5.409822071023358e-06, + "loss": 0.13892478942871095, + "step": 46975 + }, + { + "epoch": 0.4062221684205065, + "grad_norm": 5.893952642493627, + "learning_rate": 5.409700720026231e-06, + "loss": 0.0968484878540039, + "step": 46980 + }, + { + "epoch": 0.4062654019420498, + "grad_norm": 0.45419815286160375, + "learning_rate": 5.409579357915778e-06, + "loss": 0.087554931640625, + "step": 46985 + }, + { + "epoch": 0.406308635463593, + "grad_norm": 8.692001350619657, + "learning_rate": 5.409457984692558e-06, + "loss": 0.1411407470703125, + "step": 46990 + }, + { + "epoch": 0.4063518689851363, + "grad_norm": 17.734353456317358, + "learning_rate": 5.409336600357131e-06, + "loss": 0.19351806640625, + "step": 46995 + }, + { + "epoch": 0.4063951025066796, + "grad_norm": 0.23914488189946262, + "learning_rate": 5.409215204910056e-06, + "loss": 0.06713218688964843, + "step": 47000 + }, + { + "epoch": 0.4064383360282228, + "grad_norm": 33.262863161274964, + "learning_rate": 5.409093798351893e-06, + "loss": 0.223974609375, + "step": 47005 + }, + { + "epoch": 0.4064815695497661, + "grad_norm": 60.646278410608396, + "learning_rate": 5.408972380683203e-06, + "loss": 0.3185791015625, + "step": 47010 + }, + { + "epoch": 0.4065248030713094, + "grad_norm": 1.476958676686028, + "learning_rate": 5.4088509519045444e-06, + "loss": 0.0362548828125, + "step": 47015 + }, + { + "epoch": 0.4065680365928526, + "grad_norm": 1.3821546873497494, + "learning_rate": 5.408729512016479e-06, + "loss": 0.08218536376953126, + "step": 47020 + }, + { + "epoch": 0.4066112701143959, + "grad_norm": 27.617073573105063, + "learning_rate": 5.408608061019565e-06, + "loss": 0.13320693969726563, + "step": 47025 + }, + { + "epoch": 0.40665450363593914, + "grad_norm": 27.67502463434984, + "learning_rate": 5.408486598914365e-06, + "loss": 0.3040252685546875, + "step": 47030 + }, + { + "epoch": 0.4066977371574824, + "grad_norm": 9.175410184203802, + "learning_rate": 5.408365125701437e-06, + "loss": 0.33799285888671876, + "step": 47035 + }, + { + "epoch": 0.4067409706790257, + "grad_norm": 0.4775172563975779, + "learning_rate": 5.4082436413813406e-06, + "loss": 0.059637451171875, + "step": 47040 + }, + { + "epoch": 0.40678420420056893, + "grad_norm": 17.75011770446585, + "learning_rate": 5.4081221459546384e-06, + "loss": 0.09751663208007813, + "step": 47045 + }, + { + "epoch": 0.4068274377221122, + "grad_norm": 11.69848882094574, + "learning_rate": 5.4080006394218885e-06, + "loss": 0.2147216796875, + "step": 47050 + }, + { + "epoch": 0.4068706712436555, + "grad_norm": 31.160398703774444, + "learning_rate": 5.407879121783654e-06, + "loss": 0.173016357421875, + "step": 47055 + }, + { + "epoch": 0.40691390476519873, + "grad_norm": 8.308386935768265, + "learning_rate": 5.407757593040493e-06, + "loss": 0.086102294921875, + "step": 47060 + }, + { + "epoch": 0.406957138286742, + "grad_norm": 5.866221720359643, + "learning_rate": 5.407636053192967e-06, + "loss": 0.098529052734375, + "step": 47065 + }, + { + "epoch": 0.4070003718082853, + "grad_norm": 9.793671350917561, + "learning_rate": 5.407514502241635e-06, + "loss": 0.1922119140625, + "step": 47070 + }, + { + "epoch": 0.40704360532982853, + "grad_norm": 1.7900673928035993, + "learning_rate": 5.407392940187061e-06, + "loss": 0.2910675048828125, + "step": 47075 + }, + { + "epoch": 0.4070868388513718, + "grad_norm": 7.2467825659940726, + "learning_rate": 5.407271367029802e-06, + "loss": 0.3473846435546875, + "step": 47080 + }, + { + "epoch": 0.40713007237291504, + "grad_norm": 45.57915646430686, + "learning_rate": 5.40714978277042e-06, + "loss": 0.23738231658935546, + "step": 47085 + }, + { + "epoch": 0.4071733058944583, + "grad_norm": 3.4299162397097045, + "learning_rate": 5.407028187409478e-06, + "loss": 0.13460922241210938, + "step": 47090 + }, + { + "epoch": 0.4072165394160016, + "grad_norm": 2.697515854956884, + "learning_rate": 5.406906580947532e-06, + "loss": 0.245343017578125, + "step": 47095 + }, + { + "epoch": 0.40725977293754484, + "grad_norm": 2.5231582180399417, + "learning_rate": 5.406784963385147e-06, + "loss": 0.2260498046875, + "step": 47100 + }, + { + "epoch": 0.4073030064590881, + "grad_norm": 19.70341696473216, + "learning_rate": 5.406663334722882e-06, + "loss": 0.1533477783203125, + "step": 47105 + }, + { + "epoch": 0.4073462399806314, + "grad_norm": 0.12172702805161864, + "learning_rate": 5.406541694961298e-06, + "loss": 0.12250537872314453, + "step": 47110 + }, + { + "epoch": 0.40738947350217464, + "grad_norm": 0.7963603294984821, + "learning_rate": 5.406420044100957e-06, + "loss": 0.221038818359375, + "step": 47115 + }, + { + "epoch": 0.4074327070237179, + "grad_norm": 6.638652799418068, + "learning_rate": 5.406298382142419e-06, + "loss": 0.5341110229492188, + "step": 47120 + }, + { + "epoch": 0.40747594054526115, + "grad_norm": 1.0661584852090795, + "learning_rate": 5.406176709086246e-06, + "loss": 0.0557220458984375, + "step": 47125 + }, + { + "epoch": 0.40751917406680443, + "grad_norm": 6.749210425190622, + "learning_rate": 5.4060550249329984e-06, + "loss": 0.1409912109375, + "step": 47130 + }, + { + "epoch": 0.4075624075883477, + "grad_norm": 11.008277581749217, + "learning_rate": 5.4059333296832365e-06, + "loss": 0.15027618408203125, + "step": 47135 + }, + { + "epoch": 0.40760564110989095, + "grad_norm": 19.68670025920023, + "learning_rate": 5.405811623337525e-06, + "loss": 0.18846664428710938, + "step": 47140 + }, + { + "epoch": 0.40764887463143423, + "grad_norm": 29.752051413867918, + "learning_rate": 5.405689905896422e-06, + "loss": 0.2673135757446289, + "step": 47145 + }, + { + "epoch": 0.4076921081529775, + "grad_norm": 27.9211181272703, + "learning_rate": 5.405568177360488e-06, + "loss": 0.1781219482421875, + "step": 47150 + }, + { + "epoch": 0.40773534167452075, + "grad_norm": 0.8767816378106146, + "learning_rate": 5.405446437730288e-06, + "loss": 0.09688720703125, + "step": 47155 + }, + { + "epoch": 0.40777857519606403, + "grad_norm": 1.1958466928796068, + "learning_rate": 5.405324687006381e-06, + "loss": 0.1281707763671875, + "step": 47160 + }, + { + "epoch": 0.40782180871760726, + "grad_norm": 5.43829341888938, + "learning_rate": 5.405202925189329e-06, + "loss": 0.2475189208984375, + "step": 47165 + }, + { + "epoch": 0.40786504223915054, + "grad_norm": 3.391108340325165, + "learning_rate": 5.405081152279694e-06, + "loss": 0.04311294555664062, + "step": 47170 + }, + { + "epoch": 0.40790827576069383, + "grad_norm": 1.772905389327602, + "learning_rate": 5.404959368278036e-06, + "loss": 0.08461685180664062, + "step": 47175 + }, + { + "epoch": 0.40795150928223706, + "grad_norm": 0.8849051088120595, + "learning_rate": 5.40483757318492e-06, + "loss": 0.2237548828125, + "step": 47180 + }, + { + "epoch": 0.40799474280378034, + "grad_norm": 52.56139488539261, + "learning_rate": 5.404715767000904e-06, + "loss": 0.27599639892578126, + "step": 47185 + }, + { + "epoch": 0.4080379763253236, + "grad_norm": 14.645637650225051, + "learning_rate": 5.404593949726552e-06, + "loss": 0.1742706298828125, + "step": 47190 + }, + { + "epoch": 0.40808120984686685, + "grad_norm": 3.413186733551852, + "learning_rate": 5.404472121362425e-06, + "loss": 0.030873870849609374, + "step": 47195 + }, + { + "epoch": 0.40812444336841014, + "grad_norm": 8.513377606727905, + "learning_rate": 5.404350281909085e-06, + "loss": 0.11714458465576172, + "step": 47200 + }, + { + "epoch": 0.40816767688995337, + "grad_norm": 29.92088119153848, + "learning_rate": 5.404228431367094e-06, + "loss": 0.193157958984375, + "step": 47205 + }, + { + "epoch": 0.40821091041149665, + "grad_norm": 8.237951322277544, + "learning_rate": 5.404106569737014e-06, + "loss": 0.21845855712890624, + "step": 47210 + }, + { + "epoch": 0.40825414393303994, + "grad_norm": 36.0365977599554, + "learning_rate": 5.4039846970194065e-06, + "loss": 0.2978782653808594, + "step": 47215 + }, + { + "epoch": 0.40829737745458317, + "grad_norm": 5.564307984921338, + "learning_rate": 5.403862813214834e-06, + "loss": 0.14238548278808594, + "step": 47220 + }, + { + "epoch": 0.40834061097612645, + "grad_norm": 2.2773502749444305, + "learning_rate": 5.403740918323859e-06, + "loss": 0.026499176025390626, + "step": 47225 + }, + { + "epoch": 0.40838384449766973, + "grad_norm": 27.076309470207917, + "learning_rate": 5.403619012347044e-06, + "loss": 0.390350341796875, + "step": 47230 + }, + { + "epoch": 0.40842707801921296, + "grad_norm": 7.138022053831748, + "learning_rate": 5.403497095284949e-06, + "loss": 0.1684906005859375, + "step": 47235 + }, + { + "epoch": 0.40847031154075625, + "grad_norm": 12.208062333063836, + "learning_rate": 5.403375167138139e-06, + "loss": 0.05475959777832031, + "step": 47240 + }, + { + "epoch": 0.4085135450622995, + "grad_norm": 15.509086039225519, + "learning_rate": 5.403253227907174e-06, + "loss": 0.0600616455078125, + "step": 47245 + }, + { + "epoch": 0.40855677858384276, + "grad_norm": 30.956454591937103, + "learning_rate": 5.403131277592619e-06, + "loss": 0.29643936157226564, + "step": 47250 + }, + { + "epoch": 0.40860001210538605, + "grad_norm": 8.592283917742614, + "learning_rate": 5.403009316195034e-06, + "loss": 0.1628887176513672, + "step": 47255 + }, + { + "epoch": 0.4086432456269293, + "grad_norm": 2.9847417238182103, + "learning_rate": 5.402887343714981e-06, + "loss": 0.17199935913085937, + "step": 47260 + }, + { + "epoch": 0.40868647914847256, + "grad_norm": 4.511898149847598, + "learning_rate": 5.402765360153026e-06, + "loss": 0.14860305786132813, + "step": 47265 + }, + { + "epoch": 0.40872971267001584, + "grad_norm": 12.15736834531255, + "learning_rate": 5.4026433655097286e-06, + "loss": 0.24230728149414063, + "step": 47270 + }, + { + "epoch": 0.4087729461915591, + "grad_norm": 29.043721935811686, + "learning_rate": 5.402521359785652e-06, + "loss": 0.331597900390625, + "step": 47275 + }, + { + "epoch": 0.40881617971310236, + "grad_norm": 14.869004727854684, + "learning_rate": 5.40239934298136e-06, + "loss": 0.14420547485351562, + "step": 47280 + }, + { + "epoch": 0.40885941323464564, + "grad_norm": 4.578570228546097, + "learning_rate": 5.402277315097415e-06, + "loss": 0.26216583251953124, + "step": 47285 + }, + { + "epoch": 0.40890264675618887, + "grad_norm": 7.159434192027318, + "learning_rate": 5.402155276134378e-06, + "loss": 0.2026595115661621, + "step": 47290 + }, + { + "epoch": 0.40894588027773215, + "grad_norm": 61.64040883910072, + "learning_rate": 5.402033226092812e-06, + "loss": 0.285675048828125, + "step": 47295 + }, + { + "epoch": 0.4089891137992754, + "grad_norm": 15.877047170690409, + "learning_rate": 5.401911164973283e-06, + "loss": 0.17875137329101562, + "step": 47300 + }, + { + "epoch": 0.40903234732081867, + "grad_norm": 29.926476905773526, + "learning_rate": 5.401789092776352e-06, + "loss": 0.60703125, + "step": 47305 + }, + { + "epoch": 0.40907558084236195, + "grad_norm": 4.4086321666925805, + "learning_rate": 5.401667009502581e-06, + "loss": 0.4241912841796875, + "step": 47310 + }, + { + "epoch": 0.4091188143639052, + "grad_norm": 0.27404392080522616, + "learning_rate": 5.401544915152535e-06, + "loss": 0.4280120849609375, + "step": 47315 + }, + { + "epoch": 0.40916204788544847, + "grad_norm": 8.612319147757855, + "learning_rate": 5.401422809726775e-06, + "loss": 0.316754150390625, + "step": 47320 + }, + { + "epoch": 0.40920528140699175, + "grad_norm": 1.7293091787093, + "learning_rate": 5.401300693225866e-06, + "loss": 0.05440216064453125, + "step": 47325 + }, + { + "epoch": 0.409248514928535, + "grad_norm": 9.645367106350323, + "learning_rate": 5.40117856565037e-06, + "loss": 0.18226165771484376, + "step": 47330 + }, + { + "epoch": 0.40929174845007826, + "grad_norm": 6.467372982472913, + "learning_rate": 5.40105642700085e-06, + "loss": 0.297381591796875, + "step": 47335 + }, + { + "epoch": 0.4093349819716215, + "grad_norm": 29.597898720623753, + "learning_rate": 5.400934277277871e-06, + "loss": 0.263934326171875, + "step": 47340 + }, + { + "epoch": 0.4093782154931648, + "grad_norm": 13.545040988980993, + "learning_rate": 5.400812116481994e-06, + "loss": 0.25670166015625, + "step": 47345 + }, + { + "epoch": 0.40942144901470806, + "grad_norm": 10.455294264762353, + "learning_rate": 5.400689944613784e-06, + "loss": 0.15720977783203124, + "step": 47350 + }, + { + "epoch": 0.4094646825362513, + "grad_norm": 13.900204976373258, + "learning_rate": 5.400567761673805e-06, + "loss": 0.1726806640625, + "step": 47355 + }, + { + "epoch": 0.4095079160577946, + "grad_norm": 1.1314843054592592, + "learning_rate": 5.400445567662619e-06, + "loss": 0.209564208984375, + "step": 47360 + }, + { + "epoch": 0.40955114957933786, + "grad_norm": 27.341438158477413, + "learning_rate": 5.400323362580791e-06, + "loss": 0.414044189453125, + "step": 47365 + }, + { + "epoch": 0.4095943831008811, + "grad_norm": 1.4223967849531083, + "learning_rate": 5.400201146428883e-06, + "loss": 0.561553955078125, + "step": 47370 + }, + { + "epoch": 0.40963761662242437, + "grad_norm": 15.4723347848354, + "learning_rate": 5.400078919207459e-06, + "loss": 0.16759414672851564, + "step": 47375 + }, + { + "epoch": 0.4096808501439676, + "grad_norm": 3.74214673447196, + "learning_rate": 5.399956680917084e-06, + "loss": 0.20857200622558594, + "step": 47380 + }, + { + "epoch": 0.4097240836655109, + "grad_norm": 11.454103019540467, + "learning_rate": 5.3998344315583205e-06, + "loss": 0.123529052734375, + "step": 47385 + }, + { + "epoch": 0.40976731718705417, + "grad_norm": 9.591375753370302, + "learning_rate": 5.399712171131732e-06, + "loss": 0.1727466583251953, + "step": 47390 + }, + { + "epoch": 0.4098105507085974, + "grad_norm": 14.995713495377966, + "learning_rate": 5.399589899637883e-06, + "loss": 0.17964706420898438, + "step": 47395 + }, + { + "epoch": 0.4098537842301407, + "grad_norm": 23.21006428797451, + "learning_rate": 5.399467617077339e-06, + "loss": 0.8502113342285156, + "step": 47400 + }, + { + "epoch": 0.40989701775168397, + "grad_norm": 0.497713018951399, + "learning_rate": 5.399345323450661e-06, + "loss": 0.25684127807617185, + "step": 47405 + }, + { + "epoch": 0.4099402512732272, + "grad_norm": 10.97427159445519, + "learning_rate": 5.399223018758415e-06, + "loss": 0.23380889892578124, + "step": 47410 + }, + { + "epoch": 0.4099834847947705, + "grad_norm": 12.962387213431473, + "learning_rate": 5.399100703001164e-06, + "loss": 0.14286956787109376, + "step": 47415 + }, + { + "epoch": 0.4100267183163137, + "grad_norm": 0.8968591490845063, + "learning_rate": 5.3989783761794725e-06, + "loss": 0.1847240447998047, + "step": 47420 + }, + { + "epoch": 0.410069951837857, + "grad_norm": 9.317029413419263, + "learning_rate": 5.398856038293905e-06, + "loss": 0.20111465454101562, + "step": 47425 + }, + { + "epoch": 0.4101131853594003, + "grad_norm": 28.214121479415866, + "learning_rate": 5.3987336893450255e-06, + "loss": 0.1786224365234375, + "step": 47430 + }, + { + "epoch": 0.4101564188809435, + "grad_norm": 11.517481758034378, + "learning_rate": 5.3986113293333974e-06, + "loss": 0.4484466552734375, + "step": 47435 + }, + { + "epoch": 0.4101996524024868, + "grad_norm": 0.4642777063286122, + "learning_rate": 5.398488958259587e-06, + "loss": 0.10297393798828125, + "step": 47440 + }, + { + "epoch": 0.4102428859240301, + "grad_norm": 6.703087867120369, + "learning_rate": 5.398366576124157e-06, + "loss": 0.08568801879882812, + "step": 47445 + }, + { + "epoch": 0.4102861194455733, + "grad_norm": 30.481136361662568, + "learning_rate": 5.398244182927672e-06, + "loss": 0.2982627868652344, + "step": 47450 + }, + { + "epoch": 0.4103293529671166, + "grad_norm": 29.220423074950613, + "learning_rate": 5.398121778670697e-06, + "loss": 0.1785888671875, + "step": 47455 + }, + { + "epoch": 0.4103725864886599, + "grad_norm": 4.124632436297828, + "learning_rate": 5.397999363353796e-06, + "loss": 0.044492340087890624, + "step": 47460 + }, + { + "epoch": 0.4104158200102031, + "grad_norm": 14.003018455694983, + "learning_rate": 5.397876936977534e-06, + "loss": 0.09214649200439454, + "step": 47465 + }, + { + "epoch": 0.4104590535317464, + "grad_norm": 18.41219279194675, + "learning_rate": 5.397754499542475e-06, + "loss": 0.16936492919921875, + "step": 47470 + }, + { + "epoch": 0.4105022870532896, + "grad_norm": 2.030429512558372, + "learning_rate": 5.397632051049186e-06, + "loss": 0.13919219970703126, + "step": 47475 + }, + { + "epoch": 0.4105455205748329, + "grad_norm": 31.166278280410694, + "learning_rate": 5.397509591498227e-06, + "loss": 0.35643310546875, + "step": 47480 + }, + { + "epoch": 0.4105887540963762, + "grad_norm": 10.504223527543296, + "learning_rate": 5.397387120890167e-06, + "loss": 0.15203857421875, + "step": 47485 + }, + { + "epoch": 0.4106319876179194, + "grad_norm": 19.906272737890326, + "learning_rate": 5.397264639225569e-06, + "loss": 0.2745639801025391, + "step": 47490 + }, + { + "epoch": 0.4106752211394627, + "grad_norm": 13.679120716744823, + "learning_rate": 5.397142146504999e-06, + "loss": 0.07603759765625, + "step": 47495 + }, + { + "epoch": 0.410718454661006, + "grad_norm": 1.1241325261632462, + "learning_rate": 5.397019642729021e-06, + "loss": 0.07782516479492188, + "step": 47500 + }, + { + "epoch": 0.4107616881825492, + "grad_norm": 0.21584567672390856, + "learning_rate": 5.3968971278982e-06, + "loss": 0.16926727294921876, + "step": 47505 + }, + { + "epoch": 0.4108049217040925, + "grad_norm": 16.377612269780865, + "learning_rate": 5.3967746020131025e-06, + "loss": 0.21140289306640625, + "step": 47510 + }, + { + "epoch": 0.4108481552256357, + "grad_norm": 24.37752142401969, + "learning_rate": 5.39665206507429e-06, + "loss": 0.2982421875, + "step": 47515 + }, + { + "epoch": 0.410891388747179, + "grad_norm": 8.669190013585004, + "learning_rate": 5.396529517082332e-06, + "loss": 0.0515625, + "step": 47520 + }, + { + "epoch": 0.4109346222687223, + "grad_norm": 45.71097058030223, + "learning_rate": 5.396406958037791e-06, + "loss": 0.21686553955078125, + "step": 47525 + }, + { + "epoch": 0.4109778557902655, + "grad_norm": 52.63875099751974, + "learning_rate": 5.396284387941232e-06, + "loss": 0.4069999694824219, + "step": 47530 + }, + { + "epoch": 0.4110210893118088, + "grad_norm": 35.85636428775196, + "learning_rate": 5.3961618067932225e-06, + "loss": 0.18504180908203124, + "step": 47535 + }, + { + "epoch": 0.4110643228333521, + "grad_norm": 2.057251310567495, + "learning_rate": 5.396039214594325e-06, + "loss": 0.056562042236328124, + "step": 47540 + }, + { + "epoch": 0.4111075563548953, + "grad_norm": 26.298624566958463, + "learning_rate": 5.395916611345107e-06, + "loss": 0.4835479736328125, + "step": 47545 + }, + { + "epoch": 0.4111507898764386, + "grad_norm": 6.940863978303095, + "learning_rate": 5.395793997046134e-06, + "loss": 0.0886138916015625, + "step": 47550 + }, + { + "epoch": 0.41119402339798183, + "grad_norm": 10.886156907478524, + "learning_rate": 5.395671371697971e-06, + "loss": 0.15648193359375, + "step": 47555 + }, + { + "epoch": 0.4112372569195251, + "grad_norm": 10.492541772141937, + "learning_rate": 5.395548735301182e-06, + "loss": 0.35703125, + "step": 47560 + }, + { + "epoch": 0.4112804904410684, + "grad_norm": 11.261587598748049, + "learning_rate": 5.395426087856334e-06, + "loss": 0.11177978515625, + "step": 47565 + }, + { + "epoch": 0.41132372396261163, + "grad_norm": 35.556903265075064, + "learning_rate": 5.395303429363993e-06, + "loss": 0.7778091430664062, + "step": 47570 + }, + { + "epoch": 0.4113669574841549, + "grad_norm": 24.537530211487812, + "learning_rate": 5.3951807598247244e-06, + "loss": 0.26024856567382815, + "step": 47575 + }, + { + "epoch": 0.4114101910056982, + "grad_norm": 0.16709900186109491, + "learning_rate": 5.395058079239093e-06, + "loss": 0.1090972900390625, + "step": 47580 + }, + { + "epoch": 0.41145342452724143, + "grad_norm": 5.596910025156153, + "learning_rate": 5.394935387607666e-06, + "loss": 0.057568359375, + "step": 47585 + }, + { + "epoch": 0.4114966580487847, + "grad_norm": 3.243865814323527, + "learning_rate": 5.394812684931008e-06, + "loss": 0.14328765869140625, + "step": 47590 + }, + { + "epoch": 0.41153989157032794, + "grad_norm": 23.72579127987703, + "learning_rate": 5.394689971209687e-06, + "loss": 0.0805755615234375, + "step": 47595 + }, + { + "epoch": 0.4115831250918712, + "grad_norm": 2.3436412310347654, + "learning_rate": 5.394567246444266e-06, + "loss": 0.2815589904785156, + "step": 47600 + }, + { + "epoch": 0.4116263586134145, + "grad_norm": 3.400832669186298, + "learning_rate": 5.394444510635314e-06, + "loss": 0.3360809326171875, + "step": 47605 + }, + { + "epoch": 0.41166959213495774, + "grad_norm": 0.7322364568433133, + "learning_rate": 5.394321763783394e-06, + "loss": 0.13366851806640626, + "step": 47610 + }, + { + "epoch": 0.411712825656501, + "grad_norm": 23.004030746904267, + "learning_rate": 5.394199005889074e-06, + "loss": 0.24852256774902343, + "step": 47615 + }, + { + "epoch": 0.4117560591780443, + "grad_norm": 24.594439283841158, + "learning_rate": 5.39407623695292e-06, + "loss": 0.17326202392578124, + "step": 47620 + }, + { + "epoch": 0.41179929269958754, + "grad_norm": 12.127203286500412, + "learning_rate": 5.3939534569754974e-06, + "loss": 0.1088836669921875, + "step": 47625 + }, + { + "epoch": 0.4118425262211308, + "grad_norm": 0.7896248592193905, + "learning_rate": 5.393830665957374e-06, + "loss": 0.12652664184570311, + "step": 47630 + }, + { + "epoch": 0.4118857597426741, + "grad_norm": 1.222403573934527, + "learning_rate": 5.393707863899114e-06, + "loss": 0.12577247619628906, + "step": 47635 + }, + { + "epoch": 0.41192899326421734, + "grad_norm": 21.47120275244497, + "learning_rate": 5.393585050801286e-06, + "loss": 0.10240631103515625, + "step": 47640 + }, + { + "epoch": 0.4119722267857606, + "grad_norm": 17.919846590060224, + "learning_rate": 5.393462226664453e-06, + "loss": 0.41741943359375, + "step": 47645 + }, + { + "epoch": 0.41201546030730385, + "grad_norm": 2.112829868070975, + "learning_rate": 5.393339391489186e-06, + "loss": 0.11145172119140626, + "step": 47650 + }, + { + "epoch": 0.41205869382884713, + "grad_norm": 0.8411198105675619, + "learning_rate": 5.3932165452760485e-06, + "loss": 0.195941162109375, + "step": 47655 + }, + { + "epoch": 0.4121019273503904, + "grad_norm": 10.099471895598025, + "learning_rate": 5.3930936880256076e-06, + "loss": 0.3707672119140625, + "step": 47660 + }, + { + "epoch": 0.41214516087193365, + "grad_norm": 5.227441615549836, + "learning_rate": 5.39297081973843e-06, + "loss": 0.300927734375, + "step": 47665 + }, + { + "epoch": 0.41218839439347693, + "grad_norm": 5.467688586800921, + "learning_rate": 5.392847940415082e-06, + "loss": 0.248553466796875, + "step": 47670 + }, + { + "epoch": 0.4122316279150202, + "grad_norm": 5.6419322432773615, + "learning_rate": 5.392725050056132e-06, + "loss": 0.1516265869140625, + "step": 47675 + }, + { + "epoch": 0.41227486143656344, + "grad_norm": 5.222218470040416, + "learning_rate": 5.392602148662145e-06, + "loss": 0.1569580078125, + "step": 47680 + }, + { + "epoch": 0.41231809495810673, + "grad_norm": 7.752874959498346, + "learning_rate": 5.392479236233688e-06, + "loss": 0.21187515258789064, + "step": 47685 + }, + { + "epoch": 0.41236132847964996, + "grad_norm": 0.30838114930731586, + "learning_rate": 5.392356312771329e-06, + "loss": 0.4886589050292969, + "step": 47690 + }, + { + "epoch": 0.41240456200119324, + "grad_norm": 55.463278985702615, + "learning_rate": 5.3922333782756325e-06, + "loss": 0.35928192138671877, + "step": 47695 + }, + { + "epoch": 0.4124477955227365, + "grad_norm": 8.15117000776534, + "learning_rate": 5.392110432747167e-06, + "loss": 0.09578704833984375, + "step": 47700 + }, + { + "epoch": 0.41249102904427976, + "grad_norm": 20.261965577069855, + "learning_rate": 5.391987476186501e-06, + "loss": 0.17192535400390624, + "step": 47705 + }, + { + "epoch": 0.41253426256582304, + "grad_norm": 5.887230188264224, + "learning_rate": 5.3918645085942e-06, + "loss": 0.13030548095703126, + "step": 47710 + }, + { + "epoch": 0.4125774960873663, + "grad_norm": 7.342724705664473, + "learning_rate": 5.39174152997083e-06, + "loss": 0.06733512878417969, + "step": 47715 + }, + { + "epoch": 0.41262072960890955, + "grad_norm": 0.41152374435797867, + "learning_rate": 5.391618540316961e-06, + "loss": 0.07345657348632813, + "step": 47720 + }, + { + "epoch": 0.41266396313045284, + "grad_norm": 29.45504471555402, + "learning_rate": 5.391495539633157e-06, + "loss": 0.220416259765625, + "step": 47725 + }, + { + "epoch": 0.41270719665199607, + "grad_norm": 36.187730109974865, + "learning_rate": 5.3913725279199885e-06, + "loss": 0.1420513153076172, + "step": 47730 + }, + { + "epoch": 0.41275043017353935, + "grad_norm": 2.3658655574418095, + "learning_rate": 5.3912495051780205e-06, + "loss": 0.1229248046875, + "step": 47735 + }, + { + "epoch": 0.41279366369508264, + "grad_norm": 1.732596727463453, + "learning_rate": 5.39112647140782e-06, + "loss": 0.05384902954101563, + "step": 47740 + }, + { + "epoch": 0.41283689721662586, + "grad_norm": 1.0231590979718643, + "learning_rate": 5.391003426609957e-06, + "loss": 0.26710052490234376, + "step": 47745 + }, + { + "epoch": 0.41288013073816915, + "grad_norm": 0.30895810849670546, + "learning_rate": 5.390880370784997e-06, + "loss": 0.36279830932617185, + "step": 47750 + }, + { + "epoch": 0.41292336425971243, + "grad_norm": 5.908672451075166, + "learning_rate": 5.390757303933507e-06, + "loss": 0.04153900146484375, + "step": 47755 + }, + { + "epoch": 0.41296659778125566, + "grad_norm": 5.722797975859519, + "learning_rate": 5.390634226056058e-06, + "loss": 0.13358612060546876, + "step": 47760 + }, + { + "epoch": 0.41300983130279895, + "grad_norm": 10.615007696471425, + "learning_rate": 5.390511137153213e-06, + "loss": 0.10655593872070312, + "step": 47765 + }, + { + "epoch": 0.4130530648243422, + "grad_norm": 0.5357247055111181, + "learning_rate": 5.390388037225543e-06, + "loss": 0.0636962890625, + "step": 47770 + }, + { + "epoch": 0.41309629834588546, + "grad_norm": 1.6290592037652782, + "learning_rate": 5.390264926273614e-06, + "loss": 0.1471435546875, + "step": 47775 + }, + { + "epoch": 0.41313953186742874, + "grad_norm": 15.00886676554821, + "learning_rate": 5.390141804297994e-06, + "loss": 0.3207651138305664, + "step": 47780 + }, + { + "epoch": 0.413182765388972, + "grad_norm": 2.1945919004591947, + "learning_rate": 5.390018671299252e-06, + "loss": 0.142132568359375, + "step": 47785 + }, + { + "epoch": 0.41322599891051526, + "grad_norm": 9.178243305450154, + "learning_rate": 5.389895527277955e-06, + "loss": 0.2260467529296875, + "step": 47790 + }, + { + "epoch": 0.41326923243205854, + "grad_norm": 7.274687846310166, + "learning_rate": 5.38977237223467e-06, + "loss": 0.2301158905029297, + "step": 47795 + }, + { + "epoch": 0.41331246595360177, + "grad_norm": 5.463590259757079, + "learning_rate": 5.389649206169967e-06, + "loss": 0.2890380859375, + "step": 47800 + }, + { + "epoch": 0.41335569947514506, + "grad_norm": 0.440105560561431, + "learning_rate": 5.389526029084413e-06, + "loss": 0.1617431640625, + "step": 47805 + }, + { + "epoch": 0.41339893299668834, + "grad_norm": 19.814785617358407, + "learning_rate": 5.389402840978576e-06, + "loss": 0.23155975341796875, + "step": 47810 + }, + { + "epoch": 0.41344216651823157, + "grad_norm": 36.398519975983994, + "learning_rate": 5.3892796418530235e-06, + "loss": 0.6833076477050781, + "step": 47815 + }, + { + "epoch": 0.41348540003977485, + "grad_norm": 1.7691269995277021, + "learning_rate": 5.389156431708325e-06, + "loss": 0.09036369323730468, + "step": 47820 + }, + { + "epoch": 0.4135286335613181, + "grad_norm": 18.396790877288115, + "learning_rate": 5.389033210545049e-06, + "loss": 0.150384521484375, + "step": 47825 + }, + { + "epoch": 0.41357186708286137, + "grad_norm": 9.651094696702762, + "learning_rate": 5.388909978363762e-06, + "loss": 0.35456008911132814, + "step": 47830 + }, + { + "epoch": 0.41361510060440465, + "grad_norm": 12.756941239432171, + "learning_rate": 5.388786735165033e-06, + "loss": 0.15990982055664063, + "step": 47835 + }, + { + "epoch": 0.4136583341259479, + "grad_norm": 3.8252946582041156, + "learning_rate": 5.388663480949431e-06, + "loss": 0.17564544677734376, + "step": 47840 + }, + { + "epoch": 0.41370156764749116, + "grad_norm": 4.029194860907862, + "learning_rate": 5.388540215717524e-06, + "loss": 0.06718940734863281, + "step": 47845 + }, + { + "epoch": 0.41374480116903445, + "grad_norm": 9.896740413284837, + "learning_rate": 5.388416939469881e-06, + "loss": 0.37871685028076174, + "step": 47850 + }, + { + "epoch": 0.4137880346905777, + "grad_norm": 0.7282580785621091, + "learning_rate": 5.38829365220707e-06, + "loss": 0.1592071533203125, + "step": 47855 + }, + { + "epoch": 0.41383126821212096, + "grad_norm": 0.9548353377389004, + "learning_rate": 5.38817035392966e-06, + "loss": 0.24706268310546875, + "step": 47860 + }, + { + "epoch": 0.4138745017336642, + "grad_norm": 8.673580710919772, + "learning_rate": 5.388047044638218e-06, + "loss": 0.0751708984375, + "step": 47865 + }, + { + "epoch": 0.4139177352552075, + "grad_norm": 20.574835387533135, + "learning_rate": 5.387923724333316e-06, + "loss": 0.502490234375, + "step": 47870 + }, + { + "epoch": 0.41396096877675076, + "grad_norm": 5.195394816018146, + "learning_rate": 5.387800393015519e-06, + "loss": 0.04202117919921875, + "step": 47875 + }, + { + "epoch": 0.414004202298294, + "grad_norm": 0.9724516573322455, + "learning_rate": 5.387677050685398e-06, + "loss": 0.17160491943359374, + "step": 47880 + }, + { + "epoch": 0.4140474358198373, + "grad_norm": 4.9159086874427915, + "learning_rate": 5.387553697343522e-06, + "loss": 0.25988616943359377, + "step": 47885 + }, + { + "epoch": 0.41409066934138056, + "grad_norm": 0.40239452004443205, + "learning_rate": 5.387430332990459e-06, + "loss": 0.10889739990234375, + "step": 47890 + }, + { + "epoch": 0.4141339028629238, + "grad_norm": 7.085107476296256, + "learning_rate": 5.387306957626778e-06, + "loss": 0.5761199951171875, + "step": 47895 + }, + { + "epoch": 0.41417713638446707, + "grad_norm": 10.202010609664725, + "learning_rate": 5.387183571253049e-06, + "loss": 0.09440574645996094, + "step": 47900 + }, + { + "epoch": 0.4142203699060103, + "grad_norm": 4.71721793755036, + "learning_rate": 5.38706017386984e-06, + "loss": 0.19040069580078126, + "step": 47905 + }, + { + "epoch": 0.4142636034275536, + "grad_norm": 0.8464236399174081, + "learning_rate": 5.38693676547772e-06, + "loss": 0.30943756103515624, + "step": 47910 + }, + { + "epoch": 0.41430683694909687, + "grad_norm": 13.691288476653893, + "learning_rate": 5.3868133460772585e-06, + "loss": 0.23440399169921874, + "step": 47915 + }, + { + "epoch": 0.4143500704706401, + "grad_norm": 13.226581031924509, + "learning_rate": 5.386689915669025e-06, + "loss": 0.03840484619140625, + "step": 47920 + }, + { + "epoch": 0.4143933039921834, + "grad_norm": 4.023451073494493, + "learning_rate": 5.3865664742535885e-06, + "loss": 0.18244800567626954, + "step": 47925 + }, + { + "epoch": 0.41443653751372667, + "grad_norm": 28.121325071749425, + "learning_rate": 5.386443021831519e-06, + "loss": 0.2663414001464844, + "step": 47930 + }, + { + "epoch": 0.4144797710352699, + "grad_norm": 6.2385673515485545, + "learning_rate": 5.386319558403384e-06, + "loss": 0.1500091552734375, + "step": 47935 + }, + { + "epoch": 0.4145230045568132, + "grad_norm": 10.142458389667798, + "learning_rate": 5.386196083969754e-06, + "loss": 0.06174545288085938, + "step": 47940 + }, + { + "epoch": 0.4145662380783564, + "grad_norm": 0.9065364573092537, + "learning_rate": 5.3860725985312e-06, + "loss": 0.061783218383789064, + "step": 47945 + }, + { + "epoch": 0.4146094715998997, + "grad_norm": 5.398015044158015, + "learning_rate": 5.385949102088289e-06, + "loss": 0.321636962890625, + "step": 47950 + }, + { + "epoch": 0.414652705121443, + "grad_norm": 10.686618680573746, + "learning_rate": 5.3858255946415915e-06, + "loss": 0.11916580200195312, + "step": 47955 + }, + { + "epoch": 0.4146959386429862, + "grad_norm": 19.796294131854403, + "learning_rate": 5.385702076191677e-06, + "loss": 0.133514404296875, + "step": 47960 + }, + { + "epoch": 0.4147391721645295, + "grad_norm": 25.490149764870765, + "learning_rate": 5.3855785467391155e-06, + "loss": 0.226934814453125, + "step": 47965 + }, + { + "epoch": 0.4147824056860728, + "grad_norm": 10.478009848061122, + "learning_rate": 5.385455006284477e-06, + "loss": 0.15075340270996093, + "step": 47970 + }, + { + "epoch": 0.414825639207616, + "grad_norm": 17.374809853470467, + "learning_rate": 5.38533145482833e-06, + "loss": 0.11982421875, + "step": 47975 + }, + { + "epoch": 0.4148688727291593, + "grad_norm": 1.8507359781405268, + "learning_rate": 5.385207892371246e-06, + "loss": 0.13263168334960937, + "step": 47980 + }, + { + "epoch": 0.4149121062507025, + "grad_norm": 40.20243208985112, + "learning_rate": 5.385084318913794e-06, + "loss": 0.30587158203125, + "step": 47985 + }, + { + "epoch": 0.4149553397722458, + "grad_norm": 9.135492050764588, + "learning_rate": 5.3849607344565425e-06, + "loss": 0.0533905029296875, + "step": 47990 + }, + { + "epoch": 0.4149985732937891, + "grad_norm": 0.3805567269376627, + "learning_rate": 5.384837139000064e-06, + "loss": 0.2799468994140625, + "step": 47995 + }, + { + "epoch": 0.4150418068153323, + "grad_norm": 0.7245610846126471, + "learning_rate": 5.384713532544927e-06, + "loss": 0.02008514404296875, + "step": 48000 + }, + { + "epoch": 0.4150850403368756, + "grad_norm": 7.675853067104411, + "learning_rate": 5.384589915091702e-06, + "loss": 0.06804580688476562, + "step": 48005 + }, + { + "epoch": 0.4151282738584189, + "grad_norm": 68.49901303170691, + "learning_rate": 5.384466286640959e-06, + "loss": 0.36416091918945315, + "step": 48010 + }, + { + "epoch": 0.4151715073799621, + "grad_norm": 5.144576443016999, + "learning_rate": 5.384342647193268e-06, + "loss": 0.3541839599609375, + "step": 48015 + }, + { + "epoch": 0.4152147409015054, + "grad_norm": 33.492380449466815, + "learning_rate": 5.3842189967492e-06, + "loss": 0.5456546783447266, + "step": 48020 + }, + { + "epoch": 0.4152579744230487, + "grad_norm": 23.18558983352477, + "learning_rate": 5.384095335309323e-06, + "loss": 0.20009765625, + "step": 48025 + }, + { + "epoch": 0.4153012079445919, + "grad_norm": 0.24094977742599866, + "learning_rate": 5.383971662874211e-06, + "loss": 0.1589996337890625, + "step": 48030 + }, + { + "epoch": 0.4153444414661352, + "grad_norm": 15.283078326383325, + "learning_rate": 5.383847979444432e-06, + "loss": 0.18464202880859376, + "step": 48035 + }, + { + "epoch": 0.4153876749876784, + "grad_norm": 22.14125230778901, + "learning_rate": 5.3837242850205555e-06, + "loss": 0.214691162109375, + "step": 48040 + }, + { + "epoch": 0.4154309085092217, + "grad_norm": 2.946387268523239, + "learning_rate": 5.3836005796031545e-06, + "loss": 0.0990936279296875, + "step": 48045 + }, + { + "epoch": 0.415474142030765, + "grad_norm": 66.72295050451172, + "learning_rate": 5.383476863192797e-06, + "loss": 0.31996307373046873, + "step": 48050 + }, + { + "epoch": 0.4155173755523082, + "grad_norm": 0.12863559926370746, + "learning_rate": 5.383353135790056e-06, + "loss": 0.07201156616210938, + "step": 48055 + }, + { + "epoch": 0.4155606090738515, + "grad_norm": 16.127927195421, + "learning_rate": 5.383229397395501e-06, + "loss": 0.2562744140625, + "step": 48060 + }, + { + "epoch": 0.4156038425953948, + "grad_norm": 7.281042635850806, + "learning_rate": 5.383105648009702e-06, + "loss": 0.08704681396484375, + "step": 48065 + }, + { + "epoch": 0.415647076116938, + "grad_norm": 12.249041962661815, + "learning_rate": 5.38298188763323e-06, + "loss": 0.07349853515625, + "step": 48070 + }, + { + "epoch": 0.4156903096384813, + "grad_norm": 2.3312195331404433, + "learning_rate": 5.382858116266657e-06, + "loss": 0.12617111206054688, + "step": 48075 + }, + { + "epoch": 0.41573354316002453, + "grad_norm": 0.2939704337644901, + "learning_rate": 5.382734333910552e-06, + "loss": 0.2038543701171875, + "step": 48080 + }, + { + "epoch": 0.4157767766815678, + "grad_norm": 6.128229727861244, + "learning_rate": 5.382610540565487e-06, + "loss": 0.094525146484375, + "step": 48085 + }, + { + "epoch": 0.4158200102031111, + "grad_norm": 1.3905310696246733, + "learning_rate": 5.3824867362320335e-06, + "loss": 0.0238372802734375, + "step": 48090 + }, + { + "epoch": 0.41586324372465433, + "grad_norm": 4.27223669119259, + "learning_rate": 5.382362920910761e-06, + "loss": 0.070660400390625, + "step": 48095 + }, + { + "epoch": 0.4159064772461976, + "grad_norm": 10.0032184127061, + "learning_rate": 5.382239094602242e-06, + "loss": 0.1001434326171875, + "step": 48100 + }, + { + "epoch": 0.4159497107677409, + "grad_norm": 1.633136040919863, + "learning_rate": 5.3821152573070455e-06, + "loss": 0.1045166015625, + "step": 48105 + }, + { + "epoch": 0.41599294428928413, + "grad_norm": 4.984756964191833, + "learning_rate": 5.381991409025745e-06, + "loss": 0.1630615234375, + "step": 48110 + }, + { + "epoch": 0.4160361778108274, + "grad_norm": 29.28263730281253, + "learning_rate": 5.38186754975891e-06, + "loss": 0.1477264404296875, + "step": 48115 + }, + { + "epoch": 0.41607941133237064, + "grad_norm": 0.33587625516712954, + "learning_rate": 5.381743679507112e-06, + "loss": 0.04154815673828125, + "step": 48120 + }, + { + "epoch": 0.4161226448539139, + "grad_norm": 24.52046901414856, + "learning_rate": 5.381619798270924e-06, + "loss": 0.425836181640625, + "step": 48125 + }, + { + "epoch": 0.4161658783754572, + "grad_norm": 17.64726039608274, + "learning_rate": 5.3814959060509145e-06, + "loss": 0.24353485107421874, + "step": 48130 + }, + { + "epoch": 0.41620911189700044, + "grad_norm": 9.391935811462066, + "learning_rate": 5.381372002847657e-06, + "loss": 0.09025115966796875, + "step": 48135 + }, + { + "epoch": 0.4162523454185437, + "grad_norm": 5.230728158759349, + "learning_rate": 5.381248088661723e-06, + "loss": 0.05433578491210937, + "step": 48140 + }, + { + "epoch": 0.416295578940087, + "grad_norm": 21.009044454161376, + "learning_rate": 5.381124163493682e-06, + "loss": 0.0728668212890625, + "step": 48145 + }, + { + "epoch": 0.41633881246163024, + "grad_norm": 72.28570399347501, + "learning_rate": 5.381000227344106e-06, + "loss": 0.374072265625, + "step": 48150 + }, + { + "epoch": 0.4163820459831735, + "grad_norm": 34.17052332564619, + "learning_rate": 5.380876280213568e-06, + "loss": 0.262158203125, + "step": 48155 + }, + { + "epoch": 0.41642527950471675, + "grad_norm": 0.5763978902974357, + "learning_rate": 5.380752322102641e-06, + "loss": 0.07277297973632812, + "step": 48160 + }, + { + "epoch": 0.41646851302626003, + "grad_norm": 4.827671768828529, + "learning_rate": 5.380628353011893e-06, + "loss": 0.10288848876953124, + "step": 48165 + }, + { + "epoch": 0.4165117465478033, + "grad_norm": 1.4843620428808932, + "learning_rate": 5.380504372941897e-06, + "loss": 0.10431365966796875, + "step": 48170 + }, + { + "epoch": 0.41655498006934655, + "grad_norm": 14.329448541886887, + "learning_rate": 5.380380381893226e-06, + "loss": 0.12571029663085936, + "step": 48175 + }, + { + "epoch": 0.41659821359088983, + "grad_norm": 4.209252324787454, + "learning_rate": 5.3802563798664506e-06, + "loss": 0.23634109497070313, + "step": 48180 + }, + { + "epoch": 0.4166414471124331, + "grad_norm": 3.391286684044985, + "learning_rate": 5.380132366862143e-06, + "loss": 0.1631256103515625, + "step": 48185 + }, + { + "epoch": 0.41668468063397635, + "grad_norm": 2.7898672734541163, + "learning_rate": 5.3800083428808754e-06, + "loss": 0.17276229858398437, + "step": 48190 + }, + { + "epoch": 0.41672791415551963, + "grad_norm": 4.544900047257715, + "learning_rate": 5.379884307923219e-06, + "loss": 0.11847991943359375, + "step": 48195 + }, + { + "epoch": 0.4167711476770629, + "grad_norm": 2.851582777794088, + "learning_rate": 5.379760261989747e-06, + "loss": 0.1988494873046875, + "step": 48200 + }, + { + "epoch": 0.41681438119860614, + "grad_norm": 8.922360272547396, + "learning_rate": 5.379636205081031e-06, + "loss": 0.136016845703125, + "step": 48205 + }, + { + "epoch": 0.4168576147201494, + "grad_norm": 26.571935659831592, + "learning_rate": 5.379512137197644e-06, + "loss": 0.16607513427734374, + "step": 48210 + }, + { + "epoch": 0.41690084824169266, + "grad_norm": 2.6026256908447434, + "learning_rate": 5.379388058340156e-06, + "loss": 0.25580835342407227, + "step": 48215 + }, + { + "epoch": 0.41694408176323594, + "grad_norm": 46.97625507672296, + "learning_rate": 5.3792639685091405e-06, + "loss": 0.48223876953125, + "step": 48220 + }, + { + "epoch": 0.4169873152847792, + "grad_norm": 12.688875801431257, + "learning_rate": 5.379139867705171e-06, + "loss": 0.0646820068359375, + "step": 48225 + }, + { + "epoch": 0.41703054880632245, + "grad_norm": 12.276256160158171, + "learning_rate": 5.3790157559288175e-06, + "loss": 0.26214599609375, + "step": 48230 + }, + { + "epoch": 0.41707378232786574, + "grad_norm": 15.710996570614586, + "learning_rate": 5.378891633180654e-06, + "loss": 0.1069305419921875, + "step": 48235 + }, + { + "epoch": 0.417117015849409, + "grad_norm": 24.4350090553633, + "learning_rate": 5.378767499461253e-06, + "loss": 0.2550201416015625, + "step": 48240 + }, + { + "epoch": 0.41716024937095225, + "grad_norm": 2.201772348946407, + "learning_rate": 5.378643354771185e-06, + "loss": 0.23464508056640626, + "step": 48245 + }, + { + "epoch": 0.41720348289249554, + "grad_norm": 5.29063081895081, + "learning_rate": 5.378519199111025e-06, + "loss": 0.1331390380859375, + "step": 48250 + }, + { + "epoch": 0.41724671641403877, + "grad_norm": 0.7825865002700703, + "learning_rate": 5.378395032481345e-06, + "loss": 0.18846893310546875, + "step": 48255 + }, + { + "epoch": 0.41728994993558205, + "grad_norm": 0.6532794968915692, + "learning_rate": 5.378270854882716e-06, + "loss": 0.24623565673828124, + "step": 48260 + }, + { + "epoch": 0.41733318345712533, + "grad_norm": 19.66249892544041, + "learning_rate": 5.378146666315713e-06, + "loss": 0.5084556579589844, + "step": 48265 + }, + { + "epoch": 0.41737641697866856, + "grad_norm": 2.7498509478543673, + "learning_rate": 5.3780224667809085e-06, + "loss": 0.18785018920898439, + "step": 48270 + }, + { + "epoch": 0.41741965050021185, + "grad_norm": 3.502128924844685, + "learning_rate": 5.377898256278872e-06, + "loss": 0.03795166015625, + "step": 48275 + }, + { + "epoch": 0.41746288402175513, + "grad_norm": 5.673253211569263, + "learning_rate": 5.377774034810181e-06, + "loss": 0.1427215576171875, + "step": 48280 + }, + { + "epoch": 0.41750611754329836, + "grad_norm": 0.7673827552937642, + "learning_rate": 5.377649802375406e-06, + "loss": 0.0589080810546875, + "step": 48285 + }, + { + "epoch": 0.41754935106484165, + "grad_norm": 0.674365246058974, + "learning_rate": 5.377525558975119e-06, + "loss": 0.08554840087890625, + "step": 48290 + }, + { + "epoch": 0.4175925845863849, + "grad_norm": 2.393149502111299, + "learning_rate": 5.377401304609896e-06, + "loss": 0.117877197265625, + "step": 48295 + }, + { + "epoch": 0.41763581810792816, + "grad_norm": 14.20949225753329, + "learning_rate": 5.377277039280307e-06, + "loss": 0.15269241333007813, + "step": 48300 + }, + { + "epoch": 0.41767905162947144, + "grad_norm": 0.5250489398982541, + "learning_rate": 5.377152762986927e-06, + "loss": 0.21032791137695311, + "step": 48305 + }, + { + "epoch": 0.41772228515101467, + "grad_norm": 0.4918934694166296, + "learning_rate": 5.3770284757303275e-06, + "loss": 0.0517669677734375, + "step": 48310 + }, + { + "epoch": 0.41776551867255796, + "grad_norm": 44.34938704348792, + "learning_rate": 5.376904177511084e-06, + "loss": 0.5942306041717529, + "step": 48315 + }, + { + "epoch": 0.41780875219410124, + "grad_norm": 2.259898179353893, + "learning_rate": 5.376779868329767e-06, + "loss": 0.17299289703369142, + "step": 48320 + }, + { + "epoch": 0.41785198571564447, + "grad_norm": 17.406118818152184, + "learning_rate": 5.376655548186953e-06, + "loss": 0.09111251831054687, + "step": 48325 + }, + { + "epoch": 0.41789521923718775, + "grad_norm": 0.32932512364706557, + "learning_rate": 5.376531217083212e-06, + "loss": 0.12773284912109376, + "step": 48330 + }, + { + "epoch": 0.417938452758731, + "grad_norm": 8.618201803097834, + "learning_rate": 5.3764068750191204e-06, + "loss": 0.096868896484375, + "step": 48335 + }, + { + "epoch": 0.41798168628027427, + "grad_norm": 33.25484235421268, + "learning_rate": 5.3762825219952486e-06, + "loss": 0.32825164794921874, + "step": 48340 + }, + { + "epoch": 0.41802491980181755, + "grad_norm": 15.534281897138548, + "learning_rate": 5.376158158012173e-06, + "loss": 0.0994333267211914, + "step": 48345 + }, + { + "epoch": 0.4180681533233608, + "grad_norm": 4.316786934235023, + "learning_rate": 5.376033783070466e-06, + "loss": 0.11125373840332031, + "step": 48350 + }, + { + "epoch": 0.41811138684490406, + "grad_norm": 0.8025580871133527, + "learning_rate": 5.375909397170701e-06, + "loss": 0.1758575439453125, + "step": 48355 + }, + { + "epoch": 0.41815462036644735, + "grad_norm": 14.318562779685227, + "learning_rate": 5.375785000313451e-06, + "loss": 0.3245361328125, + "step": 48360 + }, + { + "epoch": 0.4181978538879906, + "grad_norm": 1.2944936683803876, + "learning_rate": 5.3756605924992914e-06, + "loss": 0.25786285400390624, + "step": 48365 + }, + { + "epoch": 0.41824108740953386, + "grad_norm": 28.1522969750525, + "learning_rate": 5.375536173728795e-06, + "loss": 0.23489990234375, + "step": 48370 + }, + { + "epoch": 0.41828432093107715, + "grad_norm": 1.0497266104511909, + "learning_rate": 5.375411744002536e-06, + "loss": 0.12333450317382813, + "step": 48375 + }, + { + "epoch": 0.4183275544526204, + "grad_norm": 5.499149887917257, + "learning_rate": 5.375287303321087e-06, + "loss": 0.06219825744628906, + "step": 48380 + }, + { + "epoch": 0.41837078797416366, + "grad_norm": 1.2742000636076893, + "learning_rate": 5.375162851685023e-06, + "loss": 0.17187881469726562, + "step": 48385 + }, + { + "epoch": 0.4184140214957069, + "grad_norm": 2.46806419401039, + "learning_rate": 5.375038389094918e-06, + "loss": 0.08227462768554687, + "step": 48390 + }, + { + "epoch": 0.4184572550172502, + "grad_norm": 5.126777761736869, + "learning_rate": 5.374913915551346e-06, + "loss": 0.15431251525878906, + "step": 48395 + }, + { + "epoch": 0.41850048853879346, + "grad_norm": 9.424535970657516, + "learning_rate": 5.374789431054881e-06, + "loss": 0.333990478515625, + "step": 48400 + }, + { + "epoch": 0.4185437220603367, + "grad_norm": 8.985425205384834, + "learning_rate": 5.374664935606096e-06, + "loss": 0.15320892333984376, + "step": 48405 + }, + { + "epoch": 0.41858695558187997, + "grad_norm": 1.5238700946008017, + "learning_rate": 5.374540429205568e-06, + "loss": 0.0557708740234375, + "step": 48410 + }, + { + "epoch": 0.41863018910342326, + "grad_norm": 6.064510652458811, + "learning_rate": 5.374415911853867e-06, + "loss": 0.13660049438476562, + "step": 48415 + }, + { + "epoch": 0.4186734226249665, + "grad_norm": 1.4219690807030332, + "learning_rate": 5.374291383551571e-06, + "loss": 0.34859619140625, + "step": 48420 + }, + { + "epoch": 0.41871665614650977, + "grad_norm": 8.971896365189453, + "learning_rate": 5.374166844299254e-06, + "loss": 0.32407379150390625, + "step": 48425 + }, + { + "epoch": 0.418759889668053, + "grad_norm": 7.40214070853429, + "learning_rate": 5.374042294097487e-06, + "loss": 0.047429656982421874, + "step": 48430 + }, + { + "epoch": 0.4188031231895963, + "grad_norm": 2.2031553109711326, + "learning_rate": 5.373917732946848e-06, + "loss": 0.2622528076171875, + "step": 48435 + }, + { + "epoch": 0.41884635671113957, + "grad_norm": 14.850980964066823, + "learning_rate": 5.3737931608479095e-06, + "loss": 0.3306671142578125, + "step": 48440 + }, + { + "epoch": 0.4188895902326828, + "grad_norm": 34.91127784466634, + "learning_rate": 5.373668577801247e-06, + "loss": 0.43642005920410154, + "step": 48445 + }, + { + "epoch": 0.4189328237542261, + "grad_norm": 2.864926735442042, + "learning_rate": 5.373543983807435e-06, + "loss": 0.1025634765625, + "step": 48450 + }, + { + "epoch": 0.41897605727576936, + "grad_norm": 7.599264760069823, + "learning_rate": 5.373419378867048e-06, + "loss": 0.06829605102539063, + "step": 48455 + }, + { + "epoch": 0.4190192907973126, + "grad_norm": 1.0087874708507447, + "learning_rate": 5.373294762980661e-06, + "loss": 0.03948211669921875, + "step": 48460 + }, + { + "epoch": 0.4190625243188559, + "grad_norm": 28.912613788131726, + "learning_rate": 5.373170136148847e-06, + "loss": 0.07109451293945312, + "step": 48465 + }, + { + "epoch": 0.4191057578403991, + "grad_norm": 14.448857801627966, + "learning_rate": 5.3730454983721835e-06, + "loss": 0.07188644409179687, + "step": 48470 + }, + { + "epoch": 0.4191489913619424, + "grad_norm": 19.744876489015052, + "learning_rate": 5.372920849651242e-06, + "loss": 0.20830078125, + "step": 48475 + }, + { + "epoch": 0.4191922248834857, + "grad_norm": 4.3132403845812695, + "learning_rate": 5.3727961899866e-06, + "loss": 0.1010772705078125, + "step": 48480 + }, + { + "epoch": 0.4192354584050289, + "grad_norm": 14.210121430443492, + "learning_rate": 5.372671519378833e-06, + "loss": 0.11201133728027343, + "step": 48485 + }, + { + "epoch": 0.4192786919265722, + "grad_norm": 6.739637701161141, + "learning_rate": 5.372546837828512e-06, + "loss": 0.047012710571289064, + "step": 48490 + }, + { + "epoch": 0.4193219254481155, + "grad_norm": 22.38692040468527, + "learning_rate": 5.372422145336217e-06, + "loss": 0.24859619140625, + "step": 48495 + }, + { + "epoch": 0.4193651589696587, + "grad_norm": 9.686298435481982, + "learning_rate": 5.372297441902519e-06, + "loss": 0.08523178100585938, + "step": 48500 + }, + { + "epoch": 0.419408392491202, + "grad_norm": 71.26845008934431, + "learning_rate": 5.372172727527997e-06, + "loss": 0.48323745727539064, + "step": 48505 + }, + { + "epoch": 0.4194516260127452, + "grad_norm": 3.592126343637349, + "learning_rate": 5.372048002213222e-06, + "loss": 0.4051300048828125, + "step": 48510 + }, + { + "epoch": 0.4194948595342885, + "grad_norm": 10.953203111386992, + "learning_rate": 5.371923265958771e-06, + "loss": 0.26692657470703124, + "step": 48515 + }, + { + "epoch": 0.4195380930558318, + "grad_norm": 2.4677925460706267, + "learning_rate": 5.371798518765221e-06, + "loss": 0.02450714111328125, + "step": 48520 + }, + { + "epoch": 0.419581326577375, + "grad_norm": 4.3898971777625055, + "learning_rate": 5.3716737606331455e-06, + "loss": 0.1552734375, + "step": 48525 + }, + { + "epoch": 0.4196245600989183, + "grad_norm": 10.211918356048306, + "learning_rate": 5.371548991563119e-06, + "loss": 0.23114395141601562, + "step": 48530 + }, + { + "epoch": 0.4196677936204616, + "grad_norm": 3.485148275822201, + "learning_rate": 5.3714242115557195e-06, + "loss": 0.40965728759765624, + "step": 48535 + }, + { + "epoch": 0.4197110271420048, + "grad_norm": 18.99595536312448, + "learning_rate": 5.371299420611519e-06, + "loss": 0.09710845947265626, + "step": 48540 + }, + { + "epoch": 0.4197542606635481, + "grad_norm": 0.343682789796472, + "learning_rate": 5.371174618731097e-06, + "loss": 0.10222148895263672, + "step": 48545 + }, + { + "epoch": 0.4197974941850914, + "grad_norm": 0.2533490856301123, + "learning_rate": 5.371049805915027e-06, + "loss": 0.07027702331542969, + "step": 48550 + }, + { + "epoch": 0.4198407277066346, + "grad_norm": 5.76605533788665, + "learning_rate": 5.370924982163885e-06, + "loss": 0.19874076843261718, + "step": 48555 + }, + { + "epoch": 0.4198839612281779, + "grad_norm": 10.947034964411463, + "learning_rate": 5.370800147478246e-06, + "loss": 0.1596099853515625, + "step": 48560 + }, + { + "epoch": 0.4199271947497211, + "grad_norm": 3.3985492759246148, + "learning_rate": 5.370675301858686e-06, + "loss": 0.16910552978515625, + "step": 48565 + }, + { + "epoch": 0.4199704282712644, + "grad_norm": 2.4933146259559407, + "learning_rate": 5.370550445305781e-06, + "loss": 0.2128753662109375, + "step": 48570 + }, + { + "epoch": 0.4200136617928077, + "grad_norm": 10.561089096789717, + "learning_rate": 5.370425577820107e-06, + "loss": 0.09704723358154296, + "step": 48575 + }, + { + "epoch": 0.4200568953143509, + "grad_norm": 3.192719307723856, + "learning_rate": 5.37030069940224e-06, + "loss": 0.08622512817382813, + "step": 48580 + }, + { + "epoch": 0.4201001288358942, + "grad_norm": 12.2285805541456, + "learning_rate": 5.370175810052755e-06, + "loss": 0.251416015625, + "step": 48585 + }, + { + "epoch": 0.4201433623574375, + "grad_norm": 37.84558642950072, + "learning_rate": 5.370050909772229e-06, + "loss": 0.13362884521484375, + "step": 48590 + }, + { + "epoch": 0.4201865958789807, + "grad_norm": 16.29110340101704, + "learning_rate": 5.369925998561238e-06, + "loss": 0.28201751708984374, + "step": 48595 + }, + { + "epoch": 0.420229829400524, + "grad_norm": 21.110651441023712, + "learning_rate": 5.369801076420356e-06, + "loss": 0.20810470581054688, + "step": 48600 + }, + { + "epoch": 0.42027306292206723, + "grad_norm": 0.7906686725088106, + "learning_rate": 5.3696761433501626e-06, + "loss": 0.08717727661132812, + "step": 48605 + }, + { + "epoch": 0.4203162964436105, + "grad_norm": 11.801228304265377, + "learning_rate": 5.3695511993512314e-06, + "loss": 0.437554931640625, + "step": 48610 + }, + { + "epoch": 0.4203595299651538, + "grad_norm": 0.9921259520813045, + "learning_rate": 5.36942624442414e-06, + "loss": 0.14301700592041017, + "step": 48615 + }, + { + "epoch": 0.42040276348669703, + "grad_norm": 0.4169919904590316, + "learning_rate": 5.369301278569464e-06, + "loss": 0.16676483154296876, + "step": 48620 + }, + { + "epoch": 0.4204459970082403, + "grad_norm": 2.0562940082830767, + "learning_rate": 5.36917630178778e-06, + "loss": 0.18133392333984374, + "step": 48625 + }, + { + "epoch": 0.4204892305297836, + "grad_norm": 8.726651951470851, + "learning_rate": 5.369051314079663e-06, + "loss": 0.1709381103515625, + "step": 48630 + }, + { + "epoch": 0.4205324640513268, + "grad_norm": 45.26454097606495, + "learning_rate": 5.368926315445692e-06, + "loss": 0.3243888854980469, + "step": 48635 + }, + { + "epoch": 0.4205756975728701, + "grad_norm": 6.092609328819526, + "learning_rate": 5.368801305886442e-06, + "loss": 0.1117034912109375, + "step": 48640 + }, + { + "epoch": 0.42061893109441334, + "grad_norm": 6.161779990298926, + "learning_rate": 5.368676285402489e-06, + "loss": 0.10281944274902344, + "step": 48645 + }, + { + "epoch": 0.4206621646159566, + "grad_norm": 0.039532745878404496, + "learning_rate": 5.368551253994411e-06, + "loss": 0.15846595764160157, + "step": 48650 + }, + { + "epoch": 0.4207053981374999, + "grad_norm": 9.502889472528683, + "learning_rate": 5.368426211662784e-06, + "loss": 0.3072906494140625, + "step": 48655 + }, + { + "epoch": 0.42074863165904314, + "grad_norm": 35.60157908705918, + "learning_rate": 5.3683011584081836e-06, + "loss": 0.28159255981445314, + "step": 48660 + }, + { + "epoch": 0.4207918651805864, + "grad_norm": 3.6272859244432287, + "learning_rate": 5.368176094231189e-06, + "loss": 0.2009765625, + "step": 48665 + }, + { + "epoch": 0.4208350987021297, + "grad_norm": 1.0929853318323282, + "learning_rate": 5.368051019132374e-06, + "loss": 0.13524169921875, + "step": 48670 + }, + { + "epoch": 0.42087833222367294, + "grad_norm": 4.293215318785196, + "learning_rate": 5.367925933112318e-06, + "loss": 0.038840103149414065, + "step": 48675 + }, + { + "epoch": 0.4209215657452162, + "grad_norm": 10.546805825448685, + "learning_rate": 5.367800836171597e-06, + "loss": 0.3243400573730469, + "step": 48680 + }, + { + "epoch": 0.42096479926675945, + "grad_norm": 6.664465474969581, + "learning_rate": 5.367675728310787e-06, + "loss": 0.12433547973632812, + "step": 48685 + }, + { + "epoch": 0.42100803278830273, + "grad_norm": 27.056558747853217, + "learning_rate": 5.367550609530465e-06, + "loss": 0.260009765625, + "step": 48690 + }, + { + "epoch": 0.421051266309846, + "grad_norm": 15.442301348898969, + "learning_rate": 5.36742547983121e-06, + "loss": 0.2648712158203125, + "step": 48695 + }, + { + "epoch": 0.42109449983138925, + "grad_norm": 0.6151493330353628, + "learning_rate": 5.367300339213598e-06, + "loss": 0.28626480102539065, + "step": 48700 + }, + { + "epoch": 0.42113773335293253, + "grad_norm": 0.9435227410687332, + "learning_rate": 5.367175187678206e-06, + "loss": 0.16744384765625, + "step": 48705 + }, + { + "epoch": 0.4211809668744758, + "grad_norm": 10.974087346511173, + "learning_rate": 5.367050025225611e-06, + "loss": 0.15498104095458984, + "step": 48710 + }, + { + "epoch": 0.42122420039601904, + "grad_norm": 25.903109029807908, + "learning_rate": 5.366924851856389e-06, + "loss": 0.333868408203125, + "step": 48715 + }, + { + "epoch": 0.42126743391756233, + "grad_norm": 22.909776679837854, + "learning_rate": 5.36679966757112e-06, + "loss": 0.264581298828125, + "step": 48720 + }, + { + "epoch": 0.42131066743910556, + "grad_norm": 2.868071190027785, + "learning_rate": 5.366674472370381e-06, + "loss": 0.17643508911132813, + "step": 48725 + }, + { + "epoch": 0.42135390096064884, + "grad_norm": 0.35468259539814734, + "learning_rate": 5.366549266254747e-06, + "loss": 0.12037353515625, + "step": 48730 + }, + { + "epoch": 0.4213971344821921, + "grad_norm": 1.8746647448763623, + "learning_rate": 5.366424049224798e-06, + "loss": 0.1105316162109375, + "step": 48735 + }, + { + "epoch": 0.42144036800373536, + "grad_norm": 8.142236823204062, + "learning_rate": 5.3662988212811095e-06, + "loss": 0.11075439453125, + "step": 48740 + }, + { + "epoch": 0.42148360152527864, + "grad_norm": 3.9453885850181076, + "learning_rate": 5.366173582424261e-06, + "loss": 0.0267181396484375, + "step": 48745 + }, + { + "epoch": 0.4215268350468219, + "grad_norm": 24.977786286901992, + "learning_rate": 5.366048332654827e-06, + "loss": 0.16832351684570312, + "step": 48750 + }, + { + "epoch": 0.42157006856836515, + "grad_norm": 2.567903860999427, + "learning_rate": 5.365923071973389e-06, + "loss": 0.182611083984375, + "step": 48755 + }, + { + "epoch": 0.42161330208990844, + "grad_norm": 24.845689196626935, + "learning_rate": 5.365797800380522e-06, + "loss": 0.11560821533203125, + "step": 48760 + }, + { + "epoch": 0.4216565356114517, + "grad_norm": 6.538152137768772, + "learning_rate": 5.365672517876805e-06, + "loss": 0.23180084228515624, + "step": 48765 + }, + { + "epoch": 0.42169976913299495, + "grad_norm": 3.34580655708944, + "learning_rate": 5.365547224462815e-06, + "loss": 0.081591796875, + "step": 48770 + }, + { + "epoch": 0.42174300265453823, + "grad_norm": 2.565356229784726, + "learning_rate": 5.3654219201391305e-06, + "loss": 0.184423828125, + "step": 48775 + }, + { + "epoch": 0.42178623617608146, + "grad_norm": 11.542654640115105, + "learning_rate": 5.3652966049063296e-06, + "loss": 0.10995330810546874, + "step": 48780 + }, + { + "epoch": 0.42182946969762475, + "grad_norm": 7.143509776006069, + "learning_rate": 5.365171278764989e-06, + "loss": 0.0375152587890625, + "step": 48785 + }, + { + "epoch": 0.42187270321916803, + "grad_norm": 23.852632984112045, + "learning_rate": 5.365045941715687e-06, + "loss": 0.08684349060058594, + "step": 48790 + }, + { + "epoch": 0.42191593674071126, + "grad_norm": 36.55474011046085, + "learning_rate": 5.364920593759004e-06, + "loss": 0.16559925079345703, + "step": 48795 + }, + { + "epoch": 0.42195917026225455, + "grad_norm": 23.772262726373743, + "learning_rate": 5.364795234895514e-06, + "loss": 0.15311279296875, + "step": 48800 + }, + { + "epoch": 0.42200240378379783, + "grad_norm": 0.45015737692262886, + "learning_rate": 5.364669865125798e-06, + "loss": 0.0705902099609375, + "step": 48805 + }, + { + "epoch": 0.42204563730534106, + "grad_norm": 0.1461572401100519, + "learning_rate": 5.364544484450434e-06, + "loss": 0.0486968994140625, + "step": 48810 + }, + { + "epoch": 0.42208887082688434, + "grad_norm": 10.678897283337653, + "learning_rate": 5.3644190928699995e-06, + "loss": 0.19963302612304687, + "step": 48815 + }, + { + "epoch": 0.4221321043484276, + "grad_norm": 30.622743826816507, + "learning_rate": 5.364293690385073e-06, + "loss": 0.5018102645874023, + "step": 48820 + }, + { + "epoch": 0.42217533786997086, + "grad_norm": 4.452527993816735, + "learning_rate": 5.364168276996233e-06, + "loss": 0.28198699951171874, + "step": 48825 + }, + { + "epoch": 0.42221857139151414, + "grad_norm": 3.537867582383246, + "learning_rate": 5.364042852704058e-06, + "loss": 0.2012392044067383, + "step": 48830 + }, + { + "epoch": 0.42226180491305737, + "grad_norm": 5.242517922349908, + "learning_rate": 5.3639174175091265e-06, + "loss": 0.127069091796875, + "step": 48835 + }, + { + "epoch": 0.42230503843460065, + "grad_norm": 2.9127322645439095, + "learning_rate": 5.3637919714120155e-06, + "loss": 0.24923458099365234, + "step": 48840 + }, + { + "epoch": 0.42234827195614394, + "grad_norm": 8.749044424430117, + "learning_rate": 5.363666514413306e-06, + "loss": 0.27529296875, + "step": 48845 + }, + { + "epoch": 0.42239150547768717, + "grad_norm": 27.12413063271184, + "learning_rate": 5.363541046513576e-06, + "loss": 0.3373594284057617, + "step": 48850 + }, + { + "epoch": 0.42243473899923045, + "grad_norm": 31.561414660386735, + "learning_rate": 5.363415567713402e-06, + "loss": 0.1553863525390625, + "step": 48855 + }, + { + "epoch": 0.4224779725207737, + "grad_norm": 2.3457365399045784, + "learning_rate": 5.363290078013365e-06, + "loss": 0.0626220703125, + "step": 48860 + }, + { + "epoch": 0.42252120604231697, + "grad_norm": 0.062378475848327904, + "learning_rate": 5.3631645774140436e-06, + "loss": 0.047664833068847653, + "step": 48865 + }, + { + "epoch": 0.42256443956386025, + "grad_norm": 13.87972138538957, + "learning_rate": 5.363039065916014e-06, + "loss": 0.30532684326171877, + "step": 48870 + }, + { + "epoch": 0.4226076730854035, + "grad_norm": 2.1522072212632875, + "learning_rate": 5.362913543519859e-06, + "loss": 0.5181365966796875, + "step": 48875 + }, + { + "epoch": 0.42265090660694676, + "grad_norm": 1.5657887754179085, + "learning_rate": 5.3627880102261545e-06, + "loss": 0.05601634979248047, + "step": 48880 + }, + { + "epoch": 0.42269414012849005, + "grad_norm": 0.3767513563723882, + "learning_rate": 5.362662466035481e-06, + "loss": 0.11365966796875, + "step": 48885 + }, + { + "epoch": 0.4227373736500333, + "grad_norm": 4.75593252800932, + "learning_rate": 5.362536910948417e-06, + "loss": 0.08800201416015625, + "step": 48890 + }, + { + "epoch": 0.42278060717157656, + "grad_norm": 6.522338336891661, + "learning_rate": 5.362411344965541e-06, + "loss": 0.19056549072265624, + "step": 48895 + }, + { + "epoch": 0.4228238406931198, + "grad_norm": 4.475083724263929, + "learning_rate": 5.362285768087433e-06, + "loss": 0.6242401123046875, + "step": 48900 + }, + { + "epoch": 0.4228670742146631, + "grad_norm": 0.1391169716968481, + "learning_rate": 5.362160180314671e-06, + "loss": 0.181292724609375, + "step": 48905 + }, + { + "epoch": 0.42291030773620636, + "grad_norm": 23.94902730062781, + "learning_rate": 5.362034581647836e-06, + "loss": 0.1990631103515625, + "step": 48910 + }, + { + "epoch": 0.4229535412577496, + "grad_norm": 1.9650649298829859, + "learning_rate": 5.3619089720875055e-06, + "loss": 0.039215087890625, + "step": 48915 + }, + { + "epoch": 0.4229967747792929, + "grad_norm": 20.002569144142317, + "learning_rate": 5.36178335163426e-06, + "loss": 0.19862213134765624, + "step": 48920 + }, + { + "epoch": 0.42304000830083616, + "grad_norm": 3.1891383699876537, + "learning_rate": 5.361657720288678e-06, + "loss": 0.08207931518554687, + "step": 48925 + }, + { + "epoch": 0.4230832418223794, + "grad_norm": 5.621286770454488, + "learning_rate": 5.3615320780513394e-06, + "loss": 0.0382110595703125, + "step": 48930 + }, + { + "epoch": 0.42312647534392267, + "grad_norm": 4.063202137974767, + "learning_rate": 5.361406424922824e-06, + "loss": 0.07081756591796876, + "step": 48935 + }, + { + "epoch": 0.42316970886546595, + "grad_norm": 8.93290872254875, + "learning_rate": 5.36128076090371e-06, + "loss": 0.10940723419189453, + "step": 48940 + }, + { + "epoch": 0.4232129423870092, + "grad_norm": 30.097535139273162, + "learning_rate": 5.3611550859945785e-06, + "loss": 0.30941162109375, + "step": 48945 + }, + { + "epoch": 0.42325617590855247, + "grad_norm": 4.0970391305934974, + "learning_rate": 5.3610294001960084e-06, + "loss": 0.09401016235351563, + "step": 48950 + }, + { + "epoch": 0.4232994094300957, + "grad_norm": 2.2867185121462086, + "learning_rate": 5.360903703508579e-06, + "loss": 0.06674365997314453, + "step": 48955 + }, + { + "epoch": 0.423342642951639, + "grad_norm": 15.805006854577996, + "learning_rate": 5.36077799593287e-06, + "loss": 0.09000244140625, + "step": 48960 + }, + { + "epoch": 0.42338587647318227, + "grad_norm": 13.941420061620084, + "learning_rate": 5.360652277469462e-06, + "loss": 0.1823566436767578, + "step": 48965 + }, + { + "epoch": 0.4234291099947255, + "grad_norm": 29.839064643292573, + "learning_rate": 5.360526548118934e-06, + "loss": 0.25881309509277345, + "step": 48970 + }, + { + "epoch": 0.4234723435162688, + "grad_norm": 11.584135807552647, + "learning_rate": 5.360400807881867e-06, + "loss": 0.27070159912109376, + "step": 48975 + }, + { + "epoch": 0.42351557703781206, + "grad_norm": 29.912314774847697, + "learning_rate": 5.3602750567588385e-06, + "loss": 0.17341766357421876, + "step": 48980 + }, + { + "epoch": 0.4235588105593553, + "grad_norm": 0.6364189812968031, + "learning_rate": 5.360149294750431e-06, + "loss": 0.12457656860351562, + "step": 48985 + }, + { + "epoch": 0.4236020440808986, + "grad_norm": 0.22731525402048353, + "learning_rate": 5.360023521857223e-06, + "loss": 0.07642974853515624, + "step": 48990 + }, + { + "epoch": 0.4236452776024418, + "grad_norm": 7.365263403527309, + "learning_rate": 5.359897738079797e-06, + "loss": 0.0953826904296875, + "step": 48995 + }, + { + "epoch": 0.4236885111239851, + "grad_norm": 13.204209367212268, + "learning_rate": 5.3597719434187285e-06, + "loss": 0.074798583984375, + "step": 49000 + }, + { + "epoch": 0.4237317446455284, + "grad_norm": 10.210199141900816, + "learning_rate": 5.359646137874601e-06, + "loss": 0.04886131286621094, + "step": 49005 + }, + { + "epoch": 0.4237749781670716, + "grad_norm": 7.593930594458023, + "learning_rate": 5.3595203214479945e-06, + "loss": 0.072552490234375, + "step": 49010 + }, + { + "epoch": 0.4238182116886149, + "grad_norm": 0.7990524329413664, + "learning_rate": 5.359394494139489e-06, + "loss": 0.17041435241699218, + "step": 49015 + }, + { + "epoch": 0.42386144521015817, + "grad_norm": 3.399943353674323, + "learning_rate": 5.3592686559496645e-06, + "loss": 0.23611602783203126, + "step": 49020 + }, + { + "epoch": 0.4239046787317014, + "grad_norm": 9.810223763530148, + "learning_rate": 5.3591428068791005e-06, + "loss": 0.31491947174072266, + "step": 49025 + }, + { + "epoch": 0.4239479122532447, + "grad_norm": 1.1356217642143835, + "learning_rate": 5.359016946928379e-06, + "loss": 0.13575592041015624, + "step": 49030 + }, + { + "epoch": 0.4239911457747879, + "grad_norm": 0.6609547457432228, + "learning_rate": 5.3588910760980795e-06, + "loss": 0.14140472412109376, + "step": 49035 + }, + { + "epoch": 0.4240343792963312, + "grad_norm": 7.15100387037088, + "learning_rate": 5.358765194388783e-06, + "loss": 0.0686920166015625, + "step": 49040 + }, + { + "epoch": 0.4240776128178745, + "grad_norm": 16.097665093760348, + "learning_rate": 5.3586393018010695e-06, + "loss": 0.059236907958984376, + "step": 49045 + }, + { + "epoch": 0.4241208463394177, + "grad_norm": 8.947870302916533, + "learning_rate": 5.358513398335521e-06, + "loss": 0.32078857421875, + "step": 49050 + }, + { + "epoch": 0.424164079860961, + "grad_norm": 4.119154667644559, + "learning_rate": 5.3583874839927165e-06, + "loss": 0.0921356201171875, + "step": 49055 + }, + { + "epoch": 0.4242073133825043, + "grad_norm": 3.286899033042754, + "learning_rate": 5.358261558773237e-06, + "loss": 0.13483734130859376, + "step": 49060 + }, + { + "epoch": 0.4242505469040475, + "grad_norm": 2.0958236642276975, + "learning_rate": 5.358135622677664e-06, + "loss": 0.0130126953125, + "step": 49065 + }, + { + "epoch": 0.4242937804255908, + "grad_norm": 2.8481622964423248, + "learning_rate": 5.358009675706578e-06, + "loss": 0.09256820678710938, + "step": 49070 + }, + { + "epoch": 0.424337013947134, + "grad_norm": 1.357576511625735, + "learning_rate": 5.3578837178605585e-06, + "loss": 0.2632659912109375, + "step": 49075 + }, + { + "epoch": 0.4243802474686773, + "grad_norm": 5.868389788772937, + "learning_rate": 5.357757749140189e-06, + "loss": 0.15185813903808593, + "step": 49080 + }, + { + "epoch": 0.4244234809902206, + "grad_norm": 2.2661043648782324, + "learning_rate": 5.357631769546049e-06, + "loss": 0.02575225830078125, + "step": 49085 + }, + { + "epoch": 0.4244667145117638, + "grad_norm": 14.661212642150264, + "learning_rate": 5.357505779078719e-06, + "loss": 0.13883056640625, + "step": 49090 + }, + { + "epoch": 0.4245099480333071, + "grad_norm": 2.985778575726632, + "learning_rate": 5.357379777738781e-06, + "loss": 0.10321502685546875, + "step": 49095 + }, + { + "epoch": 0.4245531815548504, + "grad_norm": 4.104723098407341, + "learning_rate": 5.357253765526815e-06, + "loss": 0.24532699584960938, + "step": 49100 + }, + { + "epoch": 0.4245964150763936, + "grad_norm": 1.894414543985667, + "learning_rate": 5.357127742443404e-06, + "loss": 0.06774749755859374, + "step": 49105 + }, + { + "epoch": 0.4246396485979369, + "grad_norm": 8.99275960741876, + "learning_rate": 5.357001708489128e-06, + "loss": 0.40802001953125, + "step": 49110 + }, + { + "epoch": 0.4246828821194802, + "grad_norm": 27.26649130148118, + "learning_rate": 5.3568756636645675e-06, + "loss": 0.6159698486328125, + "step": 49115 + }, + { + "epoch": 0.4247261156410234, + "grad_norm": 10.611351767544877, + "learning_rate": 5.356749607970306e-06, + "loss": 0.1162933349609375, + "step": 49120 + }, + { + "epoch": 0.4247693491625667, + "grad_norm": 6.704178626948088, + "learning_rate": 5.3566235414069225e-06, + "loss": 0.061981201171875, + "step": 49125 + }, + { + "epoch": 0.42481258268410993, + "grad_norm": 9.11672004071212, + "learning_rate": 5.356497463975e-06, + "loss": 0.572503662109375, + "step": 49130 + }, + { + "epoch": 0.4248558162056532, + "grad_norm": 39.46407579708512, + "learning_rate": 5.356371375675119e-06, + "loss": 0.351641845703125, + "step": 49135 + }, + { + "epoch": 0.4248990497271965, + "grad_norm": 29.116294317161522, + "learning_rate": 5.356245276507862e-06, + "loss": 0.4670433044433594, + "step": 49140 + }, + { + "epoch": 0.4249422832487397, + "grad_norm": 14.8971156250242, + "learning_rate": 5.35611916647381e-06, + "loss": 0.22562255859375, + "step": 49145 + }, + { + "epoch": 0.424985516770283, + "grad_norm": 8.675361269539355, + "learning_rate": 5.355993045573544e-06, + "loss": 0.06926040649414063, + "step": 49150 + }, + { + "epoch": 0.4250287502918263, + "grad_norm": 5.533047822358799, + "learning_rate": 5.355866913807646e-06, + "loss": 0.08593063354492188, + "step": 49155 + }, + { + "epoch": 0.4250719838133695, + "grad_norm": 1.1482452675326855, + "learning_rate": 5.355740771176699e-06, + "loss": 0.0825042724609375, + "step": 49160 + }, + { + "epoch": 0.4251152173349128, + "grad_norm": 24.182953128077823, + "learning_rate": 5.355614617681283e-06, + "loss": 0.2350128173828125, + "step": 49165 + }, + { + "epoch": 0.42515845085645604, + "grad_norm": 1.9513759816225578, + "learning_rate": 5.355488453321981e-06, + "loss": 0.13569793701171876, + "step": 49170 + }, + { + "epoch": 0.4252016843779993, + "grad_norm": 5.681667499670318, + "learning_rate": 5.355362278099374e-06, + "loss": 0.13138427734375, + "step": 49175 + }, + { + "epoch": 0.4252449178995426, + "grad_norm": 14.195804286890697, + "learning_rate": 5.355236092014045e-06, + "loss": 0.234942626953125, + "step": 49180 + }, + { + "epoch": 0.42528815142108584, + "grad_norm": 0.6156934870042264, + "learning_rate": 5.355109895066574e-06, + "loss": 0.45244140625, + "step": 49185 + }, + { + "epoch": 0.4253313849426291, + "grad_norm": 6.825752114612405, + "learning_rate": 5.354983687257545e-06, + "loss": 0.211553955078125, + "step": 49190 + }, + { + "epoch": 0.4253746184641724, + "grad_norm": 25.986502530073, + "learning_rate": 5.354857468587539e-06, + "loss": 0.380645751953125, + "step": 49195 + }, + { + "epoch": 0.42541785198571563, + "grad_norm": 2.0319890871013753, + "learning_rate": 5.354731239057139e-06, + "loss": 0.3345947265625, + "step": 49200 + }, + { + "epoch": 0.4254610855072589, + "grad_norm": 1.5367866128072372, + "learning_rate": 5.354604998666927e-06, + "loss": 0.0872802734375, + "step": 49205 + }, + { + "epoch": 0.42550431902880215, + "grad_norm": 15.888306289812999, + "learning_rate": 5.354478747417483e-06, + "loss": 0.17704925537109376, + "step": 49210 + }, + { + "epoch": 0.42554755255034543, + "grad_norm": 4.70672338710646, + "learning_rate": 5.354352485309393e-06, + "loss": 0.25621795654296875, + "step": 49215 + }, + { + "epoch": 0.4255907860718887, + "grad_norm": 2.34387593417154, + "learning_rate": 5.354226212343236e-06, + "loss": 0.0739593505859375, + "step": 49220 + }, + { + "epoch": 0.42563401959343194, + "grad_norm": 26.02466287269495, + "learning_rate": 5.354099928519596e-06, + "loss": 0.08116073608398437, + "step": 49225 + }, + { + "epoch": 0.42567725311497523, + "grad_norm": 0.9350924712219246, + "learning_rate": 5.353973633839056e-06, + "loss": 0.160723876953125, + "step": 49230 + }, + { + "epoch": 0.4257204866365185, + "grad_norm": 4.868019301519911, + "learning_rate": 5.353847328302197e-06, + "loss": 0.07953948974609375, + "step": 49235 + }, + { + "epoch": 0.42576372015806174, + "grad_norm": 3.439961298016058, + "learning_rate": 5.353721011909602e-06, + "loss": 0.13011856079101564, + "step": 49240 + }, + { + "epoch": 0.425806953679605, + "grad_norm": 25.553471582367777, + "learning_rate": 5.353594684661853e-06, + "loss": 0.241064453125, + "step": 49245 + }, + { + "epoch": 0.42585018720114826, + "grad_norm": 27.263249707063995, + "learning_rate": 5.3534683465595345e-06, + "loss": 0.11681747436523438, + "step": 49250 + }, + { + "epoch": 0.42589342072269154, + "grad_norm": 1.8880543600123094, + "learning_rate": 5.353341997603227e-06, + "loss": 0.10568351745605468, + "step": 49255 + }, + { + "epoch": 0.4259366542442348, + "grad_norm": 0.9564771524575747, + "learning_rate": 5.353215637793515e-06, + "loss": 0.2141998291015625, + "step": 49260 + }, + { + "epoch": 0.42597988776577805, + "grad_norm": 1.7005661370265537, + "learning_rate": 5.35308926713098e-06, + "loss": 0.3794647216796875, + "step": 49265 + }, + { + "epoch": 0.42602312128732134, + "grad_norm": 0.7383953800944978, + "learning_rate": 5.352962885616205e-06, + "loss": 0.093634033203125, + "step": 49270 + }, + { + "epoch": 0.4260663548088646, + "grad_norm": 3.509298713435007, + "learning_rate": 5.352836493249774e-06, + "loss": 0.37933349609375, + "step": 49275 + }, + { + "epoch": 0.42610958833040785, + "grad_norm": 0.5889288687153322, + "learning_rate": 5.352710090032268e-06, + "loss": 0.05994701385498047, + "step": 49280 + }, + { + "epoch": 0.42615282185195114, + "grad_norm": 34.535616083503946, + "learning_rate": 5.352583675964272e-06, + "loss": 0.11448211669921875, + "step": 49285 + }, + { + "epoch": 0.4261960553734944, + "grad_norm": 0.811552725811715, + "learning_rate": 5.3524572510463675e-06, + "loss": 0.3156410217285156, + "step": 49290 + }, + { + "epoch": 0.42623928889503765, + "grad_norm": 0.46281813510346437, + "learning_rate": 5.352330815279137e-06, + "loss": 0.146575927734375, + "step": 49295 + }, + { + "epoch": 0.42628252241658093, + "grad_norm": 7.925727130184398, + "learning_rate": 5.352204368663166e-06, + "loss": 0.1267791748046875, + "step": 49300 + }, + { + "epoch": 0.42632575593812416, + "grad_norm": 11.760474614598264, + "learning_rate": 5.352077911199035e-06, + "loss": 0.1060791015625, + "step": 49305 + }, + { + "epoch": 0.42636898945966745, + "grad_norm": 15.431028868710158, + "learning_rate": 5.351951442887329e-06, + "loss": 0.07894973754882813, + "step": 49310 + }, + { + "epoch": 0.42641222298121073, + "grad_norm": 12.106168142739909, + "learning_rate": 5.3518249637286315e-06, + "loss": 0.2488800048828125, + "step": 49315 + }, + { + "epoch": 0.42645545650275396, + "grad_norm": 1.097358328522286, + "learning_rate": 5.351698473723524e-06, + "loss": 0.2999481201171875, + "step": 49320 + }, + { + "epoch": 0.42649869002429724, + "grad_norm": 16.01644487867623, + "learning_rate": 5.351571972872592e-06, + "loss": 0.37978515625, + "step": 49325 + }, + { + "epoch": 0.42654192354584053, + "grad_norm": 16.3021155623421, + "learning_rate": 5.351445461176417e-06, + "loss": 0.23549346923828124, + "step": 49330 + }, + { + "epoch": 0.42658515706738376, + "grad_norm": 4.554374399953104, + "learning_rate": 5.351318938635584e-06, + "loss": 0.052407455444335935, + "step": 49335 + }, + { + "epoch": 0.42662839058892704, + "grad_norm": 2.6277517448137107, + "learning_rate": 5.351192405250675e-06, + "loss": 0.3665802001953125, + "step": 49340 + }, + { + "epoch": 0.42667162411047027, + "grad_norm": 32.98036085325871, + "learning_rate": 5.351065861022275e-06, + "loss": 0.3493000030517578, + "step": 49345 + }, + { + "epoch": 0.42671485763201356, + "grad_norm": 2.777928868201427, + "learning_rate": 5.350939305950968e-06, + "loss": 0.0624542236328125, + "step": 49350 + }, + { + "epoch": 0.42675809115355684, + "grad_norm": 8.905778672751751, + "learning_rate": 5.350812740037335e-06, + "loss": 0.12786788940429689, + "step": 49355 + }, + { + "epoch": 0.42680132467510007, + "grad_norm": 0.7190965034431058, + "learning_rate": 5.350686163281962e-06, + "loss": 0.11405410766601562, + "step": 49360 + }, + { + "epoch": 0.42684455819664335, + "grad_norm": 1.5052310057943938, + "learning_rate": 5.350559575685432e-06, + "loss": 0.18720245361328125, + "step": 49365 + }, + { + "epoch": 0.42688779171818664, + "grad_norm": 25.874991005342164, + "learning_rate": 5.350432977248329e-06, + "loss": 0.11692676544189454, + "step": 49370 + }, + { + "epoch": 0.42693102523972987, + "grad_norm": 23.28480593865102, + "learning_rate": 5.350306367971237e-06, + "loss": 0.1903045654296875, + "step": 49375 + }, + { + "epoch": 0.42697425876127315, + "grad_norm": 20.263765729253198, + "learning_rate": 5.350179747854739e-06, + "loss": 0.1173095703125, + "step": 49380 + }, + { + "epoch": 0.4270174922828164, + "grad_norm": 8.310542016619493, + "learning_rate": 5.35005311689942e-06, + "loss": 0.21895294189453124, + "step": 49385 + }, + { + "epoch": 0.42706072580435966, + "grad_norm": 1.511298480602914, + "learning_rate": 5.349926475105864e-06, + "loss": 0.09806976318359376, + "step": 49390 + }, + { + "epoch": 0.42710395932590295, + "grad_norm": 4.79715136862765, + "learning_rate": 5.349799822474655e-06, + "loss": 0.24678955078125, + "step": 49395 + }, + { + "epoch": 0.4271471928474462, + "grad_norm": 0.471558086029848, + "learning_rate": 5.349673159006376e-06, + "loss": 0.35061798095703123, + "step": 49400 + }, + { + "epoch": 0.42719042636898946, + "grad_norm": 4.134655508935678, + "learning_rate": 5.3495464847016125e-06, + "loss": 0.10857620239257812, + "step": 49405 + }, + { + "epoch": 0.42723365989053275, + "grad_norm": 12.016554882532137, + "learning_rate": 5.349419799560948e-06, + "loss": 0.27904739379882815, + "step": 49410 + }, + { + "epoch": 0.427276893412076, + "grad_norm": 0.15618996958880185, + "learning_rate": 5.349293103584967e-06, + "loss": 0.13929519653320313, + "step": 49415 + }, + { + "epoch": 0.42732012693361926, + "grad_norm": 10.26083877395492, + "learning_rate": 5.349166396774254e-06, + "loss": 0.3736747741699219, + "step": 49420 + }, + { + "epoch": 0.4273633604551625, + "grad_norm": 2.9756420904113985, + "learning_rate": 5.349039679129392e-06, + "loss": 0.10055313110351563, + "step": 49425 + }, + { + "epoch": 0.4274065939767058, + "grad_norm": 1.1501066906817616, + "learning_rate": 5.348912950650969e-06, + "loss": 0.2684478759765625, + "step": 49430 + }, + { + "epoch": 0.42744982749824906, + "grad_norm": 8.675261570160366, + "learning_rate": 5.348786211339565e-06, + "loss": 0.11656665802001953, + "step": 49435 + }, + { + "epoch": 0.4274930610197923, + "grad_norm": 18.72322239184233, + "learning_rate": 5.3486594611957675e-06, + "loss": 0.18509521484375, + "step": 49440 + }, + { + "epoch": 0.42753629454133557, + "grad_norm": 8.07738916562591, + "learning_rate": 5.348532700220159e-06, + "loss": 0.22644691467285155, + "step": 49445 + }, + { + "epoch": 0.42757952806287886, + "grad_norm": 22.077199205138673, + "learning_rate": 5.3484059284133265e-06, + "loss": 0.30382614135742186, + "step": 49450 + }, + { + "epoch": 0.4276227615844221, + "grad_norm": 6.692280266012515, + "learning_rate": 5.348279145775853e-06, + "loss": 0.0447357177734375, + "step": 49455 + }, + { + "epoch": 0.42766599510596537, + "grad_norm": 10.983599595519644, + "learning_rate": 5.348152352308324e-06, + "loss": 0.045977783203125, + "step": 49460 + }, + { + "epoch": 0.4277092286275086, + "grad_norm": 22.63756254503497, + "learning_rate": 5.3480255480113225e-06, + "loss": 0.1450531005859375, + "step": 49465 + }, + { + "epoch": 0.4277524621490519, + "grad_norm": 0.2608267669926414, + "learning_rate": 5.347898732885435e-06, + "loss": 0.067108154296875, + "step": 49470 + }, + { + "epoch": 0.42779569567059517, + "grad_norm": 7.931673840026527, + "learning_rate": 5.347771906931248e-06, + "loss": 0.1169342041015625, + "step": 49475 + }, + { + "epoch": 0.4278389291921384, + "grad_norm": 17.87551287038922, + "learning_rate": 5.347645070149342e-06, + "loss": 0.25250091552734377, + "step": 49480 + }, + { + "epoch": 0.4278821627136817, + "grad_norm": 5.137153776578333, + "learning_rate": 5.347518222540306e-06, + "loss": 0.11264381408691407, + "step": 49485 + }, + { + "epoch": 0.42792539623522496, + "grad_norm": 20.82612666094172, + "learning_rate": 5.3473913641047215e-06, + "loss": 0.143475341796875, + "step": 49490 + }, + { + "epoch": 0.4279686297567682, + "grad_norm": 5.880888486872098, + "learning_rate": 5.347264494843177e-06, + "loss": 0.10655784606933594, + "step": 49495 + }, + { + "epoch": 0.4280118632783115, + "grad_norm": 0.47437588723654117, + "learning_rate": 5.347137614756256e-06, + "loss": 0.11059722900390626, + "step": 49500 + }, + { + "epoch": 0.42805509679985476, + "grad_norm": 0.6721740601164206, + "learning_rate": 5.347010723844543e-06, + "loss": 0.023015594482421874, + "step": 49505 + }, + { + "epoch": 0.428098330321398, + "grad_norm": 0.9200499120028219, + "learning_rate": 5.346883822108624e-06, + "loss": 0.327301025390625, + "step": 49510 + }, + { + "epoch": 0.4281415638429413, + "grad_norm": 2.4922917582707407, + "learning_rate": 5.3467569095490845e-06, + "loss": 0.10503997802734374, + "step": 49515 + }, + { + "epoch": 0.4281847973644845, + "grad_norm": 2.865903893575353, + "learning_rate": 5.34662998616651e-06, + "loss": 0.18742141723632813, + "step": 49520 + }, + { + "epoch": 0.4282280308860278, + "grad_norm": 5.589252772957028, + "learning_rate": 5.346503051961485e-06, + "loss": 0.06794204711914062, + "step": 49525 + }, + { + "epoch": 0.4282712644075711, + "grad_norm": 20.45690077236147, + "learning_rate": 5.346376106934594e-06, + "loss": 0.07731170654296875, + "step": 49530 + }, + { + "epoch": 0.4283144979291143, + "grad_norm": 164.89091557890168, + "learning_rate": 5.346249151086425e-06, + "loss": 0.5129119873046875, + "step": 49535 + }, + { + "epoch": 0.4283577314506576, + "grad_norm": 3.572821872457535, + "learning_rate": 5.346122184417562e-06, + "loss": 0.06519775390625, + "step": 49540 + }, + { + "epoch": 0.42840096497220087, + "grad_norm": 2.026837269152825, + "learning_rate": 5.345995206928591e-06, + "loss": 0.23243408203125, + "step": 49545 + }, + { + "epoch": 0.4284441984937441, + "grad_norm": 35.330925812078355, + "learning_rate": 5.345868218620097e-06, + "loss": 0.17958812713623046, + "step": 49550 + }, + { + "epoch": 0.4284874320152874, + "grad_norm": 6.620586739440521, + "learning_rate": 5.345741219492667e-06, + "loss": 0.140509033203125, + "step": 49555 + }, + { + "epoch": 0.4285306655368306, + "grad_norm": 4.980773641558111, + "learning_rate": 5.345614209546885e-06, + "loss": 0.07177581787109374, + "step": 49560 + }, + { + "epoch": 0.4285738990583739, + "grad_norm": 4.049804885382833, + "learning_rate": 5.345487188783337e-06, + "loss": 0.06232376098632812, + "step": 49565 + }, + { + "epoch": 0.4286171325799172, + "grad_norm": 1.9351455234677528, + "learning_rate": 5.345360157202609e-06, + "loss": 0.185662841796875, + "step": 49570 + }, + { + "epoch": 0.4286603661014604, + "grad_norm": 2.8481558116811962, + "learning_rate": 5.345233114805289e-06, + "loss": 0.022556304931640625, + "step": 49575 + }, + { + "epoch": 0.4287035996230037, + "grad_norm": 6.268386721239525, + "learning_rate": 5.34510606159196e-06, + "loss": 0.31872100830078126, + "step": 49580 + }, + { + "epoch": 0.428746833144547, + "grad_norm": 18.856108026561646, + "learning_rate": 5.34497899756321e-06, + "loss": 0.16688461303710939, + "step": 49585 + }, + { + "epoch": 0.4287900666660902, + "grad_norm": 1.547823198387081, + "learning_rate": 5.344851922719623e-06, + "loss": 0.11861152648925781, + "step": 49590 + }, + { + "epoch": 0.4288333001876335, + "grad_norm": 2.4574252561007763, + "learning_rate": 5.344724837061786e-06, + "loss": 0.1296234130859375, + "step": 49595 + }, + { + "epoch": 0.4288765337091767, + "grad_norm": 25.57404197014814, + "learning_rate": 5.344597740590286e-06, + "loss": 0.12423667907714844, + "step": 49600 + }, + { + "epoch": 0.42891976723072, + "grad_norm": 33.740092389414635, + "learning_rate": 5.344470633305708e-06, + "loss": 0.40818634033203127, + "step": 49605 + }, + { + "epoch": 0.4289630007522633, + "grad_norm": 3.7695153881839465, + "learning_rate": 5.344343515208639e-06, + "loss": 0.21760149002075196, + "step": 49610 + }, + { + "epoch": 0.4290062342738065, + "grad_norm": 38.50494987175199, + "learning_rate": 5.3442163862996645e-06, + "loss": 0.22387161254882812, + "step": 49615 + }, + { + "epoch": 0.4290494677953498, + "grad_norm": 0.26155918946675955, + "learning_rate": 5.344089246579371e-06, + "loss": 0.16635894775390625, + "step": 49620 + }, + { + "epoch": 0.4290927013168931, + "grad_norm": 4.999056871855604, + "learning_rate": 5.343962096048345e-06, + "loss": 0.4065155029296875, + "step": 49625 + }, + { + "epoch": 0.4291359348384363, + "grad_norm": 27.366391600112973, + "learning_rate": 5.343834934707173e-06, + "loss": 0.3789031982421875, + "step": 49630 + }, + { + "epoch": 0.4291791683599796, + "grad_norm": 4.767479087611217, + "learning_rate": 5.3437077625564415e-06, + "loss": 0.2710540771484375, + "step": 49635 + }, + { + "epoch": 0.42922240188152283, + "grad_norm": 10.245475618970884, + "learning_rate": 5.343580579596738e-06, + "loss": 0.19004364013671876, + "step": 49640 + }, + { + "epoch": 0.4292656354030661, + "grad_norm": 1.883988701721078, + "learning_rate": 5.343453385828646e-06, + "loss": 0.033856964111328124, + "step": 49645 + }, + { + "epoch": 0.4293088689246094, + "grad_norm": 3.615605206814626, + "learning_rate": 5.343326181252755e-06, + "loss": 0.25201568603515623, + "step": 49650 + }, + { + "epoch": 0.42935210244615263, + "grad_norm": 7.463820736982764, + "learning_rate": 5.343198965869652e-06, + "loss": 0.0765838623046875, + "step": 49655 + }, + { + "epoch": 0.4293953359676959, + "grad_norm": 14.380738241454964, + "learning_rate": 5.34307173967992e-06, + "loss": 0.2503326416015625, + "step": 49660 + }, + { + "epoch": 0.4294385694892392, + "grad_norm": 16.103214404055564, + "learning_rate": 5.3429445026841485e-06, + "loss": 0.29744873046875, + "step": 49665 + }, + { + "epoch": 0.4294818030107824, + "grad_norm": 0.57593308800504, + "learning_rate": 5.342817254882925e-06, + "loss": 0.055157470703125, + "step": 49670 + }, + { + "epoch": 0.4295250365323257, + "grad_norm": 0.7702676786163009, + "learning_rate": 5.342689996276835e-06, + "loss": 0.24418182373046876, + "step": 49675 + }, + { + "epoch": 0.429568270053869, + "grad_norm": 0.7372332910111365, + "learning_rate": 5.3425627268664656e-06, + "loss": 0.027100372314453124, + "step": 49680 + }, + { + "epoch": 0.4296115035754122, + "grad_norm": 0.5773286890889753, + "learning_rate": 5.342435446652404e-06, + "loss": 0.1754669189453125, + "step": 49685 + }, + { + "epoch": 0.4296547370969555, + "grad_norm": 1.7953593175630675, + "learning_rate": 5.342308155635238e-06, + "loss": 0.08187255859375, + "step": 49690 + }, + { + "epoch": 0.42969797061849874, + "grad_norm": 10.679805155344976, + "learning_rate": 5.342180853815552e-06, + "loss": 0.222528076171875, + "step": 49695 + }, + { + "epoch": 0.429741204140042, + "grad_norm": 27.881803341972397, + "learning_rate": 5.342053541193937e-06, + "loss": 0.5006950378417969, + "step": 49700 + }, + { + "epoch": 0.4297844376615853, + "grad_norm": 3.898093119416215, + "learning_rate": 5.3419262177709755e-06, + "loss": 0.07064018249511719, + "step": 49705 + }, + { + "epoch": 0.42982767118312853, + "grad_norm": 4.027092564176058, + "learning_rate": 5.341798883547259e-06, + "loss": 0.090765380859375, + "step": 49710 + }, + { + "epoch": 0.4298709047046718, + "grad_norm": 1.4152376298701703, + "learning_rate": 5.341671538523372e-06, + "loss": 0.10266380310058594, + "step": 49715 + }, + { + "epoch": 0.4299141382262151, + "grad_norm": 24.000032273759786, + "learning_rate": 5.341544182699904e-06, + "loss": 0.26493301391601565, + "step": 49720 + }, + { + "epoch": 0.42995737174775833, + "grad_norm": 4.8328855848324395, + "learning_rate": 5.34141681607744e-06, + "loss": 0.15576629638671874, + "step": 49725 + }, + { + "epoch": 0.4300006052693016, + "grad_norm": 28.927116131511365, + "learning_rate": 5.34128943865657e-06, + "loss": 0.11771621704101562, + "step": 49730 + }, + { + "epoch": 0.43004383879084485, + "grad_norm": 19.062176160896033, + "learning_rate": 5.341162050437879e-06, + "loss": 0.181353759765625, + "step": 49735 + }, + { + "epoch": 0.43008707231238813, + "grad_norm": 12.191821189143397, + "learning_rate": 5.341034651421955e-06, + "loss": 0.10634307861328125, + "step": 49740 + }, + { + "epoch": 0.4301303058339314, + "grad_norm": 21.214442716836782, + "learning_rate": 5.3409072416093854e-06, + "loss": 0.1656280517578125, + "step": 49745 + }, + { + "epoch": 0.43017353935547464, + "grad_norm": 6.706666557585141, + "learning_rate": 5.34077982100076e-06, + "loss": 0.13623046875, + "step": 49750 + }, + { + "epoch": 0.43021677287701793, + "grad_norm": 0.4354171999153345, + "learning_rate": 5.340652389596665e-06, + "loss": 0.2498138427734375, + "step": 49755 + }, + { + "epoch": 0.4302600063985612, + "grad_norm": 16.03521491135773, + "learning_rate": 5.340524947397686e-06, + "loss": 0.1988525390625, + "step": 49760 + }, + { + "epoch": 0.43030323992010444, + "grad_norm": 28.980912967593987, + "learning_rate": 5.340397494404415e-06, + "loss": 0.18259506225585936, + "step": 49765 + }, + { + "epoch": 0.4303464734416477, + "grad_norm": 26.46690884978245, + "learning_rate": 5.3402700306174365e-06, + "loss": 0.6249988555908204, + "step": 49770 + }, + { + "epoch": 0.43038970696319095, + "grad_norm": 16.790710163847837, + "learning_rate": 5.34014255603734e-06, + "loss": 0.15609130859375, + "step": 49775 + }, + { + "epoch": 0.43043294048473424, + "grad_norm": 2.9554143124672607, + "learning_rate": 5.340015070664713e-06, + "loss": 0.0219696044921875, + "step": 49780 + }, + { + "epoch": 0.4304761740062775, + "grad_norm": 1.2414262127759355, + "learning_rate": 5.339887574500142e-06, + "loss": 0.16281204223632811, + "step": 49785 + }, + { + "epoch": 0.43051940752782075, + "grad_norm": 75.95281358235445, + "learning_rate": 5.339760067544217e-06, + "loss": 0.46688232421875, + "step": 49790 + }, + { + "epoch": 0.43056264104936404, + "grad_norm": 1.307923194248045, + "learning_rate": 5.3396325497975255e-06, + "loss": 0.09495086669921875, + "step": 49795 + }, + { + "epoch": 0.4306058745709073, + "grad_norm": 10.011988639916675, + "learning_rate": 5.3395050212606555e-06, + "loss": 0.074078369140625, + "step": 49800 + }, + { + "epoch": 0.43064910809245055, + "grad_norm": 24.51689932558825, + "learning_rate": 5.339377481934194e-06, + "loss": 0.14846343994140626, + "step": 49805 + }, + { + "epoch": 0.43069234161399383, + "grad_norm": 7.483439440414308, + "learning_rate": 5.3392499318187315e-06, + "loss": 0.0329010009765625, + "step": 49810 + }, + { + "epoch": 0.43073557513553706, + "grad_norm": 16.69708340224177, + "learning_rate": 5.339122370914854e-06, + "loss": 0.2128875732421875, + "step": 49815 + }, + { + "epoch": 0.43077880865708035, + "grad_norm": 16.283236890723444, + "learning_rate": 5.338994799223152e-06, + "loss": 0.177734375, + "step": 49820 + }, + { + "epoch": 0.43082204217862363, + "grad_norm": 6.8510929391004165, + "learning_rate": 5.338867216744212e-06, + "loss": 0.09879302978515625, + "step": 49825 + }, + { + "epoch": 0.43086527570016686, + "grad_norm": 0.3274241939635283, + "learning_rate": 5.338739623478623e-06, + "loss": 0.23521728515625, + "step": 49830 + }, + { + "epoch": 0.43090850922171015, + "grad_norm": 11.033248084845093, + "learning_rate": 5.338612019426974e-06, + "loss": 0.19815025329589844, + "step": 49835 + }, + { + "epoch": 0.43095174274325343, + "grad_norm": 3.7794576308550654, + "learning_rate": 5.338484404589852e-06, + "loss": 0.3904296875, + "step": 49840 + }, + { + "epoch": 0.43099497626479666, + "grad_norm": 4.024413506745147, + "learning_rate": 5.338356778967848e-06, + "loss": 0.060595703125, + "step": 49845 + }, + { + "epoch": 0.43103820978633994, + "grad_norm": 11.443778216975376, + "learning_rate": 5.338229142561547e-06, + "loss": 0.26207275390625, + "step": 49850 + }, + { + "epoch": 0.4310814433078832, + "grad_norm": 2.05855985913387, + "learning_rate": 5.3381014953715415e-06, + "loss": 0.0989349365234375, + "step": 49855 + }, + { + "epoch": 0.43112467682942646, + "grad_norm": 2.519411095373522, + "learning_rate": 5.337973837398418e-06, + "loss": 0.170208740234375, + "step": 49860 + }, + { + "epoch": 0.43116791035096974, + "grad_norm": 0.13542901604949212, + "learning_rate": 5.337846168642766e-06, + "loss": 0.0457305908203125, + "step": 49865 + }, + { + "epoch": 0.43121114387251297, + "grad_norm": 0.6135905679864789, + "learning_rate": 5.337718489105174e-06, + "loss": 0.323828125, + "step": 49870 + }, + { + "epoch": 0.43125437739405625, + "grad_norm": 3.0728783512852575, + "learning_rate": 5.3375907987862315e-06, + "loss": 0.1854999542236328, + "step": 49875 + }, + { + "epoch": 0.43129761091559954, + "grad_norm": 0.6578121426633746, + "learning_rate": 5.3374630976865266e-06, + "loss": 0.3281829833984375, + "step": 49880 + }, + { + "epoch": 0.43134084443714277, + "grad_norm": 17.301184538642687, + "learning_rate": 5.337335385806647e-06, + "loss": 0.357769775390625, + "step": 49885 + }, + { + "epoch": 0.43138407795868605, + "grad_norm": 37.07575029333251, + "learning_rate": 5.3372076631471845e-06, + "loss": 0.305035400390625, + "step": 49890 + }, + { + "epoch": 0.43142731148022934, + "grad_norm": 0.2134896166571156, + "learning_rate": 5.3370799297087265e-06, + "loss": 0.23580322265625, + "step": 49895 + }, + { + "epoch": 0.43147054500177257, + "grad_norm": 2.1194379097788474, + "learning_rate": 5.3369521854918614e-06, + "loss": 0.24000244140625, + "step": 49900 + }, + { + "epoch": 0.43151377852331585, + "grad_norm": 36.226254160927084, + "learning_rate": 5.336824430497181e-06, + "loss": 0.2606838226318359, + "step": 49905 + }, + { + "epoch": 0.4315570120448591, + "grad_norm": 0.7491516703468266, + "learning_rate": 5.336696664725272e-06, + "loss": 0.11060638427734375, + "step": 49910 + }, + { + "epoch": 0.43160024556640236, + "grad_norm": 3.0422554822243058, + "learning_rate": 5.336568888176724e-06, + "loss": 0.17860260009765624, + "step": 49915 + }, + { + "epoch": 0.43164347908794565, + "grad_norm": 4.51672314203297, + "learning_rate": 5.336441100852127e-06, + "loss": 0.1689361572265625, + "step": 49920 + }, + { + "epoch": 0.4316867126094889, + "grad_norm": 6.4557087091919225, + "learning_rate": 5.33631330275207e-06, + "loss": 0.12574462890625, + "step": 49925 + }, + { + "epoch": 0.43172994613103216, + "grad_norm": 17.591956295716017, + "learning_rate": 5.336185493877143e-06, + "loss": 0.28558349609375, + "step": 49930 + }, + { + "epoch": 0.43177317965257545, + "grad_norm": 19.296798279775988, + "learning_rate": 5.336057674227935e-06, + "loss": 0.23720779418945312, + "step": 49935 + }, + { + "epoch": 0.4318164131741187, + "grad_norm": 1.0144061287559365, + "learning_rate": 5.3359298438050345e-06, + "loss": 0.317803955078125, + "step": 49940 + }, + { + "epoch": 0.43185964669566196, + "grad_norm": 0.5083514151469403, + "learning_rate": 5.335802002609033e-06, + "loss": 0.22099647521972657, + "step": 49945 + }, + { + "epoch": 0.4319028802172052, + "grad_norm": 1.805312221008229, + "learning_rate": 5.335674150640519e-06, + "loss": 0.04253463745117188, + "step": 49950 + }, + { + "epoch": 0.43194611373874847, + "grad_norm": 3.4248322138204195, + "learning_rate": 5.335546287900082e-06, + "loss": 0.059661865234375, + "step": 49955 + }, + { + "epoch": 0.43198934726029176, + "grad_norm": 2.1817117342020356, + "learning_rate": 5.335418414388311e-06, + "loss": 0.2941741943359375, + "step": 49960 + }, + { + "epoch": 0.432032580781835, + "grad_norm": 14.190716380133207, + "learning_rate": 5.3352905301057975e-06, + "loss": 0.13727645874023436, + "step": 49965 + }, + { + "epoch": 0.43207581430337827, + "grad_norm": 6.765953393988783, + "learning_rate": 5.33516263505313e-06, + "loss": 0.29603271484375, + "step": 49970 + }, + { + "epoch": 0.43211904782492155, + "grad_norm": 1.568434610891301, + "learning_rate": 5.335034729230899e-06, + "loss": 0.2799163818359375, + "step": 49975 + }, + { + "epoch": 0.4321622813464648, + "grad_norm": 20.277559882194463, + "learning_rate": 5.334906812639695e-06, + "loss": 0.17987442016601562, + "step": 49980 + }, + { + "epoch": 0.43220551486800807, + "grad_norm": 2.5180205863964242, + "learning_rate": 5.334778885280105e-06, + "loss": 0.08702239990234376, + "step": 49985 + }, + { + "epoch": 0.4322487483895513, + "grad_norm": 0.39796246836328025, + "learning_rate": 5.334650947152723e-06, + "loss": 0.15127334594726563, + "step": 49990 + }, + { + "epoch": 0.4322919819110946, + "grad_norm": 15.644506760222802, + "learning_rate": 5.334522998258136e-06, + "loss": 0.07010765075683593, + "step": 49995 + }, + { + "epoch": 0.43233521543263786, + "grad_norm": 17.11631208206834, + "learning_rate": 5.334395038596936e-06, + "loss": 0.06564712524414062, + "step": 50000 + }, + { + "epoch": 0.4323784489541811, + "grad_norm": 16.329105285875972, + "learning_rate": 5.334267068169712e-06, + "loss": 0.1686279296875, + "step": 50005 + }, + { + "epoch": 0.4324216824757244, + "grad_norm": 24.6413481164837, + "learning_rate": 5.334139086977054e-06, + "loss": 0.37219390869140623, + "step": 50010 + }, + { + "epoch": 0.43246491599726766, + "grad_norm": 0.5884518852065346, + "learning_rate": 5.334011095019554e-06, + "loss": 0.3291046142578125, + "step": 50015 + }, + { + "epoch": 0.4325081495188109, + "grad_norm": 15.056537802741486, + "learning_rate": 5.333883092297801e-06, + "loss": 0.2341632843017578, + "step": 50020 + }, + { + "epoch": 0.4325513830403542, + "grad_norm": 14.244199242408506, + "learning_rate": 5.333755078812384e-06, + "loss": 0.08370933532714844, + "step": 50025 + }, + { + "epoch": 0.43259461656189746, + "grad_norm": 1.1304320911168129, + "learning_rate": 5.333627054563896e-06, + "loss": 0.24490966796875, + "step": 50030 + }, + { + "epoch": 0.4326378500834407, + "grad_norm": 17.059283341896435, + "learning_rate": 5.333499019552925e-06, + "loss": 0.2090728759765625, + "step": 50035 + }, + { + "epoch": 0.432681083604984, + "grad_norm": 1.1787310143088832, + "learning_rate": 5.333370973780065e-06, + "loss": 0.1338470458984375, + "step": 50040 + }, + { + "epoch": 0.4327243171265272, + "grad_norm": 9.779612142588174, + "learning_rate": 5.333242917245903e-06, + "loss": 0.056499481201171875, + "step": 50045 + }, + { + "epoch": 0.4327675506480705, + "grad_norm": 1.203723705228621, + "learning_rate": 5.33311484995103e-06, + "loss": 0.17972564697265625, + "step": 50050 + }, + { + "epoch": 0.43281078416961377, + "grad_norm": 6.420472140926915, + "learning_rate": 5.332986771896037e-06, + "loss": 0.22974853515625, + "step": 50055 + }, + { + "epoch": 0.432854017691157, + "grad_norm": 28.055596395410788, + "learning_rate": 5.332858683081517e-06, + "loss": 0.28475112915039064, + "step": 50060 + }, + { + "epoch": 0.4328972512127003, + "grad_norm": 1.6575621570310435, + "learning_rate": 5.332730583508058e-06, + "loss": 0.29687652587890623, + "step": 50065 + }, + { + "epoch": 0.43294048473424357, + "grad_norm": 0.6573159642912856, + "learning_rate": 5.332602473176252e-06, + "loss": 0.0492401123046875, + "step": 50070 + }, + { + "epoch": 0.4329837182557868, + "grad_norm": 0.1895684469186362, + "learning_rate": 5.332474352086689e-06, + "loss": 0.04037017822265625, + "step": 50075 + }, + { + "epoch": 0.4330269517773301, + "grad_norm": 1.349112729693872, + "learning_rate": 5.332346220239961e-06, + "loss": 0.14392776489257814, + "step": 50080 + }, + { + "epoch": 0.4330701852988733, + "grad_norm": 9.46257939018291, + "learning_rate": 5.3322180776366585e-06, + "loss": 0.14625396728515624, + "step": 50085 + }, + { + "epoch": 0.4331134188204166, + "grad_norm": 1.4585078481324545, + "learning_rate": 5.3320899242773716e-06, + "loss": 0.1843597412109375, + "step": 50090 + }, + { + "epoch": 0.4331566523419599, + "grad_norm": 3.697680127701567, + "learning_rate": 5.331961760162692e-06, + "loss": 0.311248779296875, + "step": 50095 + }, + { + "epoch": 0.4331998858635031, + "grad_norm": 19.97886115363688, + "learning_rate": 5.331833585293212e-06, + "loss": 0.07776756286621093, + "step": 50100 + }, + { + "epoch": 0.4332431193850464, + "grad_norm": 41.371016690599944, + "learning_rate": 5.331705399669521e-06, + "loss": 0.8070411682128906, + "step": 50105 + }, + { + "epoch": 0.4332863529065897, + "grad_norm": 31.168436184281187, + "learning_rate": 5.33157720329221e-06, + "loss": 0.43757781982421873, + "step": 50110 + }, + { + "epoch": 0.4333295864281329, + "grad_norm": 2.230798709814631, + "learning_rate": 5.331448996161871e-06, + "loss": 0.12845306396484374, + "step": 50115 + }, + { + "epoch": 0.4333728199496762, + "grad_norm": 24.677306560007807, + "learning_rate": 5.3313207782790955e-06, + "loss": 0.12551422119140626, + "step": 50120 + }, + { + "epoch": 0.4334160534712194, + "grad_norm": 3.4749018099689013, + "learning_rate": 5.331192549644475e-06, + "loss": 0.1374744415283203, + "step": 50125 + }, + { + "epoch": 0.4334592869927627, + "grad_norm": 3.449236832404694, + "learning_rate": 5.3310643102586e-06, + "loss": 0.18898468017578124, + "step": 50130 + }, + { + "epoch": 0.433502520514306, + "grad_norm": 15.569421966777869, + "learning_rate": 5.3309360601220625e-06, + "loss": 0.07367172241210937, + "step": 50135 + }, + { + "epoch": 0.4335457540358492, + "grad_norm": 9.541802868647038, + "learning_rate": 5.330807799235454e-06, + "loss": 0.29845733642578126, + "step": 50140 + }, + { + "epoch": 0.4335889875573925, + "grad_norm": 5.1727080642004415, + "learning_rate": 5.330679527599365e-06, + "loss": 0.12880439758300782, + "step": 50145 + }, + { + "epoch": 0.4336322210789358, + "grad_norm": 0.5001147485521618, + "learning_rate": 5.3305512452143894e-06, + "loss": 0.06856803894042969, + "step": 50150 + }, + { + "epoch": 0.433675454600479, + "grad_norm": 6.643418086885186, + "learning_rate": 5.330422952081116e-06, + "loss": 0.443463134765625, + "step": 50155 + }, + { + "epoch": 0.4337186881220223, + "grad_norm": 13.08768125842318, + "learning_rate": 5.330294648200139e-06, + "loss": 0.1722747802734375, + "step": 50160 + }, + { + "epoch": 0.43376192164356553, + "grad_norm": 4.2037203601663204, + "learning_rate": 5.330166333572049e-06, + "loss": 0.176806640625, + "step": 50165 + }, + { + "epoch": 0.4338051551651088, + "grad_norm": 28.99938394325032, + "learning_rate": 5.330038008197437e-06, + "loss": 0.3834075927734375, + "step": 50170 + }, + { + "epoch": 0.4338483886866521, + "grad_norm": 3.44754424420312, + "learning_rate": 5.329909672076895e-06, + "loss": 0.43711395263671876, + "step": 50175 + }, + { + "epoch": 0.4338916222081953, + "grad_norm": 7.937290335106569, + "learning_rate": 5.329781325211017e-06, + "loss": 0.11527099609375, + "step": 50180 + }, + { + "epoch": 0.4339348557297386, + "grad_norm": 23.43178273247363, + "learning_rate": 5.329652967600393e-06, + "loss": 0.20913848876953126, + "step": 50185 + }, + { + "epoch": 0.4339780892512819, + "grad_norm": 20.02855587022471, + "learning_rate": 5.329524599245615e-06, + "loss": 0.2325439453125, + "step": 50190 + }, + { + "epoch": 0.4340213227728251, + "grad_norm": 19.836423366465674, + "learning_rate": 5.329396220147275e-06, + "loss": 0.2693813323974609, + "step": 50195 + }, + { + "epoch": 0.4340645562943684, + "grad_norm": 3.0070911069049466, + "learning_rate": 5.3292678303059664e-06, + "loss": 0.0754119873046875, + "step": 50200 + }, + { + "epoch": 0.43410778981591164, + "grad_norm": 2.2958756161569482, + "learning_rate": 5.3291394297222795e-06, + "loss": 0.14578857421875, + "step": 50205 + }, + { + "epoch": 0.4341510233374549, + "grad_norm": 29.94556953146304, + "learning_rate": 5.329011018396808e-06, + "loss": 0.300653076171875, + "step": 50210 + }, + { + "epoch": 0.4341942568589982, + "grad_norm": 15.033742082476573, + "learning_rate": 5.328882596330143e-06, + "loss": 0.07087020874023438, + "step": 50215 + }, + { + "epoch": 0.43423749038054144, + "grad_norm": 15.223412363922426, + "learning_rate": 5.328754163522877e-06, + "loss": 0.3202484130859375, + "step": 50220 + }, + { + "epoch": 0.4342807239020847, + "grad_norm": 5.072867616977507, + "learning_rate": 5.328625719975603e-06, + "loss": 0.441851806640625, + "step": 50225 + }, + { + "epoch": 0.434323957423628, + "grad_norm": 2.4866770581146285, + "learning_rate": 5.3284972656889135e-06, + "loss": 0.027677154541015624, + "step": 50230 + }, + { + "epoch": 0.43436719094517123, + "grad_norm": 10.886797795166071, + "learning_rate": 5.3283688006634e-06, + "loss": 0.03934135437011719, + "step": 50235 + }, + { + "epoch": 0.4344104244667145, + "grad_norm": 7.506482992709317, + "learning_rate": 5.328240324899656e-06, + "loss": 0.11087646484375, + "step": 50240 + }, + { + "epoch": 0.4344536579882578, + "grad_norm": 21.09474197131581, + "learning_rate": 5.328111838398272e-06, + "loss": 0.2819000244140625, + "step": 50245 + }, + { + "epoch": 0.43449689150980103, + "grad_norm": 5.507634794707332, + "learning_rate": 5.327983341159843e-06, + "loss": 0.226251220703125, + "step": 50250 + }, + { + "epoch": 0.4345401250313443, + "grad_norm": 5.343140301137884, + "learning_rate": 5.32785483318496e-06, + "loss": 0.05497722625732422, + "step": 50255 + }, + { + "epoch": 0.43458335855288754, + "grad_norm": 36.94493735499508, + "learning_rate": 5.327726314474216e-06, + "loss": 0.36237945556640627, + "step": 50260 + }, + { + "epoch": 0.43462659207443083, + "grad_norm": 3.3171369498697625, + "learning_rate": 5.327597785028204e-06, + "loss": 0.12277984619140625, + "step": 50265 + }, + { + "epoch": 0.4346698255959741, + "grad_norm": 12.39695031620033, + "learning_rate": 5.327469244847517e-06, + "loss": 0.3165679931640625, + "step": 50270 + }, + { + "epoch": 0.43471305911751734, + "grad_norm": 2.91391015628754, + "learning_rate": 5.3273406939327475e-06, + "loss": 0.07400360107421874, + "step": 50275 + }, + { + "epoch": 0.4347562926390606, + "grad_norm": 2.1600327164752775, + "learning_rate": 5.327212132284488e-06, + "loss": 0.1022003173828125, + "step": 50280 + }, + { + "epoch": 0.4347995261606039, + "grad_norm": 53.186126893143985, + "learning_rate": 5.327083559903332e-06, + "loss": 0.32683258056640624, + "step": 50285 + }, + { + "epoch": 0.43484275968214714, + "grad_norm": 2.737156549202941, + "learning_rate": 5.326954976789872e-06, + "loss": 0.2912384033203125, + "step": 50290 + }, + { + "epoch": 0.4348859932036904, + "grad_norm": 4.627589296149999, + "learning_rate": 5.3268263829447015e-06, + "loss": 0.13720855712890626, + "step": 50295 + }, + { + "epoch": 0.43492922672523365, + "grad_norm": 16.175386540349173, + "learning_rate": 5.326697778368414e-06, + "loss": 0.166705322265625, + "step": 50300 + }, + { + "epoch": 0.43497246024677694, + "grad_norm": 12.200894401677887, + "learning_rate": 5.326569163061601e-06, + "loss": 0.2410736083984375, + "step": 50305 + }, + { + "epoch": 0.4350156937683202, + "grad_norm": 1.7240100719655824, + "learning_rate": 5.3264405370248565e-06, + "loss": 0.1946807861328125, + "step": 50310 + }, + { + "epoch": 0.43505892728986345, + "grad_norm": 2.4288424957523613, + "learning_rate": 5.326311900258774e-06, + "loss": 0.12052001953125, + "step": 50315 + }, + { + "epoch": 0.43510216081140674, + "grad_norm": 8.594274181810174, + "learning_rate": 5.326183252763947e-06, + "loss": 0.17100830078125, + "step": 50320 + }, + { + "epoch": 0.43514539433295, + "grad_norm": 10.372054285370034, + "learning_rate": 5.326054594540968e-06, + "loss": 0.19785919189453124, + "step": 50325 + }, + { + "epoch": 0.43518862785449325, + "grad_norm": 7.897044878218246, + "learning_rate": 5.325925925590431e-06, + "loss": 0.0972564697265625, + "step": 50330 + }, + { + "epoch": 0.43523186137603653, + "grad_norm": 0.22056323328120356, + "learning_rate": 5.325797245912929e-06, + "loss": 0.38272705078125, + "step": 50335 + }, + { + "epoch": 0.43527509489757976, + "grad_norm": 1.970437813472822, + "learning_rate": 5.325668555509055e-06, + "loss": 0.0688690185546875, + "step": 50340 + }, + { + "epoch": 0.43531832841912305, + "grad_norm": 19.667406890242663, + "learning_rate": 5.325539854379404e-06, + "loss": 0.15965576171875, + "step": 50345 + }, + { + "epoch": 0.43536156194066633, + "grad_norm": 5.159209403455846, + "learning_rate": 5.325411142524568e-06, + "loss": 0.0572662353515625, + "step": 50350 + }, + { + "epoch": 0.43540479546220956, + "grad_norm": 1.8068102821955185, + "learning_rate": 5.325282419945142e-06, + "loss": 0.3080137252807617, + "step": 50355 + }, + { + "epoch": 0.43544802898375284, + "grad_norm": 3.6923183407496776, + "learning_rate": 5.325153686641719e-06, + "loss": 0.07841033935546875, + "step": 50360 + }, + { + "epoch": 0.43549126250529613, + "grad_norm": 48.421765611801305, + "learning_rate": 5.3250249426148925e-06, + "loss": 0.4046875, + "step": 50365 + }, + { + "epoch": 0.43553449602683936, + "grad_norm": 1.5388920786322438, + "learning_rate": 5.324896187865256e-06, + "loss": 0.154888916015625, + "step": 50370 + }, + { + "epoch": 0.43557772954838264, + "grad_norm": 15.159268704168822, + "learning_rate": 5.324767422393404e-06, + "loss": 0.13545608520507812, + "step": 50375 + }, + { + "epoch": 0.43562096306992587, + "grad_norm": 7.289755016961822, + "learning_rate": 5.32463864619993e-06, + "loss": 0.46646728515625, + "step": 50380 + }, + { + "epoch": 0.43566419659146915, + "grad_norm": 3.5311870051961955, + "learning_rate": 5.324509859285428e-06, + "loss": 0.14344940185546876, + "step": 50385 + }, + { + "epoch": 0.43570743011301244, + "grad_norm": 0.019054116692095162, + "learning_rate": 5.324381061650492e-06, + "loss": 0.2743377685546875, + "step": 50390 + }, + { + "epoch": 0.43575066363455567, + "grad_norm": 7.731324854850651, + "learning_rate": 5.324252253295716e-06, + "loss": 0.09292678833007813, + "step": 50395 + }, + { + "epoch": 0.43579389715609895, + "grad_norm": 4.448048873161584, + "learning_rate": 5.324123434221693e-06, + "loss": 0.1250579833984375, + "step": 50400 + }, + { + "epoch": 0.43583713067764224, + "grad_norm": 9.910559359039793, + "learning_rate": 5.323994604429019e-06, + "loss": 0.1100341796875, + "step": 50405 + }, + { + "epoch": 0.43588036419918547, + "grad_norm": 3.418788133429993, + "learning_rate": 5.3238657639182874e-06, + "loss": 0.2791015625, + "step": 50410 + }, + { + "epoch": 0.43592359772072875, + "grad_norm": 4.245830828711862, + "learning_rate": 5.323736912690092e-06, + "loss": 0.33397216796875, + "step": 50415 + }, + { + "epoch": 0.43596683124227203, + "grad_norm": 4.6348951221245605, + "learning_rate": 5.323608050745027e-06, + "loss": 0.111395263671875, + "step": 50420 + }, + { + "epoch": 0.43601006476381526, + "grad_norm": 28.48509119832951, + "learning_rate": 5.323479178083687e-06, + "loss": 0.7175994873046875, + "step": 50425 + }, + { + "epoch": 0.43605329828535855, + "grad_norm": 13.849760383422003, + "learning_rate": 5.323350294706667e-06, + "loss": 0.222821044921875, + "step": 50430 + }, + { + "epoch": 0.4360965318069018, + "grad_norm": 16.173147790196367, + "learning_rate": 5.32322140061456e-06, + "loss": 0.1936492919921875, + "step": 50435 + }, + { + "epoch": 0.43613976532844506, + "grad_norm": 1.692493672184526, + "learning_rate": 5.323092495807961e-06, + "loss": 0.0586151123046875, + "step": 50440 + }, + { + "epoch": 0.43618299884998835, + "grad_norm": 9.505086192339979, + "learning_rate": 5.322963580287466e-06, + "loss": 0.112109375, + "step": 50445 + }, + { + "epoch": 0.4362262323715316, + "grad_norm": 9.698585302145984, + "learning_rate": 5.322834654053668e-06, + "loss": 0.16510066986083985, + "step": 50450 + }, + { + "epoch": 0.43626946589307486, + "grad_norm": 2.1640677441131793, + "learning_rate": 5.322705717107161e-06, + "loss": 0.1670013427734375, + "step": 50455 + }, + { + "epoch": 0.43631269941461814, + "grad_norm": 7.481157165779212, + "learning_rate": 5.32257676944854e-06, + "loss": 0.29864501953125, + "step": 50460 + }, + { + "epoch": 0.4363559329361614, + "grad_norm": 10.783487750959496, + "learning_rate": 5.322447811078401e-06, + "loss": 0.22774505615234375, + "step": 50465 + }, + { + "epoch": 0.43639916645770466, + "grad_norm": 17.35662397515103, + "learning_rate": 5.322318841997338e-06, + "loss": 0.25924072265625, + "step": 50470 + }, + { + "epoch": 0.4364423999792479, + "grad_norm": 28.552703804419725, + "learning_rate": 5.322189862205945e-06, + "loss": 0.2033294677734375, + "step": 50475 + }, + { + "epoch": 0.43648563350079117, + "grad_norm": 8.616931870945786, + "learning_rate": 5.322060871704818e-06, + "loss": 0.188720703125, + "step": 50480 + }, + { + "epoch": 0.43652886702233445, + "grad_norm": 2.7945997408976475, + "learning_rate": 5.321931870494552e-06, + "loss": 0.15059051513671876, + "step": 50485 + }, + { + "epoch": 0.4365721005438777, + "grad_norm": 15.173977049183359, + "learning_rate": 5.321802858575741e-06, + "loss": 0.09093017578125, + "step": 50490 + }, + { + "epoch": 0.43661533406542097, + "grad_norm": 6.9178772275111, + "learning_rate": 5.32167383594898e-06, + "loss": 0.4247894287109375, + "step": 50495 + }, + { + "epoch": 0.43665856758696425, + "grad_norm": 11.53061654979869, + "learning_rate": 5.321544802614864e-06, + "loss": 0.2156951904296875, + "step": 50500 + }, + { + "epoch": 0.4367018011085075, + "grad_norm": 14.434449264360522, + "learning_rate": 5.32141575857399e-06, + "loss": 0.24637451171875, + "step": 50505 + }, + { + "epoch": 0.43674503463005077, + "grad_norm": 4.292825170116138, + "learning_rate": 5.3212867038269506e-06, + "loss": 0.157647705078125, + "step": 50510 + }, + { + "epoch": 0.436788268151594, + "grad_norm": 4.325522912804539, + "learning_rate": 5.321157638374343e-06, + "loss": 0.32332763671875, + "step": 50515 + }, + { + "epoch": 0.4368315016731373, + "grad_norm": 4.692700903645104, + "learning_rate": 5.32102856221676e-06, + "loss": 0.056298828125, + "step": 50520 + }, + { + "epoch": 0.43687473519468056, + "grad_norm": 4.399495176345126, + "learning_rate": 5.320899475354799e-06, + "loss": 0.145440673828125, + "step": 50525 + }, + { + "epoch": 0.4369179687162238, + "grad_norm": 1.5888412997892334, + "learning_rate": 5.320770377789055e-06, + "loss": 0.0970458984375, + "step": 50530 + }, + { + "epoch": 0.4369612022377671, + "grad_norm": 3.9160761829536104, + "learning_rate": 5.320641269520122e-06, + "loss": 0.21341552734375, + "step": 50535 + }, + { + "epoch": 0.43700443575931036, + "grad_norm": 23.39307523196371, + "learning_rate": 5.320512150548597e-06, + "loss": 0.12904815673828124, + "step": 50540 + }, + { + "epoch": 0.4370476692808536, + "grad_norm": 45.881182708137, + "learning_rate": 5.320383020875075e-06, + "loss": 0.2846954345703125, + "step": 50545 + }, + { + "epoch": 0.4370909028023969, + "grad_norm": 32.9581087004743, + "learning_rate": 5.320253880500153e-06, + "loss": 0.23775177001953124, + "step": 50550 + }, + { + "epoch": 0.4371341363239401, + "grad_norm": 2.413736258157776, + "learning_rate": 5.320124729424424e-06, + "loss": 0.21507110595703124, + "step": 50555 + }, + { + "epoch": 0.4371773698454834, + "grad_norm": 23.930486740446536, + "learning_rate": 5.319995567648485e-06, + "loss": 0.20511093139648437, + "step": 50560 + }, + { + "epoch": 0.4372206033670267, + "grad_norm": 41.35073871095312, + "learning_rate": 5.319866395172931e-06, + "loss": 0.343408203125, + "step": 50565 + }, + { + "epoch": 0.4372638368885699, + "grad_norm": 1.6358120979960489, + "learning_rate": 5.319737211998358e-06, + "loss": 0.04457244873046875, + "step": 50570 + }, + { + "epoch": 0.4373070704101132, + "grad_norm": 6.335921735336692, + "learning_rate": 5.3196080181253625e-06, + "loss": 0.0735076904296875, + "step": 50575 + }, + { + "epoch": 0.43735030393165647, + "grad_norm": 5.611028451939467, + "learning_rate": 5.3194788135545395e-06, + "loss": 0.07164649963378907, + "step": 50580 + }, + { + "epoch": 0.4373935374531997, + "grad_norm": 0.7820586393226028, + "learning_rate": 5.319349598286486e-06, + "loss": 0.1562713623046875, + "step": 50585 + }, + { + "epoch": 0.437436770974743, + "grad_norm": 5.860899633505042, + "learning_rate": 5.319220372321795e-06, + "loss": 0.1776031494140625, + "step": 50590 + }, + { + "epoch": 0.43748000449628627, + "grad_norm": 12.959754427098188, + "learning_rate": 5.319091135661066e-06, + "loss": 0.158026123046875, + "step": 50595 + }, + { + "epoch": 0.4375232380178295, + "grad_norm": 0.6389695034074444, + "learning_rate": 5.318961888304894e-06, + "loss": 0.11138801574707032, + "step": 50600 + }, + { + "epoch": 0.4375664715393728, + "grad_norm": 13.40783253573087, + "learning_rate": 5.318832630253874e-06, + "loss": 0.2234405517578125, + "step": 50605 + }, + { + "epoch": 0.437609705060916, + "grad_norm": 4.0670279170334975, + "learning_rate": 5.318703361508604e-06, + "loss": 0.292822265625, + "step": 50610 + }, + { + "epoch": 0.4376529385824593, + "grad_norm": 2.704187035273917, + "learning_rate": 5.318574082069677e-06, + "loss": 0.04290351867675781, + "step": 50615 + }, + { + "epoch": 0.4376961721040026, + "grad_norm": 8.063293593609096, + "learning_rate": 5.318444791937693e-06, + "loss": 0.0677642822265625, + "step": 50620 + }, + { + "epoch": 0.4377394056255458, + "grad_norm": 74.90984610741535, + "learning_rate": 5.318315491113246e-06, + "loss": 0.561669921875, + "step": 50625 + }, + { + "epoch": 0.4377826391470891, + "grad_norm": 3.519950121864673, + "learning_rate": 5.318186179596932e-06, + "loss": 0.5324066162109375, + "step": 50630 + }, + { + "epoch": 0.4378258726686324, + "grad_norm": 25.361559320039504, + "learning_rate": 5.3180568573893485e-06, + "loss": 0.12073135375976562, + "step": 50635 + }, + { + "epoch": 0.4378691061901756, + "grad_norm": 1.1584188024942412, + "learning_rate": 5.317927524491092e-06, + "loss": 0.13606719970703124, + "step": 50640 + }, + { + "epoch": 0.4379123397117189, + "grad_norm": 12.13571267724085, + "learning_rate": 5.317798180902758e-06, + "loss": 0.12872390747070311, + "step": 50645 + }, + { + "epoch": 0.4379555732332621, + "grad_norm": 2.033841060618434, + "learning_rate": 5.3176688266249444e-06, + "loss": 0.059993743896484375, + "step": 50650 + }, + { + "epoch": 0.4379988067548054, + "grad_norm": 31.696245405482493, + "learning_rate": 5.3175394616582465e-06, + "loss": 0.199658203125, + "step": 50655 + }, + { + "epoch": 0.4380420402763487, + "grad_norm": 15.843191429195734, + "learning_rate": 5.3174100860032616e-06, + "loss": 0.16993751525878906, + "step": 50660 + }, + { + "epoch": 0.4380852737978919, + "grad_norm": 1.6567408998557343, + "learning_rate": 5.317280699660585e-06, + "loss": 0.1144866943359375, + "step": 50665 + }, + { + "epoch": 0.4381285073194352, + "grad_norm": 0.06507300173604272, + "learning_rate": 5.317151302630816e-06, + "loss": 0.09998931884765624, + "step": 50670 + }, + { + "epoch": 0.4381717408409785, + "grad_norm": 1.0802207300985562, + "learning_rate": 5.317021894914549e-06, + "loss": 0.04766845703125, + "step": 50675 + }, + { + "epoch": 0.4382149743625217, + "grad_norm": 11.272189030815245, + "learning_rate": 5.316892476512383e-06, + "loss": 0.13753776550292968, + "step": 50680 + }, + { + "epoch": 0.438258207884065, + "grad_norm": 5.365732463966222, + "learning_rate": 5.316763047424913e-06, + "loss": 0.463922119140625, + "step": 50685 + }, + { + "epoch": 0.4383014414056082, + "grad_norm": 12.050068603893646, + "learning_rate": 5.316633607652736e-06, + "loss": 0.21111831665039063, + "step": 50690 + }, + { + "epoch": 0.4383446749271515, + "grad_norm": 9.012260489305756, + "learning_rate": 5.31650415719645e-06, + "loss": 0.18983688354492187, + "step": 50695 + }, + { + "epoch": 0.4383879084486948, + "grad_norm": 4.2126778307989206, + "learning_rate": 5.316374696056651e-06, + "loss": 0.722900390625, + "step": 50700 + }, + { + "epoch": 0.438431141970238, + "grad_norm": 4.431763346086748, + "learning_rate": 5.3162452242339375e-06, + "loss": 0.0929931640625, + "step": 50705 + }, + { + "epoch": 0.4384743754917813, + "grad_norm": 4.796084996160768, + "learning_rate": 5.316115741728905e-06, + "loss": 0.099700927734375, + "step": 50710 + }, + { + "epoch": 0.4385176090133246, + "grad_norm": 2.4696412958972003, + "learning_rate": 5.315986248542151e-06, + "loss": 0.12231597900390626, + "step": 50715 + }, + { + "epoch": 0.4385608425348678, + "grad_norm": 27.97594538041676, + "learning_rate": 5.315856744674274e-06, + "loss": 0.138494873046875, + "step": 50720 + }, + { + "epoch": 0.4386040760564111, + "grad_norm": 0.9219352010264996, + "learning_rate": 5.3157272301258705e-06, + "loss": 0.10689315795898438, + "step": 50725 + }, + { + "epoch": 0.43864730957795434, + "grad_norm": 13.302396119533777, + "learning_rate": 5.315597704897537e-06, + "loss": 0.4214984893798828, + "step": 50730 + }, + { + "epoch": 0.4386905430994976, + "grad_norm": 22.508756510167984, + "learning_rate": 5.315468168989871e-06, + "loss": 0.2258575439453125, + "step": 50735 + }, + { + "epoch": 0.4387337766210409, + "grad_norm": 0.43705479684622545, + "learning_rate": 5.315338622403471e-06, + "loss": 0.080499267578125, + "step": 50740 + }, + { + "epoch": 0.43877701014258413, + "grad_norm": 7.571428876035522, + "learning_rate": 5.3152090651389334e-06, + "loss": 0.3026123046875, + "step": 50745 + }, + { + "epoch": 0.4388202436641274, + "grad_norm": 0.7429247378494237, + "learning_rate": 5.315079497196857e-06, + "loss": 0.10843963623046875, + "step": 50750 + }, + { + "epoch": 0.4388634771856707, + "grad_norm": 2.7184430621576916, + "learning_rate": 5.314949918577838e-06, + "loss": 0.079498291015625, + "step": 50755 + }, + { + "epoch": 0.43890671070721393, + "grad_norm": 14.203314616757686, + "learning_rate": 5.314820329282474e-06, + "loss": 0.07061614990234374, + "step": 50760 + }, + { + "epoch": 0.4389499442287572, + "grad_norm": 0.08793088339289515, + "learning_rate": 5.314690729311365e-06, + "loss": 0.32965888977050783, + "step": 50765 + }, + { + "epoch": 0.4389931777503005, + "grad_norm": 2.7142341552908293, + "learning_rate": 5.314561118665105e-06, + "loss": 0.06386642456054688, + "step": 50770 + }, + { + "epoch": 0.43903641127184373, + "grad_norm": 7.586248088140446, + "learning_rate": 5.314431497344295e-06, + "loss": 0.20377578735351562, + "step": 50775 + }, + { + "epoch": 0.439079644793387, + "grad_norm": 3.616536663541019, + "learning_rate": 5.31430186534953e-06, + "loss": 0.053022003173828124, + "step": 50780 + }, + { + "epoch": 0.43912287831493024, + "grad_norm": 18.088407799231486, + "learning_rate": 5.31417222268141e-06, + "loss": 0.46569976806640623, + "step": 50785 + }, + { + "epoch": 0.4391661118364735, + "grad_norm": 19.818968119229176, + "learning_rate": 5.314042569340533e-06, + "loss": 0.16675949096679688, + "step": 50790 + }, + { + "epoch": 0.4392093453580168, + "grad_norm": 1.7681924742742112, + "learning_rate": 5.313912905327495e-06, + "loss": 0.0396270751953125, + "step": 50795 + }, + { + "epoch": 0.43925257887956004, + "grad_norm": 10.242890837967845, + "learning_rate": 5.313783230642896e-06, + "loss": 0.11188278198242188, + "step": 50800 + }, + { + "epoch": 0.4392958124011033, + "grad_norm": 2.3529140204799153, + "learning_rate": 5.313653545287332e-06, + "loss": 0.42287750244140626, + "step": 50805 + }, + { + "epoch": 0.4393390459226466, + "grad_norm": 1.7925272586833951, + "learning_rate": 5.313523849261403e-06, + "loss": 0.0968841552734375, + "step": 50810 + }, + { + "epoch": 0.43938227944418984, + "grad_norm": 6.077870280736835, + "learning_rate": 5.313394142565707e-06, + "loss": 0.1153329849243164, + "step": 50815 + }, + { + "epoch": 0.4394255129657331, + "grad_norm": 34.35869147891601, + "learning_rate": 5.3132644252008404e-06, + "loss": 0.15479793548583984, + "step": 50820 + }, + { + "epoch": 0.43946874648727635, + "grad_norm": 0.18996973419629845, + "learning_rate": 5.313134697167403e-06, + "loss": 0.13140716552734374, + "step": 50825 + }, + { + "epoch": 0.43951198000881964, + "grad_norm": 1.983443145317102, + "learning_rate": 5.313004958465994e-06, + "loss": 0.13344268798828124, + "step": 50830 + }, + { + "epoch": 0.4395552135303629, + "grad_norm": 0.581197727122752, + "learning_rate": 5.31287520909721e-06, + "loss": 0.1320465087890625, + "step": 50835 + }, + { + "epoch": 0.43959844705190615, + "grad_norm": 6.196289622550735, + "learning_rate": 5.31274544906165e-06, + "loss": 0.18962554931640624, + "step": 50840 + }, + { + "epoch": 0.43964168057344943, + "grad_norm": 4.370497936847818, + "learning_rate": 5.312615678359911e-06, + "loss": 0.23608245849609374, + "step": 50845 + }, + { + "epoch": 0.4396849140949927, + "grad_norm": 13.041780661600379, + "learning_rate": 5.312485896992593e-06, + "loss": 0.0866058349609375, + "step": 50850 + }, + { + "epoch": 0.43972814761653595, + "grad_norm": 3.120766872277138, + "learning_rate": 5.312356104960296e-06, + "loss": 0.1169921875, + "step": 50855 + }, + { + "epoch": 0.43977138113807923, + "grad_norm": 0.27130788787107885, + "learning_rate": 5.312226302263616e-06, + "loss": 0.03269500732421875, + "step": 50860 + }, + { + "epoch": 0.43981461465962246, + "grad_norm": 2.267383509568558, + "learning_rate": 5.312096488903153e-06, + "loss": 0.08865814208984375, + "step": 50865 + }, + { + "epoch": 0.43985784818116574, + "grad_norm": 7.124724971512196, + "learning_rate": 5.311966664879504e-06, + "loss": 0.2683601379394531, + "step": 50870 + }, + { + "epoch": 0.43990108170270903, + "grad_norm": 1.870396600995566, + "learning_rate": 5.3118368301932706e-06, + "loss": 0.1708667755126953, + "step": 50875 + }, + { + "epoch": 0.43994431522425226, + "grad_norm": 22.265273542597985, + "learning_rate": 5.3117069848450494e-06, + "loss": 0.24776458740234375, + "step": 50880 + }, + { + "epoch": 0.43998754874579554, + "grad_norm": 27.27290737907795, + "learning_rate": 5.31157712883544e-06, + "loss": 0.42824592590332033, + "step": 50885 + }, + { + "epoch": 0.4400307822673388, + "grad_norm": 16.105678796008977, + "learning_rate": 5.31144726216504e-06, + "loss": 0.0764068603515625, + "step": 50890 + }, + { + "epoch": 0.44007401578888206, + "grad_norm": 19.437055476523657, + "learning_rate": 5.311317384834452e-06, + "loss": 0.28353271484375, + "step": 50895 + }, + { + "epoch": 0.44011724931042534, + "grad_norm": 2.4206133748014627, + "learning_rate": 5.31118749684427e-06, + "loss": 0.05573616027832031, + "step": 50900 + }, + { + "epoch": 0.44016048283196857, + "grad_norm": 0.5329130660418734, + "learning_rate": 5.311057598195096e-06, + "loss": 0.44871063232421876, + "step": 50905 + }, + { + "epoch": 0.44020371635351185, + "grad_norm": 0.7872578563504905, + "learning_rate": 5.3109276888875295e-06, + "loss": 0.266864013671875, + "step": 50910 + }, + { + "epoch": 0.44024694987505514, + "grad_norm": 12.427024382509602, + "learning_rate": 5.3107977689221686e-06, + "loss": 0.170361328125, + "step": 50915 + }, + { + "epoch": 0.44029018339659837, + "grad_norm": 3.140538478278267, + "learning_rate": 5.310667838299612e-06, + "loss": 0.1795318603515625, + "step": 50920 + }, + { + "epoch": 0.44033341691814165, + "grad_norm": 2.1395899038917268, + "learning_rate": 5.31053789702046e-06, + "loss": 0.17689895629882812, + "step": 50925 + }, + { + "epoch": 0.44037665043968494, + "grad_norm": 6.538908862627098, + "learning_rate": 5.31040794508531e-06, + "loss": 0.2477203369140625, + "step": 50930 + }, + { + "epoch": 0.44041988396122816, + "grad_norm": 6.449224920808832, + "learning_rate": 5.310277982494764e-06, + "loss": 0.48250732421875, + "step": 50935 + }, + { + "epoch": 0.44046311748277145, + "grad_norm": 21.55867434247172, + "learning_rate": 5.310148009249419e-06, + "loss": 0.6079833984375, + "step": 50940 + }, + { + "epoch": 0.4405063510043147, + "grad_norm": 15.916474061176146, + "learning_rate": 5.310018025349877e-06, + "loss": 0.23040390014648438, + "step": 50945 + }, + { + "epoch": 0.44054958452585796, + "grad_norm": 73.15011204828177, + "learning_rate": 5.309888030796736e-06, + "loss": 0.632025146484375, + "step": 50950 + }, + { + "epoch": 0.44059281804740125, + "grad_norm": 8.997796021078925, + "learning_rate": 5.309758025590595e-06, + "loss": 0.06751708984375, + "step": 50955 + }, + { + "epoch": 0.4406360515689445, + "grad_norm": 29.871982403390632, + "learning_rate": 5.3096280097320535e-06, + "loss": 0.16984786987304687, + "step": 50960 + }, + { + "epoch": 0.44067928509048776, + "grad_norm": 16.18630865641065, + "learning_rate": 5.309497983221712e-06, + "loss": 0.40474853515625, + "step": 50965 + }, + { + "epoch": 0.44072251861203104, + "grad_norm": 9.70507931976622, + "learning_rate": 5.30936794606017e-06, + "loss": 0.3509796142578125, + "step": 50970 + }, + { + "epoch": 0.4407657521335743, + "grad_norm": 8.024828888692673, + "learning_rate": 5.3092378982480265e-06, + "loss": 0.1780242919921875, + "step": 50975 + }, + { + "epoch": 0.44080898565511756, + "grad_norm": 7.253293287072224, + "learning_rate": 5.309107839785883e-06, + "loss": 0.4049388885498047, + "step": 50980 + }, + { + "epoch": 0.44085221917666084, + "grad_norm": 0.6686394408399562, + "learning_rate": 5.308977770674337e-06, + "loss": 0.31998138427734374, + "step": 50985 + }, + { + "epoch": 0.44089545269820407, + "grad_norm": 5.059077145524216, + "learning_rate": 5.30884769091399e-06, + "loss": 0.42063217163085936, + "step": 50990 + }, + { + "epoch": 0.44093868621974736, + "grad_norm": 7.134241392448302, + "learning_rate": 5.308717600505442e-06, + "loss": 0.14644737243652345, + "step": 50995 + }, + { + "epoch": 0.4409819197412906, + "grad_norm": 37.615062746422495, + "learning_rate": 5.3085874994492925e-06, + "loss": 0.18441619873046874, + "step": 51000 + }, + { + "epoch": 0.44102515326283387, + "grad_norm": 37.182447531513134, + "learning_rate": 5.308457387746141e-06, + "loss": 0.37697563171386717, + "step": 51005 + }, + { + "epoch": 0.44106838678437715, + "grad_norm": 0.5020365417661562, + "learning_rate": 5.308327265396587e-06, + "loss": 0.083551025390625, + "step": 51010 + }, + { + "epoch": 0.4411116203059204, + "grad_norm": 0.8969209448142316, + "learning_rate": 5.308197132401233e-06, + "loss": 0.07761516571044921, + "step": 51015 + }, + { + "epoch": 0.44115485382746367, + "grad_norm": 3.280551626002569, + "learning_rate": 5.308066988760677e-06, + "loss": 0.061229705810546875, + "step": 51020 + }, + { + "epoch": 0.44119808734900695, + "grad_norm": 0.5056841260103293, + "learning_rate": 5.307936834475522e-06, + "loss": 0.326617431640625, + "step": 51025 + }, + { + "epoch": 0.4412413208705502, + "grad_norm": 16.63873637293999, + "learning_rate": 5.307806669546364e-06, + "loss": 0.30654296875, + "step": 51030 + }, + { + "epoch": 0.44128455439209346, + "grad_norm": 8.283826542803855, + "learning_rate": 5.307676493973806e-06, + "loss": 0.09117202758789063, + "step": 51035 + }, + { + "epoch": 0.4413277879136367, + "grad_norm": 2.636895080320321, + "learning_rate": 5.307546307758448e-06, + "loss": 0.19113426208496093, + "step": 51040 + }, + { + "epoch": 0.44137102143518, + "grad_norm": 0.3361272473845844, + "learning_rate": 5.30741611090089e-06, + "loss": 0.1217041015625, + "step": 51045 + }, + { + "epoch": 0.44141425495672326, + "grad_norm": 1.6890806273948054, + "learning_rate": 5.3072859034017335e-06, + "loss": 0.38533172607421873, + "step": 51050 + }, + { + "epoch": 0.4414574884782665, + "grad_norm": 6.355632534245113, + "learning_rate": 5.307155685261578e-06, + "loss": 0.07388687133789062, + "step": 51055 + }, + { + "epoch": 0.4415007219998098, + "grad_norm": 43.96704395001065, + "learning_rate": 5.307025456481024e-06, + "loss": 0.24201736450195313, + "step": 51060 + }, + { + "epoch": 0.44154395552135306, + "grad_norm": 4.78206318096377, + "learning_rate": 5.306895217060674e-06, + "loss": 0.10308799743652344, + "step": 51065 + }, + { + "epoch": 0.4415871890428963, + "grad_norm": 0.5062048166161857, + "learning_rate": 5.306764967001125e-06, + "loss": 0.016801834106445312, + "step": 51070 + }, + { + "epoch": 0.4416304225644396, + "grad_norm": 11.031333980315736, + "learning_rate": 5.306634706302981e-06, + "loss": 0.05809783935546875, + "step": 51075 + }, + { + "epoch": 0.4416736560859828, + "grad_norm": 13.166804776624549, + "learning_rate": 5.306504434966841e-06, + "loss": 0.12495269775390624, + "step": 51080 + }, + { + "epoch": 0.4417168896075261, + "grad_norm": 17.251687590597523, + "learning_rate": 5.306374152993307e-06, + "loss": 0.2839202880859375, + "step": 51085 + }, + { + "epoch": 0.44176012312906937, + "grad_norm": 0.6297572801095191, + "learning_rate": 5.306243860382978e-06, + "loss": 0.07535858154296875, + "step": 51090 + }, + { + "epoch": 0.4418033566506126, + "grad_norm": 2.291916781758061, + "learning_rate": 5.306113557136458e-06, + "loss": 0.2573066711425781, + "step": 51095 + }, + { + "epoch": 0.4418465901721559, + "grad_norm": 19.59763429491133, + "learning_rate": 5.3059832432543445e-06, + "loss": 0.1966278076171875, + "step": 51100 + }, + { + "epoch": 0.44188982369369917, + "grad_norm": 27.30672773549893, + "learning_rate": 5.3058529187372405e-06, + "loss": 0.2265380859375, + "step": 51105 + }, + { + "epoch": 0.4419330572152424, + "grad_norm": 2.2299847475606835, + "learning_rate": 5.305722583585747e-06, + "loss": 0.04725341796875, + "step": 51110 + }, + { + "epoch": 0.4419762907367857, + "grad_norm": 2.398129129549824, + "learning_rate": 5.305592237800463e-06, + "loss": 0.25234832763671877, + "step": 51115 + }, + { + "epoch": 0.4420195242583289, + "grad_norm": 2.1031302423077953, + "learning_rate": 5.305461881381994e-06, + "loss": 0.16078338623046876, + "step": 51120 + }, + { + "epoch": 0.4420627577798722, + "grad_norm": 4.290460707983218, + "learning_rate": 5.305331514330936e-06, + "loss": 0.05900421142578125, + "step": 51125 + }, + { + "epoch": 0.4421059913014155, + "grad_norm": 0.8849419753650044, + "learning_rate": 5.305201136647894e-06, + "loss": 0.04004974365234375, + "step": 51130 + }, + { + "epoch": 0.4421492248229587, + "grad_norm": 6.051344038470659, + "learning_rate": 5.305070748333468e-06, + "loss": 0.0945709228515625, + "step": 51135 + }, + { + "epoch": 0.442192458344502, + "grad_norm": 11.776377187895841, + "learning_rate": 5.304940349388259e-06, + "loss": 0.2165313720703125, + "step": 51140 + }, + { + "epoch": 0.4422356918660453, + "grad_norm": 32.22475912732325, + "learning_rate": 5.30480993981287e-06, + "loss": 0.33037567138671875, + "step": 51145 + }, + { + "epoch": 0.4422789253875885, + "grad_norm": 3.3845415164254744, + "learning_rate": 5.3046795196079e-06, + "loss": 0.10305023193359375, + "step": 51150 + }, + { + "epoch": 0.4423221589091318, + "grad_norm": 0.702298120057108, + "learning_rate": 5.304549088773951e-06, + "loss": 0.18818435668945313, + "step": 51155 + }, + { + "epoch": 0.4423653924306751, + "grad_norm": 44.1214164074273, + "learning_rate": 5.304418647311627e-06, + "loss": 0.286395263671875, + "step": 51160 + }, + { + "epoch": 0.4424086259522183, + "grad_norm": 5.059534693963146, + "learning_rate": 5.304288195221527e-06, + "loss": 0.03648300170898437, + "step": 51165 + }, + { + "epoch": 0.4424518594737616, + "grad_norm": 10.515432393397322, + "learning_rate": 5.3041577325042535e-06, + "loss": 0.28095703125, + "step": 51170 + }, + { + "epoch": 0.4424950929953048, + "grad_norm": 0.4707047819747574, + "learning_rate": 5.304027259160409e-06, + "loss": 0.16674156188964845, + "step": 51175 + }, + { + "epoch": 0.4425383265168481, + "grad_norm": 11.826687648220785, + "learning_rate": 5.303896775190593e-06, + "loss": 0.06574745178222656, + "step": 51180 + }, + { + "epoch": 0.4425815600383914, + "grad_norm": 9.339281095176785, + "learning_rate": 5.303766280595409e-06, + "loss": 0.1713470458984375, + "step": 51185 + }, + { + "epoch": 0.4426247935599346, + "grad_norm": 1.056078355971625, + "learning_rate": 5.3036357753754586e-06, + "loss": 0.038124370574951175, + "step": 51190 + }, + { + "epoch": 0.4426680270814779, + "grad_norm": 0.4796617175573511, + "learning_rate": 5.303505259531344e-06, + "loss": 0.22856292724609376, + "step": 51195 + }, + { + "epoch": 0.4427112606030212, + "grad_norm": 53.70384482182128, + "learning_rate": 5.303374733063666e-06, + "loss": 0.3397979736328125, + "step": 51200 + }, + { + "epoch": 0.4427544941245644, + "grad_norm": 8.320791006872378, + "learning_rate": 5.303244195973027e-06, + "loss": 0.222113037109375, + "step": 51205 + }, + { + "epoch": 0.4427977276461077, + "grad_norm": 19.28605313032439, + "learning_rate": 5.3031136482600294e-06, + "loss": 0.14373817443847656, + "step": 51210 + }, + { + "epoch": 0.4428409611676509, + "grad_norm": 13.267362403088635, + "learning_rate": 5.302983089925277e-06, + "loss": 0.11071319580078125, + "step": 51215 + }, + { + "epoch": 0.4428841946891942, + "grad_norm": 7.35107761950081, + "learning_rate": 5.302852520969368e-06, + "loss": 0.173028564453125, + "step": 51220 + }, + { + "epoch": 0.4429274282107375, + "grad_norm": 14.974267865088962, + "learning_rate": 5.302721941392907e-06, + "loss": 0.13161163330078124, + "step": 51225 + }, + { + "epoch": 0.4429706617322807, + "grad_norm": 27.036335120460667, + "learning_rate": 5.302591351196496e-06, + "loss": 0.11909866333007812, + "step": 51230 + }, + { + "epoch": 0.443013895253824, + "grad_norm": 12.724402931670616, + "learning_rate": 5.302460750380738e-06, + "loss": 0.1982940673828125, + "step": 51235 + }, + { + "epoch": 0.4430571287753673, + "grad_norm": 0.4905116467581636, + "learning_rate": 5.302330138946233e-06, + "loss": 0.31932220458984373, + "step": 51240 + }, + { + "epoch": 0.4431003622969105, + "grad_norm": 61.82271555073896, + "learning_rate": 5.302199516893586e-06, + "loss": 0.4447021484375, + "step": 51245 + }, + { + "epoch": 0.4431435958184538, + "grad_norm": 20.9034564671512, + "learning_rate": 5.302068884223398e-06, + "loss": 0.2771446228027344, + "step": 51250 + }, + { + "epoch": 0.44318682933999703, + "grad_norm": 14.633956087065524, + "learning_rate": 5.301938240936271e-06, + "loss": 0.1411865234375, + "step": 51255 + }, + { + "epoch": 0.4432300628615403, + "grad_norm": 7.152975923824578, + "learning_rate": 5.30180758703281e-06, + "loss": 0.09401779174804688, + "step": 51260 + }, + { + "epoch": 0.4432732963830836, + "grad_norm": 2.022455925190351, + "learning_rate": 5.301676922513614e-06, + "loss": 0.06562652587890624, + "step": 51265 + }, + { + "epoch": 0.44331652990462683, + "grad_norm": 0.39166424113440435, + "learning_rate": 5.301546247379288e-06, + "loss": 0.01771240234375, + "step": 51270 + }, + { + "epoch": 0.4433597634261701, + "grad_norm": 1.1874487670148302, + "learning_rate": 5.3014155616304345e-06, + "loss": 0.27802581787109376, + "step": 51275 + }, + { + "epoch": 0.4434029969477134, + "grad_norm": 14.472657849663017, + "learning_rate": 5.301284865267655e-06, + "loss": 0.14030532836914061, + "step": 51280 + }, + { + "epoch": 0.44344623046925663, + "grad_norm": 4.882973733400918, + "learning_rate": 5.301154158291553e-06, + "loss": 0.0362091064453125, + "step": 51285 + }, + { + "epoch": 0.4434894639907999, + "grad_norm": 3.6151905000646485, + "learning_rate": 5.301023440702732e-06, + "loss": 0.3492450714111328, + "step": 51290 + }, + { + "epoch": 0.44353269751234314, + "grad_norm": 3.068768103414985, + "learning_rate": 5.300892712501794e-06, + "loss": 0.21491622924804688, + "step": 51295 + }, + { + "epoch": 0.44357593103388643, + "grad_norm": 3.1688137757289194, + "learning_rate": 5.300761973689342e-06, + "loss": 0.082720947265625, + "step": 51300 + }, + { + "epoch": 0.4436191645554297, + "grad_norm": 21.127463355667363, + "learning_rate": 5.30063122426598e-06, + "loss": 0.36368255615234374, + "step": 51305 + }, + { + "epoch": 0.44366239807697294, + "grad_norm": 0.44263996848309034, + "learning_rate": 5.300500464232309e-06, + "loss": 0.1103912353515625, + "step": 51310 + }, + { + "epoch": 0.4437056315985162, + "grad_norm": 7.489850092941041, + "learning_rate": 5.300369693588934e-06, + "loss": 0.077203369140625, + "step": 51315 + }, + { + "epoch": 0.4437488651200595, + "grad_norm": 25.325269658404835, + "learning_rate": 5.300238912336456e-06, + "loss": 0.22301788330078126, + "step": 51320 + }, + { + "epoch": 0.44379209864160274, + "grad_norm": 1.3462868086375015, + "learning_rate": 5.30010812047548e-06, + "loss": 0.17993392944335937, + "step": 51325 + }, + { + "epoch": 0.443835332163146, + "grad_norm": 19.848336516823068, + "learning_rate": 5.299977318006609e-06, + "loss": 0.33089141845703124, + "step": 51330 + }, + { + "epoch": 0.4438785656846893, + "grad_norm": 22.018854296149712, + "learning_rate": 5.299846504930445e-06, + "loss": 0.4886589050292969, + "step": 51335 + }, + { + "epoch": 0.44392179920623254, + "grad_norm": 1.1932524042871684, + "learning_rate": 5.299715681247593e-06, + "loss": 0.32379302978515623, + "step": 51340 + }, + { + "epoch": 0.4439650327277758, + "grad_norm": 10.04167639406654, + "learning_rate": 5.299584846958655e-06, + "loss": 0.1892578125, + "step": 51345 + }, + { + "epoch": 0.44400826624931905, + "grad_norm": 8.903941896608766, + "learning_rate": 5.299454002064235e-06, + "loss": 0.12322235107421875, + "step": 51350 + }, + { + "epoch": 0.44405149977086233, + "grad_norm": 2.5257755264581947, + "learning_rate": 5.299323146564936e-06, + "loss": 0.30629425048828124, + "step": 51355 + }, + { + "epoch": 0.4440947332924056, + "grad_norm": 0.6825936185455448, + "learning_rate": 5.299192280461363e-06, + "loss": 0.03407745361328125, + "step": 51360 + }, + { + "epoch": 0.44413796681394885, + "grad_norm": 25.949702960042597, + "learning_rate": 5.299061403754117e-06, + "loss": 0.22379188537597655, + "step": 51365 + }, + { + "epoch": 0.44418120033549213, + "grad_norm": 10.217422635554385, + "learning_rate": 5.298930516443803e-06, + "loss": 0.11193714141845704, + "step": 51370 + }, + { + "epoch": 0.4442244338570354, + "grad_norm": 45.42320317669624, + "learning_rate": 5.298799618531025e-06, + "loss": 0.21117181777954103, + "step": 51375 + }, + { + "epoch": 0.44426766737857865, + "grad_norm": 6.298176454230298, + "learning_rate": 5.2986687100163875e-06, + "loss": 0.04349517822265625, + "step": 51380 + }, + { + "epoch": 0.44431090090012193, + "grad_norm": 34.01685341362304, + "learning_rate": 5.298537790900491e-06, + "loss": 0.16528778076171874, + "step": 51385 + }, + { + "epoch": 0.44435413442166516, + "grad_norm": 8.334510460988255, + "learning_rate": 5.298406861183943e-06, + "loss": 0.184393310546875, + "step": 51390 + }, + { + "epoch": 0.44439736794320844, + "grad_norm": 19.625662551393656, + "learning_rate": 5.2982759208673444e-06, + "loss": 0.2308380126953125, + "step": 51395 + }, + { + "epoch": 0.44444060146475173, + "grad_norm": 7.84507811628457, + "learning_rate": 5.298144969951301e-06, + "loss": 0.09400558471679688, + "step": 51400 + }, + { + "epoch": 0.44448383498629496, + "grad_norm": 5.838289286283415, + "learning_rate": 5.298014008436416e-06, + "loss": 0.30460052490234374, + "step": 51405 + }, + { + "epoch": 0.44452706850783824, + "grad_norm": 17.755209272729562, + "learning_rate": 5.2978830363232926e-06, + "loss": 0.43386383056640626, + "step": 51410 + }, + { + "epoch": 0.4445703020293815, + "grad_norm": 6.1204994357932465, + "learning_rate": 5.297752053612537e-06, + "loss": 0.0580078125, + "step": 51415 + }, + { + "epoch": 0.44461353555092475, + "grad_norm": 0.19203558639782176, + "learning_rate": 5.297621060304751e-06, + "loss": 0.13881607055664064, + "step": 51420 + }, + { + "epoch": 0.44465676907246804, + "grad_norm": 5.959725991453586, + "learning_rate": 5.29749005640054e-06, + "loss": 0.27552146911621095, + "step": 51425 + }, + { + "epoch": 0.44470000259401127, + "grad_norm": 1.8154718776290584, + "learning_rate": 5.297359041900508e-06, + "loss": 0.13143463134765626, + "step": 51430 + }, + { + "epoch": 0.44474323611555455, + "grad_norm": 1.4797843444230048, + "learning_rate": 5.297228016805259e-06, + "loss": 0.12205810546875, + "step": 51435 + }, + { + "epoch": 0.44478646963709784, + "grad_norm": 10.466761874752045, + "learning_rate": 5.297096981115398e-06, + "loss": 0.32126197814941404, + "step": 51440 + }, + { + "epoch": 0.44482970315864107, + "grad_norm": 20.9093863095009, + "learning_rate": 5.296965934831528e-06, + "loss": 0.08914566040039062, + "step": 51445 + }, + { + "epoch": 0.44487293668018435, + "grad_norm": 28.84059372334747, + "learning_rate": 5.296834877954253e-06, + "loss": 0.289447021484375, + "step": 51450 + }, + { + "epoch": 0.44491617020172763, + "grad_norm": 51.73396766860297, + "learning_rate": 5.29670381048418e-06, + "loss": 0.3655670166015625, + "step": 51455 + }, + { + "epoch": 0.44495940372327086, + "grad_norm": 21.89490390161203, + "learning_rate": 5.296572732421911e-06, + "loss": 0.23726806640625, + "step": 51460 + }, + { + "epoch": 0.44500263724481415, + "grad_norm": 4.9796969925762085, + "learning_rate": 5.296441643768053e-06, + "loss": 0.11979217529296875, + "step": 51465 + }, + { + "epoch": 0.4450458707663574, + "grad_norm": 7.633254724784863, + "learning_rate": 5.296310544523208e-06, + "loss": 0.0685455322265625, + "step": 51470 + }, + { + "epoch": 0.44508910428790066, + "grad_norm": 4.533118260031235, + "learning_rate": 5.296179434687982e-06, + "loss": 0.23627548217773436, + "step": 51475 + }, + { + "epoch": 0.44513233780944395, + "grad_norm": 6.38979819389903, + "learning_rate": 5.296048314262979e-06, + "loss": 0.12304840087890626, + "step": 51480 + }, + { + "epoch": 0.4451755713309872, + "grad_norm": 12.885066302624708, + "learning_rate": 5.295917183248804e-06, + "loss": 0.071514892578125, + "step": 51485 + }, + { + "epoch": 0.44521880485253046, + "grad_norm": 4.876629851755953, + "learning_rate": 5.2957860416460615e-06, + "loss": 0.143359375, + "step": 51490 + }, + { + "epoch": 0.44526203837407374, + "grad_norm": 16.376385564894797, + "learning_rate": 5.295654889455357e-06, + "loss": 0.14546356201171876, + "step": 51495 + }, + { + "epoch": 0.44530527189561697, + "grad_norm": 17.41524140827338, + "learning_rate": 5.295523726677296e-06, + "loss": 0.2609870910644531, + "step": 51500 + }, + { + "epoch": 0.44534850541716026, + "grad_norm": 1.9707902127478518, + "learning_rate": 5.295392553312481e-06, + "loss": 0.2588615417480469, + "step": 51505 + }, + { + "epoch": 0.44539173893870354, + "grad_norm": 3.3653144183036305, + "learning_rate": 5.2952613693615195e-06, + "loss": 0.3116935729980469, + "step": 51510 + }, + { + "epoch": 0.44543497246024677, + "grad_norm": 2.026222628512586, + "learning_rate": 5.295130174825014e-06, + "loss": 0.18980865478515624, + "step": 51515 + }, + { + "epoch": 0.44547820598179005, + "grad_norm": 0.9820564352841479, + "learning_rate": 5.294998969703572e-06, + "loss": 0.1171356201171875, + "step": 51520 + }, + { + "epoch": 0.4455214395033333, + "grad_norm": 8.555430992066192, + "learning_rate": 5.294867753997797e-06, + "loss": 0.29292755126953124, + "step": 51525 + }, + { + "epoch": 0.44556467302487657, + "grad_norm": 10.302286643871422, + "learning_rate": 5.294736527708295e-06, + "loss": 0.47065582275390627, + "step": 51530 + }, + { + "epoch": 0.44560790654641985, + "grad_norm": 11.051632271752018, + "learning_rate": 5.294605290835671e-06, + "loss": 0.220880126953125, + "step": 51535 + }, + { + "epoch": 0.4456511400679631, + "grad_norm": 1.0451071763061548, + "learning_rate": 5.29447404338053e-06, + "loss": 0.03289031982421875, + "step": 51540 + }, + { + "epoch": 0.44569437358950637, + "grad_norm": 37.07533284730781, + "learning_rate": 5.294342785343478e-06, + "loss": 0.367535400390625, + "step": 51545 + }, + { + "epoch": 0.44573760711104965, + "grad_norm": 2.925722033623206, + "learning_rate": 5.2942115167251186e-06, + "loss": 0.2630462646484375, + "step": 51550 + }, + { + "epoch": 0.4457808406325929, + "grad_norm": 2.3644782201006973, + "learning_rate": 5.294080237526059e-06, + "loss": 0.06363677978515625, + "step": 51555 + }, + { + "epoch": 0.44582407415413616, + "grad_norm": 3.200410127491267, + "learning_rate": 5.293948947746905e-06, + "loss": 0.070947265625, + "step": 51560 + }, + { + "epoch": 0.4458673076756794, + "grad_norm": 22.560256938701098, + "learning_rate": 5.29381764738826e-06, + "loss": 0.160687255859375, + "step": 51565 + }, + { + "epoch": 0.4459105411972227, + "grad_norm": 3.3440982188974835, + "learning_rate": 5.293686336450731e-06, + "loss": 0.207818603515625, + "step": 51570 + }, + { + "epoch": 0.44595377471876596, + "grad_norm": 0.24969152057492938, + "learning_rate": 5.293555014934923e-06, + "loss": 0.30934715270996094, + "step": 51575 + }, + { + "epoch": 0.4459970082403092, + "grad_norm": 27.917228844896414, + "learning_rate": 5.293423682841442e-06, + "loss": 0.1761035919189453, + "step": 51580 + }, + { + "epoch": 0.4460402417618525, + "grad_norm": 0.954344902282546, + "learning_rate": 5.293292340170894e-06, + "loss": 0.058380126953125, + "step": 51585 + }, + { + "epoch": 0.44608347528339576, + "grad_norm": 36.199629267441914, + "learning_rate": 5.293160986923884e-06, + "loss": 0.29836273193359375, + "step": 51590 + }, + { + "epoch": 0.446126708804939, + "grad_norm": 11.055544248573343, + "learning_rate": 5.293029623101018e-06, + "loss": 0.119927978515625, + "step": 51595 + }, + { + "epoch": 0.44616994232648227, + "grad_norm": 31.308363492046595, + "learning_rate": 5.292898248702904e-06, + "loss": 0.2210906982421875, + "step": 51600 + }, + { + "epoch": 0.4462131758480255, + "grad_norm": 8.583608633157976, + "learning_rate": 5.292766863730143e-06, + "loss": 0.10447883605957031, + "step": 51605 + }, + { + "epoch": 0.4462564093695688, + "grad_norm": 9.104062823329867, + "learning_rate": 5.292635468183345e-06, + "loss": 0.49171295166015627, + "step": 51610 + }, + { + "epoch": 0.44629964289111207, + "grad_norm": 1.092670508551557, + "learning_rate": 5.292504062063115e-06, + "loss": 0.21410741806030273, + "step": 51615 + }, + { + "epoch": 0.4463428764126553, + "grad_norm": 11.459699850513275, + "learning_rate": 5.292372645370059e-06, + "loss": 0.0687957763671875, + "step": 51620 + }, + { + "epoch": 0.4463861099341986, + "grad_norm": 39.95440576501855, + "learning_rate": 5.292241218104782e-06, + "loss": 0.3255706787109375, + "step": 51625 + }, + { + "epoch": 0.44642934345574187, + "grad_norm": 10.444616107997705, + "learning_rate": 5.292109780267892e-06, + "loss": 0.14816360473632811, + "step": 51630 + }, + { + "epoch": 0.4464725769772851, + "grad_norm": 2.6325802581728164, + "learning_rate": 5.291978331859994e-06, + "loss": 0.5240997314453125, + "step": 51635 + }, + { + "epoch": 0.4465158104988284, + "grad_norm": 4.657560918203397, + "learning_rate": 5.291846872881694e-06, + "loss": 0.181610107421875, + "step": 51640 + }, + { + "epoch": 0.4465590440203716, + "grad_norm": 3.388697329556475, + "learning_rate": 5.2917154033335994e-06, + "loss": 0.03619537353515625, + "step": 51645 + }, + { + "epoch": 0.4466022775419149, + "grad_norm": 31.02451135196828, + "learning_rate": 5.291583923216315e-06, + "loss": 0.17789306640625, + "step": 51650 + }, + { + "epoch": 0.4466455110634582, + "grad_norm": 9.02099762598934, + "learning_rate": 5.291452432530449e-06, + "loss": 0.058953857421875, + "step": 51655 + }, + { + "epoch": 0.4466887445850014, + "grad_norm": 3.4802851333877736, + "learning_rate": 5.2913209312766065e-06, + "loss": 0.30171890258789064, + "step": 51660 + }, + { + "epoch": 0.4467319781065447, + "grad_norm": 8.833432138053476, + "learning_rate": 5.291189419455394e-06, + "loss": 0.24147777557373046, + "step": 51665 + }, + { + "epoch": 0.446775211628088, + "grad_norm": 0.3674571137083815, + "learning_rate": 5.291057897067419e-06, + "loss": 0.059112548828125, + "step": 51670 + }, + { + "epoch": 0.4468184451496312, + "grad_norm": 6.524686581006485, + "learning_rate": 5.290926364113286e-06, + "loss": 0.052593231201171875, + "step": 51675 + }, + { + "epoch": 0.4468616786711745, + "grad_norm": 8.905330526737979, + "learning_rate": 5.290794820593605e-06, + "loss": 0.20643157958984376, + "step": 51680 + }, + { + "epoch": 0.4469049121927177, + "grad_norm": 6.689786093760969, + "learning_rate": 5.290663266508979e-06, + "loss": 0.1128448486328125, + "step": 51685 + }, + { + "epoch": 0.446948145714261, + "grad_norm": 0.33233324103965217, + "learning_rate": 5.290531701860017e-06, + "loss": 0.1892017364501953, + "step": 51690 + }, + { + "epoch": 0.4469913792358043, + "grad_norm": 29.979401351808157, + "learning_rate": 5.2904001266473255e-06, + "loss": 0.343896484375, + "step": 51695 + }, + { + "epoch": 0.4470346127573475, + "grad_norm": 11.547544496031518, + "learning_rate": 5.290268540871511e-06, + "loss": 0.13920440673828124, + "step": 51700 + }, + { + "epoch": 0.4470778462788908, + "grad_norm": 10.683751170679626, + "learning_rate": 5.290136944533181e-06, + "loss": 0.16014556884765624, + "step": 51705 + }, + { + "epoch": 0.4471210798004341, + "grad_norm": 9.529598538890982, + "learning_rate": 5.290005337632941e-06, + "loss": 0.1525665283203125, + "step": 51710 + }, + { + "epoch": 0.4471643133219773, + "grad_norm": 0.4566580890472328, + "learning_rate": 5.2898737201713975e-06, + "loss": 0.18934860229492187, + "step": 51715 + }, + { + "epoch": 0.4472075468435206, + "grad_norm": 5.807348749179954, + "learning_rate": 5.289742092149161e-06, + "loss": 0.18888931274414061, + "step": 51720 + }, + { + "epoch": 0.4472507803650639, + "grad_norm": 4.320252569136774, + "learning_rate": 5.289610453566834e-06, + "loss": 0.13046722412109374, + "step": 51725 + }, + { + "epoch": 0.4472940138866071, + "grad_norm": 3.4398990564343292, + "learning_rate": 5.289478804425028e-06, + "loss": 0.34952621459960936, + "step": 51730 + }, + { + "epoch": 0.4473372474081504, + "grad_norm": 10.766005861462274, + "learning_rate": 5.289347144724347e-06, + "loss": 0.16774520874023438, + "step": 51735 + }, + { + "epoch": 0.4473804809296936, + "grad_norm": 28.209586511819932, + "learning_rate": 5.2892154744654e-06, + "loss": 0.391607666015625, + "step": 51740 + }, + { + "epoch": 0.4474237144512369, + "grad_norm": 33.50181146380638, + "learning_rate": 5.289083793648792e-06, + "loss": 0.45179290771484376, + "step": 51745 + }, + { + "epoch": 0.4474669479727802, + "grad_norm": 13.563677339178536, + "learning_rate": 5.288952102275134e-06, + "loss": 0.0981109619140625, + "step": 51750 + }, + { + "epoch": 0.4475101814943234, + "grad_norm": 8.299016924665587, + "learning_rate": 5.288820400345029e-06, + "loss": 0.12227630615234375, + "step": 51755 + }, + { + "epoch": 0.4475534150158667, + "grad_norm": 2.2027342231269547, + "learning_rate": 5.288688687859088e-06, + "loss": 0.11338310241699219, + "step": 51760 + }, + { + "epoch": 0.44759664853741, + "grad_norm": 2.3811438108606002, + "learning_rate": 5.288556964817917e-06, + "loss": 0.09878616333007813, + "step": 51765 + }, + { + "epoch": 0.4476398820589532, + "grad_norm": 10.496975441731355, + "learning_rate": 5.288425231222122e-06, + "loss": 0.06291046142578124, + "step": 51770 + }, + { + "epoch": 0.4476831155804965, + "grad_norm": 3.6795895896199804, + "learning_rate": 5.288293487072313e-06, + "loss": 0.03028564453125, + "step": 51775 + }, + { + "epoch": 0.44772634910203973, + "grad_norm": 9.79109350207462, + "learning_rate": 5.288161732369097e-06, + "loss": 0.232611083984375, + "step": 51780 + }, + { + "epoch": 0.447769582623583, + "grad_norm": 52.95416003431599, + "learning_rate": 5.288029967113081e-06, + "loss": 0.31653289794921874, + "step": 51785 + }, + { + "epoch": 0.4478128161451263, + "grad_norm": 0.2637442643639685, + "learning_rate": 5.287898191304873e-06, + "loss": 0.07521400451660157, + "step": 51790 + }, + { + "epoch": 0.44785604966666953, + "grad_norm": 3.2131903700803286, + "learning_rate": 5.2877664049450805e-06, + "loss": 0.140533447265625, + "step": 51795 + }, + { + "epoch": 0.4478992831882128, + "grad_norm": 30.44777784485469, + "learning_rate": 5.287634608034312e-06, + "loss": 0.243896484375, + "step": 51800 + }, + { + "epoch": 0.4479425167097561, + "grad_norm": 49.88362369926768, + "learning_rate": 5.287502800573174e-06, + "loss": 0.4409534454345703, + "step": 51805 + }, + { + "epoch": 0.44798575023129933, + "grad_norm": 6.083336963417873, + "learning_rate": 5.287370982562276e-06, + "loss": 0.4169647216796875, + "step": 51810 + }, + { + "epoch": 0.4480289837528426, + "grad_norm": 13.534570190579, + "learning_rate": 5.287239154002225e-06, + "loss": 0.085400390625, + "step": 51815 + }, + { + "epoch": 0.44807221727438584, + "grad_norm": 18.156149476570725, + "learning_rate": 5.287107314893629e-06, + "loss": 0.18821640014648439, + "step": 51820 + }, + { + "epoch": 0.4481154507959291, + "grad_norm": 1.1900066760306673, + "learning_rate": 5.286975465237096e-06, + "loss": 0.1536529541015625, + "step": 51825 + }, + { + "epoch": 0.4481586843174724, + "grad_norm": 1.4563462603460073, + "learning_rate": 5.286843605033234e-06, + "loss": 0.25012054443359377, + "step": 51830 + }, + { + "epoch": 0.44820191783901564, + "grad_norm": 2.3366796269185994, + "learning_rate": 5.286711734282652e-06, + "loss": 0.082684326171875, + "step": 51835 + }, + { + "epoch": 0.4482451513605589, + "grad_norm": 1.5187879540694416, + "learning_rate": 5.286579852985956e-06, + "loss": 0.231524658203125, + "step": 51840 + }, + { + "epoch": 0.4482883848821022, + "grad_norm": 14.41095618284111, + "learning_rate": 5.286447961143758e-06, + "loss": 0.1024993896484375, + "step": 51845 + }, + { + "epoch": 0.44833161840364544, + "grad_norm": 2.9895315060862235, + "learning_rate": 5.286316058756662e-06, + "loss": 0.2674095153808594, + "step": 51850 + }, + { + "epoch": 0.4483748519251887, + "grad_norm": 29.160104343408847, + "learning_rate": 5.28618414582528e-06, + "loss": 0.1644073486328125, + "step": 51855 + }, + { + "epoch": 0.44841808544673195, + "grad_norm": 2.906195106759367, + "learning_rate": 5.286052222350218e-06, + "loss": 0.0252593994140625, + "step": 51860 + }, + { + "epoch": 0.44846131896827524, + "grad_norm": 4.434327540100117, + "learning_rate": 5.285920288332086e-06, + "loss": 0.2224609375, + "step": 51865 + }, + { + "epoch": 0.4485045524898185, + "grad_norm": 0.5550506179577476, + "learning_rate": 5.28578834377149e-06, + "loss": 0.23480491638183593, + "step": 51870 + }, + { + "epoch": 0.44854778601136175, + "grad_norm": 6.697031659942536, + "learning_rate": 5.285656388669041e-06, + "loss": 0.218890380859375, + "step": 51875 + }, + { + "epoch": 0.44859101953290503, + "grad_norm": 9.304560481644765, + "learning_rate": 5.285524423025347e-06, + "loss": 0.2789501190185547, + "step": 51880 + }, + { + "epoch": 0.4486342530544483, + "grad_norm": 6.981814239271641, + "learning_rate": 5.285392446841016e-06, + "loss": 0.0569091796875, + "step": 51885 + }, + { + "epoch": 0.44867748657599155, + "grad_norm": 3.189450146731803, + "learning_rate": 5.285260460116658e-06, + "loss": 0.085345458984375, + "step": 51890 + }, + { + "epoch": 0.44872072009753483, + "grad_norm": 9.20776956977403, + "learning_rate": 5.285128462852879e-06, + "loss": 0.05399932861328125, + "step": 51895 + }, + { + "epoch": 0.4487639536190781, + "grad_norm": 1.1509154250707248, + "learning_rate": 5.284996455050289e-06, + "loss": 0.0671142578125, + "step": 51900 + }, + { + "epoch": 0.44880718714062134, + "grad_norm": 11.962775164515566, + "learning_rate": 5.2848644367094995e-06, + "loss": 0.2437713623046875, + "step": 51905 + }, + { + "epoch": 0.44885042066216463, + "grad_norm": 1.5750209191888564, + "learning_rate": 5.284732407831115e-06, + "loss": 0.04903554916381836, + "step": 51910 + }, + { + "epoch": 0.44889365418370786, + "grad_norm": 10.873476763006467, + "learning_rate": 5.284600368415748e-06, + "loss": 0.1598804473876953, + "step": 51915 + }, + { + "epoch": 0.44893688770525114, + "grad_norm": 19.87290573833856, + "learning_rate": 5.284468318464006e-06, + "loss": 0.233148193359375, + "step": 51920 + }, + { + "epoch": 0.4489801212267944, + "grad_norm": 1.620979808791717, + "learning_rate": 5.284336257976497e-06, + "loss": 0.0586212158203125, + "step": 51925 + }, + { + "epoch": 0.44902335474833766, + "grad_norm": 1.7137965457173492, + "learning_rate": 5.284204186953832e-06, + "loss": 0.201690673828125, + "step": 51930 + }, + { + "epoch": 0.44906658826988094, + "grad_norm": 15.088486007885608, + "learning_rate": 5.284072105396618e-06, + "loss": 0.23426513671875, + "step": 51935 + }, + { + "epoch": 0.4491098217914242, + "grad_norm": 28.045672259348922, + "learning_rate": 5.283940013305466e-06, + "loss": 0.200018310546875, + "step": 51940 + }, + { + "epoch": 0.44915305531296745, + "grad_norm": 0.8497710809067327, + "learning_rate": 5.283807910680984e-06, + "loss": 0.09764556884765625, + "step": 51945 + }, + { + "epoch": 0.44919628883451074, + "grad_norm": 46.53292040491562, + "learning_rate": 5.283675797523782e-06, + "loss": 0.6413284301757812, + "step": 51950 + }, + { + "epoch": 0.44923952235605397, + "grad_norm": 116.09687731819012, + "learning_rate": 5.28354367383447e-06, + "loss": 0.15789794921875, + "step": 51955 + }, + { + "epoch": 0.44928275587759725, + "grad_norm": 59.42989889610305, + "learning_rate": 5.283411539613655e-06, + "loss": 0.28061904907226565, + "step": 51960 + }, + { + "epoch": 0.44932598939914054, + "grad_norm": 5.614045971810457, + "learning_rate": 5.283279394861948e-06, + "loss": 0.173077392578125, + "step": 51965 + }, + { + "epoch": 0.44936922292068376, + "grad_norm": 3.438332528076015, + "learning_rate": 5.283147239579959e-06, + "loss": 0.126495361328125, + "step": 51970 + }, + { + "epoch": 0.44941245644222705, + "grad_norm": 1.453356112801442, + "learning_rate": 5.283015073768295e-06, + "loss": 0.14817047119140625, + "step": 51975 + }, + { + "epoch": 0.44945568996377033, + "grad_norm": 14.092978847825089, + "learning_rate": 5.282882897427569e-06, + "loss": 0.13434906005859376, + "step": 51980 + }, + { + "epoch": 0.44949892348531356, + "grad_norm": 0.9038457386643622, + "learning_rate": 5.282750710558388e-06, + "loss": 0.026319122314453124, + "step": 51985 + }, + { + "epoch": 0.44954215700685685, + "grad_norm": 2.587193032176388, + "learning_rate": 5.282618513161362e-06, + "loss": 0.1160980224609375, + "step": 51990 + }, + { + "epoch": 0.4495853905284001, + "grad_norm": 43.91224698339289, + "learning_rate": 5.282486305237102e-06, + "loss": 0.18560562133789063, + "step": 51995 + }, + { + "epoch": 0.44962862404994336, + "grad_norm": 0.0814013631852053, + "learning_rate": 5.282354086786216e-06, + "loss": 0.23006210327148438, + "step": 52000 + }, + { + "epoch": 0.44967185757148664, + "grad_norm": 0.174298220016862, + "learning_rate": 5.282221857809315e-06, + "loss": 0.036550140380859374, + "step": 52005 + }, + { + "epoch": 0.4497150910930299, + "grad_norm": 62.66842251613572, + "learning_rate": 5.282089618307009e-06, + "loss": 0.6867385864257812, + "step": 52010 + }, + { + "epoch": 0.44975832461457316, + "grad_norm": 1.8482774803667221, + "learning_rate": 5.281957368279906e-06, + "loss": 0.0420928955078125, + "step": 52015 + }, + { + "epoch": 0.44980155813611644, + "grad_norm": 2.520928675854837, + "learning_rate": 5.281825107728619e-06, + "loss": 0.4444000244140625, + "step": 52020 + }, + { + "epoch": 0.44984479165765967, + "grad_norm": 1.1544525811188238, + "learning_rate": 5.281692836653755e-06, + "loss": 0.037469482421875, + "step": 52025 + }, + { + "epoch": 0.44988802517920295, + "grad_norm": 18.145778065956303, + "learning_rate": 5.281560555055926e-06, + "loss": 0.276434326171875, + "step": 52030 + }, + { + "epoch": 0.4499312587007462, + "grad_norm": 3.681934669912415, + "learning_rate": 5.28142826293574e-06, + "loss": 0.1809326171875, + "step": 52035 + }, + { + "epoch": 0.44997449222228947, + "grad_norm": 0.7123270241625956, + "learning_rate": 5.28129596029381e-06, + "loss": 0.15504150390625, + "step": 52040 + }, + { + "epoch": 0.45001772574383275, + "grad_norm": 0.9678931701393981, + "learning_rate": 5.281163647130743e-06, + "loss": 0.3460090637207031, + "step": 52045 + }, + { + "epoch": 0.450060959265376, + "grad_norm": 47.67296557090035, + "learning_rate": 5.281031323447153e-06, + "loss": 0.3962860107421875, + "step": 52050 + }, + { + "epoch": 0.45010419278691927, + "grad_norm": 27.676202247215155, + "learning_rate": 5.280898989243646e-06, + "loss": 0.17516937255859374, + "step": 52055 + }, + { + "epoch": 0.45014742630846255, + "grad_norm": 16.547378874815738, + "learning_rate": 5.2807666445208355e-06, + "loss": 0.3332633972167969, + "step": 52060 + }, + { + "epoch": 0.4501906598300058, + "grad_norm": 1.53372449674899, + "learning_rate": 5.28063428927933e-06, + "loss": 0.19610595703125, + "step": 52065 + }, + { + "epoch": 0.45023389335154906, + "grad_norm": 13.02027552057072, + "learning_rate": 5.280501923519742e-06, + "loss": 0.1571500778198242, + "step": 52070 + }, + { + "epoch": 0.45027712687309235, + "grad_norm": 13.041876633773924, + "learning_rate": 5.280369547242679e-06, + "loss": 0.1268951416015625, + "step": 52075 + }, + { + "epoch": 0.4503203603946356, + "grad_norm": 0.27280936231688063, + "learning_rate": 5.280237160448755e-06, + "loss": 0.3382415771484375, + "step": 52080 + }, + { + "epoch": 0.45036359391617886, + "grad_norm": 13.256250334063878, + "learning_rate": 5.280104763138577e-06, + "loss": 0.22670326232910157, + "step": 52085 + }, + { + "epoch": 0.4504068274377221, + "grad_norm": 18.642992261230997, + "learning_rate": 5.2799723553127585e-06, + "loss": 0.180133056640625, + "step": 52090 + }, + { + "epoch": 0.4504500609592654, + "grad_norm": 0.313050689375663, + "learning_rate": 5.279839936971908e-06, + "loss": 0.04416351318359375, + "step": 52095 + }, + { + "epoch": 0.45049329448080866, + "grad_norm": 20.48488330362285, + "learning_rate": 5.279707508116638e-06, + "loss": 0.23798370361328125, + "step": 52100 + }, + { + "epoch": 0.4505365280023519, + "grad_norm": 1.8442225154333793, + "learning_rate": 5.279575068747558e-06, + "loss": 0.18292236328125, + "step": 52105 + }, + { + "epoch": 0.4505797615238952, + "grad_norm": 0.18628880144491192, + "learning_rate": 5.279442618865279e-06, + "loss": 0.1426849365234375, + "step": 52110 + }, + { + "epoch": 0.45062299504543846, + "grad_norm": 2.4893584066054077, + "learning_rate": 5.279310158470412e-06, + "loss": 0.1530292510986328, + "step": 52115 + }, + { + "epoch": 0.4506662285669817, + "grad_norm": 10.747474239020203, + "learning_rate": 5.279177687563569e-06, + "loss": 0.40223846435546873, + "step": 52120 + }, + { + "epoch": 0.45070946208852497, + "grad_norm": 24.033744196521425, + "learning_rate": 5.279045206145359e-06, + "loss": 0.11183662414550781, + "step": 52125 + }, + { + "epoch": 0.4507526956100682, + "grad_norm": 0.3360982119787993, + "learning_rate": 5.278912714216394e-06, + "loss": 0.11035690307617188, + "step": 52130 + }, + { + "epoch": 0.4507959291316115, + "grad_norm": 0.5464301781873095, + "learning_rate": 5.2787802117772845e-06, + "loss": 0.1569000244140625, + "step": 52135 + }, + { + "epoch": 0.45083916265315477, + "grad_norm": 35.83988948347069, + "learning_rate": 5.278647698828643e-06, + "loss": 0.3151054382324219, + "step": 52140 + }, + { + "epoch": 0.450882396174698, + "grad_norm": 2.117905643332883, + "learning_rate": 5.278515175371078e-06, + "loss": 0.20906982421875, + "step": 52145 + }, + { + "epoch": 0.4509256296962413, + "grad_norm": 12.430562514642071, + "learning_rate": 5.278382641405205e-06, + "loss": 0.34287109375, + "step": 52150 + }, + { + "epoch": 0.45096886321778457, + "grad_norm": 13.311993563997168, + "learning_rate": 5.27825009693163e-06, + "loss": 0.1608154296875, + "step": 52155 + }, + { + "epoch": 0.4510120967393278, + "grad_norm": 0.5697330122506176, + "learning_rate": 5.2781175419509685e-06, + "loss": 0.0653564453125, + "step": 52160 + }, + { + "epoch": 0.4510553302608711, + "grad_norm": 5.133824898052441, + "learning_rate": 5.277984976463829e-06, + "loss": 0.040586090087890624, + "step": 52165 + }, + { + "epoch": 0.4510985637824143, + "grad_norm": 13.877377572414634, + "learning_rate": 5.277852400470825e-06, + "loss": 0.14807281494140626, + "step": 52170 + }, + { + "epoch": 0.4511417973039576, + "grad_norm": 0.33124593614999187, + "learning_rate": 5.277719813972567e-06, + "loss": 0.190350341796875, + "step": 52175 + }, + { + "epoch": 0.4511850308255009, + "grad_norm": 15.917106099497845, + "learning_rate": 5.277587216969666e-06, + "loss": 0.2957763671875, + "step": 52180 + }, + { + "epoch": 0.4512282643470441, + "grad_norm": 11.409204500493237, + "learning_rate": 5.277454609462734e-06, + "loss": 0.054036712646484374, + "step": 52185 + }, + { + "epoch": 0.4512714978685874, + "grad_norm": 2.2485205544673383, + "learning_rate": 5.277321991452383e-06, + "loss": 0.10841598510742187, + "step": 52190 + }, + { + "epoch": 0.4513147313901307, + "grad_norm": 1.7202243686387984, + "learning_rate": 5.277189362939223e-06, + "loss": 0.12644462585449218, + "step": 52195 + }, + { + "epoch": 0.4513579649116739, + "grad_norm": 12.605196958436863, + "learning_rate": 5.277056723923869e-06, + "loss": 0.1364950180053711, + "step": 52200 + }, + { + "epoch": 0.4514011984332172, + "grad_norm": 0.31258004477529505, + "learning_rate": 5.276924074406929e-06, + "loss": 0.2743633270263672, + "step": 52205 + }, + { + "epoch": 0.4514444319547604, + "grad_norm": 37.41504640118985, + "learning_rate": 5.276791414389016e-06, + "loss": 0.62872314453125, + "step": 52210 + }, + { + "epoch": 0.4514876654763037, + "grad_norm": 3.1693258750198186, + "learning_rate": 5.276658743870744e-06, + "loss": 0.28739013671875, + "step": 52215 + }, + { + "epoch": 0.451530898997847, + "grad_norm": 2.997160285935509, + "learning_rate": 5.276526062852722e-06, + "loss": 0.36666259765625, + "step": 52220 + }, + { + "epoch": 0.4515741325193902, + "grad_norm": 1.7030514308432734, + "learning_rate": 5.276393371335563e-06, + "loss": 0.124395751953125, + "step": 52225 + }, + { + "epoch": 0.4516173660409335, + "grad_norm": 31.773597376287558, + "learning_rate": 5.276260669319879e-06, + "loss": 0.2219757080078125, + "step": 52230 + }, + { + "epoch": 0.4516605995624768, + "grad_norm": 42.20028277103073, + "learning_rate": 5.276127956806281e-06, + "loss": 0.2911712646484375, + "step": 52235 + }, + { + "epoch": 0.45170383308402, + "grad_norm": 23.846543418351075, + "learning_rate": 5.275995233795383e-06, + "loss": 0.362689208984375, + "step": 52240 + }, + { + "epoch": 0.4517470666055633, + "grad_norm": 2.0505973144404446, + "learning_rate": 5.275862500287796e-06, + "loss": 0.03620452880859375, + "step": 52245 + }, + { + "epoch": 0.4517903001271066, + "grad_norm": 1.3014045094999902, + "learning_rate": 5.275729756284132e-06, + "loss": 0.15931472778320313, + "step": 52250 + }, + { + "epoch": 0.4518335336486498, + "grad_norm": 0.8743797237044391, + "learning_rate": 5.275597001785003e-06, + "loss": 0.01842803955078125, + "step": 52255 + }, + { + "epoch": 0.4518767671701931, + "grad_norm": 9.39315247432143, + "learning_rate": 5.275464236791023e-06, + "loss": 0.39247894287109375, + "step": 52260 + }, + { + "epoch": 0.4519200006917363, + "grad_norm": 5.27453448773578, + "learning_rate": 5.275331461302802e-06, + "loss": 0.18377685546875, + "step": 52265 + }, + { + "epoch": 0.4519632342132796, + "grad_norm": 29.96418654006927, + "learning_rate": 5.275198675320954e-06, + "loss": 0.1803924560546875, + "step": 52270 + }, + { + "epoch": 0.4520064677348229, + "grad_norm": 12.681913532627066, + "learning_rate": 5.27506587884609e-06, + "loss": 0.14517059326171874, + "step": 52275 + }, + { + "epoch": 0.4520497012563661, + "grad_norm": 1.2280927741621668, + "learning_rate": 5.274933071878824e-06, + "loss": 0.20180435180664064, + "step": 52280 + }, + { + "epoch": 0.4520929347779094, + "grad_norm": 33.63115353823529, + "learning_rate": 5.274800254419767e-06, + "loss": 0.3639434814453125, + "step": 52285 + }, + { + "epoch": 0.4521361682994527, + "grad_norm": 17.041368828367357, + "learning_rate": 5.274667426469532e-06, + "loss": 0.19546165466308593, + "step": 52290 + }, + { + "epoch": 0.4521794018209959, + "grad_norm": 12.960549388828282, + "learning_rate": 5.274534588028733e-06, + "loss": 0.1468292236328125, + "step": 52295 + }, + { + "epoch": 0.4522226353425392, + "grad_norm": 17.632242845148856, + "learning_rate": 5.27440173909798e-06, + "loss": 0.236224365234375, + "step": 52300 + }, + { + "epoch": 0.45226586886408243, + "grad_norm": 16.761445333347048, + "learning_rate": 5.274268879677889e-06, + "loss": 0.11197662353515625, + "step": 52305 + }, + { + "epoch": 0.4523091023856257, + "grad_norm": 21.259156737069784, + "learning_rate": 5.274136009769069e-06, + "loss": 0.10072746276855468, + "step": 52310 + }, + { + "epoch": 0.452352335907169, + "grad_norm": 8.070154410896501, + "learning_rate": 5.274003129372136e-06, + "loss": 0.17117881774902344, + "step": 52315 + }, + { + "epoch": 0.45239556942871223, + "grad_norm": 1.0066086250704285, + "learning_rate": 5.273870238487701e-06, + "loss": 0.08038406372070313, + "step": 52320 + }, + { + "epoch": 0.4524388029502555, + "grad_norm": 79.53949093189577, + "learning_rate": 5.273737337116377e-06, + "loss": 0.8503128051757812, + "step": 52325 + }, + { + "epoch": 0.4524820364717988, + "grad_norm": 0.2830825767771316, + "learning_rate": 5.273604425258778e-06, + "loss": 0.19214401245117188, + "step": 52330 + }, + { + "epoch": 0.452525269993342, + "grad_norm": 0.3124473419506187, + "learning_rate": 5.273471502915515e-06, + "loss": 0.1194183349609375, + "step": 52335 + }, + { + "epoch": 0.4525685035148853, + "grad_norm": 3.1647688379512156, + "learning_rate": 5.273338570087204e-06, + "loss": 0.18073348999023436, + "step": 52340 + }, + { + "epoch": 0.45261173703642854, + "grad_norm": 8.0934328154736, + "learning_rate": 5.273205626774456e-06, + "loss": 0.469189453125, + "step": 52345 + }, + { + "epoch": 0.4526549705579718, + "grad_norm": 8.834756638165015, + "learning_rate": 5.273072672977884e-06, + "loss": 0.09007797241210938, + "step": 52350 + }, + { + "epoch": 0.4526982040795151, + "grad_norm": 1.3027845313790414, + "learning_rate": 5.272939708698102e-06, + "loss": 0.15366668701171876, + "step": 52355 + }, + { + "epoch": 0.45274143760105834, + "grad_norm": 3.635581794066471, + "learning_rate": 5.272806733935723e-06, + "loss": 0.15182952880859374, + "step": 52360 + }, + { + "epoch": 0.4527846711226016, + "grad_norm": 13.160218206496527, + "learning_rate": 5.27267374869136e-06, + "loss": 0.08268051147460938, + "step": 52365 + }, + { + "epoch": 0.4528279046441449, + "grad_norm": 11.105820666198326, + "learning_rate": 5.2725407529656255e-06, + "loss": 0.202557373046875, + "step": 52370 + }, + { + "epoch": 0.45287113816568814, + "grad_norm": 2.44206543927478, + "learning_rate": 5.272407746759136e-06, + "loss": 0.133123779296875, + "step": 52375 + }, + { + "epoch": 0.4529143716872314, + "grad_norm": 7.888604590847069, + "learning_rate": 5.272274730072501e-06, + "loss": 0.468353271484375, + "step": 52380 + }, + { + "epoch": 0.45295760520877465, + "grad_norm": 28.25842484697609, + "learning_rate": 5.272141702906336e-06, + "loss": 0.3313629150390625, + "step": 52385 + }, + { + "epoch": 0.45300083873031793, + "grad_norm": 36.010171915320676, + "learning_rate": 5.272008665261255e-06, + "loss": 0.3293426513671875, + "step": 52390 + }, + { + "epoch": 0.4530440722518612, + "grad_norm": 29.71357550402593, + "learning_rate": 5.27187561713787e-06, + "loss": 0.14788360595703126, + "step": 52395 + }, + { + "epoch": 0.45308730577340445, + "grad_norm": 1.829237226190377, + "learning_rate": 5.2717425585367956e-06, + "loss": 0.0715799331665039, + "step": 52400 + }, + { + "epoch": 0.45313053929494773, + "grad_norm": 0.3206072320387917, + "learning_rate": 5.271609489458646e-06, + "loss": 0.067041015625, + "step": 52405 + }, + { + "epoch": 0.453173772816491, + "grad_norm": 16.40684588370265, + "learning_rate": 5.271476409904032e-06, + "loss": 0.2019927978515625, + "step": 52410 + }, + { + "epoch": 0.45321700633803425, + "grad_norm": 18.061538824165957, + "learning_rate": 5.271343319873572e-06, + "loss": 0.06700973510742188, + "step": 52415 + }, + { + "epoch": 0.45326023985957753, + "grad_norm": 44.98841698480656, + "learning_rate": 5.271210219367876e-06, + "loss": 0.47020263671875, + "step": 52420 + }, + { + "epoch": 0.45330347338112076, + "grad_norm": 1.6491932442939266, + "learning_rate": 5.27107710838756e-06, + "loss": 0.08503875732421876, + "step": 52425 + }, + { + "epoch": 0.45334670690266404, + "grad_norm": 0.6628311812169073, + "learning_rate": 5.270943986933236e-06, + "loss": 0.08234710693359375, + "step": 52430 + }, + { + "epoch": 0.4533899404242073, + "grad_norm": 3.8403158387703242, + "learning_rate": 5.27081085500552e-06, + "loss": 0.06284379959106445, + "step": 52435 + }, + { + "epoch": 0.45343317394575056, + "grad_norm": 1.2412415296804638, + "learning_rate": 5.270677712605025e-06, + "loss": 0.3441581726074219, + "step": 52440 + }, + { + "epoch": 0.45347640746729384, + "grad_norm": 19.91476065593642, + "learning_rate": 5.2705445597323635e-06, + "loss": 0.06088104248046875, + "step": 52445 + }, + { + "epoch": 0.4535196409888371, + "grad_norm": 1.5661257118470504, + "learning_rate": 5.270411396388153e-06, + "loss": 0.29962997436523436, + "step": 52450 + }, + { + "epoch": 0.45356287451038035, + "grad_norm": 1.7662117929240826, + "learning_rate": 5.270278222573005e-06, + "loss": 0.06461029052734375, + "step": 52455 + }, + { + "epoch": 0.45360610803192364, + "grad_norm": 0.5783090562464447, + "learning_rate": 5.270145038287534e-06, + "loss": 0.062252235412597653, + "step": 52460 + }, + { + "epoch": 0.4536493415534669, + "grad_norm": 32.74376288389766, + "learning_rate": 5.2700118435323545e-06, + "loss": 0.259307861328125, + "step": 52465 + }, + { + "epoch": 0.45369257507501015, + "grad_norm": 11.793839227422303, + "learning_rate": 5.269878638308082e-06, + "loss": 0.4215911865234375, + "step": 52470 + }, + { + "epoch": 0.45373580859655344, + "grad_norm": 1.1006894324879821, + "learning_rate": 5.269745422615329e-06, + "loss": 0.1521484375, + "step": 52475 + }, + { + "epoch": 0.45377904211809666, + "grad_norm": 27.12971230154841, + "learning_rate": 5.269612196454711e-06, + "loss": 0.1384674072265625, + "step": 52480 + }, + { + "epoch": 0.45382227563963995, + "grad_norm": 9.1415351674009, + "learning_rate": 5.269478959826842e-06, + "loss": 0.07102241516113281, + "step": 52485 + }, + { + "epoch": 0.45386550916118323, + "grad_norm": 24.76675337550535, + "learning_rate": 5.2693457127323365e-06, + "loss": 0.27721710205078126, + "step": 52490 + }, + { + "epoch": 0.45390874268272646, + "grad_norm": 8.238313113987061, + "learning_rate": 5.269212455171809e-06, + "loss": 0.27103271484375, + "step": 52495 + }, + { + "epoch": 0.45395197620426975, + "grad_norm": 5.63343042084283, + "learning_rate": 5.269079187145875e-06, + "loss": 0.18654937744140626, + "step": 52500 + }, + { + "epoch": 0.45399520972581303, + "grad_norm": 0.38674076698870724, + "learning_rate": 5.268945908655148e-06, + "loss": 0.1546875, + "step": 52505 + }, + { + "epoch": 0.45403844324735626, + "grad_norm": 22.489585000561032, + "learning_rate": 5.2688126197002426e-06, + "loss": 0.40334625244140626, + "step": 52510 + }, + { + "epoch": 0.45408167676889954, + "grad_norm": 4.190306992164747, + "learning_rate": 5.2686793202817736e-06, + "loss": 0.0975250244140625, + "step": 52515 + }, + { + "epoch": 0.4541249102904428, + "grad_norm": 18.594809643774656, + "learning_rate": 5.268546010400356e-06, + "loss": 0.28153076171875, + "step": 52520 + }, + { + "epoch": 0.45416814381198606, + "grad_norm": 9.296692384573559, + "learning_rate": 5.268412690056605e-06, + "loss": 0.12519493103027343, + "step": 52525 + }, + { + "epoch": 0.45421137733352934, + "grad_norm": 17.07717571932885, + "learning_rate": 5.268279359251135e-06, + "loss": 0.26297607421875, + "step": 52530 + }, + { + "epoch": 0.45425461085507257, + "grad_norm": 2.472154474685067, + "learning_rate": 5.268146017984561e-06, + "loss": 0.4065895080566406, + "step": 52535 + }, + { + "epoch": 0.45429784437661586, + "grad_norm": 34.94221186276995, + "learning_rate": 5.268012666257499e-06, + "loss": 0.2671966552734375, + "step": 52540 + }, + { + "epoch": 0.45434107789815914, + "grad_norm": 0.35434385149656206, + "learning_rate": 5.2678793040705615e-06, + "loss": 0.17914466857910155, + "step": 52545 + }, + { + "epoch": 0.45438431141970237, + "grad_norm": 2.1008110795632136, + "learning_rate": 5.267745931424366e-06, + "loss": 0.08944244384765625, + "step": 52550 + }, + { + "epoch": 0.45442754494124565, + "grad_norm": 3.7895991310425874, + "learning_rate": 5.267612548319527e-06, + "loss": 0.099029541015625, + "step": 52555 + }, + { + "epoch": 0.4544707784627889, + "grad_norm": 6.3286467714776, + "learning_rate": 5.267479154756658e-06, + "loss": 0.21631011962890626, + "step": 52560 + }, + { + "epoch": 0.45451401198433217, + "grad_norm": 3.500508527326245, + "learning_rate": 5.267345750736375e-06, + "loss": 0.3109611511230469, + "step": 52565 + }, + { + "epoch": 0.45455724550587545, + "grad_norm": 25.77266283329916, + "learning_rate": 5.267212336259296e-06, + "loss": 0.22406005859375, + "step": 52570 + }, + { + "epoch": 0.4546004790274187, + "grad_norm": 0.7447632324296173, + "learning_rate": 5.267078911326033e-06, + "loss": 0.07877883911132813, + "step": 52575 + }, + { + "epoch": 0.45464371254896196, + "grad_norm": 1.0352410021415106, + "learning_rate": 5.2669454759372015e-06, + "loss": 0.0484161376953125, + "step": 52580 + }, + { + "epoch": 0.45468694607050525, + "grad_norm": 19.310573963571315, + "learning_rate": 5.266812030093419e-06, + "loss": 0.08804702758789062, + "step": 52585 + }, + { + "epoch": 0.4547301795920485, + "grad_norm": 0.2121894010511059, + "learning_rate": 5.2666785737952996e-06, + "loss": 0.07958526611328125, + "step": 52590 + }, + { + "epoch": 0.45477341311359176, + "grad_norm": 7.592607147116051, + "learning_rate": 5.2665451070434584e-06, + "loss": 0.0905303955078125, + "step": 52595 + }, + { + "epoch": 0.454816646635135, + "grad_norm": 9.26400913712698, + "learning_rate": 5.266411629838512e-06, + "loss": 0.102349853515625, + "step": 52600 + }, + { + "epoch": 0.4548598801566783, + "grad_norm": 32.678489537408545, + "learning_rate": 5.266278142181075e-06, + "loss": 0.25148773193359375, + "step": 52605 + }, + { + "epoch": 0.45490311367822156, + "grad_norm": 30.48254885481344, + "learning_rate": 5.266144644071762e-06, + "loss": 0.18634910583496095, + "step": 52610 + }, + { + "epoch": 0.4549463471997648, + "grad_norm": 1.1758592149328928, + "learning_rate": 5.266011135511193e-06, + "loss": 0.12392578125, + "step": 52615 + }, + { + "epoch": 0.4549895807213081, + "grad_norm": 20.006173065270215, + "learning_rate": 5.265877616499979e-06, + "loss": 0.150054931640625, + "step": 52620 + }, + { + "epoch": 0.45503281424285136, + "grad_norm": 2.1455671146080655, + "learning_rate": 5.265744087038738e-06, + "loss": 0.029901123046875, + "step": 52625 + }, + { + "epoch": 0.4550760477643946, + "grad_norm": 7.531306331638058, + "learning_rate": 5.265610547128086e-06, + "loss": 0.29703636169433595, + "step": 52630 + }, + { + "epoch": 0.45511928128593787, + "grad_norm": 2.024898637874384, + "learning_rate": 5.265476996768638e-06, + "loss": 0.091839599609375, + "step": 52635 + }, + { + "epoch": 0.45516251480748116, + "grad_norm": 17.293207963741928, + "learning_rate": 5.26534343596101e-06, + "loss": 0.36468849182128904, + "step": 52640 + }, + { + "epoch": 0.4552057483290244, + "grad_norm": 4.137292137050127, + "learning_rate": 5.265209864705818e-06, + "loss": 0.11787261962890624, + "step": 52645 + }, + { + "epoch": 0.45524898185056767, + "grad_norm": 14.473256696704217, + "learning_rate": 5.265076283003679e-06, + "loss": 0.3420448303222656, + "step": 52650 + }, + { + "epoch": 0.4552922153721109, + "grad_norm": 4.37675367731807, + "learning_rate": 5.264942690855208e-06, + "loss": 0.11860275268554688, + "step": 52655 + }, + { + "epoch": 0.4553354488936542, + "grad_norm": 2.6862996035539304, + "learning_rate": 5.264809088261022e-06, + "loss": 0.24197998046875, + "step": 52660 + }, + { + "epoch": 0.45537868241519747, + "grad_norm": 0.7563132211250306, + "learning_rate": 5.264675475221736e-06, + "loss": 0.12650680541992188, + "step": 52665 + }, + { + "epoch": 0.4554219159367407, + "grad_norm": 3.3241744022970687, + "learning_rate": 5.264541851737967e-06, + "loss": 0.0452392578125, + "step": 52670 + }, + { + "epoch": 0.455465149458284, + "grad_norm": 23.01234424785175, + "learning_rate": 5.264408217810331e-06, + "loss": 0.59814453125, + "step": 52675 + }, + { + "epoch": 0.45550838297982726, + "grad_norm": 9.661030539777684, + "learning_rate": 5.264274573439444e-06, + "loss": 0.5052047729492187, + "step": 52680 + }, + { + "epoch": 0.4555516165013705, + "grad_norm": 2.8718036397867936, + "learning_rate": 5.264140918625923e-06, + "loss": 0.115069580078125, + "step": 52685 + }, + { + "epoch": 0.4555948500229138, + "grad_norm": 0.9479018136581496, + "learning_rate": 5.2640072533703845e-06, + "loss": 0.090570068359375, + "step": 52690 + }, + { + "epoch": 0.455638083544457, + "grad_norm": 6.992509062131595, + "learning_rate": 5.2638735776734444e-06, + "loss": 0.0988037109375, + "step": 52695 + }, + { + "epoch": 0.4556813170660003, + "grad_norm": 9.688807917077575, + "learning_rate": 5.26373989153572e-06, + "loss": 0.3822490692138672, + "step": 52700 + }, + { + "epoch": 0.4557245505875436, + "grad_norm": 7.056160772752559, + "learning_rate": 5.263606194957826e-06, + "loss": 0.13660659790039062, + "step": 52705 + }, + { + "epoch": 0.4557677841090868, + "grad_norm": 51.02909663105215, + "learning_rate": 5.2634724879403806e-06, + "loss": 0.265069580078125, + "step": 52710 + }, + { + "epoch": 0.4558110176306301, + "grad_norm": 6.058383994435742, + "learning_rate": 5.263338770484001e-06, + "loss": 0.0817626953125, + "step": 52715 + }, + { + "epoch": 0.4558542511521734, + "grad_norm": 8.375882978291395, + "learning_rate": 5.263205042589302e-06, + "loss": 0.09897003173828126, + "step": 52720 + }, + { + "epoch": 0.4558974846737166, + "grad_norm": 15.756810597295356, + "learning_rate": 5.2630713042569014e-06, + "loss": 0.535516357421875, + "step": 52725 + }, + { + "epoch": 0.4559407181952599, + "grad_norm": 37.96479581296046, + "learning_rate": 5.262937555487417e-06, + "loss": 0.241448974609375, + "step": 52730 + }, + { + "epoch": 0.4559839517168031, + "grad_norm": 21.428216834185132, + "learning_rate": 5.262803796281464e-06, + "loss": 0.27870025634765627, + "step": 52735 + }, + { + "epoch": 0.4560271852383464, + "grad_norm": 5.75474678245408, + "learning_rate": 5.262670026639659e-06, + "loss": 0.0898162841796875, + "step": 52740 + }, + { + "epoch": 0.4560704187598897, + "grad_norm": 0.6961759895525983, + "learning_rate": 5.26253624656262e-06, + "loss": 0.16969146728515624, + "step": 52745 + }, + { + "epoch": 0.4561136522814329, + "grad_norm": 2.4642620643885644, + "learning_rate": 5.262402456050964e-06, + "loss": 0.086810302734375, + "step": 52750 + }, + { + "epoch": 0.4561568858029762, + "grad_norm": 12.785449361134285, + "learning_rate": 5.262268655105308e-06, + "loss": 0.0453277587890625, + "step": 52755 + }, + { + "epoch": 0.4562001193245195, + "grad_norm": 16.309562604660044, + "learning_rate": 5.262134843726269e-06, + "loss": 0.09546966552734375, + "step": 52760 + }, + { + "epoch": 0.4562433528460627, + "grad_norm": 1.9722047643855505, + "learning_rate": 5.2620010219144635e-06, + "loss": 0.22203636169433594, + "step": 52765 + }, + { + "epoch": 0.456286586367606, + "grad_norm": 21.591828005227484, + "learning_rate": 5.2618671896705094e-06, + "loss": 0.153875732421875, + "step": 52770 + }, + { + "epoch": 0.4563298198891492, + "grad_norm": 0.25152189983232504, + "learning_rate": 5.261733346995024e-06, + "loss": 0.21417770385742188, + "step": 52775 + }, + { + "epoch": 0.4563730534106925, + "grad_norm": 1.2671731543734357, + "learning_rate": 5.2615994938886234e-06, + "loss": 0.07421112060546875, + "step": 52780 + }, + { + "epoch": 0.4564162869322358, + "grad_norm": 2.063605744407149, + "learning_rate": 5.2614656303519265e-06, + "loss": 0.22644882202148436, + "step": 52785 + }, + { + "epoch": 0.456459520453779, + "grad_norm": 42.16801790739766, + "learning_rate": 5.26133175638555e-06, + "loss": 0.464385986328125, + "step": 52790 + }, + { + "epoch": 0.4565027539753223, + "grad_norm": 20.96134447033408, + "learning_rate": 5.2611978719901105e-06, + "loss": 0.076043701171875, + "step": 52795 + }, + { + "epoch": 0.4565459874968656, + "grad_norm": 2.77146789563965, + "learning_rate": 5.261063977166227e-06, + "loss": 0.11514453887939453, + "step": 52800 + }, + { + "epoch": 0.4565892210184088, + "grad_norm": 4.799803522386148, + "learning_rate": 5.260930071914516e-06, + "loss": 0.3005706787109375, + "step": 52805 + }, + { + "epoch": 0.4566324545399521, + "grad_norm": 25.85917810696473, + "learning_rate": 5.260796156235595e-06, + "loss": 0.6109466552734375, + "step": 52810 + }, + { + "epoch": 0.4566756880614954, + "grad_norm": 4.282563233188572, + "learning_rate": 5.260662230130083e-06, + "loss": 0.076336669921875, + "step": 52815 + }, + { + "epoch": 0.4567189215830386, + "grad_norm": 0.5393646601824634, + "learning_rate": 5.260528293598595e-06, + "loss": 0.1941680908203125, + "step": 52820 + }, + { + "epoch": 0.4567621551045819, + "grad_norm": 4.829315191599655, + "learning_rate": 5.26039434664175e-06, + "loss": 0.18703765869140626, + "step": 52825 + }, + { + "epoch": 0.45680538862612513, + "grad_norm": 0.9121842934502588, + "learning_rate": 5.2602603892601675e-06, + "loss": 0.02948760986328125, + "step": 52830 + }, + { + "epoch": 0.4568486221476684, + "grad_norm": 2.248435853072685, + "learning_rate": 5.260126421454464e-06, + "loss": 0.2880828857421875, + "step": 52835 + }, + { + "epoch": 0.4568918556692117, + "grad_norm": 23.71551825352607, + "learning_rate": 5.259992443225256e-06, + "loss": 0.22174072265625, + "step": 52840 + }, + { + "epoch": 0.45693508919075493, + "grad_norm": 21.10913125498691, + "learning_rate": 5.259858454573163e-06, + "loss": 0.23839111328125, + "step": 52845 + }, + { + "epoch": 0.4569783227122982, + "grad_norm": 4.342348804767783, + "learning_rate": 5.259724455498802e-06, + "loss": 0.1331268310546875, + "step": 52850 + }, + { + "epoch": 0.4570215562338415, + "grad_norm": 1.0917342654137263, + "learning_rate": 5.2595904460027925e-06, + "loss": 0.13935317993164062, + "step": 52855 + }, + { + "epoch": 0.4570647897553847, + "grad_norm": 4.769837667661137, + "learning_rate": 5.2594564260857506e-06, + "loss": 0.05006103515625, + "step": 52860 + }, + { + "epoch": 0.457108023276928, + "grad_norm": 0.2654216029531642, + "learning_rate": 5.259322395748296e-06, + "loss": 0.3565044403076172, + "step": 52865 + }, + { + "epoch": 0.45715125679847124, + "grad_norm": 6.253187324585854, + "learning_rate": 5.259188354991045e-06, + "loss": 0.08596954345703126, + "step": 52870 + }, + { + "epoch": 0.4571944903200145, + "grad_norm": 2.3798103157567327, + "learning_rate": 5.259054303814618e-06, + "loss": 0.1256561279296875, + "step": 52875 + }, + { + "epoch": 0.4572377238415578, + "grad_norm": 11.12193064122295, + "learning_rate": 5.258920242219632e-06, + "loss": 0.412591552734375, + "step": 52880 + }, + { + "epoch": 0.45728095736310104, + "grad_norm": 52.59510535905525, + "learning_rate": 5.258786170206706e-06, + "loss": 0.40526885986328126, + "step": 52885 + }, + { + "epoch": 0.4573241908846443, + "grad_norm": 26.6420732900989, + "learning_rate": 5.258652087776457e-06, + "loss": 0.25135650634765627, + "step": 52890 + }, + { + "epoch": 0.4573674244061876, + "grad_norm": 3.2651671982196855, + "learning_rate": 5.258517994929504e-06, + "loss": 0.185980224609375, + "step": 52895 + }, + { + "epoch": 0.45741065792773083, + "grad_norm": 7.122760009919293, + "learning_rate": 5.2583838916664655e-06, + "loss": 0.037335205078125, + "step": 52900 + }, + { + "epoch": 0.4574538914492741, + "grad_norm": 14.2237491688562, + "learning_rate": 5.25824977798796e-06, + "loss": 0.20105056762695311, + "step": 52905 + }, + { + "epoch": 0.45749712497081735, + "grad_norm": 4.604618196151857, + "learning_rate": 5.258115653894607e-06, + "loss": 0.24130287170410156, + "step": 52910 + }, + { + "epoch": 0.45754035849236063, + "grad_norm": 10.492176517607723, + "learning_rate": 5.257981519387024e-06, + "loss": 0.10464248657226563, + "step": 52915 + }, + { + "epoch": 0.4575835920139039, + "grad_norm": 11.172965971706617, + "learning_rate": 5.257847374465828e-06, + "loss": 0.1302001953125, + "step": 52920 + }, + { + "epoch": 0.45762682553544715, + "grad_norm": 2.9231470692893575, + "learning_rate": 5.257713219131641e-06, + "loss": 0.08710594177246093, + "step": 52925 + }, + { + "epoch": 0.45767005905699043, + "grad_norm": 15.173535268541624, + "learning_rate": 5.25757905338508e-06, + "loss": 0.44625701904296877, + "step": 52930 + }, + { + "epoch": 0.4577132925785337, + "grad_norm": 0.18746520004736686, + "learning_rate": 5.257444877226763e-06, + "loss": 0.052850341796875, + "step": 52935 + }, + { + "epoch": 0.45775652610007694, + "grad_norm": 3.8213771589713494, + "learning_rate": 5.257310690657311e-06, + "loss": 0.1453857421875, + "step": 52940 + }, + { + "epoch": 0.45779975962162023, + "grad_norm": 1.9278367726631767, + "learning_rate": 5.257176493677341e-06, + "loss": 0.11253433227539063, + "step": 52945 + }, + { + "epoch": 0.45784299314316346, + "grad_norm": 3.324339055624797, + "learning_rate": 5.257042286287473e-06, + "loss": 0.13884429931640624, + "step": 52950 + }, + { + "epoch": 0.45788622666470674, + "grad_norm": 28.133293139690572, + "learning_rate": 5.256908068488324e-06, + "loss": 0.2477783203125, + "step": 52955 + }, + { + "epoch": 0.45792946018625, + "grad_norm": 3.765191656750019, + "learning_rate": 5.2567738402805155e-06, + "loss": 0.10144081115722656, + "step": 52960 + }, + { + "epoch": 0.45797269370779325, + "grad_norm": 18.444632682556648, + "learning_rate": 5.256639601664666e-06, + "loss": 0.09005966186523437, + "step": 52965 + }, + { + "epoch": 0.45801592722933654, + "grad_norm": 0.8152519269568812, + "learning_rate": 5.256505352641393e-06, + "loss": 0.18925876617431642, + "step": 52970 + }, + { + "epoch": 0.4580591607508798, + "grad_norm": 0.5602462310180392, + "learning_rate": 5.256371093211316e-06, + "loss": 0.041057586669921875, + "step": 52975 + }, + { + "epoch": 0.45810239427242305, + "grad_norm": 14.713574321387279, + "learning_rate": 5.256236823375057e-06, + "loss": 0.3322998046875, + "step": 52980 + }, + { + "epoch": 0.45814562779396634, + "grad_norm": 13.032824363025512, + "learning_rate": 5.256102543133232e-06, + "loss": 0.10627670288085937, + "step": 52985 + }, + { + "epoch": 0.4581888613155096, + "grad_norm": 0.2289069556065604, + "learning_rate": 5.255968252486462e-06, + "loss": 0.07387237548828125, + "step": 52990 + }, + { + "epoch": 0.45823209483705285, + "grad_norm": 13.971647102837283, + "learning_rate": 5.2558339514353645e-06, + "loss": 0.16626548767089844, + "step": 52995 + }, + { + "epoch": 0.45827532835859613, + "grad_norm": 27.429321983244222, + "learning_rate": 5.2556996399805605e-06, + "loss": 0.3166259765625, + "step": 53000 + }, + { + "epoch": 0.45831856188013936, + "grad_norm": 3.8617868648314904, + "learning_rate": 5.25556531812267e-06, + "loss": 0.01616973876953125, + "step": 53005 + }, + { + "epoch": 0.45836179540168265, + "grad_norm": 0.5368208443386508, + "learning_rate": 5.255430985862311e-06, + "loss": 0.09862060546875, + "step": 53010 + }, + { + "epoch": 0.45840502892322593, + "grad_norm": 2.3455980108165497, + "learning_rate": 5.255296643200103e-06, + "loss": 0.0146881103515625, + "step": 53015 + }, + { + "epoch": 0.45844826244476916, + "grad_norm": 0.4537829888776514, + "learning_rate": 5.255162290136667e-06, + "loss": 0.08002166748046875, + "step": 53020 + }, + { + "epoch": 0.45849149596631245, + "grad_norm": 4.763688082387246, + "learning_rate": 5.255027926672622e-06, + "loss": 0.2590057373046875, + "step": 53025 + }, + { + "epoch": 0.45853472948785573, + "grad_norm": 1.4543087126669427, + "learning_rate": 5.254893552808586e-06, + "loss": 0.23210906982421875, + "step": 53030 + }, + { + "epoch": 0.45857796300939896, + "grad_norm": 30.441637595527506, + "learning_rate": 5.254759168545182e-06, + "loss": 0.20701026916503906, + "step": 53035 + }, + { + "epoch": 0.45862119653094224, + "grad_norm": 5.5096167148461, + "learning_rate": 5.254624773883027e-06, + "loss": 0.146026611328125, + "step": 53040 + }, + { + "epoch": 0.4586644300524855, + "grad_norm": 5.101490342888864, + "learning_rate": 5.254490368822742e-06, + "loss": 0.24098625183105468, + "step": 53045 + }, + { + "epoch": 0.45870766357402876, + "grad_norm": 18.443921058476644, + "learning_rate": 5.2543559533649465e-06, + "loss": 0.220794677734375, + "step": 53050 + }, + { + "epoch": 0.45875089709557204, + "grad_norm": 3.147852042484174, + "learning_rate": 5.25422152751026e-06, + "loss": 0.1110107421875, + "step": 53055 + }, + { + "epoch": 0.45879413061711527, + "grad_norm": 1.7894306910395963, + "learning_rate": 5.254087091259304e-06, + "loss": 0.23988037109375, + "step": 53060 + }, + { + "epoch": 0.45883736413865855, + "grad_norm": 17.277524676207257, + "learning_rate": 5.253952644612697e-06, + "loss": 0.2127532958984375, + "step": 53065 + }, + { + "epoch": 0.45888059766020184, + "grad_norm": 10.835520324855288, + "learning_rate": 5.25381818757106e-06, + "loss": 0.3294708251953125, + "step": 53070 + }, + { + "epoch": 0.45892383118174507, + "grad_norm": 7.0979304414717586, + "learning_rate": 5.2536837201350116e-06, + "loss": 0.21049346923828124, + "step": 53075 + }, + { + "epoch": 0.45896706470328835, + "grad_norm": 38.660260443055975, + "learning_rate": 5.2535492423051736e-06, + "loss": 0.32159271240234377, + "step": 53080 + }, + { + "epoch": 0.4590102982248316, + "grad_norm": 8.608565756166646, + "learning_rate": 5.253414754082167e-06, + "loss": 0.1234527587890625, + "step": 53085 + }, + { + "epoch": 0.45905353174637487, + "grad_norm": 13.692448693683463, + "learning_rate": 5.253280255466608e-06, + "loss": 0.08283309936523438, + "step": 53090 + }, + { + "epoch": 0.45909676526791815, + "grad_norm": 5.631299961092897, + "learning_rate": 5.253145746459121e-06, + "loss": 0.04577560424804687, + "step": 53095 + }, + { + "epoch": 0.4591399987894614, + "grad_norm": 1.8027149987085769, + "learning_rate": 5.253011227060325e-06, + "loss": 0.13577418327331542, + "step": 53100 + }, + { + "epoch": 0.45918323231100466, + "grad_norm": 2.9782441059052593, + "learning_rate": 5.252876697270841e-06, + "loss": 0.28601913452148436, + "step": 53105 + }, + { + "epoch": 0.45922646583254795, + "grad_norm": 0.5527416477230643, + "learning_rate": 5.252742157091287e-06, + "loss": 0.044708251953125, + "step": 53110 + }, + { + "epoch": 0.4592696993540912, + "grad_norm": 6.995876630709148, + "learning_rate": 5.252607606522287e-06, + "loss": 0.11136627197265625, + "step": 53115 + }, + { + "epoch": 0.45931293287563446, + "grad_norm": 7.962720951261976, + "learning_rate": 5.252473045564458e-06, + "loss": 0.4496978759765625, + "step": 53120 + }, + { + "epoch": 0.4593561663971777, + "grad_norm": 16.90933082210724, + "learning_rate": 5.252338474218424e-06, + "loss": 0.3511039733886719, + "step": 53125 + }, + { + "epoch": 0.459399399918721, + "grad_norm": 14.234771144381359, + "learning_rate": 5.252203892484802e-06, + "loss": 0.14024009704589843, + "step": 53130 + }, + { + "epoch": 0.45944263344026426, + "grad_norm": 36.743181443544394, + "learning_rate": 5.252069300364217e-06, + "loss": 0.370074462890625, + "step": 53135 + }, + { + "epoch": 0.4594858669618075, + "grad_norm": 53.399366396786895, + "learning_rate": 5.251934697857286e-06, + "loss": 0.2128173828125, + "step": 53140 + }, + { + "epoch": 0.45952910048335077, + "grad_norm": 6.11488667470479, + "learning_rate": 5.251800084964631e-06, + "loss": 0.07979564666748047, + "step": 53145 + }, + { + "epoch": 0.45957233400489406, + "grad_norm": 26.722578831563776, + "learning_rate": 5.2516654616868734e-06, + "loss": 0.3233154296875, + "step": 53150 + }, + { + "epoch": 0.4596155675264373, + "grad_norm": 3.7652565425217395, + "learning_rate": 5.2515308280246335e-06, + "loss": 0.08329010009765625, + "step": 53155 + }, + { + "epoch": 0.45965880104798057, + "grad_norm": 7.520629907346344, + "learning_rate": 5.251396183978533e-06, + "loss": 0.37559814453125, + "step": 53160 + }, + { + "epoch": 0.4597020345695238, + "grad_norm": 0.9859145889458545, + "learning_rate": 5.251261529549191e-06, + "loss": 0.1990142822265625, + "step": 53165 + }, + { + "epoch": 0.4597452680910671, + "grad_norm": 14.024991246545937, + "learning_rate": 5.251126864737232e-06, + "loss": 0.16224517822265624, + "step": 53170 + }, + { + "epoch": 0.45978850161261037, + "grad_norm": 6.0586244989567, + "learning_rate": 5.250992189543272e-06, + "loss": 0.303338623046875, + "step": 53175 + }, + { + "epoch": 0.4598317351341536, + "grad_norm": 0.2996787474961059, + "learning_rate": 5.250857503967936e-06, + "loss": 0.13653945922851562, + "step": 53180 + }, + { + "epoch": 0.4598749686556969, + "grad_norm": 0.2931543163890525, + "learning_rate": 5.250722808011845e-06, + "loss": 0.30996246337890626, + "step": 53185 + }, + { + "epoch": 0.45991820217724017, + "grad_norm": 7.786236501332197, + "learning_rate": 5.250588101675618e-06, + "loss": 0.14552021026611328, + "step": 53190 + }, + { + "epoch": 0.4599614356987834, + "grad_norm": 1.9542057727759172, + "learning_rate": 5.250453384959879e-06, + "loss": 0.025995635986328126, + "step": 53195 + }, + { + "epoch": 0.4600046692203267, + "grad_norm": 0.8676502895022601, + "learning_rate": 5.250318657865247e-06, + "loss": 0.15231781005859374, + "step": 53200 + }, + { + "epoch": 0.46004790274186996, + "grad_norm": 55.107861578187816, + "learning_rate": 5.250183920392344e-06, + "loss": 0.662750244140625, + "step": 53205 + }, + { + "epoch": 0.4600911362634132, + "grad_norm": 2.651652600791953, + "learning_rate": 5.250049172541792e-06, + "loss": 0.08308868408203125, + "step": 53210 + }, + { + "epoch": 0.4601343697849565, + "grad_norm": 0.4874506091846176, + "learning_rate": 5.2499144143142114e-06, + "loss": 0.18854026794433593, + "step": 53215 + }, + { + "epoch": 0.4601776033064997, + "grad_norm": 3.0728692467536263, + "learning_rate": 5.249779645710225e-06, + "loss": 0.2186737060546875, + "step": 53220 + }, + { + "epoch": 0.460220836828043, + "grad_norm": 7.700480859964304, + "learning_rate": 5.249644866730454e-06, + "loss": 0.25439453125, + "step": 53225 + }, + { + "epoch": 0.4602640703495863, + "grad_norm": 18.789530596051463, + "learning_rate": 5.249510077375519e-06, + "loss": 0.0811004638671875, + "step": 53230 + }, + { + "epoch": 0.4603073038711295, + "grad_norm": 15.766068311094802, + "learning_rate": 5.2493752776460414e-06, + "loss": 0.08191566467285157, + "step": 53235 + }, + { + "epoch": 0.4603505373926728, + "grad_norm": 2.2618933168146635, + "learning_rate": 5.249240467542645e-06, + "loss": 0.16413421630859376, + "step": 53240 + }, + { + "epoch": 0.46039377091421607, + "grad_norm": 1.9587089506378053, + "learning_rate": 5.24910564706595e-06, + "loss": 0.03710861206054687, + "step": 53245 + }, + { + "epoch": 0.4604370044357593, + "grad_norm": 67.43384217410193, + "learning_rate": 5.248970816216579e-06, + "loss": 0.533984375, + "step": 53250 + }, + { + "epoch": 0.4604802379573026, + "grad_norm": 1.1414853262304996, + "learning_rate": 5.248835974995153e-06, + "loss": 0.3383941650390625, + "step": 53255 + }, + { + "epoch": 0.4605234714788458, + "grad_norm": 10.08273797867039, + "learning_rate": 5.248701123402293e-06, + "loss": 0.190924072265625, + "step": 53260 + }, + { + "epoch": 0.4605667050003891, + "grad_norm": 9.121703019297861, + "learning_rate": 5.248566261438624e-06, + "loss": 0.1893798828125, + "step": 53265 + }, + { + "epoch": 0.4606099385219324, + "grad_norm": 33.539502404346656, + "learning_rate": 5.248431389104765e-06, + "loss": 0.2158447265625, + "step": 53270 + }, + { + "epoch": 0.4606531720434756, + "grad_norm": 6.591418743683457, + "learning_rate": 5.24829650640134e-06, + "loss": 0.2324676513671875, + "step": 53275 + }, + { + "epoch": 0.4606964055650189, + "grad_norm": 57.25532645712394, + "learning_rate": 5.248161613328969e-06, + "loss": 0.322357177734375, + "step": 53280 + }, + { + "epoch": 0.4607396390865622, + "grad_norm": 40.246211689889016, + "learning_rate": 5.248026709888276e-06, + "loss": 0.33010215759277345, + "step": 53285 + }, + { + "epoch": 0.4607828726081054, + "grad_norm": 7.436749265369944, + "learning_rate": 5.247891796079882e-06, + "loss": 0.12262077331542968, + "step": 53290 + }, + { + "epoch": 0.4608261061296487, + "grad_norm": 9.145563068368867, + "learning_rate": 5.247756871904409e-06, + "loss": 0.50048828125, + "step": 53295 + }, + { + "epoch": 0.4608693396511919, + "grad_norm": 9.566180854590723, + "learning_rate": 5.247621937362482e-06, + "loss": 0.10711040496826171, + "step": 53300 + }, + { + "epoch": 0.4609125731727352, + "grad_norm": 4.002937057012922, + "learning_rate": 5.24748699245472e-06, + "loss": 0.09601325988769531, + "step": 53305 + }, + { + "epoch": 0.4609558066942785, + "grad_norm": 19.447507588720587, + "learning_rate": 5.247352037181746e-06, + "loss": 0.16734619140625, + "step": 53310 + }, + { + "epoch": 0.4609990402158217, + "grad_norm": 11.155928721841054, + "learning_rate": 5.247217071544184e-06, + "loss": 0.19208755493164062, + "step": 53315 + }, + { + "epoch": 0.461042273737365, + "grad_norm": 1.619431629062086, + "learning_rate": 5.247082095542655e-06, + "loss": 0.12023658752441406, + "step": 53320 + }, + { + "epoch": 0.4610855072589083, + "grad_norm": 2.9847251832813524, + "learning_rate": 5.246947109177782e-06, + "loss": 0.32143325805664064, + "step": 53325 + }, + { + "epoch": 0.4611287407804515, + "grad_norm": 1.71682872721188, + "learning_rate": 5.246812112450187e-06, + "loss": 0.1411590576171875, + "step": 53330 + }, + { + "epoch": 0.4611719743019948, + "grad_norm": 10.693626028733817, + "learning_rate": 5.246677105360494e-06, + "loss": 0.09267425537109375, + "step": 53335 + }, + { + "epoch": 0.46121520782353803, + "grad_norm": 16.181990007334424, + "learning_rate": 5.246542087909324e-06, + "loss": 0.34690399169921876, + "step": 53340 + }, + { + "epoch": 0.4612584413450813, + "grad_norm": 12.93280432445726, + "learning_rate": 5.246407060097301e-06, + "loss": 0.21936187744140626, + "step": 53345 + }, + { + "epoch": 0.4613016748666246, + "grad_norm": 1.5113634120306088, + "learning_rate": 5.246272021925047e-06, + "loss": 0.062482452392578124, + "step": 53350 + }, + { + "epoch": 0.46134490838816783, + "grad_norm": 4.614869200247083, + "learning_rate": 5.246136973393184e-06, + "loss": 0.21139907836914062, + "step": 53355 + }, + { + "epoch": 0.4613881419097111, + "grad_norm": 6.569564073260533, + "learning_rate": 5.246001914502337e-06, + "loss": 0.19033660888671874, + "step": 53360 + }, + { + "epoch": 0.4614313754312544, + "grad_norm": 1.3055111104309856, + "learning_rate": 5.245866845253126e-06, + "loss": 0.02552947998046875, + "step": 53365 + }, + { + "epoch": 0.4614746089527976, + "grad_norm": 37.75894965886728, + "learning_rate": 5.245731765646178e-06, + "loss": 0.6375, + "step": 53370 + }, + { + "epoch": 0.4615178424743409, + "grad_norm": 0.6909882578166023, + "learning_rate": 5.245596675682112e-06, + "loss": 0.10046768188476562, + "step": 53375 + }, + { + "epoch": 0.4615610759958842, + "grad_norm": 31.593941959256465, + "learning_rate": 5.245461575361553e-06, + "loss": 0.20330581665039063, + "step": 53380 + }, + { + "epoch": 0.4616043095174274, + "grad_norm": 3.868770750694305, + "learning_rate": 5.245326464685123e-06, + "loss": 0.21771240234375, + "step": 53385 + }, + { + "epoch": 0.4616475430389707, + "grad_norm": 2.540161550719531, + "learning_rate": 5.245191343653447e-06, + "loss": 0.2414989471435547, + "step": 53390 + }, + { + "epoch": 0.46169077656051394, + "grad_norm": 0.6486959326279635, + "learning_rate": 5.245056212267146e-06, + "loss": 0.2266510009765625, + "step": 53395 + }, + { + "epoch": 0.4617340100820572, + "grad_norm": 5.425997789614013, + "learning_rate": 5.2449210705268434e-06, + "loss": 0.060491943359375, + "step": 53400 + }, + { + "epoch": 0.4617772436036005, + "grad_norm": 4.534417402901393, + "learning_rate": 5.244785918433164e-06, + "loss": 0.206304931640625, + "step": 53405 + }, + { + "epoch": 0.46182047712514374, + "grad_norm": 32.568701527508146, + "learning_rate": 5.24465075598673e-06, + "loss": 0.3898963928222656, + "step": 53410 + }, + { + "epoch": 0.461863710646687, + "grad_norm": 14.993979288804018, + "learning_rate": 5.244515583188166e-06, + "loss": 0.1115386962890625, + "step": 53415 + }, + { + "epoch": 0.4619069441682303, + "grad_norm": 3.581032928986328, + "learning_rate": 5.244380400038093e-06, + "loss": 0.5192020416259766, + "step": 53420 + }, + { + "epoch": 0.46195017768977353, + "grad_norm": 13.107536262139346, + "learning_rate": 5.244245206537137e-06, + "loss": 0.1477203369140625, + "step": 53425 + }, + { + "epoch": 0.4619934112113168, + "grad_norm": 28.312613558264793, + "learning_rate": 5.24411000268592e-06, + "loss": 0.136077880859375, + "step": 53430 + }, + { + "epoch": 0.46203664473286005, + "grad_norm": 8.348682431325583, + "learning_rate": 5.243974788485067e-06, + "loss": 0.182379150390625, + "step": 53435 + }, + { + "epoch": 0.46207987825440333, + "grad_norm": 33.53007003061809, + "learning_rate": 5.243839563935199e-06, + "loss": 0.17811355590820313, + "step": 53440 + }, + { + "epoch": 0.4621231117759466, + "grad_norm": 13.879143779836669, + "learning_rate": 5.243704329036941e-06, + "loss": 0.24383392333984374, + "step": 53445 + }, + { + "epoch": 0.46216634529748984, + "grad_norm": 4.336992865604491, + "learning_rate": 5.243569083790918e-06, + "loss": 0.05511970520019531, + "step": 53450 + }, + { + "epoch": 0.46220957881903313, + "grad_norm": 1.4872826035512445, + "learning_rate": 5.2434338281977525e-06, + "loss": 0.23068723678588868, + "step": 53455 + }, + { + "epoch": 0.4622528123405764, + "grad_norm": 1.2234501742891317, + "learning_rate": 5.243298562258068e-06, + "loss": 0.0544830322265625, + "step": 53460 + }, + { + "epoch": 0.46229604586211964, + "grad_norm": 0.20316492061788194, + "learning_rate": 5.243163285972489e-06, + "loss": 0.21521377563476562, + "step": 53465 + }, + { + "epoch": 0.4623392793836629, + "grad_norm": 12.722478787307647, + "learning_rate": 5.24302799934164e-06, + "loss": 0.21300201416015624, + "step": 53470 + }, + { + "epoch": 0.46238251290520616, + "grad_norm": 7.331010052525075, + "learning_rate": 5.242892702366143e-06, + "loss": 0.0798248291015625, + "step": 53475 + }, + { + "epoch": 0.46242574642674944, + "grad_norm": 20.583772494092518, + "learning_rate": 5.242757395046622e-06, + "loss": 0.09219741821289062, + "step": 53480 + }, + { + "epoch": 0.4624689799482927, + "grad_norm": 31.637674835109213, + "learning_rate": 5.242622077383703e-06, + "loss": 0.2111175537109375, + "step": 53485 + }, + { + "epoch": 0.46251221346983595, + "grad_norm": 26.163510438421124, + "learning_rate": 5.24248674937801e-06, + "loss": 0.32833251953125, + "step": 53490 + }, + { + "epoch": 0.46255544699137924, + "grad_norm": 8.038158939278393, + "learning_rate": 5.242351411030166e-06, + "loss": 0.0509002685546875, + "step": 53495 + }, + { + "epoch": 0.4625986805129225, + "grad_norm": 7.699906909233647, + "learning_rate": 5.242216062340795e-06, + "loss": 0.2133941650390625, + "step": 53500 + }, + { + "epoch": 0.46264191403446575, + "grad_norm": 4.273669398004509, + "learning_rate": 5.2420807033105224e-06, + "loss": 0.32807579040527346, + "step": 53505 + }, + { + "epoch": 0.46268514755600904, + "grad_norm": 3.279938106448311, + "learning_rate": 5.24194533393997e-06, + "loss": 0.10926265716552734, + "step": 53510 + }, + { + "epoch": 0.46272838107755226, + "grad_norm": 0.6909699121579876, + "learning_rate": 5.241809954229765e-06, + "loss": 0.11248016357421875, + "step": 53515 + }, + { + "epoch": 0.46277161459909555, + "grad_norm": 1.6467248436137438, + "learning_rate": 5.241674564180531e-06, + "loss": 0.46463699340820314, + "step": 53520 + }, + { + "epoch": 0.46281484812063883, + "grad_norm": 23.18887113539039, + "learning_rate": 5.241539163792892e-06, + "loss": 0.3097393035888672, + "step": 53525 + }, + { + "epoch": 0.46285808164218206, + "grad_norm": 19.131787799431407, + "learning_rate": 5.241403753067471e-06, + "loss": 0.48744049072265627, + "step": 53530 + }, + { + "epoch": 0.46290131516372535, + "grad_norm": 16.141568201950506, + "learning_rate": 5.241268332004896e-06, + "loss": 0.3947021484375, + "step": 53535 + }, + { + "epoch": 0.46294454868526863, + "grad_norm": 0.7560628333197709, + "learning_rate": 5.241132900605788e-06, + "loss": 0.07792739868164063, + "step": 53540 + }, + { + "epoch": 0.46298778220681186, + "grad_norm": 5.736817184610858, + "learning_rate": 5.240997458870773e-06, + "loss": 0.0958953857421875, + "step": 53545 + }, + { + "epoch": 0.46303101572835514, + "grad_norm": 26.76894412830002, + "learning_rate": 5.240862006800476e-06, + "loss": 0.4024139404296875, + "step": 53550 + }, + { + "epoch": 0.46307424924989843, + "grad_norm": 32.11197158039299, + "learning_rate": 5.240726544395522e-06, + "loss": 0.232037353515625, + "step": 53555 + }, + { + "epoch": 0.46311748277144166, + "grad_norm": 0.24185032694288355, + "learning_rate": 5.240591071656535e-06, + "loss": 0.05707006454467774, + "step": 53560 + }, + { + "epoch": 0.46316071629298494, + "grad_norm": 16.05040634507801, + "learning_rate": 5.24045558858414e-06, + "loss": 0.37490692138671877, + "step": 53565 + }, + { + "epoch": 0.46320394981452817, + "grad_norm": 9.161074626718516, + "learning_rate": 5.240320095178961e-06, + "loss": 0.226788330078125, + "step": 53570 + }, + { + "epoch": 0.46324718333607146, + "grad_norm": 11.686170985066246, + "learning_rate": 5.240184591441624e-06, + "loss": 0.20245132446289063, + "step": 53575 + }, + { + "epoch": 0.46329041685761474, + "grad_norm": 0.18882606220733586, + "learning_rate": 5.240049077372754e-06, + "loss": 0.12882232666015625, + "step": 53580 + }, + { + "epoch": 0.46333365037915797, + "grad_norm": 11.499348372038094, + "learning_rate": 5.239913552972975e-06, + "loss": 0.30801849365234374, + "step": 53585 + }, + { + "epoch": 0.46337688390070125, + "grad_norm": 2.6717192383707515, + "learning_rate": 5.239778018242914e-06, + "loss": 0.05263595581054688, + "step": 53590 + }, + { + "epoch": 0.46342011742224454, + "grad_norm": 13.143549390116021, + "learning_rate": 5.239642473183193e-06, + "loss": 0.1110748291015625, + "step": 53595 + }, + { + "epoch": 0.46346335094378777, + "grad_norm": 19.482541360599186, + "learning_rate": 5.239506917794439e-06, + "loss": 0.17566490173339844, + "step": 53600 + }, + { + "epoch": 0.46350658446533105, + "grad_norm": 1.5320074241406423, + "learning_rate": 5.239371352077278e-06, + "loss": 0.2570167541503906, + "step": 53605 + }, + { + "epoch": 0.4635498179868743, + "grad_norm": 7.983179903960531, + "learning_rate": 5.239235776032334e-06, + "loss": 0.214654541015625, + "step": 53610 + }, + { + "epoch": 0.46359305150841756, + "grad_norm": 7.383288807381018, + "learning_rate": 5.239100189660232e-06, + "loss": 0.149102783203125, + "step": 53615 + }, + { + "epoch": 0.46363628502996085, + "grad_norm": 38.215279606971144, + "learning_rate": 5.238964592961597e-06, + "loss": 0.49298858642578125, + "step": 53620 + }, + { + "epoch": 0.4636795185515041, + "grad_norm": 2.903513597093627, + "learning_rate": 5.2388289859370574e-06, + "loss": 0.09618091583251953, + "step": 53625 + }, + { + "epoch": 0.46372275207304736, + "grad_norm": 21.582268883750217, + "learning_rate": 5.238693368587234e-06, + "loss": 0.12201995849609375, + "step": 53630 + }, + { + "epoch": 0.46376598559459065, + "grad_norm": 7.654424978638285, + "learning_rate": 5.238557740912756e-06, + "loss": 0.0559661865234375, + "step": 53635 + }, + { + "epoch": 0.4638092191161339, + "grad_norm": 0.6954474189437122, + "learning_rate": 5.238422102914247e-06, + "loss": 0.04877166748046875, + "step": 53640 + }, + { + "epoch": 0.46385245263767716, + "grad_norm": 27.874287206601633, + "learning_rate": 5.238286454592333e-06, + "loss": 0.2025726318359375, + "step": 53645 + }, + { + "epoch": 0.4638956861592204, + "grad_norm": 34.10373254105453, + "learning_rate": 5.2381507959476406e-06, + "loss": 0.37633056640625, + "step": 53650 + }, + { + "epoch": 0.4639389196807637, + "grad_norm": 8.058159444894514, + "learning_rate": 5.2380151269807935e-06, + "loss": 0.073431396484375, + "step": 53655 + }, + { + "epoch": 0.46398215320230696, + "grad_norm": 33.10994514828901, + "learning_rate": 5.237879447692418e-06, + "loss": 0.2860905170440674, + "step": 53660 + }, + { + "epoch": 0.4640253867238502, + "grad_norm": 11.15626025714809, + "learning_rate": 5.237743758083142e-06, + "loss": 0.105096435546875, + "step": 53665 + }, + { + "epoch": 0.46406862024539347, + "grad_norm": 156.11202356385442, + "learning_rate": 5.237608058153587e-06, + "loss": 0.2889495849609375, + "step": 53670 + }, + { + "epoch": 0.46411185376693675, + "grad_norm": 2.5420135654720406, + "learning_rate": 5.237472347904384e-06, + "loss": 0.1537445068359375, + "step": 53675 + }, + { + "epoch": 0.46415508728848, + "grad_norm": 29.277001634399166, + "learning_rate": 5.237336627336154e-06, + "loss": 0.10216827392578125, + "step": 53680 + }, + { + "epoch": 0.46419832081002327, + "grad_norm": 34.402717169516364, + "learning_rate": 5.237200896449527e-06, + "loss": 0.41478424072265624, + "step": 53685 + }, + { + "epoch": 0.4642415543315665, + "grad_norm": 12.045631955650261, + "learning_rate": 5.237065155245126e-06, + "loss": 0.11534423828125, + "step": 53690 + }, + { + "epoch": 0.4642847878531098, + "grad_norm": 6.334071316039667, + "learning_rate": 5.2369294037235786e-06, + "loss": 0.0447906494140625, + "step": 53695 + }, + { + "epoch": 0.46432802137465307, + "grad_norm": 20.48429208996695, + "learning_rate": 5.236793641885509e-06, + "loss": 0.16540908813476562, + "step": 53700 + }, + { + "epoch": 0.4643712548961963, + "grad_norm": 22.373054162829256, + "learning_rate": 5.236657869731547e-06, + "loss": 0.37611083984375, + "step": 53705 + }, + { + "epoch": 0.4644144884177396, + "grad_norm": 4.432941616971352, + "learning_rate": 5.236522087262315e-06, + "loss": 0.092156982421875, + "step": 53710 + }, + { + "epoch": 0.46445772193928286, + "grad_norm": 7.584471584256683, + "learning_rate": 5.2363862944784415e-06, + "loss": 0.0471221923828125, + "step": 53715 + }, + { + "epoch": 0.4645009554608261, + "grad_norm": 7.233356902079127, + "learning_rate": 5.236250491380552e-06, + "loss": 0.06080169677734375, + "step": 53720 + }, + { + "epoch": 0.4645441889823694, + "grad_norm": 7.533985215827705, + "learning_rate": 5.236114677969273e-06, + "loss": 0.215460205078125, + "step": 53725 + }, + { + "epoch": 0.4645874225039126, + "grad_norm": 5.18867316790253, + "learning_rate": 5.2359788542452305e-06, + "loss": 0.22772178649902344, + "step": 53730 + }, + { + "epoch": 0.4646306560254559, + "grad_norm": 1.6663022092592557, + "learning_rate": 5.23584302020905e-06, + "loss": 0.1325286865234375, + "step": 53735 + }, + { + "epoch": 0.4646738895469992, + "grad_norm": 9.561600217921388, + "learning_rate": 5.2357071758613605e-06, + "loss": 0.09808921813964844, + "step": 53740 + }, + { + "epoch": 0.4647171230685424, + "grad_norm": 8.950324742168608, + "learning_rate": 5.235571321202786e-06, + "loss": 0.3599029541015625, + "step": 53745 + }, + { + "epoch": 0.4647603565900857, + "grad_norm": 29.018071163915806, + "learning_rate": 5.235435456233955e-06, + "loss": 0.2917236328125, + "step": 53750 + }, + { + "epoch": 0.464803590111629, + "grad_norm": 12.590084040370018, + "learning_rate": 5.235299580955493e-06, + "loss": 0.10230865478515624, + "step": 53755 + }, + { + "epoch": 0.4648468236331722, + "grad_norm": 1.080151537043692, + "learning_rate": 5.235163695368027e-06, + "loss": 0.39306793212890623, + "step": 53760 + }, + { + "epoch": 0.4648900571547155, + "grad_norm": 0.7507147528947086, + "learning_rate": 5.235027799472183e-06, + "loss": 0.1071624755859375, + "step": 53765 + }, + { + "epoch": 0.46493329067625877, + "grad_norm": 7.503105308601275, + "learning_rate": 5.234891893268589e-06, + "loss": 0.262103271484375, + "step": 53770 + }, + { + "epoch": 0.464976524197802, + "grad_norm": 15.780533861950703, + "learning_rate": 5.23475597675787e-06, + "loss": 0.23124847412109376, + "step": 53775 + }, + { + "epoch": 0.4650197577193453, + "grad_norm": 14.704702276307886, + "learning_rate": 5.234620049940655e-06, + "loss": 0.0599212646484375, + "step": 53780 + }, + { + "epoch": 0.4650629912408885, + "grad_norm": 1.0041691790259732, + "learning_rate": 5.234484112817569e-06, + "loss": 0.1431488037109375, + "step": 53785 + }, + { + "epoch": 0.4651062247624318, + "grad_norm": 9.819568374140754, + "learning_rate": 5.2343481653892406e-06, + "loss": 0.11380043029785156, + "step": 53790 + }, + { + "epoch": 0.4651494582839751, + "grad_norm": 0.7910620506255637, + "learning_rate": 5.234212207656295e-06, + "loss": 0.14437332153320312, + "step": 53795 + }, + { + "epoch": 0.4651926918055183, + "grad_norm": 3.3629251733672185, + "learning_rate": 5.234076239619361e-06, + "loss": 0.18051834106445314, + "step": 53800 + }, + { + "epoch": 0.4652359253270616, + "grad_norm": 8.94550846225434, + "learning_rate": 5.233940261279064e-06, + "loss": 0.34639892578125, + "step": 53805 + }, + { + "epoch": 0.4652791588486049, + "grad_norm": 44.92961445043032, + "learning_rate": 5.233804272636032e-06, + "loss": 0.47334747314453124, + "step": 53810 + }, + { + "epoch": 0.4653223923701481, + "grad_norm": 7.393441828162928, + "learning_rate": 5.233668273690892e-06, + "loss": 0.22359390258789064, + "step": 53815 + }, + { + "epoch": 0.4653656258916914, + "grad_norm": 24.082497766910887, + "learning_rate": 5.233532264444271e-06, + "loss": 0.20709228515625, + "step": 53820 + }, + { + "epoch": 0.4654088594132346, + "grad_norm": 35.25405634259536, + "learning_rate": 5.233396244896798e-06, + "loss": 0.376043701171875, + "step": 53825 + }, + { + "epoch": 0.4654520929347779, + "grad_norm": 0.8510702258189041, + "learning_rate": 5.233260215049098e-06, + "loss": 0.34589691162109376, + "step": 53830 + }, + { + "epoch": 0.4654953264563212, + "grad_norm": 0.06520656116450017, + "learning_rate": 5.233124174901799e-06, + "loss": 0.21961898803710939, + "step": 53835 + }, + { + "epoch": 0.4655385599778644, + "grad_norm": 19.118523373365893, + "learning_rate": 5.232988124455529e-06, + "loss": 0.0727325439453125, + "step": 53840 + }, + { + "epoch": 0.4655817934994077, + "grad_norm": 9.121853483245873, + "learning_rate": 5.232852063710914e-06, + "loss": 0.06180267333984375, + "step": 53845 + }, + { + "epoch": 0.465625027020951, + "grad_norm": 2.964661529234741, + "learning_rate": 5.232715992668584e-06, + "loss": 0.12635269165039062, + "step": 53850 + }, + { + "epoch": 0.4656682605424942, + "grad_norm": 25.059428020537705, + "learning_rate": 5.232579911329165e-06, + "loss": 0.19915275573730468, + "step": 53855 + }, + { + "epoch": 0.4657114940640375, + "grad_norm": 28.74225487313068, + "learning_rate": 5.232443819693284e-06, + "loss": 0.31638641357421876, + "step": 53860 + }, + { + "epoch": 0.46575472758558073, + "grad_norm": 36.222154292771776, + "learning_rate": 5.23230771776157e-06, + "loss": 0.14734649658203125, + "step": 53865 + }, + { + "epoch": 0.465797961107124, + "grad_norm": 46.03684901637095, + "learning_rate": 5.232171605534649e-06, + "loss": 0.1087127685546875, + "step": 53870 + }, + { + "epoch": 0.4658411946286673, + "grad_norm": 4.455776911612412, + "learning_rate": 5.23203548301315e-06, + "loss": 0.04075126647949219, + "step": 53875 + }, + { + "epoch": 0.46588442815021053, + "grad_norm": 8.101163465840745, + "learning_rate": 5.231899350197701e-06, + "loss": 0.44351348876953123, + "step": 53880 + }, + { + "epoch": 0.4659276616717538, + "grad_norm": 1.959605503354787, + "learning_rate": 5.2317632070889305e-06, + "loss": 0.2350433349609375, + "step": 53885 + }, + { + "epoch": 0.4659708951932971, + "grad_norm": 13.878505487873491, + "learning_rate": 5.231627053687464e-06, + "loss": 0.10246543884277344, + "step": 53890 + }, + { + "epoch": 0.4660141287148403, + "grad_norm": 14.289601884522604, + "learning_rate": 5.231490889993931e-06, + "loss": 0.09921646118164062, + "step": 53895 + }, + { + "epoch": 0.4660573622363836, + "grad_norm": 1.4545171393019647, + "learning_rate": 5.231354716008958e-06, + "loss": 0.11439971923828125, + "step": 53900 + }, + { + "epoch": 0.46610059575792684, + "grad_norm": 6.31228148546009, + "learning_rate": 5.231218531733177e-06, + "loss": 0.23050994873046876, + "step": 53905 + }, + { + "epoch": 0.4661438292794701, + "grad_norm": 2.470077374759564, + "learning_rate": 5.2310823371672115e-06, + "loss": 0.1503936767578125, + "step": 53910 + }, + { + "epoch": 0.4661870628010134, + "grad_norm": 4.849465561905961, + "learning_rate": 5.2309461323116906e-06, + "loss": 0.3072685241699219, + "step": 53915 + }, + { + "epoch": 0.46623029632255664, + "grad_norm": 7.799458416562272, + "learning_rate": 5.230809917167245e-06, + "loss": 0.13865966796875, + "step": 53920 + }, + { + "epoch": 0.4662735298440999, + "grad_norm": 0.5519665334512408, + "learning_rate": 5.2306736917345005e-06, + "loss": 0.08980636596679688, + "step": 53925 + }, + { + "epoch": 0.4663167633656432, + "grad_norm": 23.342859670363133, + "learning_rate": 5.230537456014085e-06, + "loss": 0.170404052734375, + "step": 53930 + }, + { + "epoch": 0.46635999688718643, + "grad_norm": 23.009458465435788, + "learning_rate": 5.23040121000663e-06, + "loss": 0.18080902099609375, + "step": 53935 + }, + { + "epoch": 0.4664032304087297, + "grad_norm": 12.884736699610404, + "learning_rate": 5.23026495371276e-06, + "loss": 0.0714935302734375, + "step": 53940 + }, + { + "epoch": 0.466446463930273, + "grad_norm": 11.384851936120729, + "learning_rate": 5.230128687133105e-06, + "loss": 0.05354156494140625, + "step": 53945 + }, + { + "epoch": 0.46648969745181623, + "grad_norm": 18.832472093861046, + "learning_rate": 5.229992410268295e-06, + "loss": 0.23473892211914063, + "step": 53950 + }, + { + "epoch": 0.4665329309733595, + "grad_norm": 0.8807769775144513, + "learning_rate": 5.2298561231189555e-06, + "loss": 0.096575927734375, + "step": 53955 + }, + { + "epoch": 0.46657616449490275, + "grad_norm": 4.077322515509897, + "learning_rate": 5.229719825685718e-06, + "loss": 0.05079498291015625, + "step": 53960 + }, + { + "epoch": 0.46661939801644603, + "grad_norm": 1.204539261695771, + "learning_rate": 5.229583517969209e-06, + "loss": 0.2104511260986328, + "step": 53965 + }, + { + "epoch": 0.4666626315379893, + "grad_norm": 23.31722334694456, + "learning_rate": 5.229447199970058e-06, + "loss": 0.167425537109375, + "step": 53970 + }, + { + "epoch": 0.46670586505953254, + "grad_norm": 5.191373023544115, + "learning_rate": 5.229310871688894e-06, + "loss": 0.16988525390625, + "step": 53975 + }, + { + "epoch": 0.4667490985810758, + "grad_norm": 9.447179989770682, + "learning_rate": 5.229174533126344e-06, + "loss": 0.1552947998046875, + "step": 53980 + }, + { + "epoch": 0.4667923321026191, + "grad_norm": 49.56007452622469, + "learning_rate": 5.2290381842830384e-06, + "loss": 0.14847831726074218, + "step": 53985 + }, + { + "epoch": 0.46683556562416234, + "grad_norm": 1.888565188784062, + "learning_rate": 5.228901825159606e-06, + "loss": 0.10466880798339843, + "step": 53990 + }, + { + "epoch": 0.4668787991457056, + "grad_norm": 4.257882772737681, + "learning_rate": 5.228765455756675e-06, + "loss": 0.19278030395507811, + "step": 53995 + }, + { + "epoch": 0.46692203266724885, + "grad_norm": 45.32131463031867, + "learning_rate": 5.2286290760748746e-06, + "loss": 0.23804397583007814, + "step": 54000 + }, + { + "epoch": 0.46696526618879214, + "grad_norm": 2.900614915863492, + "learning_rate": 5.228492686114835e-06, + "loss": 0.06533889770507813, + "step": 54005 + }, + { + "epoch": 0.4670084997103354, + "grad_norm": 0.8488217819612184, + "learning_rate": 5.228356285877183e-06, + "loss": 0.20366668701171875, + "step": 54010 + }, + { + "epoch": 0.46705173323187865, + "grad_norm": 9.420317870243313, + "learning_rate": 5.228219875362548e-06, + "loss": 0.23575439453125, + "step": 54015 + }, + { + "epoch": 0.46709496675342194, + "grad_norm": 0.9270796293052375, + "learning_rate": 5.228083454571561e-06, + "loss": 0.1869354248046875, + "step": 54020 + }, + { + "epoch": 0.4671382002749652, + "grad_norm": 0.2039730941553693, + "learning_rate": 5.227947023504849e-06, + "loss": 0.13194656372070312, + "step": 54025 + }, + { + "epoch": 0.46718143379650845, + "grad_norm": 2.3422527339323342, + "learning_rate": 5.2278105821630435e-06, + "loss": 0.04014854431152344, + "step": 54030 + }, + { + "epoch": 0.46722466731805173, + "grad_norm": 5.667875951735816, + "learning_rate": 5.2276741305467715e-06, + "loss": 0.1322509765625, + "step": 54035 + }, + { + "epoch": 0.46726790083959496, + "grad_norm": 12.567068952879795, + "learning_rate": 5.227537668656664e-06, + "loss": 0.1536895751953125, + "step": 54040 + }, + { + "epoch": 0.46731113436113825, + "grad_norm": 8.700422582416115, + "learning_rate": 5.227401196493349e-06, + "loss": 0.0395233154296875, + "step": 54045 + }, + { + "epoch": 0.46735436788268153, + "grad_norm": 0.6344259313326663, + "learning_rate": 5.227264714057456e-06, + "loss": 0.012941741943359375, + "step": 54050 + }, + { + "epoch": 0.46739760140422476, + "grad_norm": 34.059388258311316, + "learning_rate": 5.227128221349616e-06, + "loss": 0.2089141845703125, + "step": 54055 + }, + { + "epoch": 0.46744083492576805, + "grad_norm": 2.6007022926757095, + "learning_rate": 5.226991718370457e-06, + "loss": 0.06075439453125, + "step": 54060 + }, + { + "epoch": 0.46748406844731133, + "grad_norm": 4.83296170525919, + "learning_rate": 5.22685520512061e-06, + "loss": 0.09933013916015625, + "step": 54065 + }, + { + "epoch": 0.46752730196885456, + "grad_norm": 19.26618823596071, + "learning_rate": 5.226718681600702e-06, + "loss": 0.13660736083984376, + "step": 54070 + }, + { + "epoch": 0.46757053549039784, + "grad_norm": 5.372147202515213, + "learning_rate": 5.226582147811365e-06, + "loss": 0.24031219482421876, + "step": 54075 + }, + { + "epoch": 0.46761376901194107, + "grad_norm": 35.71528246525784, + "learning_rate": 5.2264456037532274e-06, + "loss": 0.20716171264648436, + "step": 54080 + }, + { + "epoch": 0.46765700253348436, + "grad_norm": 13.921905718773008, + "learning_rate": 5.226309049426919e-06, + "loss": 0.16783714294433594, + "step": 54085 + }, + { + "epoch": 0.46770023605502764, + "grad_norm": 7.1069902737054, + "learning_rate": 5.226172484833071e-06, + "loss": 0.3747894287109375, + "step": 54090 + }, + { + "epoch": 0.46774346957657087, + "grad_norm": 1.4028847168773466, + "learning_rate": 5.226035909972312e-06, + "loss": 0.1318981170654297, + "step": 54095 + }, + { + "epoch": 0.46778670309811415, + "grad_norm": 9.003811797027495, + "learning_rate": 5.225899324845271e-06, + "loss": 0.1173828125, + "step": 54100 + }, + { + "epoch": 0.46782993661965744, + "grad_norm": 11.255078689729878, + "learning_rate": 5.2257627294525805e-06, + "loss": 0.1044097900390625, + "step": 54105 + }, + { + "epoch": 0.46787317014120067, + "grad_norm": 23.405507343775398, + "learning_rate": 5.225626123794869e-06, + "loss": 0.28922500610351565, + "step": 54110 + }, + { + "epoch": 0.46791640366274395, + "grad_norm": 2.03640286382012, + "learning_rate": 5.225489507872765e-06, + "loss": 0.48909759521484375, + "step": 54115 + }, + { + "epoch": 0.46795963718428724, + "grad_norm": 0.5500247444603028, + "learning_rate": 5.225352881686901e-06, + "loss": 0.0471435546875, + "step": 54120 + }, + { + "epoch": 0.46800287070583046, + "grad_norm": 5.358533481227161, + "learning_rate": 5.225216245237906e-06, + "loss": 0.1737396240234375, + "step": 54125 + }, + { + "epoch": 0.46804610422737375, + "grad_norm": 21.28952858454706, + "learning_rate": 5.225079598526411e-06, + "loss": 0.15412750244140624, + "step": 54130 + }, + { + "epoch": 0.468089337748917, + "grad_norm": 5.049477278261306, + "learning_rate": 5.224942941553044e-06, + "loss": 0.1720428466796875, + "step": 54135 + }, + { + "epoch": 0.46813257127046026, + "grad_norm": 26.21583950955644, + "learning_rate": 5.224806274318438e-06, + "loss": 0.8486736297607422, + "step": 54140 + }, + { + "epoch": 0.46817580479200355, + "grad_norm": 0.6916273513734793, + "learning_rate": 5.2246695968232215e-06, + "loss": 0.1567657470703125, + "step": 54145 + }, + { + "epoch": 0.4682190383135468, + "grad_norm": 18.73664502207775, + "learning_rate": 5.224532909068026e-06, + "loss": 0.39931640625, + "step": 54150 + }, + { + "epoch": 0.46826227183509006, + "grad_norm": 0.2544502880490479, + "learning_rate": 5.224396211053479e-06, + "loss": 0.10921440124511719, + "step": 54155 + }, + { + "epoch": 0.46830550535663334, + "grad_norm": 1.3177567923977713, + "learning_rate": 5.224259502780216e-06, + "loss": 0.14791946411132811, + "step": 54160 + }, + { + "epoch": 0.4683487388781766, + "grad_norm": 6.442087030364317, + "learning_rate": 5.224122784248864e-06, + "loss": 0.2991455078125, + "step": 54165 + }, + { + "epoch": 0.46839197239971986, + "grad_norm": 18.24085067599001, + "learning_rate": 5.2239860554600535e-06, + "loss": 0.14456253051757811, + "step": 54170 + }, + { + "epoch": 0.4684352059212631, + "grad_norm": 21.44935086994579, + "learning_rate": 5.223849316414416e-06, + "loss": 0.14177703857421875, + "step": 54175 + }, + { + "epoch": 0.46847843944280637, + "grad_norm": 3.469613590322474, + "learning_rate": 5.223712567112583e-06, + "loss": 0.07231292724609376, + "step": 54180 + }, + { + "epoch": 0.46852167296434966, + "grad_norm": 0.6323275559701357, + "learning_rate": 5.223575807555184e-06, + "loss": 0.09552001953125, + "step": 54185 + }, + { + "epoch": 0.4685649064858929, + "grad_norm": 25.87332886986931, + "learning_rate": 5.223439037742849e-06, + "loss": 0.24393310546875, + "step": 54190 + }, + { + "epoch": 0.46860814000743617, + "grad_norm": 5.818892827068028, + "learning_rate": 5.22330225767621e-06, + "loss": 0.23844032287597655, + "step": 54195 + }, + { + "epoch": 0.46865137352897945, + "grad_norm": 2.9383304343162813, + "learning_rate": 5.2231654673558966e-06, + "loss": 0.16606369018554687, + "step": 54200 + }, + { + "epoch": 0.4686946070505227, + "grad_norm": 1.2397111331871333, + "learning_rate": 5.223028666782542e-06, + "loss": 0.10997314453125, + "step": 54205 + }, + { + "epoch": 0.46873784057206597, + "grad_norm": 0.5425765100915423, + "learning_rate": 5.222891855956775e-06, + "loss": 0.06396484375, + "step": 54210 + }, + { + "epoch": 0.4687810740936092, + "grad_norm": 0.8828448410173891, + "learning_rate": 5.2227550348792265e-06, + "loss": 0.120965576171875, + "step": 54215 + }, + { + "epoch": 0.4688243076151525, + "grad_norm": 7.9950663915733, + "learning_rate": 5.222618203550529e-06, + "loss": 0.061153411865234375, + "step": 54220 + }, + { + "epoch": 0.46886754113669576, + "grad_norm": 2.285589690217619, + "learning_rate": 5.222481361971311e-06, + "loss": 0.09974231719970703, + "step": 54225 + }, + { + "epoch": 0.468910774658239, + "grad_norm": 4.696312826877375, + "learning_rate": 5.222344510142208e-06, + "loss": 0.18407859802246093, + "step": 54230 + }, + { + "epoch": 0.4689540081797823, + "grad_norm": 3.8375359714233803, + "learning_rate": 5.222207648063847e-06, + "loss": 0.12265472412109375, + "step": 54235 + }, + { + "epoch": 0.46899724170132556, + "grad_norm": 2.830130191306775, + "learning_rate": 5.222070775736861e-06, + "loss": 0.1988056182861328, + "step": 54240 + }, + { + "epoch": 0.4690404752228688, + "grad_norm": 0.4808350388245072, + "learning_rate": 5.221933893161881e-06, + "loss": 0.10199394226074218, + "step": 54245 + }, + { + "epoch": 0.4690837087444121, + "grad_norm": 3.8158848876370057, + "learning_rate": 5.221797000339538e-06, + "loss": 0.22203216552734376, + "step": 54250 + }, + { + "epoch": 0.4691269422659553, + "grad_norm": 1.1192912181535004, + "learning_rate": 5.2216600972704635e-06, + "loss": 0.0651641845703125, + "step": 54255 + }, + { + "epoch": 0.4691701757874986, + "grad_norm": 3.609513433862667, + "learning_rate": 5.221523183955289e-06, + "loss": 0.179827880859375, + "step": 54260 + }, + { + "epoch": 0.4692134093090419, + "grad_norm": 37.71566349938839, + "learning_rate": 5.221386260394646e-06, + "loss": 0.2905731201171875, + "step": 54265 + }, + { + "epoch": 0.4692566428305851, + "grad_norm": 0.8472980156478767, + "learning_rate": 5.221249326589166e-06, + "loss": 0.12646026611328126, + "step": 54270 + }, + { + "epoch": 0.4692998763521284, + "grad_norm": 1.9691041500943987, + "learning_rate": 5.2211123825394805e-06, + "loss": 0.21142578125, + "step": 54275 + }, + { + "epoch": 0.46934310987367167, + "grad_norm": 21.83671568372901, + "learning_rate": 5.2209754282462205e-06, + "loss": 0.07260017395019532, + "step": 54280 + }, + { + "epoch": 0.4693863433952149, + "grad_norm": 7.240554347427703, + "learning_rate": 5.220838463710018e-06, + "loss": 0.25511932373046875, + "step": 54285 + }, + { + "epoch": 0.4694295769167582, + "grad_norm": 26.85577342157639, + "learning_rate": 5.220701488931505e-06, + "loss": 0.26706886291503906, + "step": 54290 + }, + { + "epoch": 0.46947281043830147, + "grad_norm": 0.09882788035467235, + "learning_rate": 5.220564503911313e-06, + "loss": 0.17859268188476562, + "step": 54295 + }, + { + "epoch": 0.4695160439598447, + "grad_norm": 1.070395325354451, + "learning_rate": 5.2204275086500735e-06, + "loss": 0.24735641479492188, + "step": 54300 + }, + { + "epoch": 0.469559277481388, + "grad_norm": 7.576371506453631, + "learning_rate": 5.2202905031484186e-06, + "loss": 0.128558349609375, + "step": 54305 + }, + { + "epoch": 0.4696025110029312, + "grad_norm": 22.89146784712096, + "learning_rate": 5.220153487406981e-06, + "loss": 0.38497772216796877, + "step": 54310 + }, + { + "epoch": 0.4696457445244745, + "grad_norm": 22.572053707577922, + "learning_rate": 5.22001646142639e-06, + "loss": 0.18703193664550782, + "step": 54315 + }, + { + "epoch": 0.4696889780460178, + "grad_norm": 16.655437707490204, + "learning_rate": 5.21987942520728e-06, + "loss": 0.1137420654296875, + "step": 54320 + }, + { + "epoch": 0.469732211567561, + "grad_norm": 7.613553269401451, + "learning_rate": 5.219742378750283e-06, + "loss": 0.20185546875, + "step": 54325 + }, + { + "epoch": 0.4697754450891043, + "grad_norm": 3.7034289744998854, + "learning_rate": 5.219605322056029e-06, + "loss": 0.16717529296875, + "step": 54330 + }, + { + "epoch": 0.4698186786106476, + "grad_norm": 0.2401653932103256, + "learning_rate": 5.219468255125153e-06, + "loss": 0.20364990234375, + "step": 54335 + }, + { + "epoch": 0.4698619121321908, + "grad_norm": 9.279317263263117, + "learning_rate": 5.219331177958284e-06, + "loss": 0.0420989990234375, + "step": 54340 + }, + { + "epoch": 0.4699051456537341, + "grad_norm": 17.972076478187258, + "learning_rate": 5.219194090556056e-06, + "loss": 0.201580810546875, + "step": 54345 + }, + { + "epoch": 0.4699483791752773, + "grad_norm": 8.776584475734065, + "learning_rate": 5.219056992919101e-06, + "loss": 0.14196128845214845, + "step": 54350 + }, + { + "epoch": 0.4699916126968206, + "grad_norm": 30.30140112572815, + "learning_rate": 5.2189198850480515e-06, + "loss": 0.233514404296875, + "step": 54355 + }, + { + "epoch": 0.4700348462183639, + "grad_norm": 1.868252183492629, + "learning_rate": 5.21878276694354e-06, + "loss": 0.2839263916015625, + "step": 54360 + }, + { + "epoch": 0.4700780797399071, + "grad_norm": 9.818683613517353, + "learning_rate": 5.218645638606198e-06, + "loss": 0.186468505859375, + "step": 54365 + }, + { + "epoch": 0.4701213132614504, + "grad_norm": 38.88318458398492, + "learning_rate": 5.218508500036657e-06, + "loss": 0.328082275390625, + "step": 54370 + }, + { + "epoch": 0.4701645467829937, + "grad_norm": 9.492098806973196, + "learning_rate": 5.218371351235553e-06, + "loss": 0.18897743225097657, + "step": 54375 + }, + { + "epoch": 0.4702077803045369, + "grad_norm": 6.238886991595005, + "learning_rate": 5.2182341922035156e-06, + "loss": 0.206231689453125, + "step": 54380 + }, + { + "epoch": 0.4702510138260802, + "grad_norm": 10.656305402737704, + "learning_rate": 5.218097022941179e-06, + "loss": 0.04517059326171875, + "step": 54385 + }, + { + "epoch": 0.47029424734762343, + "grad_norm": 5.2170291786672935, + "learning_rate": 5.217959843449172e-06, + "loss": 0.07300262451171875, + "step": 54390 + }, + { + "epoch": 0.4703374808691667, + "grad_norm": 6.483169588353465, + "learning_rate": 5.217822653728133e-06, + "loss": 0.165814208984375, + "step": 54395 + }, + { + "epoch": 0.47038071439071, + "grad_norm": 11.878092965184315, + "learning_rate": 5.217685453778691e-06, + "loss": 0.5700668334960938, + "step": 54400 + }, + { + "epoch": 0.4704239479122532, + "grad_norm": 1.4017821011755192, + "learning_rate": 5.217548243601479e-06, + "loss": 0.090777587890625, + "step": 54405 + }, + { + "epoch": 0.4704671814337965, + "grad_norm": 0.044101916081702636, + "learning_rate": 5.217411023197131e-06, + "loss": 0.10714797973632813, + "step": 54410 + }, + { + "epoch": 0.4705104149553398, + "grad_norm": 2.5615112646070246, + "learning_rate": 5.21727379256628e-06, + "loss": 0.029929351806640626, + "step": 54415 + }, + { + "epoch": 0.470553648476883, + "grad_norm": 8.030125182681159, + "learning_rate": 5.217136551709557e-06, + "loss": 0.04923553466796875, + "step": 54420 + }, + { + "epoch": 0.4705968819984263, + "grad_norm": 0.8786222317825861, + "learning_rate": 5.216999300627597e-06, + "loss": 0.05918731689453125, + "step": 54425 + }, + { + "epoch": 0.47064011551996954, + "grad_norm": 30.467768892560944, + "learning_rate": 5.216862039321032e-06, + "loss": 0.3644233703613281, + "step": 54430 + }, + { + "epoch": 0.4706833490415128, + "grad_norm": 2.3888870417325956, + "learning_rate": 5.2167247677904945e-06, + "loss": 0.62049560546875, + "step": 54435 + }, + { + "epoch": 0.4707265825630561, + "grad_norm": 10.890078532601207, + "learning_rate": 5.216587486036618e-06, + "loss": 0.095068359375, + "step": 54440 + }, + { + "epoch": 0.47076981608459934, + "grad_norm": 24.346628813098206, + "learning_rate": 5.216450194060037e-06, + "loss": 0.13940343856811524, + "step": 54445 + }, + { + "epoch": 0.4708130496061426, + "grad_norm": 3.8707685305719104, + "learning_rate": 5.216312891861383e-06, + "loss": 0.1062042236328125, + "step": 54450 + }, + { + "epoch": 0.4708562831276859, + "grad_norm": 6.635229302945502, + "learning_rate": 5.21617557944129e-06, + "loss": 0.05343704223632813, + "step": 54455 + }, + { + "epoch": 0.47089951664922913, + "grad_norm": 3.6624167424788485, + "learning_rate": 5.2160382568003915e-06, + "loss": 0.12098407745361328, + "step": 54460 + }, + { + "epoch": 0.4709427501707724, + "grad_norm": 6.160970715277446, + "learning_rate": 5.215900923939319e-06, + "loss": 0.1812713623046875, + "step": 54465 + }, + { + "epoch": 0.47098598369231565, + "grad_norm": 7.595118005214445, + "learning_rate": 5.215763580858709e-06, + "loss": 0.15461502075195313, + "step": 54470 + }, + { + "epoch": 0.47102921721385893, + "grad_norm": 3.1801200996111145, + "learning_rate": 5.2156262275591926e-06, + "loss": 0.05171051025390625, + "step": 54475 + }, + { + "epoch": 0.4710724507354022, + "grad_norm": 15.271295827239156, + "learning_rate": 5.215488864041403e-06, + "loss": 0.0999481201171875, + "step": 54480 + }, + { + "epoch": 0.47111568425694544, + "grad_norm": 6.209010902456406, + "learning_rate": 5.215351490305975e-06, + "loss": 0.112872314453125, + "step": 54485 + }, + { + "epoch": 0.47115891777848873, + "grad_norm": 2.137851231861489, + "learning_rate": 5.215214106353541e-06, + "loss": 0.045809173583984376, + "step": 54490 + }, + { + "epoch": 0.471202151300032, + "grad_norm": 26.268864983896826, + "learning_rate": 5.215076712184737e-06, + "loss": 0.30650482177734373, + "step": 54495 + }, + { + "epoch": 0.47124538482157524, + "grad_norm": 5.5170691918632695, + "learning_rate": 5.214939307800193e-06, + "loss": 0.0882537841796875, + "step": 54500 + }, + { + "epoch": 0.4712886183431185, + "grad_norm": 0.39791606282423364, + "learning_rate": 5.214801893200546e-06, + "loss": 0.13221817016601561, + "step": 54505 + }, + { + "epoch": 0.4713318518646618, + "grad_norm": 11.7311361441409, + "learning_rate": 5.214664468386426e-06, + "loss": 0.121783447265625, + "step": 54510 + }, + { + "epoch": 0.47137508538620504, + "grad_norm": 1.5491841552756667, + "learning_rate": 5.214527033358473e-06, + "loss": 0.062160491943359375, + "step": 54515 + }, + { + "epoch": 0.4714183189077483, + "grad_norm": 3.979566362476111, + "learning_rate": 5.214389588117314e-06, + "loss": 0.1407012939453125, + "step": 54520 + }, + { + "epoch": 0.47146155242929155, + "grad_norm": 10.95774690030013, + "learning_rate": 5.214252132663587e-06, + "loss": 0.2350341796875, + "step": 54525 + }, + { + "epoch": 0.47150478595083484, + "grad_norm": 2.851144293146635, + "learning_rate": 5.214114666997925e-06, + "loss": 0.03719329833984375, + "step": 54530 + }, + { + "epoch": 0.4715480194723781, + "grad_norm": 4.917368048553287, + "learning_rate": 5.213977191120961e-06, + "loss": 0.1154327392578125, + "step": 54535 + }, + { + "epoch": 0.47159125299392135, + "grad_norm": 11.76798444056278, + "learning_rate": 5.21383970503333e-06, + "loss": 0.254510498046875, + "step": 54540 + }, + { + "epoch": 0.47163448651546463, + "grad_norm": 3.5700599654607537, + "learning_rate": 5.213702208735667e-06, + "loss": 0.08874626159667968, + "step": 54545 + }, + { + "epoch": 0.4716777200370079, + "grad_norm": 8.617170955727627, + "learning_rate": 5.213564702228604e-06, + "loss": 0.106512451171875, + "step": 54550 + }, + { + "epoch": 0.47172095355855115, + "grad_norm": 25.854815072214507, + "learning_rate": 5.213427185512777e-06, + "loss": 0.2822662353515625, + "step": 54555 + }, + { + "epoch": 0.47176418708009443, + "grad_norm": 1.29648547402808, + "learning_rate": 5.2132896585888184e-06, + "loss": 0.0436798095703125, + "step": 54560 + }, + { + "epoch": 0.47180742060163766, + "grad_norm": 23.617228706981564, + "learning_rate": 5.2131521214573645e-06, + "loss": 0.17462615966796874, + "step": 54565 + }, + { + "epoch": 0.47185065412318095, + "grad_norm": 0.717023731466904, + "learning_rate": 5.2130145741190475e-06, + "loss": 0.10099067687988281, + "step": 54570 + }, + { + "epoch": 0.47189388764472423, + "grad_norm": 4.577175694734675, + "learning_rate": 5.212877016574504e-06, + "loss": 0.065728759765625, + "step": 54575 + }, + { + "epoch": 0.47193712116626746, + "grad_norm": 30.742592623661, + "learning_rate": 5.2127394488243655e-06, + "loss": 0.28267669677734375, + "step": 54580 + }, + { + "epoch": 0.47198035468781074, + "grad_norm": 0.9380511909041629, + "learning_rate": 5.212601870869269e-06, + "loss": 0.6706829071044922, + "step": 54585 + }, + { + "epoch": 0.47202358820935403, + "grad_norm": 0.1420826890355089, + "learning_rate": 5.212464282709849e-06, + "loss": 0.1866128921508789, + "step": 54590 + }, + { + "epoch": 0.47206682173089726, + "grad_norm": 0.37734500112220126, + "learning_rate": 5.212326684346739e-06, + "loss": 0.3335784912109375, + "step": 54595 + }, + { + "epoch": 0.47211005525244054, + "grad_norm": 3.219666578030081, + "learning_rate": 5.212189075780572e-06, + "loss": 0.26724853515625, + "step": 54600 + }, + { + "epoch": 0.47215328877398377, + "grad_norm": 23.490353905041772, + "learning_rate": 5.212051457011986e-06, + "loss": 0.5014083862304688, + "step": 54605 + }, + { + "epoch": 0.47219652229552705, + "grad_norm": 0.847623187816254, + "learning_rate": 5.2119138280416145e-06, + "loss": 0.545626449584961, + "step": 54610 + }, + { + "epoch": 0.47223975581707034, + "grad_norm": 7.611738408077201, + "learning_rate": 5.211776188870091e-06, + "loss": 0.042417144775390624, + "step": 54615 + }, + { + "epoch": 0.47228298933861357, + "grad_norm": 7.562912122204761, + "learning_rate": 5.2116385394980515e-06, + "loss": 0.11398391723632813, + "step": 54620 + }, + { + "epoch": 0.47232622286015685, + "grad_norm": 14.528805453101928, + "learning_rate": 5.211500879926132e-06, + "loss": 0.1780029296875, + "step": 54625 + }, + { + "epoch": 0.47236945638170014, + "grad_norm": 1.1044976644245346, + "learning_rate": 5.211363210154963e-06, + "loss": 0.1037384033203125, + "step": 54630 + }, + { + "epoch": 0.47241268990324337, + "grad_norm": 20.37571208955411, + "learning_rate": 5.2112255301851845e-06, + "loss": 0.23611373901367189, + "step": 54635 + }, + { + "epoch": 0.47245592342478665, + "grad_norm": 0.26865501127189356, + "learning_rate": 5.2110878400174284e-06, + "loss": 0.0862091064453125, + "step": 54640 + }, + { + "epoch": 0.4724991569463299, + "grad_norm": 2.0470588005720733, + "learning_rate": 5.210950139652331e-06, + "loss": 0.2551727294921875, + "step": 54645 + }, + { + "epoch": 0.47254239046787316, + "grad_norm": 7.9018186350303194, + "learning_rate": 5.210812429090526e-06, + "loss": 0.05784273147583008, + "step": 54650 + }, + { + "epoch": 0.47258562398941645, + "grad_norm": 5.108928554871379, + "learning_rate": 5.21067470833265e-06, + "loss": 0.257958984375, + "step": 54655 + }, + { + "epoch": 0.4726288575109597, + "grad_norm": 0.7612820841245982, + "learning_rate": 5.210536977379337e-06, + "loss": 0.1671905517578125, + "step": 54660 + }, + { + "epoch": 0.47267209103250296, + "grad_norm": 0.8413348009439593, + "learning_rate": 5.210399236231223e-06, + "loss": 0.1787689208984375, + "step": 54665 + }, + { + "epoch": 0.47271532455404625, + "grad_norm": 0.5054349656995898, + "learning_rate": 5.210261484888944e-06, + "loss": 0.3558540344238281, + "step": 54670 + }, + { + "epoch": 0.4727585580755895, + "grad_norm": 10.09941178745989, + "learning_rate": 5.210123723353134e-06, + "loss": 0.12506103515625, + "step": 54675 + }, + { + "epoch": 0.47280179159713276, + "grad_norm": 13.821247633662137, + "learning_rate": 5.209985951624428e-06, + "loss": 0.21620330810546876, + "step": 54680 + }, + { + "epoch": 0.47284502511867604, + "grad_norm": 6.256439470085384, + "learning_rate": 5.209848169703463e-06, + "loss": 0.10887603759765625, + "step": 54685 + }, + { + "epoch": 0.4728882586402193, + "grad_norm": 28.439387445422984, + "learning_rate": 5.209710377590872e-06, + "loss": 0.23781471252441405, + "step": 54690 + }, + { + "epoch": 0.47293149216176256, + "grad_norm": 0.20315219490686937, + "learning_rate": 5.2095725752872935e-06, + "loss": 0.04085273742675781, + "step": 54695 + }, + { + "epoch": 0.4729747256833058, + "grad_norm": 15.232611420476237, + "learning_rate": 5.209434762793361e-06, + "loss": 0.07846527099609375, + "step": 54700 + }, + { + "epoch": 0.47301795920484907, + "grad_norm": 15.17585122679387, + "learning_rate": 5.209296940109709e-06, + "loss": 0.10500030517578125, + "step": 54705 + }, + { + "epoch": 0.47306119272639235, + "grad_norm": 12.802744768030939, + "learning_rate": 5.209159107236977e-06, + "loss": 0.12600173950195312, + "step": 54710 + }, + { + "epoch": 0.4731044262479356, + "grad_norm": 0.7221078653814014, + "learning_rate": 5.209021264175798e-06, + "loss": 0.07945556640625, + "step": 54715 + }, + { + "epoch": 0.47314765976947887, + "grad_norm": 0.35292567563161203, + "learning_rate": 5.208883410926807e-06, + "loss": 0.1072998046875, + "step": 54720 + }, + { + "epoch": 0.47319089329102215, + "grad_norm": 4.413424187757666, + "learning_rate": 5.208745547490642e-06, + "loss": 0.41105804443359373, + "step": 54725 + }, + { + "epoch": 0.4732341268125654, + "grad_norm": 0.5414106666424027, + "learning_rate": 5.2086076738679375e-06, + "loss": 0.26307373046875, + "step": 54730 + }, + { + "epoch": 0.47327736033410867, + "grad_norm": 16.130724367151714, + "learning_rate": 5.20846979005933e-06, + "loss": 0.2119457244873047, + "step": 54735 + }, + { + "epoch": 0.4733205938556519, + "grad_norm": 22.12255487868381, + "learning_rate": 5.208331896065454e-06, + "loss": 0.23148193359375, + "step": 54740 + }, + { + "epoch": 0.4733638273771952, + "grad_norm": 3.8468979430891426, + "learning_rate": 5.208193991886947e-06, + "loss": 0.0801849365234375, + "step": 54745 + }, + { + "epoch": 0.47340706089873846, + "grad_norm": 29.01374294558721, + "learning_rate": 5.208056077524444e-06, + "loss": 0.21926422119140626, + "step": 54750 + }, + { + "epoch": 0.4734502944202817, + "grad_norm": 14.858328703794227, + "learning_rate": 5.207918152978582e-06, + "loss": 0.5307159423828125, + "step": 54755 + }, + { + "epoch": 0.473493527941825, + "grad_norm": 17.270839477638248, + "learning_rate": 5.207780218249996e-06, + "loss": 0.255474853515625, + "step": 54760 + }, + { + "epoch": 0.47353676146336826, + "grad_norm": 7.016484206245722, + "learning_rate": 5.207642273339323e-06, + "loss": 0.614016342163086, + "step": 54765 + }, + { + "epoch": 0.4735799949849115, + "grad_norm": 1.7327012034069627, + "learning_rate": 5.207504318247198e-06, + "loss": 0.24102859497070311, + "step": 54770 + }, + { + "epoch": 0.4736232285064548, + "grad_norm": 27.701921939613293, + "learning_rate": 5.20736635297426e-06, + "loss": 0.505523681640625, + "step": 54775 + }, + { + "epoch": 0.473666462027998, + "grad_norm": 12.34106449320718, + "learning_rate": 5.207228377521143e-06, + "loss": 0.2740386962890625, + "step": 54780 + }, + { + "epoch": 0.4737096955495413, + "grad_norm": 6.856837441185299, + "learning_rate": 5.207090391888483e-06, + "loss": 0.0661041259765625, + "step": 54785 + }, + { + "epoch": 0.47375292907108457, + "grad_norm": 28.454345892230936, + "learning_rate": 5.206952396076917e-06, + "loss": 0.349267578125, + "step": 54790 + }, + { + "epoch": 0.4737961625926278, + "grad_norm": 4.455927554256163, + "learning_rate": 5.206814390087082e-06, + "loss": 0.121710205078125, + "step": 54795 + }, + { + "epoch": 0.4738393961141711, + "grad_norm": 1.6349929249973016, + "learning_rate": 5.2066763739196146e-06, + "loss": 0.19115676879882812, + "step": 54800 + }, + { + "epoch": 0.47388262963571437, + "grad_norm": 15.478833803160784, + "learning_rate": 5.206538347575149e-06, + "loss": 0.1403289794921875, + "step": 54805 + }, + { + "epoch": 0.4739258631572576, + "grad_norm": 0.27849206728234355, + "learning_rate": 5.206400311054325e-06, + "loss": 0.10364723205566406, + "step": 54810 + }, + { + "epoch": 0.4739690966788009, + "grad_norm": 2.1041578056824077, + "learning_rate": 5.206262264357777e-06, + "loss": 0.30409698486328124, + "step": 54815 + }, + { + "epoch": 0.4740123302003441, + "grad_norm": 1.8707500795087615, + "learning_rate": 5.206124207486143e-06, + "loss": 0.04868621826171875, + "step": 54820 + }, + { + "epoch": 0.4740555637218874, + "grad_norm": 1.9467301963411558, + "learning_rate": 5.205986140440059e-06, + "loss": 0.2128469467163086, + "step": 54825 + }, + { + "epoch": 0.4740987972434307, + "grad_norm": 0.7456480430284391, + "learning_rate": 5.205848063220162e-06, + "loss": 0.33518218994140625, + "step": 54830 + }, + { + "epoch": 0.4741420307649739, + "grad_norm": 2.005102745107461, + "learning_rate": 5.205709975827088e-06, + "loss": 0.43274917602539065, + "step": 54835 + }, + { + "epoch": 0.4741852642865172, + "grad_norm": 11.608044532779834, + "learning_rate": 5.205571878261475e-06, + "loss": 0.4431755065917969, + "step": 54840 + }, + { + "epoch": 0.4742284978080605, + "grad_norm": 2.8608152478810087, + "learning_rate": 5.20543377052396e-06, + "loss": 0.10206222534179688, + "step": 54845 + }, + { + "epoch": 0.4742717313296037, + "grad_norm": 25.403490150568125, + "learning_rate": 5.205295652615178e-06, + "loss": 0.17978363037109374, + "step": 54850 + }, + { + "epoch": 0.474314964851147, + "grad_norm": 4.774038276863444, + "learning_rate": 5.205157524535767e-06, + "loss": 0.02127685546875, + "step": 54855 + }, + { + "epoch": 0.4743581983726903, + "grad_norm": 2.178108811333432, + "learning_rate": 5.205019386286366e-06, + "loss": 0.107257080078125, + "step": 54860 + }, + { + "epoch": 0.4744014318942335, + "grad_norm": 3.045062937392494, + "learning_rate": 5.2048812378676094e-06, + "loss": 0.0874856948852539, + "step": 54865 + }, + { + "epoch": 0.4744446654157768, + "grad_norm": 1.083839829769938, + "learning_rate": 5.204743079280135e-06, + "loss": 0.5148567199707031, + "step": 54870 + }, + { + "epoch": 0.47448789893732, + "grad_norm": 20.28849107592872, + "learning_rate": 5.204604910524581e-06, + "loss": 0.09999885559082031, + "step": 54875 + }, + { + "epoch": 0.4745311324588633, + "grad_norm": 2.2557176788636624, + "learning_rate": 5.204466731601584e-06, + "loss": 0.060699462890625, + "step": 54880 + }, + { + "epoch": 0.4745743659804066, + "grad_norm": 5.603685505100074, + "learning_rate": 5.204328542511781e-06, + "loss": 0.12602386474609376, + "step": 54885 + }, + { + "epoch": 0.4746175995019498, + "grad_norm": 10.068594087138107, + "learning_rate": 5.20419034325581e-06, + "loss": 0.159576416015625, + "step": 54890 + }, + { + "epoch": 0.4746608330234931, + "grad_norm": 3.3374375800954827, + "learning_rate": 5.204052133834308e-06, + "loss": 0.0704864501953125, + "step": 54895 + }, + { + "epoch": 0.4747040665450364, + "grad_norm": 9.129153382831806, + "learning_rate": 5.203913914247912e-06, + "loss": 0.654400634765625, + "step": 54900 + }, + { + "epoch": 0.4747473000665796, + "grad_norm": 3.801172329590553, + "learning_rate": 5.2037756844972595e-06, + "loss": 0.041933441162109376, + "step": 54905 + }, + { + "epoch": 0.4747905335881229, + "grad_norm": 1.0098191203100797, + "learning_rate": 5.203637444582988e-06, + "loss": 0.2758209228515625, + "step": 54910 + }, + { + "epoch": 0.4748337671096661, + "grad_norm": 1.1487231149023047, + "learning_rate": 5.203499194505736e-06, + "loss": 0.2357421875, + "step": 54915 + }, + { + "epoch": 0.4748770006312094, + "grad_norm": 0.009177256994800995, + "learning_rate": 5.20336093426614e-06, + "loss": 0.12438697814941406, + "step": 54920 + }, + { + "epoch": 0.4749202341527527, + "grad_norm": 8.553033588653213, + "learning_rate": 5.203222663864838e-06, + "loss": 0.27060394287109374, + "step": 54925 + }, + { + "epoch": 0.4749634676742959, + "grad_norm": 5.781402943868396, + "learning_rate": 5.203084383302468e-06, + "loss": 0.13507537841796874, + "step": 54930 + }, + { + "epoch": 0.4750067011958392, + "grad_norm": 2.322226983718601, + "learning_rate": 5.202946092579667e-06, + "loss": 0.059637451171875, + "step": 54935 + }, + { + "epoch": 0.4750499347173825, + "grad_norm": 9.34575358601457, + "learning_rate": 5.202807791697075e-06, + "loss": 0.08948974609375, + "step": 54940 + }, + { + "epoch": 0.4750931682389257, + "grad_norm": 9.858904550579547, + "learning_rate": 5.202669480655325e-06, + "loss": 0.05271148681640625, + "step": 54945 + }, + { + "epoch": 0.475136401760469, + "grad_norm": 3.6327136557029984, + "learning_rate": 5.202531159455061e-06, + "loss": 0.15299072265625, + "step": 54950 + }, + { + "epoch": 0.47517963528201224, + "grad_norm": 0.5191602354292225, + "learning_rate": 5.202392828096917e-06, + "loss": 0.3298637390136719, + "step": 54955 + }, + { + "epoch": 0.4752228688035555, + "grad_norm": 4.0428658180661845, + "learning_rate": 5.202254486581531e-06, + "loss": 0.1686187744140625, + "step": 54960 + }, + { + "epoch": 0.4752661023250988, + "grad_norm": 42.38650074676914, + "learning_rate": 5.202116134909542e-06, + "loss": 0.26139144897460936, + "step": 54965 + }, + { + "epoch": 0.47530933584664203, + "grad_norm": 4.1733374395986695, + "learning_rate": 5.201977773081589e-06, + "loss": 0.26473846435546877, + "step": 54970 + }, + { + "epoch": 0.4753525693681853, + "grad_norm": 4.398438546170717, + "learning_rate": 5.201839401098308e-06, + "loss": 0.0907470703125, + "step": 54975 + }, + { + "epoch": 0.4753958028897286, + "grad_norm": 0.7959792812641038, + "learning_rate": 5.201701018960339e-06, + "loss": 0.6866607666015625, + "step": 54980 + }, + { + "epoch": 0.47543903641127183, + "grad_norm": 0.42017127574168806, + "learning_rate": 5.201562626668319e-06, + "loss": 0.08617935180664063, + "step": 54985 + }, + { + "epoch": 0.4754822699328151, + "grad_norm": 19.19685378936307, + "learning_rate": 5.201424224222887e-06, + "loss": 0.111456298828125, + "step": 54990 + }, + { + "epoch": 0.47552550345435834, + "grad_norm": 19.840322922722017, + "learning_rate": 5.201285811624681e-06, + "loss": 0.147186279296875, + "step": 54995 + }, + { + "epoch": 0.47556873697590163, + "grad_norm": 8.382096914780965, + "learning_rate": 5.201147388874339e-06, + "loss": 0.22015838623046874, + "step": 55000 + }, + { + "epoch": 0.4756119704974449, + "grad_norm": 51.53332311539002, + "learning_rate": 5.201008955972501e-06, + "loss": 0.5092575073242187, + "step": 55005 + }, + { + "epoch": 0.47565520401898814, + "grad_norm": 0.7924880474989994, + "learning_rate": 5.200870512919803e-06, + "loss": 0.28613967895507814, + "step": 55010 + }, + { + "epoch": 0.4756984375405314, + "grad_norm": 2.596188232640346, + "learning_rate": 5.200732059716885e-06, + "loss": 0.1477203369140625, + "step": 55015 + }, + { + "epoch": 0.4757416710620747, + "grad_norm": 0.6849248867124644, + "learning_rate": 5.200593596364384e-06, + "loss": 0.05752105712890625, + "step": 55020 + }, + { + "epoch": 0.47578490458361794, + "grad_norm": 1.4295940693726075, + "learning_rate": 5.200455122862941e-06, + "loss": 0.059454345703125, + "step": 55025 + }, + { + "epoch": 0.4758281381051612, + "grad_norm": 6.922204958033318, + "learning_rate": 5.200316639213194e-06, + "loss": 0.18430938720703124, + "step": 55030 + }, + { + "epoch": 0.4758713716267045, + "grad_norm": 1.6465113860849632, + "learning_rate": 5.20017814541578e-06, + "loss": 0.13086700439453125, + "step": 55035 + }, + { + "epoch": 0.47591460514824774, + "grad_norm": 44.27794534917041, + "learning_rate": 5.200039641471339e-06, + "loss": 0.14779720306396485, + "step": 55040 + }, + { + "epoch": 0.475957838669791, + "grad_norm": 19.008130906566734, + "learning_rate": 5.1999011273805096e-06, + "loss": 0.2566619873046875, + "step": 55045 + }, + { + "epoch": 0.47600107219133425, + "grad_norm": 5.58458679719837, + "learning_rate": 5.199762603143931e-06, + "loss": 0.14023361206054688, + "step": 55050 + }, + { + "epoch": 0.47604430571287754, + "grad_norm": 19.90350522000485, + "learning_rate": 5.199624068762241e-06, + "loss": 0.230859375, + "step": 55055 + }, + { + "epoch": 0.4760875392344208, + "grad_norm": 13.848305538102787, + "learning_rate": 5.1994855242360795e-06, + "loss": 0.11077919006347656, + "step": 55060 + }, + { + "epoch": 0.47613077275596405, + "grad_norm": 7.22753292703026, + "learning_rate": 5.199346969566086e-06, + "loss": 0.0453521728515625, + "step": 55065 + }, + { + "epoch": 0.47617400627750733, + "grad_norm": 17.534955806038273, + "learning_rate": 5.199208404752897e-06, + "loss": 0.3528106689453125, + "step": 55070 + }, + { + "epoch": 0.4762172397990506, + "grad_norm": 21.520474643910315, + "learning_rate": 5.199069829797153e-06, + "loss": 0.4915863037109375, + "step": 55075 + }, + { + "epoch": 0.47626047332059385, + "grad_norm": 6.720041949215632, + "learning_rate": 5.198931244699496e-06, + "loss": 0.42994537353515627, + "step": 55080 + }, + { + "epoch": 0.47630370684213713, + "grad_norm": 5.999779456476179, + "learning_rate": 5.198792649460559e-06, + "loss": 0.25429229736328124, + "step": 55085 + }, + { + "epoch": 0.47634694036368036, + "grad_norm": 9.379142659809753, + "learning_rate": 5.198654044080987e-06, + "loss": 0.0461761474609375, + "step": 55090 + }, + { + "epoch": 0.47639017388522364, + "grad_norm": 1.204177287000002, + "learning_rate": 5.198515428561416e-06, + "loss": 0.1824920654296875, + "step": 55095 + }, + { + "epoch": 0.47643340740676693, + "grad_norm": 7.055517528960598, + "learning_rate": 5.198376802902485e-06, + "loss": 0.22248249053955077, + "step": 55100 + }, + { + "epoch": 0.47647664092831016, + "grad_norm": 29.58063213838333, + "learning_rate": 5.198238167104836e-06, + "loss": 0.2294586181640625, + "step": 55105 + }, + { + "epoch": 0.47651987444985344, + "grad_norm": 22.527015992450032, + "learning_rate": 5.1980995211691064e-06, + "loss": 0.208209228515625, + "step": 55110 + }, + { + "epoch": 0.4765631079713967, + "grad_norm": 3.5865391452522686, + "learning_rate": 5.197960865095936e-06, + "loss": 0.100311279296875, + "step": 55115 + }, + { + "epoch": 0.47660634149293996, + "grad_norm": 9.835313571253021, + "learning_rate": 5.197822198885965e-06, + "loss": 0.17143030166625978, + "step": 55120 + }, + { + "epoch": 0.47664957501448324, + "grad_norm": 0.09077894702837339, + "learning_rate": 5.1976835225398306e-06, + "loss": 0.1510852813720703, + "step": 55125 + }, + { + "epoch": 0.47669280853602647, + "grad_norm": 0.20529960301558708, + "learning_rate": 5.197544836058175e-06, + "loss": 0.04691619873046875, + "step": 55130 + }, + { + "epoch": 0.47673604205756975, + "grad_norm": 6.907199008613505, + "learning_rate": 5.197406139441637e-06, + "loss": 0.0394500732421875, + "step": 55135 + }, + { + "epoch": 0.47677927557911304, + "grad_norm": 40.99530390664415, + "learning_rate": 5.197267432690856e-06, + "loss": 0.2925628662109375, + "step": 55140 + }, + { + "epoch": 0.47682250910065627, + "grad_norm": 6.8383417430160165, + "learning_rate": 5.197128715806471e-06, + "loss": 0.156781005859375, + "step": 55145 + }, + { + "epoch": 0.47686574262219955, + "grad_norm": 0.766598520702361, + "learning_rate": 5.196989988789123e-06, + "loss": 0.21975479125976563, + "step": 55150 + }, + { + "epoch": 0.47690897614374284, + "grad_norm": 17.17344693898155, + "learning_rate": 5.196851251639451e-06, + "loss": 0.09734039306640625, + "step": 55155 + }, + { + "epoch": 0.47695220966528606, + "grad_norm": 47.44029047444439, + "learning_rate": 5.196712504358095e-06, + "loss": 0.40573463439941404, + "step": 55160 + }, + { + "epoch": 0.47699544318682935, + "grad_norm": 2.3320705622818454, + "learning_rate": 5.196573746945696e-06, + "loss": 0.7500930786132812, + "step": 55165 + }, + { + "epoch": 0.4770386767083726, + "grad_norm": 5.346760240823603, + "learning_rate": 5.196434979402892e-06, + "loss": 0.1351348876953125, + "step": 55170 + }, + { + "epoch": 0.47708191022991586, + "grad_norm": 15.94890220322219, + "learning_rate": 5.196296201730325e-06, + "loss": 0.11497650146484376, + "step": 55175 + }, + { + "epoch": 0.47712514375145915, + "grad_norm": 2.2622756387525906, + "learning_rate": 5.196157413928633e-06, + "loss": 0.18864212036132813, + "step": 55180 + }, + { + "epoch": 0.4771683772730024, + "grad_norm": 2.5820506544266832, + "learning_rate": 5.196018615998457e-06, + "loss": 0.2339080810546875, + "step": 55185 + }, + { + "epoch": 0.47721161079454566, + "grad_norm": 1.2710998720189561, + "learning_rate": 5.1958798079404385e-06, + "loss": 0.1859039306640625, + "step": 55190 + }, + { + "epoch": 0.47725484431608894, + "grad_norm": 2.8235733536744694, + "learning_rate": 5.195740989755216e-06, + "loss": 0.15524368286132811, + "step": 55195 + }, + { + "epoch": 0.4772980778376322, + "grad_norm": 8.719504635478062, + "learning_rate": 5.195602161443429e-06, + "loss": 0.12230453491210938, + "step": 55200 + }, + { + "epoch": 0.47734131135917546, + "grad_norm": 4.974453703180536, + "learning_rate": 5.195463323005719e-06, + "loss": 0.4694374084472656, + "step": 55205 + }, + { + "epoch": 0.4773845448807187, + "grad_norm": 37.658777366167776, + "learning_rate": 5.195324474442727e-06, + "loss": 0.2092926025390625, + "step": 55210 + }, + { + "epoch": 0.47742777840226197, + "grad_norm": 3.710419796789032, + "learning_rate": 5.195185615755092e-06, + "loss": 0.075921630859375, + "step": 55215 + }, + { + "epoch": 0.47747101192380526, + "grad_norm": 20.263411656232506, + "learning_rate": 5.195046746943454e-06, + "loss": 0.122796630859375, + "step": 55220 + }, + { + "epoch": 0.4775142454453485, + "grad_norm": 21.59442638668985, + "learning_rate": 5.194907868008455e-06, + "loss": 0.2107940673828125, + "step": 55225 + }, + { + "epoch": 0.47755747896689177, + "grad_norm": 5.1428410442199795, + "learning_rate": 5.194768978950736e-06, + "loss": 0.04544219970703125, + "step": 55230 + }, + { + "epoch": 0.47760071248843505, + "grad_norm": 5.881168796207397, + "learning_rate": 5.194630079770935e-06, + "loss": 0.125238037109375, + "step": 55235 + }, + { + "epoch": 0.4776439460099783, + "grad_norm": 9.238198644619539, + "learning_rate": 5.194491170469695e-06, + "loss": 0.15278778076171876, + "step": 55240 + }, + { + "epoch": 0.47768717953152157, + "grad_norm": 0.24522666748280844, + "learning_rate": 5.194352251047655e-06, + "loss": 0.15642242431640624, + "step": 55245 + }, + { + "epoch": 0.47773041305306485, + "grad_norm": 30.239839345947285, + "learning_rate": 5.194213321505457e-06, + "loss": 0.31133270263671875, + "step": 55250 + }, + { + "epoch": 0.4777736465746081, + "grad_norm": 0.3058056678436305, + "learning_rate": 5.19407438184374e-06, + "loss": 0.2655548095703125, + "step": 55255 + }, + { + "epoch": 0.47781688009615136, + "grad_norm": 0.8644115393722386, + "learning_rate": 5.1939354320631465e-06, + "loss": 0.39952850341796875, + "step": 55260 + }, + { + "epoch": 0.4778601136176946, + "grad_norm": 15.8206864729461, + "learning_rate": 5.193796472164317e-06, + "loss": 0.1880706787109375, + "step": 55265 + }, + { + "epoch": 0.4779033471392379, + "grad_norm": 2.7860006441252843, + "learning_rate": 5.193657502147891e-06, + "loss": 0.17047653198242188, + "step": 55270 + }, + { + "epoch": 0.47794658066078116, + "grad_norm": 19.68453487285245, + "learning_rate": 5.193518522014511e-06, + "loss": 0.380792236328125, + "step": 55275 + }, + { + "epoch": 0.4779898141823244, + "grad_norm": 0.022292399759477253, + "learning_rate": 5.193379531764818e-06, + "loss": 0.08718757629394532, + "step": 55280 + }, + { + "epoch": 0.4780330477038677, + "grad_norm": 40.75963098003976, + "learning_rate": 5.193240531399451e-06, + "loss": 0.3442390441894531, + "step": 55285 + }, + { + "epoch": 0.47807628122541096, + "grad_norm": 15.431597467014532, + "learning_rate": 5.1931015209190535e-06, + "loss": 0.2558277130126953, + "step": 55290 + }, + { + "epoch": 0.4781195147469542, + "grad_norm": 8.017702390855712, + "learning_rate": 5.192962500324265e-06, + "loss": 0.06364784240722657, + "step": 55295 + }, + { + "epoch": 0.4781627482684975, + "grad_norm": 17.356134661859244, + "learning_rate": 5.192823469615728e-06, + "loss": 0.15259552001953125, + "step": 55300 + }, + { + "epoch": 0.4782059817900407, + "grad_norm": 24.54666859063393, + "learning_rate": 5.1926844287940825e-06, + "loss": 0.1609344482421875, + "step": 55305 + }, + { + "epoch": 0.478249215311584, + "grad_norm": 2.1045082306699605, + "learning_rate": 5.19254537785997e-06, + "loss": 0.18837814331054686, + "step": 55310 + }, + { + "epoch": 0.47829244883312727, + "grad_norm": 3.4843689398294706, + "learning_rate": 5.192406316814032e-06, + "loss": 0.2666107177734375, + "step": 55315 + }, + { + "epoch": 0.4783356823546705, + "grad_norm": 13.243076115991045, + "learning_rate": 5.19226724565691e-06, + "loss": 0.07855377197265626, + "step": 55320 + }, + { + "epoch": 0.4783789158762138, + "grad_norm": 7.399384748325295, + "learning_rate": 5.192128164389245e-06, + "loss": 0.02898826599121094, + "step": 55325 + }, + { + "epoch": 0.47842214939775707, + "grad_norm": 52.064961793931424, + "learning_rate": 5.191989073011678e-06, + "loss": 0.23653106689453124, + "step": 55330 + }, + { + "epoch": 0.4784653829193003, + "grad_norm": 22.87585638892171, + "learning_rate": 5.191849971524852e-06, + "loss": 0.32025890350341796, + "step": 55335 + }, + { + "epoch": 0.4785086164408436, + "grad_norm": 2.0045604220951017, + "learning_rate": 5.191710859929407e-06, + "loss": 0.02518768310546875, + "step": 55340 + }, + { + "epoch": 0.4785518499623868, + "grad_norm": 3.5480460768334585, + "learning_rate": 5.191571738225986e-06, + "loss": 0.26664199829101565, + "step": 55345 + }, + { + "epoch": 0.4785950834839301, + "grad_norm": 5.502133156606713, + "learning_rate": 5.191432606415229e-06, + "loss": 0.09314422607421875, + "step": 55350 + }, + { + "epoch": 0.4786383170054734, + "grad_norm": 39.40310571573184, + "learning_rate": 5.191293464497778e-06, + "loss": 0.300225830078125, + "step": 55355 + }, + { + "epoch": 0.4786815505270166, + "grad_norm": 0.20338176479805314, + "learning_rate": 5.191154312474277e-06, + "loss": 0.41373519897460936, + "step": 55360 + }, + { + "epoch": 0.4787247840485599, + "grad_norm": 28.42086290994849, + "learning_rate": 5.191015150345365e-06, + "loss": 0.2893074035644531, + "step": 55365 + }, + { + "epoch": 0.4787680175701032, + "grad_norm": 6.680585501291874, + "learning_rate": 5.190875978111684e-06, + "loss": 0.23260498046875, + "step": 55370 + }, + { + "epoch": 0.4788112510916464, + "grad_norm": 40.19907461686883, + "learning_rate": 5.190736795773878e-06, + "loss": 0.28732452392578123, + "step": 55375 + }, + { + "epoch": 0.4788544846131897, + "grad_norm": 1.428463532175745, + "learning_rate": 5.1905976033325864e-06, + "loss": 0.15601043701171874, + "step": 55380 + }, + { + "epoch": 0.4788977181347329, + "grad_norm": 10.156381122447964, + "learning_rate": 5.190458400788452e-06, + "loss": 0.11127223968505859, + "step": 55385 + }, + { + "epoch": 0.4789409516562762, + "grad_norm": 0.14683812159544873, + "learning_rate": 5.1903191881421175e-06, + "loss": 0.04395370483398438, + "step": 55390 + }, + { + "epoch": 0.4789841851778195, + "grad_norm": 6.744067351477487, + "learning_rate": 5.190179965394224e-06, + "loss": 0.370135498046875, + "step": 55395 + }, + { + "epoch": 0.4790274186993627, + "grad_norm": 13.778310497772162, + "learning_rate": 5.190040732545416e-06, + "loss": 0.1138031005859375, + "step": 55400 + }, + { + "epoch": 0.479070652220906, + "grad_norm": 27.72214068353694, + "learning_rate": 5.189901489596331e-06, + "loss": 0.48510894775390623, + "step": 55405 + }, + { + "epoch": 0.4791138857424493, + "grad_norm": 7.696937849803471, + "learning_rate": 5.189762236547615e-06, + "loss": 0.0634918212890625, + "step": 55410 + }, + { + "epoch": 0.4791571192639925, + "grad_norm": 2.4754738023621807, + "learning_rate": 5.189622973399909e-06, + "loss": 0.2696052551269531, + "step": 55415 + }, + { + "epoch": 0.4792003527855358, + "grad_norm": 53.87474720477299, + "learning_rate": 5.189483700153856e-06, + "loss": 0.3600128173828125, + "step": 55420 + }, + { + "epoch": 0.4792435863070791, + "grad_norm": 0.23510767219408546, + "learning_rate": 5.189344416810096e-06, + "loss": 0.129620361328125, + "step": 55425 + }, + { + "epoch": 0.4792868198286223, + "grad_norm": 5.380329730259943, + "learning_rate": 5.1892051233692745e-06, + "loss": 0.228082275390625, + "step": 55430 + }, + { + "epoch": 0.4793300533501656, + "grad_norm": 8.138615015379164, + "learning_rate": 5.189065819832032e-06, + "loss": 0.04323272705078125, + "step": 55435 + }, + { + "epoch": 0.4793732868717088, + "grad_norm": 8.411741932572738, + "learning_rate": 5.18892650619901e-06, + "loss": 0.2018157958984375, + "step": 55440 + }, + { + "epoch": 0.4794165203932521, + "grad_norm": 14.098297779894398, + "learning_rate": 5.188787182470854e-06, + "loss": 0.3283428192138672, + "step": 55445 + }, + { + "epoch": 0.4794597539147954, + "grad_norm": 55.21649287890213, + "learning_rate": 5.188647848648204e-06, + "loss": 0.25366363525390623, + "step": 55450 + }, + { + "epoch": 0.4795029874363386, + "grad_norm": 0.558524079151177, + "learning_rate": 5.188508504731705e-06, + "loss": 0.036547088623046876, + "step": 55455 + }, + { + "epoch": 0.4795462209578819, + "grad_norm": 13.377291653619052, + "learning_rate": 5.188369150721997e-06, + "loss": 0.0566864013671875, + "step": 55460 + }, + { + "epoch": 0.4795894544794252, + "grad_norm": 9.27823809315112, + "learning_rate": 5.188229786619724e-06, + "loss": 0.233056640625, + "step": 55465 + }, + { + "epoch": 0.4796326880009684, + "grad_norm": 2.4224058409011073, + "learning_rate": 5.188090412425528e-06, + "loss": 0.09912910461425781, + "step": 55470 + }, + { + "epoch": 0.4796759215225117, + "grad_norm": 0.9757646895723459, + "learning_rate": 5.1879510281400535e-06, + "loss": 0.0726470947265625, + "step": 55475 + }, + { + "epoch": 0.47971915504405493, + "grad_norm": 27.76037836908487, + "learning_rate": 5.187811633763942e-06, + "loss": 0.29234619140625, + "step": 55480 + }, + { + "epoch": 0.4797623885655982, + "grad_norm": 1.2559745505110724, + "learning_rate": 5.187672229297836e-06, + "loss": 0.1718048095703125, + "step": 55485 + }, + { + "epoch": 0.4798056220871415, + "grad_norm": 17.02664963088991, + "learning_rate": 5.187532814742379e-06, + "loss": 0.17124862670898439, + "step": 55490 + }, + { + "epoch": 0.47984885560868473, + "grad_norm": 5.170519840696632, + "learning_rate": 5.187393390098215e-06, + "loss": 0.16211700439453125, + "step": 55495 + }, + { + "epoch": 0.479892089130228, + "grad_norm": 2.9774876538308908, + "learning_rate": 5.1872539553659845e-06, + "loss": 0.174456787109375, + "step": 55500 + }, + { + "epoch": 0.4799353226517713, + "grad_norm": 8.14504485825263, + "learning_rate": 5.187114510546332e-06, + "loss": 0.32061519622802737, + "step": 55505 + }, + { + "epoch": 0.47997855617331453, + "grad_norm": 1.0498920063181942, + "learning_rate": 5.1869750556399015e-06, + "loss": 0.242230224609375, + "step": 55510 + }, + { + "epoch": 0.4800217896948578, + "grad_norm": 2.57238144629386, + "learning_rate": 5.186835590647335e-06, + "loss": 0.04133415222167969, + "step": 55515 + }, + { + "epoch": 0.48006502321640104, + "grad_norm": 3.3406851298612934, + "learning_rate": 5.186696115569276e-06, + "loss": 0.08154640197753907, + "step": 55520 + }, + { + "epoch": 0.48010825673794433, + "grad_norm": 14.077696724426872, + "learning_rate": 5.186556630406368e-06, + "loss": 0.2603141784667969, + "step": 55525 + }, + { + "epoch": 0.4801514902594876, + "grad_norm": 22.514578828545986, + "learning_rate": 5.186417135159253e-06, + "loss": 0.22874832153320312, + "step": 55530 + }, + { + "epoch": 0.48019472378103084, + "grad_norm": 16.743556662916774, + "learning_rate": 5.186277629828577e-06, + "loss": 0.4261138916015625, + "step": 55535 + }, + { + "epoch": 0.4802379573025741, + "grad_norm": 19.22889274554012, + "learning_rate": 5.18613811441498e-06, + "loss": 0.26673583984375, + "step": 55540 + }, + { + "epoch": 0.4802811908241174, + "grad_norm": 1.2584619085537962, + "learning_rate": 5.185998588919108e-06, + "loss": 0.3995147705078125, + "step": 55545 + }, + { + "epoch": 0.48032442434566064, + "grad_norm": 7.2962290729078125, + "learning_rate": 5.185859053341604e-06, + "loss": 0.05885467529296875, + "step": 55550 + }, + { + "epoch": 0.4803676578672039, + "grad_norm": 39.36819186864807, + "learning_rate": 5.185719507683112e-06, + "loss": 0.2759735107421875, + "step": 55555 + }, + { + "epoch": 0.48041089138874715, + "grad_norm": 18.582744959183007, + "learning_rate": 5.185579951944274e-06, + "loss": 0.07141876220703125, + "step": 55560 + }, + { + "epoch": 0.48045412491029044, + "grad_norm": 16.761380267311395, + "learning_rate": 5.185440386125734e-06, + "loss": 0.26339263916015626, + "step": 55565 + }, + { + "epoch": 0.4804973584318337, + "grad_norm": 31.696534243635952, + "learning_rate": 5.185300810228135e-06, + "loss": 0.095025634765625, + "step": 55570 + }, + { + "epoch": 0.48054059195337695, + "grad_norm": 5.779544537274712, + "learning_rate": 5.185161224252124e-06, + "loss": 0.09030990600585938, + "step": 55575 + }, + { + "epoch": 0.48058382547492023, + "grad_norm": 29.10313645431587, + "learning_rate": 5.185021628198341e-06, + "loss": 0.298663330078125, + "step": 55580 + }, + { + "epoch": 0.4806270589964635, + "grad_norm": 42.8856498343719, + "learning_rate": 5.184882022067432e-06, + "loss": 0.4150146484375, + "step": 55585 + }, + { + "epoch": 0.48067029251800675, + "grad_norm": 6.66788905407223, + "learning_rate": 5.184742405860039e-06, + "loss": 0.256036376953125, + "step": 55590 + }, + { + "epoch": 0.48071352603955003, + "grad_norm": 18.907633698145553, + "learning_rate": 5.184602779576809e-06, + "loss": 0.49672393798828124, + "step": 55595 + }, + { + "epoch": 0.4807567595610933, + "grad_norm": 1.2401191734843282, + "learning_rate": 5.184463143218383e-06, + "loss": 0.08011245727539062, + "step": 55600 + }, + { + "epoch": 0.48079999308263655, + "grad_norm": 2.5948638043008856, + "learning_rate": 5.1843234967854055e-06, + "loss": 0.0853515625, + "step": 55605 + }, + { + "epoch": 0.48084322660417983, + "grad_norm": 11.442647066968837, + "learning_rate": 5.1841838402785215e-06, + "loss": 0.2602783203125, + "step": 55610 + }, + { + "epoch": 0.48088646012572306, + "grad_norm": 2.0237973729249457, + "learning_rate": 5.184044173698375e-06, + "loss": 0.4216331481933594, + "step": 55615 + }, + { + "epoch": 0.48092969364726634, + "grad_norm": 4.927596975651443, + "learning_rate": 5.1839044970456095e-06, + "loss": 0.060232925415039065, + "step": 55620 + }, + { + "epoch": 0.4809729271688096, + "grad_norm": 0.26271757082136094, + "learning_rate": 5.183764810320869e-06, + "loss": 0.439715576171875, + "step": 55625 + }, + { + "epoch": 0.48101616069035286, + "grad_norm": 0.8352404703002763, + "learning_rate": 5.183625113524799e-06, + "loss": 0.23300094604492189, + "step": 55630 + }, + { + "epoch": 0.48105939421189614, + "grad_norm": 5.520327312114517, + "learning_rate": 5.183485406658042e-06, + "loss": 0.09412565231323242, + "step": 55635 + }, + { + "epoch": 0.4811026277334394, + "grad_norm": 6.502057612553929, + "learning_rate": 5.1833456897212435e-06, + "loss": 0.08272285461425781, + "step": 55640 + }, + { + "epoch": 0.48114586125498265, + "grad_norm": 2.976117706333362, + "learning_rate": 5.183205962715048e-06, + "loss": 0.423077392578125, + "step": 55645 + }, + { + "epoch": 0.48118909477652594, + "grad_norm": 15.695785951469691, + "learning_rate": 5.1830662256401e-06, + "loss": 0.15608367919921876, + "step": 55650 + }, + { + "epoch": 0.48123232829806917, + "grad_norm": 28.107131258264598, + "learning_rate": 5.182926478497043e-06, + "loss": 0.2850677490234375, + "step": 55655 + }, + { + "epoch": 0.48127556181961245, + "grad_norm": 7.68785017121419, + "learning_rate": 5.182786721286522e-06, + "loss": 0.5809783935546875, + "step": 55660 + }, + { + "epoch": 0.48131879534115574, + "grad_norm": 17.410197387621047, + "learning_rate": 5.18264695400918e-06, + "loss": 0.08368682861328125, + "step": 55665 + }, + { + "epoch": 0.48136202886269897, + "grad_norm": 68.6887868713102, + "learning_rate": 5.182507176665664e-06, + "loss": 0.17236328125, + "step": 55670 + }, + { + "epoch": 0.48140526238424225, + "grad_norm": 3.8807068881919986, + "learning_rate": 5.1823673892566195e-06, + "loss": 0.04619159698486328, + "step": 55675 + }, + { + "epoch": 0.48144849590578553, + "grad_norm": 7.011319092434789, + "learning_rate": 5.182227591782687e-06, + "loss": 0.2328369140625, + "step": 55680 + }, + { + "epoch": 0.48149172942732876, + "grad_norm": 2.5281547019516197, + "learning_rate": 5.182087784244516e-06, + "loss": 0.107391357421875, + "step": 55685 + }, + { + "epoch": 0.48153496294887205, + "grad_norm": 11.460327175984474, + "learning_rate": 5.181947966642747e-06, + "loss": 0.09232177734375, + "step": 55690 + }, + { + "epoch": 0.4815781964704153, + "grad_norm": 4.709244230949936, + "learning_rate": 5.181808138978027e-06, + "loss": 0.12809295654296876, + "step": 55695 + }, + { + "epoch": 0.48162142999195856, + "grad_norm": 3.0195591190743056, + "learning_rate": 5.181668301251001e-06, + "loss": 0.07872333526611328, + "step": 55700 + }, + { + "epoch": 0.48166466351350185, + "grad_norm": 2.0443098498215457, + "learning_rate": 5.181528453462315e-06, + "loss": 0.0811269760131836, + "step": 55705 + }, + { + "epoch": 0.4817078970350451, + "grad_norm": 2.7338118508145297, + "learning_rate": 5.181388595612611e-06, + "loss": 0.157073974609375, + "step": 55710 + }, + { + "epoch": 0.48175113055658836, + "grad_norm": 8.015023027121362, + "learning_rate": 5.181248727702535e-06, + "loss": 0.232281494140625, + "step": 55715 + }, + { + "epoch": 0.48179436407813164, + "grad_norm": 6.860040923901108, + "learning_rate": 5.181108849732734e-06, + "loss": 0.07379150390625, + "step": 55720 + }, + { + "epoch": 0.48183759759967487, + "grad_norm": 1.2117289399328106, + "learning_rate": 5.18096896170385e-06, + "loss": 0.035430908203125, + "step": 55725 + }, + { + "epoch": 0.48188083112121816, + "grad_norm": 1.3132518073799058, + "learning_rate": 5.180829063616532e-06, + "loss": 0.08892822265625, + "step": 55730 + }, + { + "epoch": 0.4819240646427614, + "grad_norm": 20.06506139714173, + "learning_rate": 5.180689155471421e-06, + "loss": 0.0421783447265625, + "step": 55735 + }, + { + "epoch": 0.48196729816430467, + "grad_norm": 20.32557135066423, + "learning_rate": 5.180549237269166e-06, + "loss": 0.27823333740234374, + "step": 55740 + }, + { + "epoch": 0.48201053168584795, + "grad_norm": 41.03339815546922, + "learning_rate": 5.18040930901041e-06, + "loss": 0.635455322265625, + "step": 55745 + }, + { + "epoch": 0.4820537652073912, + "grad_norm": 9.361833343306444, + "learning_rate": 5.180269370695799e-06, + "loss": 0.29647216796875, + "step": 55750 + }, + { + "epoch": 0.48209699872893447, + "grad_norm": 4.647969941380001, + "learning_rate": 5.180129422325978e-06, + "loss": 0.03816642761230469, + "step": 55755 + }, + { + "epoch": 0.48214023225047775, + "grad_norm": 1.231808137758927, + "learning_rate": 5.179989463901593e-06, + "loss": 0.07912712097167969, + "step": 55760 + }, + { + "epoch": 0.482183465772021, + "grad_norm": 4.338472469803928, + "learning_rate": 5.179849495423289e-06, + "loss": 0.07733001708984374, + "step": 55765 + }, + { + "epoch": 0.48222669929356426, + "grad_norm": 48.29307482114393, + "learning_rate": 5.179709516891712e-06, + "loss": 0.29933319091796873, + "step": 55770 + }, + { + "epoch": 0.48226993281510755, + "grad_norm": 8.521473199418397, + "learning_rate": 5.179569528307507e-06, + "loss": 0.1831298828125, + "step": 55775 + }, + { + "epoch": 0.4823131663366508, + "grad_norm": 9.239278562355599, + "learning_rate": 5.17942952967132e-06, + "loss": 0.2890350341796875, + "step": 55780 + }, + { + "epoch": 0.48235639985819406, + "grad_norm": 29.907283914718874, + "learning_rate": 5.179289520983796e-06, + "loss": 0.1961833953857422, + "step": 55785 + }, + { + "epoch": 0.4823996333797373, + "grad_norm": 30.215239934213514, + "learning_rate": 5.179149502245582e-06, + "loss": 0.4522195816040039, + "step": 55790 + }, + { + "epoch": 0.4824428669012806, + "grad_norm": 3.5914309008779033, + "learning_rate": 5.179009473457324e-06, + "loss": 0.25756988525390623, + "step": 55795 + }, + { + "epoch": 0.48248610042282386, + "grad_norm": 24.017766654785916, + "learning_rate": 5.178869434619666e-06, + "loss": 0.2488372802734375, + "step": 55800 + }, + { + "epoch": 0.4825293339443671, + "grad_norm": 0.18959612808267404, + "learning_rate": 5.178729385733254e-06, + "loss": 0.07229461669921874, + "step": 55805 + }, + { + "epoch": 0.4825725674659104, + "grad_norm": 0.3446637416519122, + "learning_rate": 5.1785893267987354e-06, + "loss": 0.0538848876953125, + "step": 55810 + }, + { + "epoch": 0.48261580098745366, + "grad_norm": 18.996026411510364, + "learning_rate": 5.178449257816755e-06, + "loss": 0.18275680541992187, + "step": 55815 + }, + { + "epoch": 0.4826590345089969, + "grad_norm": 0.7548560260311499, + "learning_rate": 5.178309178787959e-06, + "loss": 0.036663150787353514, + "step": 55820 + }, + { + "epoch": 0.48270226803054017, + "grad_norm": 21.497311828560505, + "learning_rate": 5.178169089712994e-06, + "loss": 0.4245208740234375, + "step": 55825 + }, + { + "epoch": 0.4827455015520834, + "grad_norm": 5.56594595764626, + "learning_rate": 5.178028990592505e-06, + "loss": 0.08128013610839843, + "step": 55830 + }, + { + "epoch": 0.4827887350736267, + "grad_norm": 5.615571718363559, + "learning_rate": 5.17788888142714e-06, + "loss": 0.12260894775390625, + "step": 55835 + }, + { + "epoch": 0.48283196859516997, + "grad_norm": 4.87044367771341, + "learning_rate": 5.177748762217543e-06, + "loss": 0.09200820922851563, + "step": 55840 + }, + { + "epoch": 0.4828752021167132, + "grad_norm": 1.9001998896327645, + "learning_rate": 5.177608632964361e-06, + "loss": 0.13180007934570312, + "step": 55845 + }, + { + "epoch": 0.4829184356382565, + "grad_norm": 9.68318129414547, + "learning_rate": 5.17746849366824e-06, + "loss": 0.1164459228515625, + "step": 55850 + }, + { + "epoch": 0.48296166915979977, + "grad_norm": 4.118804617199151, + "learning_rate": 5.177328344329828e-06, + "loss": 0.2529605865478516, + "step": 55855 + }, + { + "epoch": 0.483004902681343, + "grad_norm": 0.6209653268236769, + "learning_rate": 5.177188184949768e-06, + "loss": 0.29884185791015627, + "step": 55860 + }, + { + "epoch": 0.4830481362028863, + "grad_norm": 2.7872959817712344, + "learning_rate": 5.17704801552871e-06, + "loss": 0.1597137451171875, + "step": 55865 + }, + { + "epoch": 0.4830913697244295, + "grad_norm": 1.051654825585242, + "learning_rate": 5.1769078360673e-06, + "loss": 0.18420562744140626, + "step": 55870 + }, + { + "epoch": 0.4831346032459728, + "grad_norm": 1.5652617575921277, + "learning_rate": 5.176767646566181e-06, + "loss": 0.09072723388671874, + "step": 55875 + }, + { + "epoch": 0.4831778367675161, + "grad_norm": 1.2127478251322452, + "learning_rate": 5.176627447026004e-06, + "loss": 0.4214202880859375, + "step": 55880 + }, + { + "epoch": 0.4832210702890593, + "grad_norm": 3.174866884620236, + "learning_rate": 5.176487237447413e-06, + "loss": 0.08237838745117188, + "step": 55885 + }, + { + "epoch": 0.4832643038106026, + "grad_norm": 0.641198768236389, + "learning_rate": 5.176347017831054e-06, + "loss": 0.3151885986328125, + "step": 55890 + }, + { + "epoch": 0.4833075373321459, + "grad_norm": 1.1635838119105528, + "learning_rate": 5.1762067881775755e-06, + "loss": 0.087103271484375, + "step": 55895 + }, + { + "epoch": 0.4833507708536891, + "grad_norm": 1.6696164599261663, + "learning_rate": 5.176066548487624e-06, + "loss": 0.04519805908203125, + "step": 55900 + }, + { + "epoch": 0.4833940043752324, + "grad_norm": 0.23508387829609664, + "learning_rate": 5.175926298761845e-06, + "loss": 0.06605072021484375, + "step": 55905 + }, + { + "epoch": 0.4834372378967756, + "grad_norm": 3.0034502844109814, + "learning_rate": 5.175786039000887e-06, + "loss": 0.16195144653320312, + "step": 55910 + }, + { + "epoch": 0.4834804714183189, + "grad_norm": 5.093454649094102, + "learning_rate": 5.1756457692053965e-06, + "loss": 0.1379241943359375, + "step": 55915 + }, + { + "epoch": 0.4835237049398622, + "grad_norm": 29.868262890309737, + "learning_rate": 5.175505489376019e-06, + "loss": 0.23451995849609375, + "step": 55920 + }, + { + "epoch": 0.4835669384614054, + "grad_norm": 25.24891193965005, + "learning_rate": 5.175365199513403e-06, + "loss": 0.179852294921875, + "step": 55925 + }, + { + "epoch": 0.4836101719829487, + "grad_norm": 54.28848169192977, + "learning_rate": 5.175224899618194e-06, + "loss": 0.38994293212890624, + "step": 55930 + }, + { + "epoch": 0.483653405504492, + "grad_norm": 7.302857592669793, + "learning_rate": 5.175084589691041e-06, + "loss": 0.37467384338378906, + "step": 55935 + }, + { + "epoch": 0.4836966390260352, + "grad_norm": 0.5673777167185543, + "learning_rate": 5.17494426973259e-06, + "loss": 0.2705474853515625, + "step": 55940 + }, + { + "epoch": 0.4837398725475785, + "grad_norm": 6.845244987227553, + "learning_rate": 5.174803939743488e-06, + "loss": 0.05357513427734375, + "step": 55945 + }, + { + "epoch": 0.4837831060691217, + "grad_norm": 5.5533391281235165, + "learning_rate": 5.174663599724382e-06, + "loss": 0.1535125732421875, + "step": 55950 + }, + { + "epoch": 0.483826339590665, + "grad_norm": 25.73318936087367, + "learning_rate": 5.1745232496759206e-06, + "loss": 0.194439697265625, + "step": 55955 + }, + { + "epoch": 0.4838695731122083, + "grad_norm": 15.889203685372028, + "learning_rate": 5.174382889598749e-06, + "loss": 0.155865478515625, + "step": 55960 + }, + { + "epoch": 0.4839128066337515, + "grad_norm": 6.11912747133954, + "learning_rate": 5.174242519493516e-06, + "loss": 0.303662109375, + "step": 55965 + }, + { + "epoch": 0.4839560401552948, + "grad_norm": 16.99200921018908, + "learning_rate": 5.174102139360869e-06, + "loss": 0.336334228515625, + "step": 55970 + }, + { + "epoch": 0.4839992736768381, + "grad_norm": 2.3849082614239308, + "learning_rate": 5.173961749201455e-06, + "loss": 0.09530181884765625, + "step": 55975 + }, + { + "epoch": 0.4840425071983813, + "grad_norm": 1.3093266007284163, + "learning_rate": 5.173821349015921e-06, + "loss": 0.10666351318359375, + "step": 55980 + }, + { + "epoch": 0.4840857407199246, + "grad_norm": 8.41981055685932, + "learning_rate": 5.173680938804915e-06, + "loss": 0.393316650390625, + "step": 55985 + }, + { + "epoch": 0.4841289742414679, + "grad_norm": 35.40864953190369, + "learning_rate": 5.1735405185690845e-06, + "loss": 0.27686309814453125, + "step": 55990 + }, + { + "epoch": 0.4841722077630111, + "grad_norm": 0.06351007165768964, + "learning_rate": 5.173400088309078e-06, + "loss": 0.046081924438476564, + "step": 55995 + }, + { + "epoch": 0.4842154412845544, + "grad_norm": 28.336132099388013, + "learning_rate": 5.173259648025542e-06, + "loss": 0.23893890380859376, + "step": 56000 + }, + { + "epoch": 0.48425867480609763, + "grad_norm": 0.8057791192956555, + "learning_rate": 5.173119197719124e-06, + "loss": 0.36682281494140623, + "step": 56005 + }, + { + "epoch": 0.4843019083276409, + "grad_norm": 22.51333105659888, + "learning_rate": 5.1729787373904726e-06, + "loss": 0.16152896881103515, + "step": 56010 + }, + { + "epoch": 0.4843451418491842, + "grad_norm": 33.16696364207748, + "learning_rate": 5.172838267040236e-06, + "loss": 0.54246826171875, + "step": 56015 + }, + { + "epoch": 0.48438837537072743, + "grad_norm": 0.052764279018517754, + "learning_rate": 5.17269778666906e-06, + "loss": 0.1351165771484375, + "step": 56020 + }, + { + "epoch": 0.4844316088922707, + "grad_norm": 20.35585433366604, + "learning_rate": 5.172557296277594e-06, + "loss": 0.12852306365966798, + "step": 56025 + }, + { + "epoch": 0.484474842413814, + "grad_norm": 4.364605327472669, + "learning_rate": 5.172416795866487e-06, + "loss": 0.2838775634765625, + "step": 56030 + }, + { + "epoch": 0.48451807593535723, + "grad_norm": 0.16545220607922032, + "learning_rate": 5.1722762854363845e-06, + "loss": 0.15768051147460938, + "step": 56035 + }, + { + "epoch": 0.4845613094569005, + "grad_norm": 6.067207562031843, + "learning_rate": 5.172135764987937e-06, + "loss": 0.09642791748046875, + "step": 56040 + }, + { + "epoch": 0.48460454297844374, + "grad_norm": 0.38809023007868637, + "learning_rate": 5.17199523452179e-06, + "loss": 0.18368797302246093, + "step": 56045 + }, + { + "epoch": 0.484647776499987, + "grad_norm": 3.3150247645174002, + "learning_rate": 5.171854694038593e-06, + "loss": 0.3822299957275391, + "step": 56050 + }, + { + "epoch": 0.4846910100215303, + "grad_norm": 15.834953476898782, + "learning_rate": 5.171714143538995e-06, + "loss": 0.20450897216796876, + "step": 56055 + }, + { + "epoch": 0.48473424354307354, + "grad_norm": 0.8964807877172061, + "learning_rate": 5.1715735830236425e-06, + "loss": 0.1898345947265625, + "step": 56060 + }, + { + "epoch": 0.4847774770646168, + "grad_norm": 35.20107155864394, + "learning_rate": 5.171433012493184e-06, + "loss": 0.3064861297607422, + "step": 56065 + }, + { + "epoch": 0.4848207105861601, + "grad_norm": 5.208675241633695, + "learning_rate": 5.171292431948269e-06, + "loss": 0.0945587158203125, + "step": 56070 + }, + { + "epoch": 0.48486394410770334, + "grad_norm": 4.781226630078502, + "learning_rate": 5.171151841389545e-06, + "loss": 0.0481903076171875, + "step": 56075 + }, + { + "epoch": 0.4849071776292466, + "grad_norm": 14.057190858213133, + "learning_rate": 5.17101124081766e-06, + "loss": 0.2393157958984375, + "step": 56080 + }, + { + "epoch": 0.48495041115078985, + "grad_norm": 25.794539095480847, + "learning_rate": 5.170870630233264e-06, + "loss": 0.20457305908203124, + "step": 56085 + }, + { + "epoch": 0.48499364467233314, + "grad_norm": 0.25004881050104383, + "learning_rate": 5.1707300096370046e-06, + "loss": 0.1405120849609375, + "step": 56090 + }, + { + "epoch": 0.4850368781938764, + "grad_norm": 37.27436720595142, + "learning_rate": 5.17058937902953e-06, + "loss": 0.19626007080078126, + "step": 56095 + }, + { + "epoch": 0.48508011171541965, + "grad_norm": 23.483056632584315, + "learning_rate": 5.170448738411488e-06, + "loss": 0.3340087890625, + "step": 56100 + }, + { + "epoch": 0.48512334523696293, + "grad_norm": 1.733010114282187, + "learning_rate": 5.170308087783529e-06, + "loss": 0.02863311767578125, + "step": 56105 + }, + { + "epoch": 0.4851665787585062, + "grad_norm": 8.649639437622382, + "learning_rate": 5.170167427146301e-06, + "loss": 0.24090576171875, + "step": 56110 + }, + { + "epoch": 0.48520981228004945, + "grad_norm": 19.887777990776506, + "learning_rate": 5.170026756500452e-06, + "loss": 0.30810546875, + "step": 56115 + }, + { + "epoch": 0.48525304580159273, + "grad_norm": 3.211058065366551, + "learning_rate": 5.169886075846632e-06, + "loss": 0.49651031494140624, + "step": 56120 + }, + { + "epoch": 0.48529627932313596, + "grad_norm": 7.034833317690693, + "learning_rate": 5.169745385185489e-06, + "loss": 0.542205810546875, + "step": 56125 + }, + { + "epoch": 0.48533951284467924, + "grad_norm": 1.0502812931636418, + "learning_rate": 5.169604684517672e-06, + "loss": 0.0848388671875, + "step": 56130 + }, + { + "epoch": 0.48538274636622253, + "grad_norm": 33.28325068963491, + "learning_rate": 5.169463973843829e-06, + "loss": 0.22337646484375, + "step": 56135 + }, + { + "epoch": 0.48542597988776576, + "grad_norm": 7.917090159868217, + "learning_rate": 5.169323253164612e-06, + "loss": 0.10498428344726562, + "step": 56140 + }, + { + "epoch": 0.48546921340930904, + "grad_norm": 21.05405622602094, + "learning_rate": 5.169182522480665e-06, + "loss": 0.403387451171875, + "step": 56145 + }, + { + "epoch": 0.4855124469308523, + "grad_norm": 22.835942073834556, + "learning_rate": 5.169041781792641e-06, + "loss": 0.064874267578125, + "step": 56150 + }, + { + "epoch": 0.48555568045239555, + "grad_norm": 31.29772551123734, + "learning_rate": 5.168901031101189e-06, + "loss": 0.21482696533203124, + "step": 56155 + }, + { + "epoch": 0.48559891397393884, + "grad_norm": 6.461107604370018, + "learning_rate": 5.168760270406957e-06, + "loss": 0.08847312927246094, + "step": 56160 + }, + { + "epoch": 0.4856421474954821, + "grad_norm": 3.9683935320148778, + "learning_rate": 5.1686194997105925e-06, + "loss": 0.3825286865234375, + "step": 56165 + }, + { + "epoch": 0.48568538101702535, + "grad_norm": 0.2791406278381878, + "learning_rate": 5.168478719012748e-06, + "loss": 0.023724365234375, + "step": 56170 + }, + { + "epoch": 0.48572861453856864, + "grad_norm": 1.1028322837449023, + "learning_rate": 5.168337928314071e-06, + "loss": 0.04063720703125, + "step": 56175 + }, + { + "epoch": 0.48577184806011187, + "grad_norm": 6.590461138830963, + "learning_rate": 5.168197127615211e-06, + "loss": 0.18667755126953126, + "step": 56180 + }, + { + "epoch": 0.48581508158165515, + "grad_norm": 1.423234395891659, + "learning_rate": 5.1680563169168175e-06, + "loss": 0.05765838623046875, + "step": 56185 + }, + { + "epoch": 0.48585831510319843, + "grad_norm": 0.05910053139415682, + "learning_rate": 5.16791549621954e-06, + "loss": 0.08246536254882812, + "step": 56190 + }, + { + "epoch": 0.48590154862474166, + "grad_norm": 15.64953662508808, + "learning_rate": 5.167774665524028e-06, + "loss": 0.3083099365234375, + "step": 56195 + }, + { + "epoch": 0.48594478214628495, + "grad_norm": 5.139781150005331, + "learning_rate": 5.167633824830929e-06, + "loss": 0.02957611083984375, + "step": 56200 + }, + { + "epoch": 0.48598801566782823, + "grad_norm": 34.61743999645468, + "learning_rate": 5.1674929741408965e-06, + "loss": 0.22674407958984374, + "step": 56205 + }, + { + "epoch": 0.48603124918937146, + "grad_norm": 2.060817977642594, + "learning_rate": 5.167352113454577e-06, + "loss": 0.1188568115234375, + "step": 56210 + }, + { + "epoch": 0.48607448271091475, + "grad_norm": 25.677700154961528, + "learning_rate": 5.1672112427726204e-06, + "loss": 0.19457550048828126, + "step": 56215 + }, + { + "epoch": 0.486117716232458, + "grad_norm": 14.507856619745109, + "learning_rate": 5.167070362095678e-06, + "loss": 0.06508102416992187, + "step": 56220 + }, + { + "epoch": 0.48616094975400126, + "grad_norm": 11.809374620030654, + "learning_rate": 5.1669294714243986e-06, + "loss": 0.12397003173828125, + "step": 56225 + }, + { + "epoch": 0.48620418327554454, + "grad_norm": 1.9451275563494341, + "learning_rate": 5.166788570759432e-06, + "loss": 0.08303680419921874, + "step": 56230 + }, + { + "epoch": 0.4862474167970878, + "grad_norm": 0.11711526766796428, + "learning_rate": 5.166647660101428e-06, + "loss": 0.21847686767578126, + "step": 56235 + }, + { + "epoch": 0.48629065031863106, + "grad_norm": 10.034434999148566, + "learning_rate": 5.166506739451035e-06, + "loss": 0.20168991088867189, + "step": 56240 + }, + { + "epoch": 0.48633388384017434, + "grad_norm": 2.7934188871289938, + "learning_rate": 5.166365808808906e-06, + "loss": 0.1527679443359375, + "step": 56245 + }, + { + "epoch": 0.48637711736171757, + "grad_norm": 4.837853052347621, + "learning_rate": 5.16622486817569e-06, + "loss": 0.1554901123046875, + "step": 56250 + }, + { + "epoch": 0.48642035088326085, + "grad_norm": 0.5466032534668754, + "learning_rate": 5.166083917552034e-06, + "loss": 0.18203582763671874, + "step": 56255 + }, + { + "epoch": 0.4864635844048041, + "grad_norm": 3.2997298705479605, + "learning_rate": 5.1659429569385925e-06, + "loss": 0.2262958526611328, + "step": 56260 + }, + { + "epoch": 0.48650681792634737, + "grad_norm": 16.490412755809967, + "learning_rate": 5.165801986336012e-06, + "loss": 0.0926361083984375, + "step": 56265 + }, + { + "epoch": 0.48655005144789065, + "grad_norm": 2.892810584454145, + "learning_rate": 5.165661005744944e-06, + "loss": 0.11423873901367188, + "step": 56270 + }, + { + "epoch": 0.4865932849694339, + "grad_norm": 6.63826705318064, + "learning_rate": 5.16552001516604e-06, + "loss": 0.05649013519287109, + "step": 56275 + }, + { + "epoch": 0.48663651849097717, + "grad_norm": 0.5886523521573849, + "learning_rate": 5.1653790145999475e-06, + "loss": 0.2062915802001953, + "step": 56280 + }, + { + "epoch": 0.48667975201252045, + "grad_norm": 18.690115121077646, + "learning_rate": 5.16523800404732e-06, + "loss": 0.10446701049804688, + "step": 56285 + }, + { + "epoch": 0.4867229855340637, + "grad_norm": 37.34619691583385, + "learning_rate": 5.1650969835088056e-06, + "loss": 0.23114013671875, + "step": 56290 + }, + { + "epoch": 0.48676621905560696, + "grad_norm": 6.050202527710181, + "learning_rate": 5.164955952985054e-06, + "loss": 0.083160400390625, + "step": 56295 + }, + { + "epoch": 0.4868094525771502, + "grad_norm": 7.938956773990561, + "learning_rate": 5.164814912476719e-06, + "loss": 0.2715362548828125, + "step": 56300 + }, + { + "epoch": 0.4868526860986935, + "grad_norm": 18.0206346367021, + "learning_rate": 5.164673861984448e-06, + "loss": 0.0977294921875, + "step": 56305 + }, + { + "epoch": 0.48689591962023676, + "grad_norm": 30.844994531963906, + "learning_rate": 5.164532801508892e-06, + "loss": 0.43543319702148436, + "step": 56310 + }, + { + "epoch": 0.48693915314178, + "grad_norm": 4.680057280987548, + "learning_rate": 5.1643917310507015e-06, + "loss": 0.0950347900390625, + "step": 56315 + }, + { + "epoch": 0.4869823866633233, + "grad_norm": 22.266425815422238, + "learning_rate": 5.164250650610529e-06, + "loss": 0.36281585693359375, + "step": 56320 + }, + { + "epoch": 0.48702562018486656, + "grad_norm": 0.9058660427271337, + "learning_rate": 5.164109560189022e-06, + "loss": 0.13702545166015626, + "step": 56325 + }, + { + "epoch": 0.4870688537064098, + "grad_norm": 5.938854215083321, + "learning_rate": 5.1639684597868335e-06, + "loss": 0.1512969970703125, + "step": 56330 + }, + { + "epoch": 0.48711208722795307, + "grad_norm": 15.446165477319607, + "learning_rate": 5.1638273494046145e-06, + "loss": 0.255096435546875, + "step": 56335 + }, + { + "epoch": 0.48715532074949636, + "grad_norm": 5.773502075096628, + "learning_rate": 5.163686229043014e-06, + "loss": 0.11641387939453125, + "step": 56340 + }, + { + "epoch": 0.4871985542710396, + "grad_norm": 3.090438885105016, + "learning_rate": 5.1635450987026855e-06, + "loss": 0.3839752197265625, + "step": 56345 + }, + { + "epoch": 0.48724178779258287, + "grad_norm": 10.548906662586882, + "learning_rate": 5.1634039583842766e-06, + "loss": 0.13975486755371094, + "step": 56350 + }, + { + "epoch": 0.4872850213141261, + "grad_norm": 15.801514929863526, + "learning_rate": 5.1632628080884395e-06, + "loss": 0.08871917724609375, + "step": 56355 + }, + { + "epoch": 0.4873282548356694, + "grad_norm": 11.48905562527221, + "learning_rate": 5.163121647815827e-06, + "loss": 0.17842025756835939, + "step": 56360 + }, + { + "epoch": 0.48737148835721267, + "grad_norm": 52.76244361814703, + "learning_rate": 5.1629804775670875e-06, + "loss": 0.45127105712890625, + "step": 56365 + }, + { + "epoch": 0.4874147218787559, + "grad_norm": 1.384612425679205, + "learning_rate": 5.1628392973428735e-06, + "loss": 0.07398681640625, + "step": 56370 + }, + { + "epoch": 0.4874579554002992, + "grad_norm": 21.104532667636708, + "learning_rate": 5.162698107143836e-06, + "loss": 0.5539131164550781, + "step": 56375 + }, + { + "epoch": 0.48750118892184247, + "grad_norm": 22.24240845649266, + "learning_rate": 5.162556906970626e-06, + "loss": 0.1137237548828125, + "step": 56380 + }, + { + "epoch": 0.4875444224433857, + "grad_norm": 5.411910085506967, + "learning_rate": 5.162415696823894e-06, + "loss": 0.14801769256591796, + "step": 56385 + }, + { + "epoch": 0.487587655964929, + "grad_norm": 4.376286049822477, + "learning_rate": 5.1622744767042925e-06, + "loss": 0.07687835693359375, + "step": 56390 + }, + { + "epoch": 0.4876308894864722, + "grad_norm": 9.963417709902458, + "learning_rate": 5.1621332466124715e-06, + "loss": 0.14764404296875, + "step": 56395 + }, + { + "epoch": 0.4876741230080155, + "grad_norm": 9.88640294347525, + "learning_rate": 5.161992006549083e-06, + "loss": 0.5513961791992188, + "step": 56400 + }, + { + "epoch": 0.4877173565295588, + "grad_norm": 27.98344775459492, + "learning_rate": 5.161850756514779e-06, + "loss": 0.4012176513671875, + "step": 56405 + }, + { + "epoch": 0.487760590051102, + "grad_norm": 3.8019521594618704, + "learning_rate": 5.16170949651021e-06, + "loss": 0.434320068359375, + "step": 56410 + }, + { + "epoch": 0.4878038235726453, + "grad_norm": 2.32027664031765, + "learning_rate": 5.161568226536028e-06, + "loss": 0.5915069580078125, + "step": 56415 + }, + { + "epoch": 0.4878470570941886, + "grad_norm": 4.657682513918844, + "learning_rate": 5.1614269465928845e-06, + "loss": 0.05504150390625, + "step": 56420 + }, + { + "epoch": 0.4878902906157318, + "grad_norm": 20.075058531496627, + "learning_rate": 5.16128565668143e-06, + "loss": 0.3477439880371094, + "step": 56425 + }, + { + "epoch": 0.4879335241372751, + "grad_norm": 2.368391138739467, + "learning_rate": 5.161144356802318e-06, + "loss": 0.19188232421875, + "step": 56430 + }, + { + "epoch": 0.4879767576588183, + "grad_norm": 4.77300672709036, + "learning_rate": 5.161003046956199e-06, + "loss": 0.0837249755859375, + "step": 56435 + }, + { + "epoch": 0.4880199911803616, + "grad_norm": 0.3261072941120313, + "learning_rate": 5.160861727143725e-06, + "loss": 0.10208511352539062, + "step": 56440 + }, + { + "epoch": 0.4880632247019049, + "grad_norm": 0.20119113810695433, + "learning_rate": 5.160720397365547e-06, + "loss": 0.2927711486816406, + "step": 56445 + }, + { + "epoch": 0.4881064582234481, + "grad_norm": 9.089858809317983, + "learning_rate": 5.1605790576223176e-06, + "loss": 0.20836715698242186, + "step": 56450 + }, + { + "epoch": 0.4881496917449914, + "grad_norm": 19.42411475055847, + "learning_rate": 5.160437707914689e-06, + "loss": 0.2022705078125, + "step": 56455 + }, + { + "epoch": 0.4881929252665347, + "grad_norm": 12.98224230400183, + "learning_rate": 5.160296348243312e-06, + "loss": 0.12274360656738281, + "step": 56460 + }, + { + "epoch": 0.4882361587880779, + "grad_norm": 1.657730622477332, + "learning_rate": 5.160154978608841e-06, + "loss": 0.3503927230834961, + "step": 56465 + }, + { + "epoch": 0.4882793923096212, + "grad_norm": 0.7230028394266766, + "learning_rate": 5.160013599011924e-06, + "loss": 0.03316497802734375, + "step": 56470 + }, + { + "epoch": 0.4883226258311644, + "grad_norm": 3.6190639274950893, + "learning_rate": 5.159872209453217e-06, + "loss": 0.20361194610595704, + "step": 56475 + }, + { + "epoch": 0.4883658593527077, + "grad_norm": 7.276024999050233, + "learning_rate": 5.159730809933367e-06, + "loss": 0.3785888671875, + "step": 56480 + }, + { + "epoch": 0.488409092874251, + "grad_norm": 17.862821084352635, + "learning_rate": 5.1595894004530315e-06, + "loss": 0.12563819885253907, + "step": 56485 + }, + { + "epoch": 0.4884523263957942, + "grad_norm": 31.300385676034587, + "learning_rate": 5.159447981012861e-06, + "loss": 0.43977012634277346, + "step": 56490 + }, + { + "epoch": 0.4884955599173375, + "grad_norm": 24.14866826552675, + "learning_rate": 5.159306551613506e-06, + "loss": 0.30538787841796877, + "step": 56495 + }, + { + "epoch": 0.4885387934388808, + "grad_norm": 0.576707992175473, + "learning_rate": 5.15916511225562e-06, + "loss": 0.20006561279296875, + "step": 56500 + }, + { + "epoch": 0.488582026960424, + "grad_norm": 4.5278060994103555, + "learning_rate": 5.1590236629398565e-06, + "loss": 0.09695358276367187, + "step": 56505 + }, + { + "epoch": 0.4886252604819673, + "grad_norm": 2.4425590500385677, + "learning_rate": 5.158882203666866e-06, + "loss": 0.2793212890625, + "step": 56510 + }, + { + "epoch": 0.4886684940035106, + "grad_norm": 12.858804263108235, + "learning_rate": 5.158740734437301e-06, + "loss": 0.28567657470703123, + "step": 56515 + }, + { + "epoch": 0.4887117275250538, + "grad_norm": 35.33507114174121, + "learning_rate": 5.158599255251816e-06, + "loss": 0.22517681121826172, + "step": 56520 + }, + { + "epoch": 0.4887549610465971, + "grad_norm": 2.959425733346562, + "learning_rate": 5.1584577661110606e-06, + "loss": 0.03376693725585937, + "step": 56525 + }, + { + "epoch": 0.48879819456814033, + "grad_norm": 6.603189661957555, + "learning_rate": 5.15831626701569e-06, + "loss": 0.35143585205078126, + "step": 56530 + }, + { + "epoch": 0.4888414280896836, + "grad_norm": 5.458104953071412, + "learning_rate": 5.1581747579663534e-06, + "loss": 0.0560943603515625, + "step": 56535 + }, + { + "epoch": 0.4888846616112269, + "grad_norm": 0.30553929361483667, + "learning_rate": 5.158033238963707e-06, + "loss": 0.1212860107421875, + "step": 56540 + }, + { + "epoch": 0.48892789513277013, + "grad_norm": 7.437080026685106, + "learning_rate": 5.157891710008402e-06, + "loss": 0.027094268798828126, + "step": 56545 + }, + { + "epoch": 0.4889711286543134, + "grad_norm": 4.518704182778885, + "learning_rate": 5.15775017110109e-06, + "loss": 0.3953826904296875, + "step": 56550 + }, + { + "epoch": 0.4890143621758567, + "grad_norm": 8.654923697713864, + "learning_rate": 5.157608622242426e-06, + "loss": 0.185205078125, + "step": 56555 + }, + { + "epoch": 0.4890575956973999, + "grad_norm": 21.460642634208472, + "learning_rate": 5.1574670634330615e-06, + "loss": 0.1763580322265625, + "step": 56560 + }, + { + "epoch": 0.4891008292189432, + "grad_norm": 8.429553895377389, + "learning_rate": 5.157325494673649e-06, + "loss": 0.33036231994628906, + "step": 56565 + }, + { + "epoch": 0.48914406274048644, + "grad_norm": 2.5061628092356663, + "learning_rate": 5.157183915964842e-06, + "loss": 0.13249969482421875, + "step": 56570 + }, + { + "epoch": 0.4891872962620297, + "grad_norm": 40.64686572631644, + "learning_rate": 5.157042327307294e-06, + "loss": 0.27046661376953124, + "step": 56575 + }, + { + "epoch": 0.489230529783573, + "grad_norm": 1.129598546398456, + "learning_rate": 5.1569007287016565e-06, + "loss": 0.15151290893554686, + "step": 56580 + }, + { + "epoch": 0.48927376330511624, + "grad_norm": 1.217241953212972, + "learning_rate": 5.156759120148585e-06, + "loss": 0.15259933471679688, + "step": 56585 + }, + { + "epoch": 0.4893169968266595, + "grad_norm": 2.6265884241007353, + "learning_rate": 5.15661750164873e-06, + "loss": 0.1151885986328125, + "step": 56590 + }, + { + "epoch": 0.4893602303482028, + "grad_norm": 15.829940914415596, + "learning_rate": 5.156475873202745e-06, + "loss": 0.18881454467773437, + "step": 56595 + }, + { + "epoch": 0.48940346386974604, + "grad_norm": 13.800989201167045, + "learning_rate": 5.156334234811285e-06, + "loss": 0.2195098876953125, + "step": 56600 + }, + { + "epoch": 0.4894466973912893, + "grad_norm": 0.11649835694814042, + "learning_rate": 5.1561925864750015e-06, + "loss": 0.11179962158203124, + "step": 56605 + }, + { + "epoch": 0.48948993091283255, + "grad_norm": 2.357548612303094, + "learning_rate": 5.156050928194548e-06, + "loss": 0.26094398498535154, + "step": 56610 + }, + { + "epoch": 0.48953316443437583, + "grad_norm": 38.390186030193064, + "learning_rate": 5.1559092599705785e-06, + "loss": 0.18875732421875, + "step": 56615 + }, + { + "epoch": 0.4895763979559191, + "grad_norm": 12.038528704961225, + "learning_rate": 5.155767581803746e-06, + "loss": 0.06758918762207031, + "step": 56620 + }, + { + "epoch": 0.48961963147746235, + "grad_norm": 17.702624416700235, + "learning_rate": 5.155625893694704e-06, + "loss": 0.159588623046875, + "step": 56625 + }, + { + "epoch": 0.48966286499900563, + "grad_norm": 13.781152424846905, + "learning_rate": 5.155484195644106e-06, + "loss": 0.24018363952636718, + "step": 56630 + }, + { + "epoch": 0.4897060985205489, + "grad_norm": 0.383896586615762, + "learning_rate": 5.155342487652605e-06, + "loss": 0.20889434814453126, + "step": 56635 + }, + { + "epoch": 0.48974933204209214, + "grad_norm": 1.1817796228787374, + "learning_rate": 5.155200769720855e-06, + "loss": 0.16400604248046874, + "step": 56640 + }, + { + "epoch": 0.48979256556363543, + "grad_norm": 17.4857015647999, + "learning_rate": 5.15505904184951e-06, + "loss": 0.30469818115234376, + "step": 56645 + }, + { + "epoch": 0.48983579908517866, + "grad_norm": 5.508918247729092, + "learning_rate": 5.154917304039222e-06, + "loss": 0.3279022216796875, + "step": 56650 + }, + { + "epoch": 0.48987903260672194, + "grad_norm": 3.122202858858433, + "learning_rate": 5.154775556290646e-06, + "loss": 0.040618896484375, + "step": 56655 + }, + { + "epoch": 0.4899222661282652, + "grad_norm": 1.0818612793310152, + "learning_rate": 5.1546337986044365e-06, + "loss": 0.150140380859375, + "step": 56660 + }, + { + "epoch": 0.48996549964980846, + "grad_norm": 24.970437414336253, + "learning_rate": 5.154492030981245e-06, + "loss": 0.2901702880859375, + "step": 56665 + }, + { + "epoch": 0.49000873317135174, + "grad_norm": 26.004126139770555, + "learning_rate": 5.1543502534217275e-06, + "loss": 0.44759979248046877, + "step": 56670 + }, + { + "epoch": 0.490051966692895, + "grad_norm": 18.062089659460487, + "learning_rate": 5.154208465926537e-06, + "loss": 0.048999977111816403, + "step": 56675 + }, + { + "epoch": 0.49009520021443825, + "grad_norm": 1.3079038744197395, + "learning_rate": 5.1540666684963276e-06, + "loss": 0.13128662109375, + "step": 56680 + }, + { + "epoch": 0.49013843373598154, + "grad_norm": 1.1729749117507404, + "learning_rate": 5.153924861131753e-06, + "loss": 0.12982025146484374, + "step": 56685 + }, + { + "epoch": 0.49018166725752477, + "grad_norm": 2.6054545379141767, + "learning_rate": 5.153783043833466e-06, + "loss": 0.11060752868652343, + "step": 56690 + }, + { + "epoch": 0.49022490077906805, + "grad_norm": 42.08251402881635, + "learning_rate": 5.153641216602123e-06, + "loss": 0.28135986328125, + "step": 56695 + }, + { + "epoch": 0.49026813430061134, + "grad_norm": 0.725830865175852, + "learning_rate": 5.153499379438377e-06, + "loss": 0.40496063232421875, + "step": 56700 + }, + { + "epoch": 0.49031136782215456, + "grad_norm": 7.117606282355342, + "learning_rate": 5.1533575323428825e-06, + "loss": 0.15123748779296875, + "step": 56705 + }, + { + "epoch": 0.49035460134369785, + "grad_norm": 3.6961054103546016, + "learning_rate": 5.1532156753162925e-06, + "loss": 0.07662353515625, + "step": 56710 + }, + { + "epoch": 0.49039783486524113, + "grad_norm": 23.063730840918822, + "learning_rate": 5.153073808359262e-06, + "loss": 0.3405914306640625, + "step": 56715 + }, + { + "epoch": 0.49044106838678436, + "grad_norm": 2.0161498894438177, + "learning_rate": 5.1529319314724455e-06, + "loss": 0.0965576171875, + "step": 56720 + }, + { + "epoch": 0.49048430190832765, + "grad_norm": 17.45286048752997, + "learning_rate": 5.152790044656498e-06, + "loss": 0.18444061279296875, + "step": 56725 + }, + { + "epoch": 0.49052753542987093, + "grad_norm": 4.970235708565524, + "learning_rate": 5.152648147912072e-06, + "loss": 0.137896728515625, + "step": 56730 + }, + { + "epoch": 0.49057076895141416, + "grad_norm": 37.83625594580188, + "learning_rate": 5.152506241239824e-06, + "loss": 0.061053466796875, + "step": 56735 + }, + { + "epoch": 0.49061400247295744, + "grad_norm": 1.3489111919713679, + "learning_rate": 5.1523643246404075e-06, + "loss": 0.28594970703125, + "step": 56740 + }, + { + "epoch": 0.4906572359945007, + "grad_norm": 19.501237841887527, + "learning_rate": 5.152222398114476e-06, + "loss": 0.1366119384765625, + "step": 56745 + }, + { + "epoch": 0.49070046951604396, + "grad_norm": 14.624213068042659, + "learning_rate": 5.152080461662685e-06, + "loss": 0.2734771728515625, + "step": 56750 + }, + { + "epoch": 0.49074370303758724, + "grad_norm": 2.3712786022651846, + "learning_rate": 5.15193851528569e-06, + "loss": 0.16068267822265625, + "step": 56755 + }, + { + "epoch": 0.49078693655913047, + "grad_norm": 18.803618760652498, + "learning_rate": 5.151796558984143e-06, + "loss": 0.30308074951171876, + "step": 56760 + }, + { + "epoch": 0.49083017008067376, + "grad_norm": 15.179748306099997, + "learning_rate": 5.151654592758702e-06, + "loss": 0.618096923828125, + "step": 56765 + }, + { + "epoch": 0.49087340360221704, + "grad_norm": 9.846846549907779, + "learning_rate": 5.151512616610019e-06, + "loss": 0.3500518798828125, + "step": 56770 + }, + { + "epoch": 0.49091663712376027, + "grad_norm": 0.986848758580302, + "learning_rate": 5.151370630538751e-06, + "loss": 0.12043914794921876, + "step": 56775 + }, + { + "epoch": 0.49095987064530355, + "grad_norm": 30.595120324613745, + "learning_rate": 5.151228634545551e-06, + "loss": 0.43048095703125, + "step": 56780 + }, + { + "epoch": 0.4910031041668468, + "grad_norm": 1.8094725626482409, + "learning_rate": 5.151086628631076e-06, + "loss": 0.1480224609375, + "step": 56785 + }, + { + "epoch": 0.49104633768839007, + "grad_norm": 6.3065884597814135, + "learning_rate": 5.150944612795977e-06, + "loss": 0.11935577392578126, + "step": 56790 + }, + { + "epoch": 0.49108957120993335, + "grad_norm": 7.2603756021956904, + "learning_rate": 5.1508025870409134e-06, + "loss": 0.028099822998046874, + "step": 56795 + }, + { + "epoch": 0.4911328047314766, + "grad_norm": 33.023097220501484, + "learning_rate": 5.150660551366538e-06, + "loss": 0.142901611328125, + "step": 56800 + }, + { + "epoch": 0.49117603825301986, + "grad_norm": 0.5308639202497739, + "learning_rate": 5.150518505773505e-06, + "loss": 0.167620849609375, + "step": 56805 + }, + { + "epoch": 0.49121927177456315, + "grad_norm": 51.259930568715525, + "learning_rate": 5.150376450262472e-06, + "loss": 0.27802886962890627, + "step": 56810 + }, + { + "epoch": 0.4912625052961064, + "grad_norm": 35.6067449524151, + "learning_rate": 5.150234384834093e-06, + "loss": 0.139910888671875, + "step": 56815 + }, + { + "epoch": 0.49130573881764966, + "grad_norm": 3.1813378176162557, + "learning_rate": 5.150092309489023e-06, + "loss": 0.19151611328125, + "step": 56820 + }, + { + "epoch": 0.4913489723391929, + "grad_norm": 1.426227138553698, + "learning_rate": 5.149950224227917e-06, + "loss": 0.054866790771484375, + "step": 56825 + }, + { + "epoch": 0.4913922058607362, + "grad_norm": 6.146795339738322, + "learning_rate": 5.14980812905143e-06, + "loss": 0.20952415466308594, + "step": 56830 + }, + { + "epoch": 0.49143543938227946, + "grad_norm": 3.8508435751475445, + "learning_rate": 5.149666023960218e-06, + "loss": 0.0672637939453125, + "step": 56835 + }, + { + "epoch": 0.4914786729038227, + "grad_norm": 4.943209800162351, + "learning_rate": 5.149523908954937e-06, + "loss": 0.4585845947265625, + "step": 56840 + }, + { + "epoch": 0.491521906425366, + "grad_norm": 3.2500616846466204, + "learning_rate": 5.1493817840362405e-06, + "loss": 0.06726608276367188, + "step": 56845 + }, + { + "epoch": 0.49156513994690926, + "grad_norm": 24.75020443924539, + "learning_rate": 5.149239649204785e-06, + "loss": 0.2952789306640625, + "step": 56850 + }, + { + "epoch": 0.4916083734684525, + "grad_norm": 13.67941754573838, + "learning_rate": 5.149097504461228e-06, + "loss": 0.1128631591796875, + "step": 56855 + }, + { + "epoch": 0.49165160698999577, + "grad_norm": 2.340121670869607, + "learning_rate": 5.148955349806222e-06, + "loss": 0.21388587951660157, + "step": 56860 + }, + { + "epoch": 0.491694840511539, + "grad_norm": 42.783829724204395, + "learning_rate": 5.148813185240423e-06, + "loss": 0.17795028686523437, + "step": 56865 + }, + { + "epoch": 0.4917380740330823, + "grad_norm": 12.458520995191298, + "learning_rate": 5.14867101076449e-06, + "loss": 0.350384521484375, + "step": 56870 + }, + { + "epoch": 0.49178130755462557, + "grad_norm": 26.230341874616148, + "learning_rate": 5.148528826379073e-06, + "loss": 0.11828346252441406, + "step": 56875 + }, + { + "epoch": 0.4918245410761688, + "grad_norm": 21.32885210275375, + "learning_rate": 5.148386632084832e-06, + "loss": 0.2336761474609375, + "step": 56880 + }, + { + "epoch": 0.4918677745977121, + "grad_norm": 8.181868011628483, + "learning_rate": 5.148244427882423e-06, + "loss": 0.22941207885742188, + "step": 56885 + }, + { + "epoch": 0.49191100811925537, + "grad_norm": 2.3515843151131572, + "learning_rate": 5.148102213772499e-06, + "loss": 0.101275634765625, + "step": 56890 + }, + { + "epoch": 0.4919542416407986, + "grad_norm": 1.57531594635461, + "learning_rate": 5.147959989755719e-06, + "loss": 0.14952964782714845, + "step": 56895 + }, + { + "epoch": 0.4919974751623419, + "grad_norm": 1.3673295176393878, + "learning_rate": 5.147817755832735e-06, + "loss": 0.15784034729003907, + "step": 56900 + }, + { + "epoch": 0.49204070868388516, + "grad_norm": 24.49957847660843, + "learning_rate": 5.147675512004208e-06, + "loss": 0.1332315444946289, + "step": 56905 + }, + { + "epoch": 0.4920839422054284, + "grad_norm": 2.4686029187398524, + "learning_rate": 5.147533258270789e-06, + "loss": 0.39904632568359377, + "step": 56910 + }, + { + "epoch": 0.4921271757269717, + "grad_norm": 7.618499402539596, + "learning_rate": 5.147390994633138e-06, + "loss": 0.199847412109375, + "step": 56915 + }, + { + "epoch": 0.4921704092485149, + "grad_norm": 5.372633906539865, + "learning_rate": 5.147248721091909e-06, + "loss": 0.2326171875, + "step": 56920 + }, + { + "epoch": 0.4922136427700582, + "grad_norm": 12.682100563786303, + "learning_rate": 5.147106437647758e-06, + "loss": 0.15748100280761718, + "step": 56925 + }, + { + "epoch": 0.4922568762916015, + "grad_norm": 4.232739187790294, + "learning_rate": 5.1469641443013425e-06, + "loss": 0.13297576904296876, + "step": 56930 + }, + { + "epoch": 0.4923001098131447, + "grad_norm": 21.065099201917143, + "learning_rate": 5.146821841053318e-06, + "loss": 0.5898391723632812, + "step": 56935 + }, + { + "epoch": 0.492343343334688, + "grad_norm": 1.1171234957579699, + "learning_rate": 5.14667952790434e-06, + "loss": 0.03323822021484375, + "step": 56940 + }, + { + "epoch": 0.4923865768562313, + "grad_norm": 1.9056759190780963, + "learning_rate": 5.146537204855067e-06, + "loss": 0.28826904296875, + "step": 56945 + }, + { + "epoch": 0.4924298103777745, + "grad_norm": 2.657114679959573, + "learning_rate": 5.146394871906153e-06, + "loss": 0.07840309143066407, + "step": 56950 + }, + { + "epoch": 0.4924730438993178, + "grad_norm": 0.14293723819130877, + "learning_rate": 5.146252529058256e-06, + "loss": 0.09500579833984375, + "step": 56955 + }, + { + "epoch": 0.492516277420861, + "grad_norm": 0.26152529478512093, + "learning_rate": 5.146110176312032e-06, + "loss": 0.15377235412597656, + "step": 56960 + }, + { + "epoch": 0.4925595109424043, + "grad_norm": 34.96075772387097, + "learning_rate": 5.1459678136681374e-06, + "loss": 0.1745941162109375, + "step": 56965 + }, + { + "epoch": 0.4926027444639476, + "grad_norm": 0.9695365038595399, + "learning_rate": 5.145825441127229e-06, + "loss": 0.30641632080078124, + "step": 56970 + }, + { + "epoch": 0.4926459779854908, + "grad_norm": 6.587250473303711, + "learning_rate": 5.145683058689963e-06, + "loss": 0.17233200073242189, + "step": 56975 + }, + { + "epoch": 0.4926892115070341, + "grad_norm": 1.351370120978682, + "learning_rate": 5.1455406663569965e-06, + "loss": 0.21410369873046875, + "step": 56980 + }, + { + "epoch": 0.4927324450285774, + "grad_norm": 0.5893423735660102, + "learning_rate": 5.1453982641289866e-06, + "loss": 0.07620391845703126, + "step": 56985 + }, + { + "epoch": 0.4927756785501206, + "grad_norm": 15.821927966974583, + "learning_rate": 5.145255852006588e-06, + "loss": 0.08991222381591797, + "step": 56990 + }, + { + "epoch": 0.4928189120716639, + "grad_norm": 38.43889917001406, + "learning_rate": 5.1451134299904615e-06, + "loss": 0.3322662353515625, + "step": 56995 + }, + { + "epoch": 0.4928621455932071, + "grad_norm": 0.421853822104974, + "learning_rate": 5.144970998081259e-06, + "loss": 0.19520149230957032, + "step": 57000 + }, + { + "epoch": 0.4929053791147504, + "grad_norm": 7.231232705875412, + "learning_rate": 5.14482855627964e-06, + "loss": 0.12813491821289064, + "step": 57005 + }, + { + "epoch": 0.4929486126362937, + "grad_norm": 8.737612681381812, + "learning_rate": 5.1446861045862625e-06, + "loss": 0.05033454895019531, + "step": 57010 + }, + { + "epoch": 0.4929918461578369, + "grad_norm": 2.1855740194962134, + "learning_rate": 5.144543643001782e-06, + "loss": 0.22195663452148437, + "step": 57015 + }, + { + "epoch": 0.4930350796793802, + "grad_norm": 0.978992005145196, + "learning_rate": 5.144401171526856e-06, + "loss": 0.059651947021484374, + "step": 57020 + }, + { + "epoch": 0.4930783132009235, + "grad_norm": 9.837332981748164, + "learning_rate": 5.144258690162139e-06, + "loss": 0.0608245849609375, + "step": 57025 + }, + { + "epoch": 0.4931215467224667, + "grad_norm": 2.690259366881713, + "learning_rate": 5.144116198908292e-06, + "loss": 0.10889892578125, + "step": 57030 + }, + { + "epoch": 0.49316478024401, + "grad_norm": 3.395121544112416, + "learning_rate": 5.143973697765971e-06, + "loss": 0.11601715087890625, + "step": 57035 + }, + { + "epoch": 0.49320801376555323, + "grad_norm": 1.3093984112709125, + "learning_rate": 5.143831186735832e-06, + "loss": 0.4786525726318359, + "step": 57040 + }, + { + "epoch": 0.4932512472870965, + "grad_norm": 19.926196502129823, + "learning_rate": 5.143688665818533e-06, + "loss": 0.04384918212890625, + "step": 57045 + }, + { + "epoch": 0.4932944808086398, + "grad_norm": 15.741086954576355, + "learning_rate": 5.143546135014732e-06, + "loss": 0.5914306640625, + "step": 57050 + }, + { + "epoch": 0.49333771433018303, + "grad_norm": 3.9262912532957035, + "learning_rate": 5.143403594325085e-06, + "loss": 0.19832000732421876, + "step": 57055 + }, + { + "epoch": 0.4933809478517263, + "grad_norm": 3.6754323933824793, + "learning_rate": 5.143261043750251e-06, + "loss": 0.06412429809570312, + "step": 57060 + }, + { + "epoch": 0.4934241813732696, + "grad_norm": 9.798280495180054, + "learning_rate": 5.143118483290885e-06, + "loss": 0.1984527587890625, + "step": 57065 + }, + { + "epoch": 0.49346741489481283, + "grad_norm": 0.3583088031869488, + "learning_rate": 5.1429759129476465e-06, + "loss": 0.048004150390625, + "step": 57070 + }, + { + "epoch": 0.4935106484163561, + "grad_norm": 1.3883746103723291, + "learning_rate": 5.142833332721192e-06, + "loss": 0.03349113464355469, + "step": 57075 + }, + { + "epoch": 0.4935538819378994, + "grad_norm": 0.25514986249081517, + "learning_rate": 5.14269074261218e-06, + "loss": 0.16940078735351563, + "step": 57080 + }, + { + "epoch": 0.4935971154594426, + "grad_norm": 0.6493100039595604, + "learning_rate": 5.142548142621267e-06, + "loss": 0.14896240234375, + "step": 57085 + }, + { + "epoch": 0.4936403489809859, + "grad_norm": 4.956774148996727, + "learning_rate": 5.142405532749112e-06, + "loss": 0.061077880859375, + "step": 57090 + }, + { + "epoch": 0.49368358250252914, + "grad_norm": 8.637374314881754, + "learning_rate": 5.1422629129963725e-06, + "loss": 0.3005615234375, + "step": 57095 + }, + { + "epoch": 0.4937268160240724, + "grad_norm": 9.073418853349816, + "learning_rate": 5.142120283363705e-06, + "loss": 0.13638458251953126, + "step": 57100 + }, + { + "epoch": 0.4937700495456157, + "grad_norm": 4.42140191193418, + "learning_rate": 5.141977643851769e-06, + "loss": 0.03909759521484375, + "step": 57105 + }, + { + "epoch": 0.49381328306715894, + "grad_norm": 0.9778845849377112, + "learning_rate": 5.141834994461221e-06, + "loss": 0.14965057373046875, + "step": 57110 + }, + { + "epoch": 0.4938565165887022, + "grad_norm": 0.19675874986567243, + "learning_rate": 5.141692335192717e-06, + "loss": 0.15378255844116212, + "step": 57115 + }, + { + "epoch": 0.4938997501102455, + "grad_norm": 7.496291539083036, + "learning_rate": 5.14154966604692e-06, + "loss": 0.3339954376220703, + "step": 57120 + }, + { + "epoch": 0.49394298363178873, + "grad_norm": 15.18819447623076, + "learning_rate": 5.141406987024485e-06, + "loss": 0.05316619873046875, + "step": 57125 + }, + { + "epoch": 0.493986217153332, + "grad_norm": 3.207806890347766, + "learning_rate": 5.141264298126068e-06, + "loss": 0.347393798828125, + "step": 57130 + }, + { + "epoch": 0.49402945067487525, + "grad_norm": 3.717239090258167, + "learning_rate": 5.141121599352331e-06, + "loss": 0.07989768981933594, + "step": 57135 + }, + { + "epoch": 0.49407268419641853, + "grad_norm": 6.598388754696503, + "learning_rate": 5.14097889070393e-06, + "loss": 0.386114501953125, + "step": 57140 + }, + { + "epoch": 0.4941159177179618, + "grad_norm": 34.65067485674405, + "learning_rate": 5.140836172181524e-06, + "loss": 0.3119041442871094, + "step": 57145 + }, + { + "epoch": 0.49415915123950505, + "grad_norm": 4.236464572591932, + "learning_rate": 5.140693443785769e-06, + "loss": 0.07241439819335938, + "step": 57150 + }, + { + "epoch": 0.49420238476104833, + "grad_norm": 2.0211161532871893, + "learning_rate": 5.140550705517327e-06, + "loss": 0.01638946533203125, + "step": 57155 + }, + { + "epoch": 0.4942456182825916, + "grad_norm": 5.150320139440606, + "learning_rate": 5.140407957376853e-06, + "loss": 0.219622802734375, + "step": 57160 + }, + { + "epoch": 0.49428885180413484, + "grad_norm": 0.39196816307152416, + "learning_rate": 5.140265199365008e-06, + "loss": 0.11102294921875, + "step": 57165 + }, + { + "epoch": 0.49433208532567813, + "grad_norm": 6.434815248715271, + "learning_rate": 5.140122431482448e-06, + "loss": 0.3472747802734375, + "step": 57170 + }, + { + "epoch": 0.49437531884722136, + "grad_norm": 9.534734392380752, + "learning_rate": 5.139979653729833e-06, + "loss": 0.212603759765625, + "step": 57175 + }, + { + "epoch": 0.49441855236876464, + "grad_norm": 7.25981772493794, + "learning_rate": 5.1398368661078205e-06, + "loss": 0.0931427001953125, + "step": 57180 + }, + { + "epoch": 0.4944617858903079, + "grad_norm": 1.7175959211967242, + "learning_rate": 5.139694068617069e-06, + "loss": 0.2695167541503906, + "step": 57185 + }, + { + "epoch": 0.49450501941185115, + "grad_norm": 4.128468020336775, + "learning_rate": 5.1395512612582375e-06, + "loss": 0.10444869995117187, + "step": 57190 + }, + { + "epoch": 0.49454825293339444, + "grad_norm": 0.18252665078988198, + "learning_rate": 5.139408444031986e-06, + "loss": 0.03315248489379883, + "step": 57195 + }, + { + "epoch": 0.4945914864549377, + "grad_norm": 23.040955580299304, + "learning_rate": 5.139265616938971e-06, + "loss": 0.26868743896484376, + "step": 57200 + }, + { + "epoch": 0.49463471997648095, + "grad_norm": 11.245636119601716, + "learning_rate": 5.139122779979851e-06, + "loss": 0.2544586181640625, + "step": 57205 + }, + { + "epoch": 0.49467795349802424, + "grad_norm": 0.3478518547347926, + "learning_rate": 5.138979933155288e-06, + "loss": 0.14685287475585937, + "step": 57210 + }, + { + "epoch": 0.49472118701956747, + "grad_norm": 0.8714748983316298, + "learning_rate": 5.138837076465937e-06, + "loss": 0.224359130859375, + "step": 57215 + }, + { + "epoch": 0.49476442054111075, + "grad_norm": 7.801782937758989, + "learning_rate": 5.138694209912459e-06, + "loss": 0.3937042236328125, + "step": 57220 + }, + { + "epoch": 0.49480765406265403, + "grad_norm": 8.767959974034744, + "learning_rate": 5.1385513334955115e-06, + "loss": 0.11879119873046876, + "step": 57225 + }, + { + "epoch": 0.49485088758419726, + "grad_norm": 30.296875136164708, + "learning_rate": 5.138408447215755e-06, + "loss": 0.6384063720703125, + "step": 57230 + }, + { + "epoch": 0.49489412110574055, + "grad_norm": 54.5302536479765, + "learning_rate": 5.1382655510738465e-06, + "loss": 0.3310302734375, + "step": 57235 + }, + { + "epoch": 0.49493735462728383, + "grad_norm": 0.6350531615888715, + "learning_rate": 5.138122645070448e-06, + "loss": 0.2109283447265625, + "step": 57240 + }, + { + "epoch": 0.49498058814882706, + "grad_norm": 11.223113253203278, + "learning_rate": 5.1379797292062155e-06, + "loss": 0.100341796875, + "step": 57245 + }, + { + "epoch": 0.49502382167037035, + "grad_norm": 18.57993306971831, + "learning_rate": 5.137836803481809e-06, + "loss": 0.34489593505859373, + "step": 57250 + }, + { + "epoch": 0.49506705519191363, + "grad_norm": 1.8789632697305996, + "learning_rate": 5.1376938678978895e-06, + "loss": 0.25206298828125, + "step": 57255 + }, + { + "epoch": 0.49511028871345686, + "grad_norm": 32.99821548587454, + "learning_rate": 5.1375509224551136e-06, + "loss": 0.3959716796875, + "step": 57260 + }, + { + "epoch": 0.49515352223500014, + "grad_norm": 27.678664092619034, + "learning_rate": 5.137407967154143e-06, + "loss": 0.3490692138671875, + "step": 57265 + }, + { + "epoch": 0.49519675575654337, + "grad_norm": 2.06866124624915, + "learning_rate": 5.137265001995635e-06, + "loss": 0.3010009765625, + "step": 57270 + }, + { + "epoch": 0.49523998927808666, + "grad_norm": 23.359328373951765, + "learning_rate": 5.137122026980249e-06, + "loss": 0.15490455627441407, + "step": 57275 + }, + { + "epoch": 0.49528322279962994, + "grad_norm": 42.99248825858913, + "learning_rate": 5.1369790421086465e-06, + "loss": 0.27078857421875, + "step": 57280 + }, + { + "epoch": 0.49532645632117317, + "grad_norm": 2.9571479478191423, + "learning_rate": 5.136836047381485e-06, + "loss": 0.06490936279296874, + "step": 57285 + }, + { + "epoch": 0.49536968984271645, + "grad_norm": 7.023811557251552, + "learning_rate": 5.136693042799423e-06, + "loss": 0.08618106842041015, + "step": 57290 + }, + { + "epoch": 0.49541292336425974, + "grad_norm": 0.46234196098285457, + "learning_rate": 5.136550028363123e-06, + "loss": 0.13500823974609374, + "step": 57295 + }, + { + "epoch": 0.49545615688580297, + "grad_norm": 6.232508314691749, + "learning_rate": 5.136407004073243e-06, + "loss": 0.2014862060546875, + "step": 57300 + }, + { + "epoch": 0.49549939040734625, + "grad_norm": 27.546250033195914, + "learning_rate": 5.1362639699304415e-06, + "loss": 0.29672775268554685, + "step": 57305 + }, + { + "epoch": 0.4955426239288895, + "grad_norm": 23.581728285561347, + "learning_rate": 5.1361209259353804e-06, + "loss": 0.1660186767578125, + "step": 57310 + }, + { + "epoch": 0.49558585745043277, + "grad_norm": 28.208452169650617, + "learning_rate": 5.135977872088719e-06, + "loss": 0.2466602325439453, + "step": 57315 + }, + { + "epoch": 0.49562909097197605, + "grad_norm": 4.041477218303325, + "learning_rate": 5.135834808391115e-06, + "loss": 0.2859100341796875, + "step": 57320 + }, + { + "epoch": 0.4956723244935193, + "grad_norm": 5.4887658313047, + "learning_rate": 5.13569173484323e-06, + "loss": 0.3352783203125, + "step": 57325 + }, + { + "epoch": 0.49571555801506256, + "grad_norm": 2.5001436518569746, + "learning_rate": 5.135548651445724e-06, + "loss": 0.04841766357421875, + "step": 57330 + }, + { + "epoch": 0.49575879153660585, + "grad_norm": 4.2919987509777675, + "learning_rate": 5.135405558199255e-06, + "loss": 0.1943634033203125, + "step": 57335 + }, + { + "epoch": 0.4958020250581491, + "grad_norm": 3.81339301506086, + "learning_rate": 5.1352624551044845e-06, + "loss": 0.07997817993164062, + "step": 57340 + }, + { + "epoch": 0.49584525857969236, + "grad_norm": 4.030776960632459, + "learning_rate": 5.135119342162073e-06, + "loss": 0.124346923828125, + "step": 57345 + }, + { + "epoch": 0.4958884921012356, + "grad_norm": 0.6632187610942714, + "learning_rate": 5.13497621937268e-06, + "loss": 0.13354949951171874, + "step": 57350 + }, + { + "epoch": 0.4959317256227789, + "grad_norm": 3.639163607486372, + "learning_rate": 5.134833086736965e-06, + "loss": 0.03887786865234375, + "step": 57355 + }, + { + "epoch": 0.49597495914432216, + "grad_norm": 0.09584400488875758, + "learning_rate": 5.134689944255588e-06, + "loss": 0.07680740356445312, + "step": 57360 + }, + { + "epoch": 0.4960181926658654, + "grad_norm": 26.549047597690503, + "learning_rate": 5.134546791929211e-06, + "loss": 0.1106048583984375, + "step": 57365 + }, + { + "epoch": 0.49606142618740867, + "grad_norm": 4.171254516763449, + "learning_rate": 5.134403629758492e-06, + "loss": 0.26507568359375, + "step": 57370 + }, + { + "epoch": 0.49610465970895196, + "grad_norm": 0.2457667858642076, + "learning_rate": 5.1342604577440915e-06, + "loss": 0.24896240234375, + "step": 57375 + }, + { + "epoch": 0.4961478932304952, + "grad_norm": 1.946047523454154, + "learning_rate": 5.1341172758866705e-06, + "loss": 0.1227294921875, + "step": 57380 + }, + { + "epoch": 0.49619112675203847, + "grad_norm": 7.798028082950219, + "learning_rate": 5.133974084186889e-06, + "loss": 0.09343147277832031, + "step": 57385 + }, + { + "epoch": 0.4962343602735817, + "grad_norm": 13.917571773116267, + "learning_rate": 5.1338308826454085e-06, + "loss": 0.13219451904296875, + "step": 57390 + }, + { + "epoch": 0.496277593795125, + "grad_norm": 2.225383513246604, + "learning_rate": 5.133687671262888e-06, + "loss": 0.2272735595703125, + "step": 57395 + }, + { + "epoch": 0.49632082731666827, + "grad_norm": 44.48275924079175, + "learning_rate": 5.133544450039988e-06, + "loss": 0.10752525329589843, + "step": 57400 + }, + { + "epoch": 0.4963640608382115, + "grad_norm": 6.2315917087387644, + "learning_rate": 5.133401218977371e-06, + "loss": 0.3755767822265625, + "step": 57405 + }, + { + "epoch": 0.4964072943597548, + "grad_norm": 0.4264740427709329, + "learning_rate": 5.133257978075694e-06, + "loss": 0.20479393005371094, + "step": 57410 + }, + { + "epoch": 0.49645052788129806, + "grad_norm": 0.6329484273196743, + "learning_rate": 5.1331147273356205e-06, + "loss": 0.3633026123046875, + "step": 57415 + }, + { + "epoch": 0.4964937614028413, + "grad_norm": 0.5707111489728635, + "learning_rate": 5.132971466757811e-06, + "loss": 0.1604034423828125, + "step": 57420 + }, + { + "epoch": 0.4965369949243846, + "grad_norm": 25.92500055066992, + "learning_rate": 5.132828196342926e-06, + "loss": 0.32982177734375, + "step": 57425 + }, + { + "epoch": 0.4965802284459278, + "grad_norm": 5.366542980415289, + "learning_rate": 5.132684916091624e-06, + "loss": 0.1373046875, + "step": 57430 + }, + { + "epoch": 0.4966234619674711, + "grad_norm": 0.11248876537126856, + "learning_rate": 5.132541626004569e-06, + "loss": 0.2034515380859375, + "step": 57435 + }, + { + "epoch": 0.4966666954890144, + "grad_norm": 23.561778873459758, + "learning_rate": 5.13239832608242e-06, + "loss": 0.419732666015625, + "step": 57440 + }, + { + "epoch": 0.4967099290105576, + "grad_norm": 0.6775763319962287, + "learning_rate": 5.132255016325839e-06, + "loss": 0.042926025390625, + "step": 57445 + }, + { + "epoch": 0.4967531625321009, + "grad_norm": 26.689805589257052, + "learning_rate": 5.132111696735485e-06, + "loss": 0.14911727905273436, + "step": 57450 + }, + { + "epoch": 0.4967963960536442, + "grad_norm": 10.255060984138533, + "learning_rate": 5.131968367312022e-06, + "loss": 0.10155105590820312, + "step": 57455 + }, + { + "epoch": 0.4968396295751874, + "grad_norm": 2.660882800208732, + "learning_rate": 5.131825028056108e-06, + "loss": 0.05526046752929688, + "step": 57460 + }, + { + "epoch": 0.4968828630967307, + "grad_norm": 14.791282353046428, + "learning_rate": 5.1316816789684045e-06, + "loss": 0.5109954833984375, + "step": 57465 + }, + { + "epoch": 0.49692609661827397, + "grad_norm": 0.511914092049935, + "learning_rate": 5.131538320049575e-06, + "loss": 0.09277496337890626, + "step": 57470 + }, + { + "epoch": 0.4969693301398172, + "grad_norm": 6.610101171819164, + "learning_rate": 5.131394951300279e-06, + "loss": 0.23571929931640626, + "step": 57475 + }, + { + "epoch": 0.4970125636613605, + "grad_norm": 22.384499854717735, + "learning_rate": 5.131251572721178e-06, + "loss": 0.10643463134765625, + "step": 57480 + }, + { + "epoch": 0.4970557971829037, + "grad_norm": 4.222897507398039, + "learning_rate": 5.131108184312932e-06, + "loss": 0.0643524169921875, + "step": 57485 + }, + { + "epoch": 0.497099030704447, + "grad_norm": 5.697502798483105, + "learning_rate": 5.130964786076204e-06, + "loss": 0.35028076171875, + "step": 57490 + }, + { + "epoch": 0.4971422642259903, + "grad_norm": 1.2859465487945125, + "learning_rate": 5.130821378011654e-06, + "loss": 0.42697601318359374, + "step": 57495 + }, + { + "epoch": 0.4971854977475335, + "grad_norm": 14.664206568871782, + "learning_rate": 5.130677960119945e-06, + "loss": 0.4390869140625, + "step": 57500 + }, + { + "epoch": 0.4972287312690768, + "grad_norm": 10.81664115885757, + "learning_rate": 5.130534532401737e-06, + "loss": 0.0690399169921875, + "step": 57505 + }, + { + "epoch": 0.4972719647906201, + "grad_norm": 7.314452888621677, + "learning_rate": 5.130391094857692e-06, + "loss": 0.2096923828125, + "step": 57510 + }, + { + "epoch": 0.4973151983121633, + "grad_norm": 0.6346040843270241, + "learning_rate": 5.130247647488472e-06, + "loss": 0.10455055236816406, + "step": 57515 + }, + { + "epoch": 0.4973584318337066, + "grad_norm": 5.561489789862974, + "learning_rate": 5.130104190294738e-06, + "loss": 0.5196136474609375, + "step": 57520 + }, + { + "epoch": 0.4974016653552498, + "grad_norm": 17.28661069853044, + "learning_rate": 5.129960723277152e-06, + "loss": 0.18984146118164064, + "step": 57525 + }, + { + "epoch": 0.4974448988767931, + "grad_norm": 0.8285790407470337, + "learning_rate": 5.129817246436374e-06, + "loss": 0.0915863037109375, + "step": 57530 + }, + { + "epoch": 0.4974881323983364, + "grad_norm": 13.349731930238658, + "learning_rate": 5.129673759773068e-06, + "loss": 0.077276611328125, + "step": 57535 + }, + { + "epoch": 0.4975313659198796, + "grad_norm": 14.165402587725104, + "learning_rate": 5.1295302632878954e-06, + "loss": 0.3261383056640625, + "step": 57540 + }, + { + "epoch": 0.4975745994414229, + "grad_norm": 8.155906375876715, + "learning_rate": 5.129386756981516e-06, + "loss": 0.29839630126953126, + "step": 57545 + }, + { + "epoch": 0.4976178329629662, + "grad_norm": 1.9711593501819613, + "learning_rate": 5.129243240854594e-06, + "loss": 0.06038322448730469, + "step": 57550 + }, + { + "epoch": 0.4976610664845094, + "grad_norm": 4.363650082206104, + "learning_rate": 5.12909971490779e-06, + "loss": 0.0715606689453125, + "step": 57555 + }, + { + "epoch": 0.4977043000060527, + "grad_norm": 0.41187542303611757, + "learning_rate": 5.128956179141766e-06, + "loss": 0.2205474853515625, + "step": 57560 + }, + { + "epoch": 0.49774753352759593, + "grad_norm": 3.650625769475153, + "learning_rate": 5.1288126335571845e-06, + "loss": 0.09096832275390625, + "step": 57565 + }, + { + "epoch": 0.4977907670491392, + "grad_norm": 1.9617640771164426, + "learning_rate": 5.128669078154708e-06, + "loss": 0.2564208984375, + "step": 57570 + }, + { + "epoch": 0.4978340005706825, + "grad_norm": 47.444009439313035, + "learning_rate": 5.128525512934996e-06, + "loss": 0.694476318359375, + "step": 57575 + }, + { + "epoch": 0.49787723409222573, + "grad_norm": 7.034787601120349, + "learning_rate": 5.128381937898714e-06, + "loss": 0.17872161865234376, + "step": 57580 + }, + { + "epoch": 0.497920467613769, + "grad_norm": 1.8084495027103809, + "learning_rate": 5.1282383530465216e-06, + "loss": 0.28543815612792967, + "step": 57585 + }, + { + "epoch": 0.4979637011353123, + "grad_norm": 6.2880388692394735, + "learning_rate": 5.128094758379082e-06, + "loss": 0.22676849365234375, + "step": 57590 + }, + { + "epoch": 0.4980069346568555, + "grad_norm": 11.853315271948862, + "learning_rate": 5.127951153897059e-06, + "loss": 0.10940284729003906, + "step": 57595 + }, + { + "epoch": 0.4980501681783988, + "grad_norm": 6.702544767301986, + "learning_rate": 5.127807539601112e-06, + "loss": 0.0945465087890625, + "step": 57600 + }, + { + "epoch": 0.49809340169994204, + "grad_norm": 8.56737676319973, + "learning_rate": 5.127663915491905e-06, + "loss": 0.052501678466796875, + "step": 57605 + }, + { + "epoch": 0.4981366352214853, + "grad_norm": 0.2592770851144326, + "learning_rate": 5.127520281570099e-06, + "loss": 0.046459197998046875, + "step": 57610 + }, + { + "epoch": 0.4981798687430286, + "grad_norm": 2.1694192307757545, + "learning_rate": 5.127376637836358e-06, + "loss": 0.23058929443359374, + "step": 57615 + }, + { + "epoch": 0.49822310226457184, + "grad_norm": 0.9164415694136511, + "learning_rate": 5.127232984291344e-06, + "loss": 0.08163948059082031, + "step": 57620 + }, + { + "epoch": 0.4982663357861151, + "grad_norm": 1.986673608585496, + "learning_rate": 5.1270893209357205e-06, + "loss": 0.12833786010742188, + "step": 57625 + }, + { + "epoch": 0.4983095693076584, + "grad_norm": 5.64372217884177, + "learning_rate": 5.126945647770148e-06, + "loss": 0.1349884033203125, + "step": 57630 + }, + { + "epoch": 0.49835280282920164, + "grad_norm": 0.3076194670240015, + "learning_rate": 5.126801964795289e-06, + "loss": 0.11060600280761719, + "step": 57635 + }, + { + "epoch": 0.4983960363507449, + "grad_norm": 3.4776683324040842, + "learning_rate": 5.12665827201181e-06, + "loss": 0.5196941375732422, + "step": 57640 + }, + { + "epoch": 0.4984392698722882, + "grad_norm": 6.784865319240239, + "learning_rate": 5.126514569420369e-06, + "loss": 0.2277587890625, + "step": 57645 + }, + { + "epoch": 0.49848250339383143, + "grad_norm": 7.514901687155419, + "learning_rate": 5.126370857021631e-06, + "loss": 0.10029296875, + "step": 57650 + }, + { + "epoch": 0.4985257369153747, + "grad_norm": 3.7131662003052286, + "learning_rate": 5.126227134816258e-06, + "loss": 0.09840087890625, + "step": 57655 + }, + { + "epoch": 0.49856897043691795, + "grad_norm": 8.126489482878238, + "learning_rate": 5.126083402804915e-06, + "loss": 0.162628173828125, + "step": 57660 + }, + { + "epoch": 0.49861220395846123, + "grad_norm": 1.6181459880772566, + "learning_rate": 5.125939660988261e-06, + "loss": 0.445172119140625, + "step": 57665 + }, + { + "epoch": 0.4986554374800045, + "grad_norm": 4.6009927136167095, + "learning_rate": 5.125795909366963e-06, + "loss": 0.03623199462890625, + "step": 57670 + }, + { + "epoch": 0.49869867100154774, + "grad_norm": 16.862023886930693, + "learning_rate": 5.125652147941681e-06, + "loss": 0.33719940185546876, + "step": 57675 + }, + { + "epoch": 0.49874190452309103, + "grad_norm": 12.631896865137072, + "learning_rate": 5.12550837671308e-06, + "loss": 0.044346046447753903, + "step": 57680 + }, + { + "epoch": 0.4987851380446343, + "grad_norm": 44.57579837886999, + "learning_rate": 5.125364595681822e-06, + "loss": 0.475775146484375, + "step": 57685 + }, + { + "epoch": 0.49882837156617754, + "grad_norm": 13.603357965534258, + "learning_rate": 5.12522080484857e-06, + "loss": 0.3851478576660156, + "step": 57690 + }, + { + "epoch": 0.4988716050877208, + "grad_norm": 15.199435824137073, + "learning_rate": 5.125077004213987e-06, + "loss": 0.20382041931152345, + "step": 57695 + }, + { + "epoch": 0.49891483860926406, + "grad_norm": 0.9686347934463637, + "learning_rate": 5.124933193778738e-06, + "loss": 0.12021255493164062, + "step": 57700 + }, + { + "epoch": 0.49895807213080734, + "grad_norm": 17.804174969325757, + "learning_rate": 5.124789373543483e-06, + "loss": 0.121331787109375, + "step": 57705 + }, + { + "epoch": 0.4990013056523506, + "grad_norm": 9.321479056726195, + "learning_rate": 5.124645543508889e-06, + "loss": 0.27947235107421875, + "step": 57710 + }, + { + "epoch": 0.49904453917389385, + "grad_norm": 12.742892361589734, + "learning_rate": 5.124501703675617e-06, + "loss": 0.13438720703125, + "step": 57715 + }, + { + "epoch": 0.49908777269543714, + "grad_norm": 5.697428843037649, + "learning_rate": 5.12435785404433e-06, + "loss": 0.16612396240234376, + "step": 57720 + }, + { + "epoch": 0.4991310062169804, + "grad_norm": 3.829707477569238, + "learning_rate": 5.124213994615692e-06, + "loss": 0.332183837890625, + "step": 57725 + }, + { + "epoch": 0.49917423973852365, + "grad_norm": 8.85824111824502, + "learning_rate": 5.1240701253903676e-06, + "loss": 0.07637367248535157, + "step": 57730 + }, + { + "epoch": 0.49921747326006694, + "grad_norm": 21.330813059457757, + "learning_rate": 5.123926246369019e-06, + "loss": 0.10408515930175781, + "step": 57735 + }, + { + "epoch": 0.49926070678161016, + "grad_norm": 0.04364331009398846, + "learning_rate": 5.1237823575523115e-06, + "loss": 0.20440101623535156, + "step": 57740 + }, + { + "epoch": 0.49930394030315345, + "grad_norm": 18.454347415345538, + "learning_rate": 5.123638458940906e-06, + "loss": 0.16748733520507814, + "step": 57745 + }, + { + "epoch": 0.49934717382469673, + "grad_norm": 22.205106453049435, + "learning_rate": 5.123494550535468e-06, + "loss": 0.3615119934082031, + "step": 57750 + }, + { + "epoch": 0.49939040734623996, + "grad_norm": 1.0673938916732748, + "learning_rate": 5.1233506323366604e-06, + "loss": 0.13404464721679688, + "step": 57755 + }, + { + "epoch": 0.49943364086778325, + "grad_norm": 16.16182136156761, + "learning_rate": 5.123206704345148e-06, + "loss": 0.14160919189453125, + "step": 57760 + }, + { + "epoch": 0.49947687438932653, + "grad_norm": 9.959687802513091, + "learning_rate": 5.123062766561595e-06, + "loss": 0.1192230224609375, + "step": 57765 + }, + { + "epoch": 0.49952010791086976, + "grad_norm": 6.850690614233785, + "learning_rate": 5.122918818986662e-06, + "loss": 0.14494171142578124, + "step": 57770 + }, + { + "epoch": 0.49956334143241304, + "grad_norm": 5.192361156262009, + "learning_rate": 5.122774861621015e-06, + "loss": 0.3276153564453125, + "step": 57775 + }, + { + "epoch": 0.4996065749539563, + "grad_norm": 13.228579646643464, + "learning_rate": 5.12263089446532e-06, + "loss": 0.15825042724609376, + "step": 57780 + }, + { + "epoch": 0.49964980847549956, + "grad_norm": 2.0128700966660755, + "learning_rate": 5.122486917520237e-06, + "loss": 0.19557876586914064, + "step": 57785 + }, + { + "epoch": 0.49969304199704284, + "grad_norm": 2.421076594385165, + "learning_rate": 5.1223429307864326e-06, + "loss": 0.29468765258789065, + "step": 57790 + }, + { + "epoch": 0.49973627551858607, + "grad_norm": 9.002332659292616, + "learning_rate": 5.12219893426457e-06, + "loss": 0.1904022216796875, + "step": 57795 + }, + { + "epoch": 0.49977950904012935, + "grad_norm": 3.1067458766609777, + "learning_rate": 5.122054927955313e-06, + "loss": 0.0309295654296875, + "step": 57800 + }, + { + "epoch": 0.49982274256167264, + "grad_norm": 4.604300654026297, + "learning_rate": 5.121910911859327e-06, + "loss": 0.2540580749511719, + "step": 57805 + }, + { + "epoch": 0.49986597608321587, + "grad_norm": 2.4109523512285027, + "learning_rate": 5.121766885977274e-06, + "loss": 0.1367584228515625, + "step": 57810 + }, + { + "epoch": 0.49990920960475915, + "grad_norm": 0.7223692933154939, + "learning_rate": 5.121622850309821e-06, + "loss": 0.09977645874023437, + "step": 57815 + }, + { + "epoch": 0.49995244312630244, + "grad_norm": 34.94418767281978, + "learning_rate": 5.121478804857631e-06, + "loss": 0.37558135986328123, + "step": 57820 + }, + { + "epoch": 0.49999567664784567, + "grad_norm": 0.3533320116142757, + "learning_rate": 5.1213347496213674e-06, + "loss": 0.30629501342773435, + "step": 57825 + }, + { + "epoch": 0.500038910169389, + "grad_norm": 3.845712474052244, + "learning_rate": 5.1211906846016965e-06, + "loss": 0.2660400390625, + "step": 57830 + }, + { + "epoch": 0.5000821436909322, + "grad_norm": 8.404699851548074, + "learning_rate": 5.1210466097992795e-06, + "loss": 0.11290855407714843, + "step": 57835 + }, + { + "epoch": 0.5001253772124755, + "grad_norm": 12.822482186044413, + "learning_rate": 5.120902525214784e-06, + "loss": 0.06322669982910156, + "step": 57840 + }, + { + "epoch": 0.5001686107340187, + "grad_norm": 2.846187446515203, + "learning_rate": 5.120758430848875e-06, + "loss": 0.0895843505859375, + "step": 57845 + }, + { + "epoch": 0.500211844255562, + "grad_norm": 15.86927375186087, + "learning_rate": 5.120614326702213e-06, + "loss": 0.2301422119140625, + "step": 57850 + }, + { + "epoch": 0.5002550777771052, + "grad_norm": 10.497833116008364, + "learning_rate": 5.120470212775467e-06, + "loss": 0.063311767578125, + "step": 57855 + }, + { + "epoch": 0.5002983112986485, + "grad_norm": 38.67036109189676, + "learning_rate": 5.120326089069298e-06, + "loss": 0.209283447265625, + "step": 57860 + }, + { + "epoch": 0.5003415448201918, + "grad_norm": 17.012463458433775, + "learning_rate": 5.120181955584374e-06, + "loss": 0.26985931396484375, + "step": 57865 + }, + { + "epoch": 0.500384778341735, + "grad_norm": 8.305513396074371, + "learning_rate": 5.120037812321358e-06, + "loss": 0.09510040283203125, + "step": 57870 + }, + { + "epoch": 0.5004280118632783, + "grad_norm": 4.48991672030308, + "learning_rate": 5.119893659280915e-06, + "loss": 0.13021087646484375, + "step": 57875 + }, + { + "epoch": 0.5004712453848216, + "grad_norm": 14.809111757373831, + "learning_rate": 5.1197494964637085e-06, + "loss": 0.11537628173828125, + "step": 57880 + }, + { + "epoch": 0.5005144789063648, + "grad_norm": 8.831959350155685, + "learning_rate": 5.119605323870406e-06, + "loss": 0.1733673095703125, + "step": 57885 + }, + { + "epoch": 0.5005577124279081, + "grad_norm": 7.229379235883895, + "learning_rate": 5.11946114150167e-06, + "loss": 0.0649688720703125, + "step": 57890 + }, + { + "epoch": 0.5006009459494514, + "grad_norm": 35.94408301258955, + "learning_rate": 5.119316949358167e-06, + "loss": 0.3302825927734375, + "step": 57895 + }, + { + "epoch": 0.5006441794709946, + "grad_norm": 21.684563562403795, + "learning_rate": 5.119172747440562e-06, + "loss": 0.19376068115234374, + "step": 57900 + }, + { + "epoch": 0.5006874129925379, + "grad_norm": 4.217060635426231, + "learning_rate": 5.119028535749519e-06, + "loss": 0.122344970703125, + "step": 57905 + }, + { + "epoch": 0.5007306465140812, + "grad_norm": 2.0799251899953255, + "learning_rate": 5.118884314285703e-06, + "loss": 0.2557964324951172, + "step": 57910 + }, + { + "epoch": 0.5007738800356244, + "grad_norm": 6.76314851252023, + "learning_rate": 5.118740083049781e-06, + "loss": 0.021317672729492188, + "step": 57915 + }, + { + "epoch": 0.5008171135571677, + "grad_norm": 2.446768749790831, + "learning_rate": 5.1185958420424156e-06, + "loss": 0.07679214477539062, + "step": 57920 + }, + { + "epoch": 0.500860347078711, + "grad_norm": 20.38457037054321, + "learning_rate": 5.118451591264274e-06, + "loss": 0.09161605834960937, + "step": 57925 + }, + { + "epoch": 0.5009035806002542, + "grad_norm": 2.083374968242004, + "learning_rate": 5.118307330716021e-06, + "loss": 0.2692596435546875, + "step": 57930 + }, + { + "epoch": 0.5009468141217974, + "grad_norm": 1.81360354761919, + "learning_rate": 5.118163060398322e-06, + "loss": 0.22018966674804688, + "step": 57935 + }, + { + "epoch": 0.5009900476433408, + "grad_norm": 0.5655767717664514, + "learning_rate": 5.118018780311842e-06, + "loss": 0.080804443359375, + "step": 57940 + }, + { + "epoch": 0.501033281164884, + "grad_norm": 5.401904330798626, + "learning_rate": 5.117874490457246e-06, + "loss": 0.05297698974609375, + "step": 57945 + }, + { + "epoch": 0.5010765146864272, + "grad_norm": 10.40435064983009, + "learning_rate": 5.1177301908352005e-06, + "loss": 0.1313018798828125, + "step": 57950 + }, + { + "epoch": 0.5011197482079706, + "grad_norm": 14.57777845751251, + "learning_rate": 5.11758588144637e-06, + "loss": 0.13642578125, + "step": 57955 + }, + { + "epoch": 0.5011629817295138, + "grad_norm": 14.286073489047645, + "learning_rate": 5.117441562291422e-06, + "loss": 0.167156982421875, + "step": 57960 + }, + { + "epoch": 0.501206215251057, + "grad_norm": 3.514706759249179, + "learning_rate": 5.117297233371019e-06, + "loss": 0.2768756866455078, + "step": 57965 + }, + { + "epoch": 0.5012494487726004, + "grad_norm": 5.481189916836924, + "learning_rate": 5.1171528946858285e-06, + "loss": 0.08923187255859374, + "step": 57970 + }, + { + "epoch": 0.5012926822941436, + "grad_norm": 1.278627333542684, + "learning_rate": 5.117008546236517e-06, + "loss": 0.08538131713867188, + "step": 57975 + }, + { + "epoch": 0.5013359158156868, + "grad_norm": 19.964503436744096, + "learning_rate": 5.1168641880237485e-06, + "loss": 0.56292724609375, + "step": 57980 + }, + { + "epoch": 0.5013791493372302, + "grad_norm": 4.742593452021101, + "learning_rate": 5.1167198200481885e-06, + "loss": 0.361810302734375, + "step": 57985 + }, + { + "epoch": 0.5014223828587734, + "grad_norm": 33.92872196489868, + "learning_rate": 5.116575442310505e-06, + "loss": 0.147943115234375, + "step": 57990 + }, + { + "epoch": 0.5014656163803166, + "grad_norm": 1.6438418725214723, + "learning_rate": 5.116431054811362e-06, + "loss": 0.11577301025390625, + "step": 57995 + }, + { + "epoch": 0.50150884990186, + "grad_norm": 34.67915386093295, + "learning_rate": 5.116286657551426e-06, + "loss": 0.13792648315429687, + "step": 58000 + }, + { + "epoch": 0.5015520834234032, + "grad_norm": 8.068409829915668, + "learning_rate": 5.116142250531362e-06, + "loss": 0.21726951599121094, + "step": 58005 + }, + { + "epoch": 0.5015953169449464, + "grad_norm": 4.488976043915484, + "learning_rate": 5.115997833751839e-06, + "loss": 0.1768798828125, + "step": 58010 + }, + { + "epoch": 0.5016385504664898, + "grad_norm": 5.525024847183343, + "learning_rate": 5.11585340721352e-06, + "loss": 0.07719268798828124, + "step": 58015 + }, + { + "epoch": 0.501681783988033, + "grad_norm": 17.97793584737625, + "learning_rate": 5.115708970917072e-06, + "loss": 0.41983795166015625, + "step": 58020 + }, + { + "epoch": 0.5017250175095762, + "grad_norm": 21.62022961196236, + "learning_rate": 5.115564524863161e-06, + "loss": 0.319268798828125, + "step": 58025 + }, + { + "epoch": 0.5017682510311194, + "grad_norm": 5.404395266584007, + "learning_rate": 5.115420069052453e-06, + "loss": 0.24136276245117189, + "step": 58030 + }, + { + "epoch": 0.5018114845526628, + "grad_norm": 2.3835588713414686, + "learning_rate": 5.115275603485615e-06, + "loss": 0.08918838500976563, + "step": 58035 + }, + { + "epoch": 0.501854718074206, + "grad_norm": 0.5616307409244186, + "learning_rate": 5.115131128163314e-06, + "loss": 0.07995719909667968, + "step": 58040 + }, + { + "epoch": 0.5018979515957492, + "grad_norm": 0.2721931156354146, + "learning_rate": 5.114986643086214e-06, + "loss": 0.047503662109375, + "step": 58045 + }, + { + "epoch": 0.5019411851172926, + "grad_norm": 16.095457753036907, + "learning_rate": 5.114842148254981e-06, + "loss": 0.14856643676757814, + "step": 58050 + }, + { + "epoch": 0.5019844186388358, + "grad_norm": 1.2537853184513936, + "learning_rate": 5.114697643670285e-06, + "loss": 0.022088623046875, + "step": 58055 + }, + { + "epoch": 0.502027652160379, + "grad_norm": 8.526768643095359, + "learning_rate": 5.11455312933279e-06, + "loss": 0.058147430419921875, + "step": 58060 + }, + { + "epoch": 0.5020708856819224, + "grad_norm": 2.3332590362991943, + "learning_rate": 5.1144086052431614e-06, + "loss": 0.24812698364257812, + "step": 58065 + }, + { + "epoch": 0.5021141192034656, + "grad_norm": 2.2217592048688877, + "learning_rate": 5.114264071402069e-06, + "loss": 0.08983154296875, + "step": 58070 + }, + { + "epoch": 0.5021573527250088, + "grad_norm": 1.7882547183943975, + "learning_rate": 5.114119527810177e-06, + "loss": 0.0587371826171875, + "step": 58075 + }, + { + "epoch": 0.5022005862465522, + "grad_norm": 2.1221150716778627, + "learning_rate": 5.1139749744681514e-06, + "loss": 0.43649826049804685, + "step": 58080 + }, + { + "epoch": 0.5022438197680954, + "grad_norm": 3.675958150162464, + "learning_rate": 5.1138304113766615e-06, + "loss": 0.23875732421875, + "step": 58085 + }, + { + "epoch": 0.5022870532896386, + "grad_norm": 7.033405938013344, + "learning_rate": 5.113685838536371e-06, + "loss": 0.2024749755859375, + "step": 58090 + }, + { + "epoch": 0.502330286811182, + "grad_norm": 0.5763750450904697, + "learning_rate": 5.11354125594795e-06, + "loss": 0.0965179443359375, + "step": 58095 + }, + { + "epoch": 0.5023735203327252, + "grad_norm": 24.976298429539014, + "learning_rate": 5.113396663612063e-06, + "loss": 0.15729904174804688, + "step": 58100 + }, + { + "epoch": 0.5024167538542684, + "grad_norm": 15.97629122331392, + "learning_rate": 5.113252061529376e-06, + "loss": 0.142144775390625, + "step": 58105 + }, + { + "epoch": 0.5024599873758117, + "grad_norm": 3.042754208723532, + "learning_rate": 5.113107449700559e-06, + "loss": 0.11105194091796874, + "step": 58110 + }, + { + "epoch": 0.502503220897355, + "grad_norm": 0.22713047937749417, + "learning_rate": 5.112962828126276e-06, + "loss": 0.10142669677734376, + "step": 58115 + }, + { + "epoch": 0.5025464544188982, + "grad_norm": 14.21114547378822, + "learning_rate": 5.112818196807195e-06, + "loss": 0.3303314208984375, + "step": 58120 + }, + { + "epoch": 0.5025896879404415, + "grad_norm": 0.5221272734941349, + "learning_rate": 5.112673555743985e-06, + "loss": 0.18174896240234376, + "step": 58125 + }, + { + "epoch": 0.5026329214619848, + "grad_norm": 3.6370696713865778, + "learning_rate": 5.112528904937309e-06, + "loss": 0.10067596435546874, + "step": 58130 + }, + { + "epoch": 0.502676154983528, + "grad_norm": 0.684254976617005, + "learning_rate": 5.1123842443878365e-06, + "loss": 0.06774368286132812, + "step": 58135 + }, + { + "epoch": 0.5027193885050713, + "grad_norm": 8.866483461599787, + "learning_rate": 5.112239574096235e-06, + "loss": 0.05024871826171875, + "step": 58140 + }, + { + "epoch": 0.5027626220266146, + "grad_norm": 2.3004234907431265, + "learning_rate": 5.112094894063172e-06, + "loss": 0.23303375244140626, + "step": 58145 + }, + { + "epoch": 0.5028058555481578, + "grad_norm": 15.756613373680269, + "learning_rate": 5.111950204289314e-06, + "loss": 0.070928955078125, + "step": 58150 + }, + { + "epoch": 0.502849089069701, + "grad_norm": 28.928831660260457, + "learning_rate": 5.111805504775327e-06, + "loss": 0.4179847717285156, + "step": 58155 + }, + { + "epoch": 0.5028923225912444, + "grad_norm": 25.874550675612078, + "learning_rate": 5.11166079552188e-06, + "loss": 0.27395477294921877, + "step": 58160 + }, + { + "epoch": 0.5029355561127876, + "grad_norm": 1.5074464273009773, + "learning_rate": 5.111516076529641e-06, + "loss": 0.08956680297851563, + "step": 58165 + }, + { + "epoch": 0.5029787896343308, + "grad_norm": 85.24319783469986, + "learning_rate": 5.111371347799276e-06, + "loss": 0.305816650390625, + "step": 58170 + }, + { + "epoch": 0.5030220231558742, + "grad_norm": 5.227825871129071, + "learning_rate": 5.111226609331451e-06, + "loss": 0.053118896484375, + "step": 58175 + }, + { + "epoch": 0.5030652566774174, + "grad_norm": 6.323069382940032, + "learning_rate": 5.111081861126838e-06, + "loss": 0.1377716064453125, + "step": 58180 + }, + { + "epoch": 0.5031084901989606, + "grad_norm": 1.1469184427799737, + "learning_rate": 5.110937103186101e-06, + "loss": 0.073492431640625, + "step": 58185 + }, + { + "epoch": 0.503151723720504, + "grad_norm": 7.869502194688199, + "learning_rate": 5.110792335509909e-06, + "loss": 0.12546558380126954, + "step": 58190 + }, + { + "epoch": 0.5031949572420472, + "grad_norm": 22.492534625057427, + "learning_rate": 5.110647558098929e-06, + "loss": 0.0971221923828125, + "step": 58195 + }, + { + "epoch": 0.5032381907635904, + "grad_norm": 6.823477032161401, + "learning_rate": 5.110502770953829e-06, + "loss": 0.224560546875, + "step": 58200 + }, + { + "epoch": 0.5032814242851337, + "grad_norm": 14.283368852536281, + "learning_rate": 5.110357974075276e-06, + "loss": 0.125909423828125, + "step": 58205 + }, + { + "epoch": 0.503324657806677, + "grad_norm": 4.941379944680487, + "learning_rate": 5.110213167463939e-06, + "loss": 0.2491851806640625, + "step": 58210 + }, + { + "epoch": 0.5033678913282202, + "grad_norm": 23.015994107081152, + "learning_rate": 5.110068351120486e-06, + "loss": 0.2579833984375, + "step": 58215 + }, + { + "epoch": 0.5034111248497635, + "grad_norm": 16.540219384261025, + "learning_rate": 5.109923525045584e-06, + "loss": 0.395166015625, + "step": 58220 + }, + { + "epoch": 0.5034543583713068, + "grad_norm": 25.965905599335226, + "learning_rate": 5.1097786892399e-06, + "loss": 0.140460205078125, + "step": 58225 + }, + { + "epoch": 0.50349759189285, + "grad_norm": 2.1578406311982943, + "learning_rate": 5.109633843704104e-06, + "loss": 0.09796142578125, + "step": 58230 + }, + { + "epoch": 0.5035408254143933, + "grad_norm": 7.101431508601173, + "learning_rate": 5.109488988438863e-06, + "loss": 0.11928558349609375, + "step": 58235 + }, + { + "epoch": 0.5035840589359366, + "grad_norm": 2.0191240751507853, + "learning_rate": 5.109344123444845e-06, + "loss": 0.4951080322265625, + "step": 58240 + }, + { + "epoch": 0.5036272924574798, + "grad_norm": 0.5896727531617437, + "learning_rate": 5.109199248722719e-06, + "loss": 0.173016357421875, + "step": 58245 + }, + { + "epoch": 0.5036705259790231, + "grad_norm": 17.531437891974345, + "learning_rate": 5.109054364273152e-06, + "loss": 0.3712738037109375, + "step": 58250 + }, + { + "epoch": 0.5037137595005664, + "grad_norm": 11.868196235363355, + "learning_rate": 5.108909470096813e-06, + "loss": 0.0634429931640625, + "step": 58255 + }, + { + "epoch": 0.5037569930221096, + "grad_norm": 2.8172046664399804, + "learning_rate": 5.108764566194368e-06, + "loss": 0.050382232666015624, + "step": 58260 + }, + { + "epoch": 0.5038002265436529, + "grad_norm": 2.2844209178361896, + "learning_rate": 5.10861965256649e-06, + "loss": 0.043695831298828126, + "step": 58265 + }, + { + "epoch": 0.5038434600651962, + "grad_norm": 8.665054419928172, + "learning_rate": 5.108474729213842e-06, + "loss": 0.114129638671875, + "step": 58270 + }, + { + "epoch": 0.5038866935867394, + "grad_norm": 0.2960368659219461, + "learning_rate": 5.1083297961370965e-06, + "loss": 0.092010498046875, + "step": 58275 + }, + { + "epoch": 0.5039299271082827, + "grad_norm": 36.87212861043012, + "learning_rate": 5.10818485333692e-06, + "loss": 0.239404296875, + "step": 58280 + }, + { + "epoch": 0.5039731606298259, + "grad_norm": 2.0146760393337404, + "learning_rate": 5.10803990081398e-06, + "loss": 0.1269927978515625, + "step": 58285 + }, + { + "epoch": 0.5040163941513692, + "grad_norm": 2.6508790944047855, + "learning_rate": 5.107894938568948e-06, + "loss": 0.1364429473876953, + "step": 58290 + }, + { + "epoch": 0.5040596276729125, + "grad_norm": 0.3605493778205464, + "learning_rate": 5.107749966602489e-06, + "loss": 0.1863006591796875, + "step": 58295 + }, + { + "epoch": 0.5041028611944557, + "grad_norm": 111.72431846754994, + "learning_rate": 5.107604984915275e-06, + "loss": 0.29903564453125, + "step": 58300 + }, + { + "epoch": 0.504146094715999, + "grad_norm": 3.229780271984453, + "learning_rate": 5.107459993507972e-06, + "loss": 0.0803445816040039, + "step": 58305 + }, + { + "epoch": 0.5041893282375423, + "grad_norm": 41.02010137088256, + "learning_rate": 5.10731499238125e-06, + "loss": 0.16351318359375, + "step": 58310 + }, + { + "epoch": 0.5042325617590855, + "grad_norm": 1.7301687790526543, + "learning_rate": 5.107169981535778e-06, + "loss": 0.04747467041015625, + "step": 58315 + }, + { + "epoch": 0.5042757952806288, + "grad_norm": 9.334574866162173, + "learning_rate": 5.107024960972224e-06, + "loss": 0.07204055786132812, + "step": 58320 + }, + { + "epoch": 0.504319028802172, + "grad_norm": 16.18711723807522, + "learning_rate": 5.106879930691257e-06, + "loss": 0.08303966522216796, + "step": 58325 + }, + { + "epoch": 0.5043622623237153, + "grad_norm": 0.3427434431550981, + "learning_rate": 5.106734890693545e-06, + "loss": 0.16353530883789064, + "step": 58330 + }, + { + "epoch": 0.5044054958452586, + "grad_norm": 0.614324826449093, + "learning_rate": 5.106589840979759e-06, + "loss": 0.316021728515625, + "step": 58335 + }, + { + "epoch": 0.5044487293668019, + "grad_norm": 5.9208285145734285, + "learning_rate": 5.106444781550567e-06, + "loss": 0.38200531005859373, + "step": 58340 + }, + { + "epoch": 0.5044919628883451, + "grad_norm": 7.61442890748541, + "learning_rate": 5.106299712406637e-06, + "loss": 0.15279693603515626, + "step": 58345 + }, + { + "epoch": 0.5045351964098884, + "grad_norm": 2.5050070883044637, + "learning_rate": 5.106154633548639e-06, + "loss": 0.21417999267578125, + "step": 58350 + }, + { + "epoch": 0.5045784299314316, + "grad_norm": 5.179098813759307, + "learning_rate": 5.106009544977242e-06, + "loss": 0.120758056640625, + "step": 58355 + }, + { + "epoch": 0.5046216634529749, + "grad_norm": 24.27585311477738, + "learning_rate": 5.105864446693116e-06, + "loss": 0.22369384765625, + "step": 58360 + }, + { + "epoch": 0.5046648969745182, + "grad_norm": 6.326276457225325, + "learning_rate": 5.105719338696927e-06, + "loss": 0.39768524169921876, + "step": 58365 + }, + { + "epoch": 0.5047081304960614, + "grad_norm": 0.9083646360855644, + "learning_rate": 5.105574220989349e-06, + "loss": 0.1221435546875, + "step": 58370 + }, + { + "epoch": 0.5047513640176047, + "grad_norm": 19.885784224071873, + "learning_rate": 5.105429093571047e-06, + "loss": 0.345977783203125, + "step": 58375 + }, + { + "epoch": 0.5047945975391479, + "grad_norm": 6.683351863634401, + "learning_rate": 5.105283956442692e-06, + "loss": 0.3087627410888672, + "step": 58380 + }, + { + "epoch": 0.5048378310606912, + "grad_norm": 1.8643146372359651, + "learning_rate": 5.1051388096049544e-06, + "loss": 0.07810134887695312, + "step": 58385 + }, + { + "epoch": 0.5048810645822345, + "grad_norm": 16.362531519318267, + "learning_rate": 5.104993653058502e-06, + "loss": 0.09706954956054688, + "step": 58390 + }, + { + "epoch": 0.5049242981037777, + "grad_norm": 2.5880806151257345, + "learning_rate": 5.104848486804006e-06, + "loss": 0.034488677978515625, + "step": 58395 + }, + { + "epoch": 0.504967531625321, + "grad_norm": 24.766466372410463, + "learning_rate": 5.104703310842134e-06, + "loss": 0.28097076416015626, + "step": 58400 + }, + { + "epoch": 0.5050107651468643, + "grad_norm": 6.009972175660771, + "learning_rate": 5.104558125173556e-06, + "loss": 0.14974365234375, + "step": 58405 + }, + { + "epoch": 0.5050539986684075, + "grad_norm": 20.04970843750249, + "learning_rate": 5.104412929798942e-06, + "loss": 0.33011550903320314, + "step": 58410 + }, + { + "epoch": 0.5050972321899508, + "grad_norm": 6.7689411539896565, + "learning_rate": 5.104267724718961e-06, + "loss": 0.12264556884765625, + "step": 58415 + }, + { + "epoch": 0.5051404657114941, + "grad_norm": 22.48436253593333, + "learning_rate": 5.104122509934284e-06, + "loss": 0.06851425170898437, + "step": 58420 + }, + { + "epoch": 0.5051836992330373, + "grad_norm": 37.833701367590635, + "learning_rate": 5.10397728544558e-06, + "loss": 0.4619598388671875, + "step": 58425 + }, + { + "epoch": 0.5052269327545806, + "grad_norm": 2.4635953386373286, + "learning_rate": 5.103832051253518e-06, + "loss": 0.03226776123046875, + "step": 58430 + }, + { + "epoch": 0.5052701662761239, + "grad_norm": 0.6103709554708504, + "learning_rate": 5.103686807358769e-06, + "loss": 0.0495269775390625, + "step": 58435 + }, + { + "epoch": 0.5053133997976671, + "grad_norm": 4.188157958855802, + "learning_rate": 5.103541553762003e-06, + "loss": 0.02479248046875, + "step": 58440 + }, + { + "epoch": 0.5053566333192104, + "grad_norm": 2.842247905720996, + "learning_rate": 5.103396290463887e-06, + "loss": 0.06822586059570312, + "step": 58445 + }, + { + "epoch": 0.5053998668407537, + "grad_norm": 22.305871941059245, + "learning_rate": 5.103251017465095e-06, + "loss": 0.25584259033203127, + "step": 58450 + }, + { + "epoch": 0.5054431003622969, + "grad_norm": 4.468621334658594, + "learning_rate": 5.103105734766296e-06, + "loss": 0.17350387573242188, + "step": 58455 + }, + { + "epoch": 0.5054863338838401, + "grad_norm": 13.756116856180252, + "learning_rate": 5.102960442368157e-06, + "loss": 0.15797119140625, + "step": 58460 + }, + { + "epoch": 0.5055295674053835, + "grad_norm": 4.043960298942402, + "learning_rate": 5.102815140271352e-06, + "loss": 0.075299072265625, + "step": 58465 + }, + { + "epoch": 0.5055728009269267, + "grad_norm": 21.062377500916103, + "learning_rate": 5.102669828476548e-06, + "loss": 0.16854248046875, + "step": 58470 + }, + { + "epoch": 0.5056160344484699, + "grad_norm": 10.46666604276322, + "learning_rate": 5.102524506984418e-06, + "loss": 0.2656093597412109, + "step": 58475 + }, + { + "epoch": 0.5056592679700133, + "grad_norm": 1.4947179200900549, + "learning_rate": 5.102379175795629e-06, + "loss": 0.10502395629882813, + "step": 58480 + }, + { + "epoch": 0.5057025014915565, + "grad_norm": 25.269286611571943, + "learning_rate": 5.102233834910853e-06, + "loss": 0.6532150268554687, + "step": 58485 + }, + { + "epoch": 0.5057457350130997, + "grad_norm": 7.199495661709095, + "learning_rate": 5.102088484330762e-06, + "loss": 0.1030843734741211, + "step": 58490 + }, + { + "epoch": 0.5057889685346431, + "grad_norm": 0.5694076294788929, + "learning_rate": 5.101943124056024e-06, + "loss": 0.16009674072265626, + "step": 58495 + }, + { + "epoch": 0.5058322020561863, + "grad_norm": 3.5924360263972046, + "learning_rate": 5.10179775408731e-06, + "loss": 0.0553680419921875, + "step": 58500 + }, + { + "epoch": 0.5058754355777295, + "grad_norm": 22.060948886546292, + "learning_rate": 5.10165237442529e-06, + "loss": 0.20927734375, + "step": 58505 + }, + { + "epoch": 0.5059186690992729, + "grad_norm": 3.2205793103306037, + "learning_rate": 5.101506985070635e-06, + "loss": 0.07080307006835937, + "step": 58510 + }, + { + "epoch": 0.5059619026208161, + "grad_norm": 5.524930635475329, + "learning_rate": 5.101361586024016e-06, + "loss": 0.11331787109375, + "step": 58515 + }, + { + "epoch": 0.5060051361423593, + "grad_norm": 4.886268608600875, + "learning_rate": 5.101216177286103e-06, + "loss": 0.137408447265625, + "step": 58520 + }, + { + "epoch": 0.5060483696639027, + "grad_norm": 0.18749083258902666, + "learning_rate": 5.101070758857567e-06, + "loss": 0.38136444091796873, + "step": 58525 + }, + { + "epoch": 0.5060916031854459, + "grad_norm": 1.459257000549839, + "learning_rate": 5.100925330739077e-06, + "loss": 0.28376922607421873, + "step": 58530 + }, + { + "epoch": 0.5061348367069891, + "grad_norm": 6.700514114203435, + "learning_rate": 5.100779892931307e-06, + "loss": 0.16392478942871094, + "step": 58535 + }, + { + "epoch": 0.5061780702285323, + "grad_norm": 24.70479567129934, + "learning_rate": 5.100634445434924e-06, + "loss": 0.16506175994873046, + "step": 58540 + }, + { + "epoch": 0.5062213037500757, + "grad_norm": 28.549619074468787, + "learning_rate": 5.100488988250601e-06, + "loss": 0.1369333267211914, + "step": 58545 + }, + { + "epoch": 0.5062645372716189, + "grad_norm": 0.9221089541494399, + "learning_rate": 5.100343521379009e-06, + "loss": 0.20063323974609376, + "step": 58550 + }, + { + "epoch": 0.5063077707931621, + "grad_norm": 12.288190515160794, + "learning_rate": 5.100198044820817e-06, + "loss": 0.037371826171875, + "step": 58555 + }, + { + "epoch": 0.5063510043147055, + "grad_norm": 16.377898091838535, + "learning_rate": 5.100052558576698e-06, + "loss": 0.0926116943359375, + "step": 58560 + }, + { + "epoch": 0.5063942378362487, + "grad_norm": 7.9505958693353636, + "learning_rate": 5.0999070626473226e-06, + "loss": 0.055889892578125, + "step": 58565 + }, + { + "epoch": 0.5064374713577919, + "grad_norm": 35.89469358547965, + "learning_rate": 5.0997615570333605e-06, + "loss": 0.21138458251953124, + "step": 58570 + }, + { + "epoch": 0.5064807048793353, + "grad_norm": 25.475602280382542, + "learning_rate": 5.099616041735484e-06, + "loss": 0.082757568359375, + "step": 58575 + }, + { + "epoch": 0.5065239384008785, + "grad_norm": 5.296214802328262, + "learning_rate": 5.099470516754364e-06, + "loss": 0.40057525634765623, + "step": 58580 + }, + { + "epoch": 0.5065671719224217, + "grad_norm": 5.102919159794628, + "learning_rate": 5.099324982090671e-06, + "loss": 0.3072509765625, + "step": 58585 + }, + { + "epoch": 0.5066104054439651, + "grad_norm": 3.5530869167239945, + "learning_rate": 5.099179437745077e-06, + "loss": 0.09729461669921875, + "step": 58590 + }, + { + "epoch": 0.5066536389655083, + "grad_norm": 4.280234558801269, + "learning_rate": 5.0990338837182534e-06, + "loss": 0.1264495849609375, + "step": 58595 + }, + { + "epoch": 0.5066968724870515, + "grad_norm": 33.373989174025404, + "learning_rate": 5.098888320010871e-06, + "loss": 0.4437126159667969, + "step": 58600 + }, + { + "epoch": 0.5067401060085949, + "grad_norm": 4.67301590524691, + "learning_rate": 5.0987427466236e-06, + "loss": 0.11646728515625, + "step": 58605 + }, + { + "epoch": 0.5067833395301381, + "grad_norm": 5.543277125297988, + "learning_rate": 5.098597163557114e-06, + "loss": 0.07039642333984375, + "step": 58610 + }, + { + "epoch": 0.5068265730516813, + "grad_norm": 0.5556420454391626, + "learning_rate": 5.0984515708120825e-06, + "loss": 0.10761260986328125, + "step": 58615 + }, + { + "epoch": 0.5068698065732247, + "grad_norm": 0.9052945941667316, + "learning_rate": 5.098305968389179e-06, + "loss": 0.14166107177734374, + "step": 58620 + }, + { + "epoch": 0.5069130400947679, + "grad_norm": 7.504740575395125, + "learning_rate": 5.098160356289072e-06, + "loss": 0.15557022094726564, + "step": 58625 + }, + { + "epoch": 0.5069562736163111, + "grad_norm": 50.31233149078877, + "learning_rate": 5.098014734512437e-06, + "loss": 0.46276702880859377, + "step": 58630 + }, + { + "epoch": 0.5069995071378544, + "grad_norm": 1.1611046051279832, + "learning_rate": 5.097869103059942e-06, + "loss": 0.21932449340820312, + "step": 58635 + }, + { + "epoch": 0.5070427406593977, + "grad_norm": 3.7508815487721234, + "learning_rate": 5.0977234619322595e-06, + "loss": 0.033217239379882815, + "step": 58640 + }, + { + "epoch": 0.5070859741809409, + "grad_norm": 33.70322363118192, + "learning_rate": 5.097577811130064e-06, + "loss": 0.4752197265625, + "step": 58645 + }, + { + "epoch": 0.5071292077024842, + "grad_norm": 0.08321564650164344, + "learning_rate": 5.0974321506540226e-06, + "loss": 0.0904632568359375, + "step": 58650 + }, + { + "epoch": 0.5071724412240275, + "grad_norm": 2.4744869083617274, + "learning_rate": 5.09728648050481e-06, + "loss": 0.2097320556640625, + "step": 58655 + }, + { + "epoch": 0.5072156747455707, + "grad_norm": 1.3151383835892925, + "learning_rate": 5.097140800683099e-06, + "loss": 0.0593658447265625, + "step": 58660 + }, + { + "epoch": 0.507258908267114, + "grad_norm": 4.679476563582301, + "learning_rate": 5.096995111189559e-06, + "loss": 0.10181884765625, + "step": 58665 + }, + { + "epoch": 0.5073021417886573, + "grad_norm": 0.7106470552490414, + "learning_rate": 5.096849412024862e-06, + "loss": 0.2187591552734375, + "step": 58670 + }, + { + "epoch": 0.5073453753102005, + "grad_norm": 1.730552761199492, + "learning_rate": 5.096703703189682e-06, + "loss": 0.16534271240234374, + "step": 58675 + }, + { + "epoch": 0.5073886088317437, + "grad_norm": 4.741715596713764, + "learning_rate": 5.096557984684689e-06, + "loss": 0.20013504028320311, + "step": 58680 + }, + { + "epoch": 0.5074318423532871, + "grad_norm": 7.720441326934642, + "learning_rate": 5.0964122565105555e-06, + "loss": 0.20128173828125, + "step": 58685 + }, + { + "epoch": 0.5074750758748303, + "grad_norm": 0.6846634854935398, + "learning_rate": 5.096266518667955e-06, + "loss": 0.1734039306640625, + "step": 58690 + }, + { + "epoch": 0.5075183093963735, + "grad_norm": 9.572774462996378, + "learning_rate": 5.096120771157558e-06, + "loss": 0.649761962890625, + "step": 58695 + }, + { + "epoch": 0.5075615429179169, + "grad_norm": 3.6097644797168082, + "learning_rate": 5.0959750139800365e-06, + "loss": 0.11805992126464844, + "step": 58700 + }, + { + "epoch": 0.5076047764394601, + "grad_norm": 0.26115251175663134, + "learning_rate": 5.095829247136065e-06, + "loss": 0.19698333740234375, + "step": 58705 + }, + { + "epoch": 0.5076480099610033, + "grad_norm": 20.771025831214327, + "learning_rate": 5.095683470626314e-06, + "loss": 0.13390655517578126, + "step": 58710 + }, + { + "epoch": 0.5076912434825466, + "grad_norm": 43.305274286316845, + "learning_rate": 5.095537684451455e-06, + "loss": 0.17657241821289063, + "step": 58715 + }, + { + "epoch": 0.5077344770040899, + "grad_norm": 36.889269441501234, + "learning_rate": 5.0953918886121615e-06, + "loss": 0.4003749847412109, + "step": 58720 + }, + { + "epoch": 0.5077777105256331, + "grad_norm": 9.10772334582518, + "learning_rate": 5.095246083109106e-06, + "loss": 0.19374542236328124, + "step": 58725 + }, + { + "epoch": 0.5078209440471764, + "grad_norm": 31.893392961145487, + "learning_rate": 5.09510026794296e-06, + "loss": 0.154791259765625, + "step": 58730 + }, + { + "epoch": 0.5078641775687197, + "grad_norm": 3.950584077981243, + "learning_rate": 5.094954443114398e-06, + "loss": 0.0616485595703125, + "step": 58735 + }, + { + "epoch": 0.5079074110902629, + "grad_norm": 1.4687318974209007, + "learning_rate": 5.094808608624091e-06, + "loss": 0.10392837524414063, + "step": 58740 + }, + { + "epoch": 0.5079506446118062, + "grad_norm": 9.136558301225344, + "learning_rate": 5.094662764472711e-06, + "loss": 0.25933151245117186, + "step": 58745 + }, + { + "epoch": 0.5079938781333495, + "grad_norm": 10.683130265151275, + "learning_rate": 5.094516910660932e-06, + "loss": 0.160009765625, + "step": 58750 + }, + { + "epoch": 0.5080371116548927, + "grad_norm": 13.720063297472253, + "learning_rate": 5.094371047189426e-06, + "loss": 0.401495361328125, + "step": 58755 + }, + { + "epoch": 0.508080345176436, + "grad_norm": 0.3649364401015869, + "learning_rate": 5.094225174058866e-06, + "loss": 0.12626380920410157, + "step": 58760 + }, + { + "epoch": 0.5081235786979793, + "grad_norm": 0.8185729847270824, + "learning_rate": 5.094079291269924e-06, + "loss": 0.244781494140625, + "step": 58765 + }, + { + "epoch": 0.5081668122195225, + "grad_norm": 40.97059757304982, + "learning_rate": 5.093933398823273e-06, + "loss": 0.22286949157714844, + "step": 58770 + }, + { + "epoch": 0.5082100457410658, + "grad_norm": 1.1890230146814222, + "learning_rate": 5.093787496719587e-06, + "loss": 0.204071044921875, + "step": 58775 + }, + { + "epoch": 0.5082532792626091, + "grad_norm": 8.930572830762483, + "learning_rate": 5.093641584959538e-06, + "loss": 0.27587738037109377, + "step": 58780 + }, + { + "epoch": 0.5082965127841523, + "grad_norm": 26.26806872259497, + "learning_rate": 5.093495663543798e-06, + "loss": 0.14892120361328126, + "step": 58785 + }, + { + "epoch": 0.5083397463056956, + "grad_norm": 0.35713482841810773, + "learning_rate": 5.093349732473043e-06, + "loss": 0.12375946044921875, + "step": 58790 + }, + { + "epoch": 0.5083829798272389, + "grad_norm": 0.3275920903026398, + "learning_rate": 5.0932037917479425e-06, + "loss": 0.096063232421875, + "step": 58795 + }, + { + "epoch": 0.5084262133487821, + "grad_norm": 14.160922056047271, + "learning_rate": 5.093057841369172e-06, + "loss": 0.12682723999023438, + "step": 58800 + }, + { + "epoch": 0.5084694468703254, + "grad_norm": 27.71113388587178, + "learning_rate": 5.092911881337403e-06, + "loss": 0.17957611083984376, + "step": 58805 + }, + { + "epoch": 0.5085126803918686, + "grad_norm": 1.063448537242153, + "learning_rate": 5.092765911653309e-06, + "loss": 0.089617919921875, + "step": 58810 + }, + { + "epoch": 0.5085559139134119, + "grad_norm": 3.8541293300631563, + "learning_rate": 5.0926199323175646e-06, + "loss": 0.12195072174072266, + "step": 58815 + }, + { + "epoch": 0.5085991474349552, + "grad_norm": 15.514965839527125, + "learning_rate": 5.092473943330841e-06, + "loss": 0.09091339111328126, + "step": 58820 + }, + { + "epoch": 0.5086423809564984, + "grad_norm": 2.1756809637258376, + "learning_rate": 5.092327944693813e-06, + "loss": 0.24327850341796875, + "step": 58825 + }, + { + "epoch": 0.5086856144780417, + "grad_norm": 0.09961726022386808, + "learning_rate": 5.092181936407153e-06, + "loss": 0.14097633361816406, + "step": 58830 + }, + { + "epoch": 0.508728847999585, + "grad_norm": 11.570968561683424, + "learning_rate": 5.092035918471535e-06, + "loss": 0.13635101318359374, + "step": 58835 + }, + { + "epoch": 0.5087720815211282, + "grad_norm": 27.151916399079738, + "learning_rate": 5.091889890887632e-06, + "loss": 0.20036544799804687, + "step": 58840 + }, + { + "epoch": 0.5088153150426715, + "grad_norm": 5.224601551324328, + "learning_rate": 5.091743853656118e-06, + "loss": 0.06914215087890625, + "step": 58845 + }, + { + "epoch": 0.5088585485642148, + "grad_norm": 36.14550745018215, + "learning_rate": 5.0915978067776656e-06, + "loss": 0.21033477783203125, + "step": 58850 + }, + { + "epoch": 0.508901782085758, + "grad_norm": 2.552004457065109, + "learning_rate": 5.09145175025295e-06, + "loss": 0.14837493896484374, + "step": 58855 + }, + { + "epoch": 0.5089450156073013, + "grad_norm": 8.99109271375701, + "learning_rate": 5.091305684082643e-06, + "loss": 0.07902145385742188, + "step": 58860 + }, + { + "epoch": 0.5089882491288445, + "grad_norm": 3.002439197861348, + "learning_rate": 5.091159608267419e-06, + "loss": 0.2541351318359375, + "step": 58865 + }, + { + "epoch": 0.5090314826503878, + "grad_norm": 9.569729866276736, + "learning_rate": 5.091013522807952e-06, + "loss": 0.11909637451171876, + "step": 58870 + }, + { + "epoch": 0.5090747161719311, + "grad_norm": 26.62158625986108, + "learning_rate": 5.090867427704916e-06, + "loss": 0.2305389404296875, + "step": 58875 + }, + { + "epoch": 0.5091179496934743, + "grad_norm": 0.12491133911116821, + "learning_rate": 5.090721322958982e-06, + "loss": 0.08560371398925781, + "step": 58880 + }, + { + "epoch": 0.5091611832150176, + "grad_norm": 43.734477545418464, + "learning_rate": 5.090575208570828e-06, + "loss": 0.4970722198486328, + "step": 58885 + }, + { + "epoch": 0.5092044167365608, + "grad_norm": 3.582041867138388, + "learning_rate": 5.090429084541125e-06, + "loss": 0.2341522216796875, + "step": 58890 + }, + { + "epoch": 0.5092476502581041, + "grad_norm": 0.8240288100386229, + "learning_rate": 5.090282950870548e-06, + "loss": 0.05334320068359375, + "step": 58895 + }, + { + "epoch": 0.5092908837796474, + "grad_norm": 8.303507620133505, + "learning_rate": 5.090136807559771e-06, + "loss": 0.147198486328125, + "step": 58900 + }, + { + "epoch": 0.5093341173011906, + "grad_norm": 1.4341193260425498, + "learning_rate": 5.089990654609467e-06, + "loss": 0.18205947875976564, + "step": 58905 + }, + { + "epoch": 0.5093773508227339, + "grad_norm": 0.5525265523259592, + "learning_rate": 5.089844492020311e-06, + "loss": 0.06371040344238281, + "step": 58910 + }, + { + "epoch": 0.5094205843442772, + "grad_norm": 31.441794149854353, + "learning_rate": 5.089698319792978e-06, + "loss": 0.26961669921875, + "step": 58915 + }, + { + "epoch": 0.5094638178658204, + "grad_norm": 2.466362766336231, + "learning_rate": 5.08955213792814e-06, + "loss": 0.13581981658935546, + "step": 58920 + }, + { + "epoch": 0.5095070513873637, + "grad_norm": 4.104287296342296, + "learning_rate": 5.089405946426472e-06, + "loss": 0.2065887451171875, + "step": 58925 + }, + { + "epoch": 0.509550284908907, + "grad_norm": 15.61915526597627, + "learning_rate": 5.089259745288649e-06, + "loss": 0.314697265625, + "step": 58930 + }, + { + "epoch": 0.5095935184304502, + "grad_norm": 24.48360354700155, + "learning_rate": 5.0891135345153445e-06, + "loss": 0.70880126953125, + "step": 58935 + }, + { + "epoch": 0.5096367519519935, + "grad_norm": 0.9140941425845702, + "learning_rate": 5.088967314107232e-06, + "loss": 0.25937576293945314, + "step": 58940 + }, + { + "epoch": 0.5096799854735368, + "grad_norm": 63.90125618236149, + "learning_rate": 5.088821084064988e-06, + "loss": 0.12566680908203126, + "step": 58945 + }, + { + "epoch": 0.50972321899508, + "grad_norm": 3.0149330106920305, + "learning_rate": 5.088674844389285e-06, + "loss": 0.2377471923828125, + "step": 58950 + }, + { + "epoch": 0.5097664525166233, + "grad_norm": 9.484373028791733, + "learning_rate": 5.088528595080799e-06, + "loss": 0.05458984375, + "step": 58955 + }, + { + "epoch": 0.5098096860381666, + "grad_norm": 70.44483692811991, + "learning_rate": 5.088382336140203e-06, + "loss": 0.2149810791015625, + "step": 58960 + }, + { + "epoch": 0.5098529195597098, + "grad_norm": 4.909417827404448, + "learning_rate": 5.088236067568173e-06, + "loss": 0.19305410385131835, + "step": 58965 + }, + { + "epoch": 0.5098961530812531, + "grad_norm": 9.770285645449558, + "learning_rate": 5.088089789365382e-06, + "loss": 0.24738616943359376, + "step": 58970 + }, + { + "epoch": 0.5099393866027964, + "grad_norm": 1.2996827639434076, + "learning_rate": 5.087943501532505e-06, + "loss": 0.03849220275878906, + "step": 58975 + }, + { + "epoch": 0.5099826201243396, + "grad_norm": 0.4539507467991199, + "learning_rate": 5.087797204070218e-06, + "loss": 0.040008544921875, + "step": 58980 + }, + { + "epoch": 0.5100258536458828, + "grad_norm": 22.291340799539054, + "learning_rate": 5.087650896979195e-06, + "loss": 0.3010528564453125, + "step": 58985 + }, + { + "epoch": 0.5100690871674262, + "grad_norm": 43.44855789254043, + "learning_rate": 5.087504580260109e-06, + "loss": 0.2984283447265625, + "step": 58990 + }, + { + "epoch": 0.5101123206889694, + "grad_norm": 34.69304624381231, + "learning_rate": 5.087358253913637e-06, + "loss": 0.25619659423828123, + "step": 58995 + }, + { + "epoch": 0.5101555542105126, + "grad_norm": 6.451268156262606, + "learning_rate": 5.087211917940454e-06, + "loss": 0.12373619079589844, + "step": 59000 + }, + { + "epoch": 0.510198787732056, + "grad_norm": 4.905807088071401, + "learning_rate": 5.087065572341233e-06, + "loss": 0.280950927734375, + "step": 59005 + }, + { + "epoch": 0.5102420212535992, + "grad_norm": 0.31864713301508707, + "learning_rate": 5.086919217116651e-06, + "loss": 0.13026885986328124, + "step": 59010 + }, + { + "epoch": 0.5102852547751424, + "grad_norm": 4.320518625054249, + "learning_rate": 5.0867728522673816e-06, + "loss": 0.180218505859375, + "step": 59015 + }, + { + "epoch": 0.5103284882966858, + "grad_norm": 1.6440194866711038, + "learning_rate": 5.086626477794099e-06, + "loss": 0.0623138427734375, + "step": 59020 + }, + { + "epoch": 0.510371721818229, + "grad_norm": 11.707169610456825, + "learning_rate": 5.086480093697481e-06, + "loss": 0.24969329833984374, + "step": 59025 + }, + { + "epoch": 0.5104149553397722, + "grad_norm": 12.435702158053568, + "learning_rate": 5.0863336999782005e-06, + "loss": 0.4763275146484375, + "step": 59030 + }, + { + "epoch": 0.5104581888613156, + "grad_norm": 16.973918371306507, + "learning_rate": 5.086187296636934e-06, + "loss": 0.11416015625, + "step": 59035 + }, + { + "epoch": 0.5105014223828588, + "grad_norm": 1.384795471630926, + "learning_rate": 5.086040883674355e-06, + "loss": 0.10225677490234375, + "step": 59040 + }, + { + "epoch": 0.510544655904402, + "grad_norm": 2.952233752603848, + "learning_rate": 5.085894461091141e-06, + "loss": 0.39980316162109375, + "step": 59045 + }, + { + "epoch": 0.5105878894259454, + "grad_norm": 2.3353646526535576, + "learning_rate": 5.0857480288879646e-06, + "loss": 0.1435546875, + "step": 59050 + }, + { + "epoch": 0.5106311229474886, + "grad_norm": 0.4290517089143633, + "learning_rate": 5.085601587065504e-06, + "loss": 0.4010883331298828, + "step": 59055 + }, + { + "epoch": 0.5106743564690318, + "grad_norm": 0.34571062976180006, + "learning_rate": 5.085455135624432e-06, + "loss": 0.2302093505859375, + "step": 59060 + }, + { + "epoch": 0.510717589990575, + "grad_norm": 4.068035936556929, + "learning_rate": 5.085308674565426e-06, + "loss": 0.11572265625, + "step": 59065 + }, + { + "epoch": 0.5107608235121184, + "grad_norm": 4.370410570805561, + "learning_rate": 5.085162203889161e-06, + "loss": 0.061289215087890626, + "step": 59070 + }, + { + "epoch": 0.5108040570336616, + "grad_norm": 14.422230353374143, + "learning_rate": 5.085015723596311e-06, + "loss": 0.08495559692382812, + "step": 59075 + }, + { + "epoch": 0.5108472905552048, + "grad_norm": 63.74539300608407, + "learning_rate": 5.0848692336875535e-06, + "loss": 0.250897216796875, + "step": 59080 + }, + { + "epoch": 0.5108905240767482, + "grad_norm": 17.460815059978327, + "learning_rate": 5.084722734163562e-06, + "loss": 0.076043701171875, + "step": 59085 + }, + { + "epoch": 0.5109337575982914, + "grad_norm": 6.789414542087135, + "learning_rate": 5.0845762250250155e-06, + "loss": 0.08145217895507813, + "step": 59090 + }, + { + "epoch": 0.5109769911198346, + "grad_norm": 15.098742839437666, + "learning_rate": 5.084429706272587e-06, + "loss": 0.08673095703125, + "step": 59095 + }, + { + "epoch": 0.511020224641378, + "grad_norm": 26.966245150419148, + "learning_rate": 5.084283177906953e-06, + "loss": 0.3405303955078125, + "step": 59100 + }, + { + "epoch": 0.5110634581629212, + "grad_norm": 2.1311132363821415, + "learning_rate": 5.084136639928788e-06, + "loss": 0.3059967041015625, + "step": 59105 + }, + { + "epoch": 0.5111066916844644, + "grad_norm": 2.2832247537817434, + "learning_rate": 5.08399009233877e-06, + "loss": 0.2314697265625, + "step": 59110 + }, + { + "epoch": 0.5111499252060078, + "grad_norm": 6.173343692137104, + "learning_rate": 5.083843535137574e-06, + "loss": 0.22488784790039062, + "step": 59115 + }, + { + "epoch": 0.511193158727551, + "grad_norm": 13.121079190180476, + "learning_rate": 5.083696968325875e-06, + "loss": 0.2446746826171875, + "step": 59120 + }, + { + "epoch": 0.5112363922490942, + "grad_norm": 5.085688842160829, + "learning_rate": 5.08355039190435e-06, + "loss": 0.25798568725585935, + "step": 59125 + }, + { + "epoch": 0.5112796257706376, + "grad_norm": 1.098994317702414, + "learning_rate": 5.083403805873674e-06, + "loss": 0.204718017578125, + "step": 59130 + }, + { + "epoch": 0.5113228592921808, + "grad_norm": 15.177524639954633, + "learning_rate": 5.0832572102345245e-06, + "loss": 0.6225486755371094, + "step": 59135 + }, + { + "epoch": 0.511366092813724, + "grad_norm": 8.866337322958053, + "learning_rate": 5.083110604987577e-06, + "loss": 0.06755218505859376, + "step": 59140 + }, + { + "epoch": 0.5114093263352674, + "grad_norm": 5.0098223957377, + "learning_rate": 5.082963990133507e-06, + "loss": 0.137548828125, + "step": 59145 + }, + { + "epoch": 0.5114525598568106, + "grad_norm": 8.498283908623394, + "learning_rate": 5.0828173656729915e-06, + "loss": 0.0727386474609375, + "step": 59150 + }, + { + "epoch": 0.5114957933783538, + "grad_norm": 4.744285176591805, + "learning_rate": 5.082670731606706e-06, + "loss": 0.10928192138671874, + "step": 59155 + }, + { + "epoch": 0.511539026899897, + "grad_norm": 0.44281170556190635, + "learning_rate": 5.0825240879353275e-06, + "loss": 0.029619598388671876, + "step": 59160 + }, + { + "epoch": 0.5115822604214404, + "grad_norm": 16.166936198353394, + "learning_rate": 5.082377434659532e-06, + "loss": 0.10601959228515626, + "step": 59165 + }, + { + "epoch": 0.5116254939429836, + "grad_norm": 46.49313758210071, + "learning_rate": 5.0822307717799955e-06, + "loss": 0.192767333984375, + "step": 59170 + }, + { + "epoch": 0.5116687274645269, + "grad_norm": 24.630385528513738, + "learning_rate": 5.082084099297394e-06, + "loss": 0.2679290771484375, + "step": 59175 + }, + { + "epoch": 0.5117119609860702, + "grad_norm": 12.698622254508383, + "learning_rate": 5.0819374172124055e-06, + "loss": 0.2843498229980469, + "step": 59180 + }, + { + "epoch": 0.5117551945076134, + "grad_norm": 14.654585732968759, + "learning_rate": 5.081790725525706e-06, + "loss": 0.10948486328125, + "step": 59185 + }, + { + "epoch": 0.5117984280291566, + "grad_norm": 17.50596905527523, + "learning_rate": 5.081644024237971e-06, + "loss": 0.07970848083496093, + "step": 59190 + }, + { + "epoch": 0.5118416615507, + "grad_norm": 24.855036139434944, + "learning_rate": 5.081497313349877e-06, + "loss": 0.33095703125, + "step": 59195 + }, + { + "epoch": 0.5118848950722432, + "grad_norm": 3.5314842883101947, + "learning_rate": 5.081350592862102e-06, + "loss": 0.157940673828125, + "step": 59200 + }, + { + "epoch": 0.5119281285937864, + "grad_norm": 1.6485033728926168, + "learning_rate": 5.081203862775323e-06, + "loss": 0.19193572998046876, + "step": 59205 + }, + { + "epoch": 0.5119713621153298, + "grad_norm": 3.0444676099532115, + "learning_rate": 5.081057123090214e-06, + "loss": 0.246649169921875, + "step": 59210 + }, + { + "epoch": 0.512014595636873, + "grad_norm": 2.4915320224359006, + "learning_rate": 5.080910373807454e-06, + "loss": 0.0814239501953125, + "step": 59215 + }, + { + "epoch": 0.5120578291584162, + "grad_norm": 4.237347595347943, + "learning_rate": 5.08076361492772e-06, + "loss": 0.0954681396484375, + "step": 59220 + }, + { + "epoch": 0.5121010626799596, + "grad_norm": 14.023788134867871, + "learning_rate": 5.080616846451687e-06, + "loss": 0.18916397094726561, + "step": 59225 + }, + { + "epoch": 0.5121442962015028, + "grad_norm": 8.49159377964977, + "learning_rate": 5.0804700683800325e-06, + "loss": 0.14756126403808595, + "step": 59230 + }, + { + "epoch": 0.512187529723046, + "grad_norm": 0.9891672268513153, + "learning_rate": 5.0803232807134356e-06, + "loss": 0.1327239990234375, + "step": 59235 + }, + { + "epoch": 0.5122307632445893, + "grad_norm": 1.9007459804217106, + "learning_rate": 5.080176483452571e-06, + "loss": 0.064422607421875, + "step": 59240 + }, + { + "epoch": 0.5122739967661326, + "grad_norm": 2.461180449392055, + "learning_rate": 5.0800296765981155e-06, + "loss": 0.25933685302734377, + "step": 59245 + }, + { + "epoch": 0.5123172302876758, + "grad_norm": 27.729081078796863, + "learning_rate": 5.079882860150747e-06, + "loss": 0.13911666870117187, + "step": 59250 + }, + { + "epoch": 0.5123604638092191, + "grad_norm": 12.74039708218306, + "learning_rate": 5.0797360341111425e-06, + "loss": 0.15916748046875, + "step": 59255 + }, + { + "epoch": 0.5124036973307624, + "grad_norm": 1.2771790114374602, + "learning_rate": 5.07958919847998e-06, + "loss": 0.23895606994628907, + "step": 59260 + }, + { + "epoch": 0.5124469308523056, + "grad_norm": 4.891911199872715, + "learning_rate": 5.079442353257936e-06, + "loss": 0.14917678833007814, + "step": 59265 + }, + { + "epoch": 0.5124901643738489, + "grad_norm": 41.097313885220935, + "learning_rate": 5.079295498445686e-06, + "loss": 0.4720344543457031, + "step": 59270 + }, + { + "epoch": 0.5125333978953922, + "grad_norm": 2.6869858240901277, + "learning_rate": 5.0791486340439104e-06, + "loss": 0.48262786865234375, + "step": 59275 + }, + { + "epoch": 0.5125766314169354, + "grad_norm": 8.363615034304427, + "learning_rate": 5.079001760053285e-06, + "loss": 0.309295654296875, + "step": 59280 + }, + { + "epoch": 0.5126198649384787, + "grad_norm": 7.998791874685601, + "learning_rate": 5.078854876474486e-06, + "loss": 0.1290283203125, + "step": 59285 + }, + { + "epoch": 0.512663098460022, + "grad_norm": 25.479433163468304, + "learning_rate": 5.078707983308193e-06, + "loss": 0.14662246704101561, + "step": 59290 + }, + { + "epoch": 0.5127063319815652, + "grad_norm": 2.3003377484152647, + "learning_rate": 5.078561080555082e-06, + "loss": 0.17200260162353515, + "step": 59295 + }, + { + "epoch": 0.5127495655031085, + "grad_norm": 7.9596147772471335, + "learning_rate": 5.078414168215832e-06, + "loss": 0.3182499885559082, + "step": 59300 + }, + { + "epoch": 0.5127927990246518, + "grad_norm": 29.973903255454857, + "learning_rate": 5.078267246291118e-06, + "loss": 0.3337898254394531, + "step": 59305 + }, + { + "epoch": 0.512836032546195, + "grad_norm": 0.13825789107348654, + "learning_rate": 5.07812031478162e-06, + "loss": 0.07801055908203125, + "step": 59310 + }, + { + "epoch": 0.5128792660677383, + "grad_norm": 7.576966636111559, + "learning_rate": 5.0779733736880146e-06, + "loss": 0.167620849609375, + "step": 59315 + }, + { + "epoch": 0.5129224995892816, + "grad_norm": 13.639938541063012, + "learning_rate": 5.077826423010979e-06, + "loss": 0.1040191650390625, + "step": 59320 + }, + { + "epoch": 0.5129657331108248, + "grad_norm": 15.778995023463802, + "learning_rate": 5.077679462751192e-06, + "loss": 0.08518524169921875, + "step": 59325 + }, + { + "epoch": 0.5130089666323681, + "grad_norm": 24.940769431433704, + "learning_rate": 5.077532492909331e-06, + "loss": 0.24249267578125, + "step": 59330 + }, + { + "epoch": 0.5130522001539113, + "grad_norm": 2.4392613267101555, + "learning_rate": 5.077385513486074e-06, + "loss": 0.1479583740234375, + "step": 59335 + }, + { + "epoch": 0.5130954336754546, + "grad_norm": 4.534651754354933, + "learning_rate": 5.077238524482098e-06, + "loss": 0.12086868286132812, + "step": 59340 + }, + { + "epoch": 0.5131386671969979, + "grad_norm": 31.641883941694367, + "learning_rate": 5.0770915258980825e-06, + "loss": 0.127362060546875, + "step": 59345 + }, + { + "epoch": 0.5131819007185411, + "grad_norm": 4.5270608117917215, + "learning_rate": 5.076944517734703e-06, + "loss": 0.0892974853515625, + "step": 59350 + }, + { + "epoch": 0.5132251342400844, + "grad_norm": 1.5248976291696035, + "learning_rate": 5.076797499992639e-06, + "loss": 0.167047119140625, + "step": 59355 + }, + { + "epoch": 0.5132683677616277, + "grad_norm": 1.9269779047546065, + "learning_rate": 5.07665047267257e-06, + "loss": 0.054186248779296876, + "step": 59360 + }, + { + "epoch": 0.5133116012831709, + "grad_norm": 13.996142970397896, + "learning_rate": 5.076503435775171e-06, + "loss": 0.1128692626953125, + "step": 59365 + }, + { + "epoch": 0.5133548348047142, + "grad_norm": 2.4097458183665186, + "learning_rate": 5.076356389301121e-06, + "loss": 0.04903564453125, + "step": 59370 + }, + { + "epoch": 0.5133980683262575, + "grad_norm": 10.460879149542308, + "learning_rate": 5.076209333251101e-06, + "loss": 0.12706756591796875, + "step": 59375 + }, + { + "epoch": 0.5134413018478007, + "grad_norm": 10.210687155413899, + "learning_rate": 5.076062267625786e-06, + "loss": 0.208642578125, + "step": 59380 + }, + { + "epoch": 0.513484535369344, + "grad_norm": 2.543225379835866, + "learning_rate": 5.075915192425855e-06, + "loss": 0.0569610595703125, + "step": 59385 + }, + { + "epoch": 0.5135277688908872, + "grad_norm": 4.045452754340043, + "learning_rate": 5.075768107651987e-06, + "loss": 0.0677642822265625, + "step": 59390 + }, + { + "epoch": 0.5135710024124305, + "grad_norm": 5.439191250503907, + "learning_rate": 5.07562101330486e-06, + "loss": 0.3730316162109375, + "step": 59395 + }, + { + "epoch": 0.5136142359339738, + "grad_norm": 53.54941608052907, + "learning_rate": 5.075473909385151e-06, + "loss": 0.36411056518554685, + "step": 59400 + }, + { + "epoch": 0.513657469455517, + "grad_norm": 2.071579712842746, + "learning_rate": 5.075326795893541e-06, + "loss": 0.4244110107421875, + "step": 59405 + }, + { + "epoch": 0.5137007029770603, + "grad_norm": 1.0960722588567882, + "learning_rate": 5.075179672830707e-06, + "loss": 0.2093578338623047, + "step": 59410 + }, + { + "epoch": 0.5137439364986035, + "grad_norm": 16.12513347223254, + "learning_rate": 5.075032540197328e-06, + "loss": 0.22308502197265626, + "step": 59415 + }, + { + "epoch": 0.5137871700201468, + "grad_norm": 2.445019764226785, + "learning_rate": 5.074885397994082e-06, + "loss": 0.17231521606445313, + "step": 59420 + }, + { + "epoch": 0.5138304035416901, + "grad_norm": 5.109586532174078, + "learning_rate": 5.074738246221647e-06, + "loss": 0.04384260177612305, + "step": 59425 + }, + { + "epoch": 0.5138736370632333, + "grad_norm": 25.83052040950035, + "learning_rate": 5.074591084880704e-06, + "loss": 0.13509521484375, + "step": 59430 + }, + { + "epoch": 0.5139168705847766, + "grad_norm": 0.3506065636654452, + "learning_rate": 5.074443913971929e-06, + "loss": 0.3027740478515625, + "step": 59435 + }, + { + "epoch": 0.5139601041063199, + "grad_norm": 25.043806090069186, + "learning_rate": 5.074296733496003e-06, + "loss": 0.3389869689941406, + "step": 59440 + }, + { + "epoch": 0.5140033376278631, + "grad_norm": 0.3196185687667948, + "learning_rate": 5.074149543453603e-06, + "loss": 0.291400146484375, + "step": 59445 + }, + { + "epoch": 0.5140465711494064, + "grad_norm": 2.9665045796171436, + "learning_rate": 5.074002343845409e-06, + "loss": 0.088641357421875, + "step": 59450 + }, + { + "epoch": 0.5140898046709497, + "grad_norm": 4.3408317875090745, + "learning_rate": 5.073855134672099e-06, + "loss": 0.5882339477539062, + "step": 59455 + }, + { + "epoch": 0.5141330381924929, + "grad_norm": 7.278642390170353, + "learning_rate": 5.073707915934353e-06, + "loss": 0.16670989990234375, + "step": 59460 + }, + { + "epoch": 0.5141762717140362, + "grad_norm": 32.06879627700153, + "learning_rate": 5.073560687632849e-06, + "loss": 0.35320892333984377, + "step": 59465 + }, + { + "epoch": 0.5142195052355795, + "grad_norm": 3.759691404323632, + "learning_rate": 5.073413449768266e-06, + "loss": 0.2673919677734375, + "step": 59470 + }, + { + "epoch": 0.5142627387571227, + "grad_norm": 0.6112345619588178, + "learning_rate": 5.073266202341284e-06, + "loss": 0.44457855224609377, + "step": 59475 + }, + { + "epoch": 0.514305972278666, + "grad_norm": 10.946473214907483, + "learning_rate": 5.073118945352581e-06, + "loss": 0.14841842651367188, + "step": 59480 + }, + { + "epoch": 0.5143492058002093, + "grad_norm": 0.7713587810355039, + "learning_rate": 5.072971678802836e-06, + "loss": 0.17107601165771485, + "step": 59485 + }, + { + "epoch": 0.5143924393217525, + "grad_norm": 3.540388956699685, + "learning_rate": 5.07282440269273e-06, + "loss": 0.1697784423828125, + "step": 59490 + }, + { + "epoch": 0.5144356728432958, + "grad_norm": 0.4464269348637248, + "learning_rate": 5.072677117022941e-06, + "loss": 0.03015899658203125, + "step": 59495 + }, + { + "epoch": 0.5144789063648391, + "grad_norm": 1.2022872847680466, + "learning_rate": 5.072529821794148e-06, + "loss": 0.16376018524169922, + "step": 59500 + }, + { + "epoch": 0.5145221398863823, + "grad_norm": 9.227714814609445, + "learning_rate": 5.07238251700703e-06, + "loss": 0.17711734771728516, + "step": 59505 + }, + { + "epoch": 0.5145653734079255, + "grad_norm": 3.7025363820132737, + "learning_rate": 5.072235202662268e-06, + "loss": 0.1356536865234375, + "step": 59510 + }, + { + "epoch": 0.5146086069294689, + "grad_norm": 15.88292015107869, + "learning_rate": 5.0720878787605386e-06, + "loss": 0.12548828125, + "step": 59515 + }, + { + "epoch": 0.5146518404510121, + "grad_norm": 1.5348200083258732, + "learning_rate": 5.071940545302525e-06, + "loss": 0.0881866455078125, + "step": 59520 + }, + { + "epoch": 0.5146950739725553, + "grad_norm": 12.577339716406023, + "learning_rate": 5.0717932022889034e-06, + "loss": 0.12810287475585938, + "step": 59525 + }, + { + "epoch": 0.5147383074940987, + "grad_norm": 1.250905241154783, + "learning_rate": 5.0716458497203546e-06, + "loss": 0.07296524047851563, + "step": 59530 + }, + { + "epoch": 0.5147815410156419, + "grad_norm": 18.182182231015346, + "learning_rate": 5.0714984875975595e-06, + "loss": 0.16120147705078125, + "step": 59535 + }, + { + "epoch": 0.5148247745371851, + "grad_norm": 7.294802314015349, + "learning_rate": 5.071351115921196e-06, + "loss": 0.18940238952636718, + "step": 59540 + }, + { + "epoch": 0.5148680080587285, + "grad_norm": 4.757615431637147, + "learning_rate": 5.071203734691943e-06, + "loss": 0.06358604431152344, + "step": 59545 + }, + { + "epoch": 0.5149112415802717, + "grad_norm": 19.987317146014288, + "learning_rate": 5.071056343910483e-06, + "loss": 0.5571197509765625, + "step": 59550 + }, + { + "epoch": 0.5149544751018149, + "grad_norm": 27.75025945852767, + "learning_rate": 5.070908943577493e-06, + "loss": 0.40435590744018557, + "step": 59555 + }, + { + "epoch": 0.5149977086233583, + "grad_norm": 0.6187023283166625, + "learning_rate": 5.070761533693654e-06, + "loss": 0.03312835693359375, + "step": 59560 + }, + { + "epoch": 0.5150409421449015, + "grad_norm": 6.913171037381591, + "learning_rate": 5.0706141142596465e-06, + "loss": 0.084356689453125, + "step": 59565 + }, + { + "epoch": 0.5150841756664447, + "grad_norm": 11.846897235089124, + "learning_rate": 5.070466685276149e-06, + "loss": 0.35721282958984374, + "step": 59570 + }, + { + "epoch": 0.515127409187988, + "grad_norm": 1.6349432999569755, + "learning_rate": 5.070319246743842e-06, + "loss": 0.27717132568359376, + "step": 59575 + }, + { + "epoch": 0.5151706427095313, + "grad_norm": 16.399597860410424, + "learning_rate": 5.070171798663406e-06, + "loss": 0.3347747802734375, + "step": 59580 + }, + { + "epoch": 0.5152138762310745, + "grad_norm": 1.3539738735070592, + "learning_rate": 5.07002434103552e-06, + "loss": 0.24452056884765624, + "step": 59585 + }, + { + "epoch": 0.5152571097526177, + "grad_norm": 18.62138867778035, + "learning_rate": 5.069876873860865e-06, + "loss": 0.3415283203125, + "step": 59590 + }, + { + "epoch": 0.5153003432741611, + "grad_norm": 0.1127861881270224, + "learning_rate": 5.069729397140121e-06, + "loss": 0.24848175048828125, + "step": 59595 + }, + { + "epoch": 0.5153435767957043, + "grad_norm": 0.7664117848932787, + "learning_rate": 5.069581910873967e-06, + "loss": 0.14158172607421876, + "step": 59600 + }, + { + "epoch": 0.5153868103172475, + "grad_norm": 1.3526645256785148, + "learning_rate": 5.069434415063086e-06, + "loss": 0.09052848815917969, + "step": 59605 + }, + { + "epoch": 0.5154300438387909, + "grad_norm": 7.478102254792163, + "learning_rate": 5.069286909708154e-06, + "loss": 0.0419464111328125, + "step": 59610 + }, + { + "epoch": 0.5154732773603341, + "grad_norm": 3.936364516770839, + "learning_rate": 5.069139394809855e-06, + "loss": 0.14501953125, + "step": 59615 + }, + { + "epoch": 0.5155165108818773, + "grad_norm": 5.4875826852922565, + "learning_rate": 5.068991870368868e-06, + "loss": 0.109368896484375, + "step": 59620 + }, + { + "epoch": 0.5155597444034207, + "grad_norm": 2.8333969250131097, + "learning_rate": 5.068844336385873e-06, + "loss": 0.07511749267578124, + "step": 59625 + }, + { + "epoch": 0.5156029779249639, + "grad_norm": 11.49839151258366, + "learning_rate": 5.068696792861551e-06, + "loss": 0.156689453125, + "step": 59630 + }, + { + "epoch": 0.5156462114465071, + "grad_norm": 0.08124398864220854, + "learning_rate": 5.068549239796582e-06, + "loss": 0.33462371826171877, + "step": 59635 + }, + { + "epoch": 0.5156894449680505, + "grad_norm": 13.123692186349006, + "learning_rate": 5.068401677191646e-06, + "loss": 0.5741912841796875, + "step": 59640 + }, + { + "epoch": 0.5157326784895937, + "grad_norm": 44.5653763470699, + "learning_rate": 5.0682541050474255e-06, + "loss": 0.30888900756835935, + "step": 59645 + }, + { + "epoch": 0.5157759120111369, + "grad_norm": 0.1676530599044638, + "learning_rate": 5.068106523364598e-06, + "loss": 0.16956939697265624, + "step": 59650 + }, + { + "epoch": 0.5158191455326803, + "grad_norm": 7.630222511925509, + "learning_rate": 5.0679589321438475e-06, + "loss": 0.10035247802734375, + "step": 59655 + }, + { + "epoch": 0.5158623790542235, + "grad_norm": 6.029278848322047, + "learning_rate": 5.067811331385853e-06, + "loss": 0.10216064453125, + "step": 59660 + }, + { + "epoch": 0.5159056125757667, + "grad_norm": 38.68807174651956, + "learning_rate": 5.0676637210912955e-06, + "loss": 0.3706878662109375, + "step": 59665 + }, + { + "epoch": 0.5159488460973101, + "grad_norm": 4.479255177239796, + "learning_rate": 5.067516101260855e-06, + "loss": 0.13076934814453126, + "step": 59670 + }, + { + "epoch": 0.5159920796188533, + "grad_norm": 16.921715102835122, + "learning_rate": 5.0673684718952135e-06, + "loss": 0.43375701904296876, + "step": 59675 + }, + { + "epoch": 0.5160353131403965, + "grad_norm": 12.967853412449067, + "learning_rate": 5.067220832995051e-06, + "loss": 0.11845245361328124, + "step": 59680 + }, + { + "epoch": 0.5160785466619398, + "grad_norm": 25.072507609661688, + "learning_rate": 5.067073184561049e-06, + "loss": 0.15614013671875, + "step": 59685 + }, + { + "epoch": 0.5161217801834831, + "grad_norm": 3.35619692209273, + "learning_rate": 5.066925526593887e-06, + "loss": 0.010178375244140624, + "step": 59690 + }, + { + "epoch": 0.5161650137050263, + "grad_norm": 7.821127468067468, + "learning_rate": 5.066777859094247e-06, + "loss": 0.0968231201171875, + "step": 59695 + }, + { + "epoch": 0.5162082472265695, + "grad_norm": 25.79405555974973, + "learning_rate": 5.066630182062812e-06, + "loss": 0.27145919799804685, + "step": 59700 + }, + { + "epoch": 0.5162514807481129, + "grad_norm": 1.895996809195499, + "learning_rate": 5.0664824955002604e-06, + "loss": 0.04314079284667969, + "step": 59705 + }, + { + "epoch": 0.5162947142696561, + "grad_norm": 34.12417024128441, + "learning_rate": 5.066334799407274e-06, + "loss": 0.3898162841796875, + "step": 59710 + }, + { + "epoch": 0.5163379477911993, + "grad_norm": 8.923769230228631, + "learning_rate": 5.066187093784534e-06, + "loss": 0.23463668823242187, + "step": 59715 + }, + { + "epoch": 0.5163811813127427, + "grad_norm": 19.323831711955794, + "learning_rate": 5.0660393786327215e-06, + "loss": 0.3820930480957031, + "step": 59720 + }, + { + "epoch": 0.5164244148342859, + "grad_norm": 6.059257565245609, + "learning_rate": 5.065891653952518e-06, + "loss": 0.1874053955078125, + "step": 59725 + }, + { + "epoch": 0.5164676483558291, + "grad_norm": 22.59041906501445, + "learning_rate": 5.065743919744605e-06, + "loss": 0.158966064453125, + "step": 59730 + }, + { + "epoch": 0.5165108818773725, + "grad_norm": 4.314678265410695, + "learning_rate": 5.065596176009664e-06, + "loss": 0.21479339599609376, + "step": 59735 + }, + { + "epoch": 0.5165541153989157, + "grad_norm": 27.20893682196532, + "learning_rate": 5.065448422748375e-06, + "loss": 0.37778892517089846, + "step": 59740 + }, + { + "epoch": 0.5165973489204589, + "grad_norm": 0.1774430015535004, + "learning_rate": 5.065300659961421e-06, + "loss": 0.02821693420410156, + "step": 59745 + }, + { + "epoch": 0.5166405824420023, + "grad_norm": 0.12058798203083054, + "learning_rate": 5.065152887649482e-06, + "loss": 0.08638076782226563, + "step": 59750 + }, + { + "epoch": 0.5166838159635455, + "grad_norm": 12.98616267028898, + "learning_rate": 5.065005105813241e-06, + "loss": 0.23478546142578124, + "step": 59755 + }, + { + "epoch": 0.5167270494850887, + "grad_norm": 10.519895245980594, + "learning_rate": 5.06485731445338e-06, + "loss": 0.16499176025390624, + "step": 59760 + }, + { + "epoch": 0.516770283006632, + "grad_norm": 2.826911487219689, + "learning_rate": 5.0647095135705776e-06, + "loss": 0.22363739013671874, + "step": 59765 + }, + { + "epoch": 0.5168135165281753, + "grad_norm": 26.858750891452353, + "learning_rate": 5.064561703165518e-06, + "loss": 0.14317779541015624, + "step": 59770 + }, + { + "epoch": 0.5168567500497185, + "grad_norm": 1.525310477046843, + "learning_rate": 5.0644138832388834e-06, + "loss": 0.14378662109375, + "step": 59775 + }, + { + "epoch": 0.5168999835712618, + "grad_norm": 1.5132447531351554, + "learning_rate": 5.064266053791354e-06, + "loss": 0.387017822265625, + "step": 59780 + }, + { + "epoch": 0.5169432170928051, + "grad_norm": 0.746166397481333, + "learning_rate": 5.0641182148236105e-06, + "loss": 0.0821563720703125, + "step": 59785 + }, + { + "epoch": 0.5169864506143483, + "grad_norm": 6.677243282677224, + "learning_rate": 5.063970366336338e-06, + "loss": 0.24132156372070312, + "step": 59790 + }, + { + "epoch": 0.5170296841358916, + "grad_norm": 2.258925532801409, + "learning_rate": 5.063822508330215e-06, + "loss": 0.07856559753417969, + "step": 59795 + }, + { + "epoch": 0.5170729176574349, + "grad_norm": 2.056545078362444, + "learning_rate": 5.063674640805925e-06, + "loss": 0.038143157958984375, + "step": 59800 + }, + { + "epoch": 0.5171161511789781, + "grad_norm": 1.2824681617640485, + "learning_rate": 5.063526763764151e-06, + "loss": 0.07141780853271484, + "step": 59805 + }, + { + "epoch": 0.5171593847005214, + "grad_norm": 17.86440240370547, + "learning_rate": 5.063378877205574e-06, + "loss": 0.13303070068359374, + "step": 59810 + }, + { + "epoch": 0.5172026182220647, + "grad_norm": 20.832704064863744, + "learning_rate": 5.063230981130875e-06, + "loss": 0.107086181640625, + "step": 59815 + }, + { + "epoch": 0.5172458517436079, + "grad_norm": 0.33960379369291316, + "learning_rate": 5.063083075540737e-06, + "loss": 0.13632354736328126, + "step": 59820 + }, + { + "epoch": 0.5172890852651512, + "grad_norm": 40.8212158916321, + "learning_rate": 5.062935160435843e-06, + "loss": 0.20975341796875, + "step": 59825 + }, + { + "epoch": 0.5173323187866945, + "grad_norm": 15.362157647411081, + "learning_rate": 5.062787235816873e-06, + "loss": 0.130810546875, + "step": 59830 + }, + { + "epoch": 0.5173755523082377, + "grad_norm": 1.546457923045672, + "learning_rate": 5.062639301684512e-06, + "loss": 0.20924072265625, + "step": 59835 + }, + { + "epoch": 0.517418785829781, + "grad_norm": 14.018679352196141, + "learning_rate": 5.06249135803944e-06, + "loss": 0.06518402099609374, + "step": 59840 + }, + { + "epoch": 0.5174620193513242, + "grad_norm": 28.561980564368692, + "learning_rate": 5.062343404882341e-06, + "loss": 0.3486053466796875, + "step": 59845 + }, + { + "epoch": 0.5175052528728675, + "grad_norm": 45.61798997833145, + "learning_rate": 5.062195442213895e-06, + "loss": 0.5012527465820312, + "step": 59850 + }, + { + "epoch": 0.5175484863944108, + "grad_norm": 29.150442214874094, + "learning_rate": 5.062047470034786e-06, + "loss": 0.1537872314453125, + "step": 59855 + }, + { + "epoch": 0.517591719915954, + "grad_norm": 4.774100181367215, + "learning_rate": 5.0618994883456975e-06, + "loss": 0.090887451171875, + "step": 59860 + }, + { + "epoch": 0.5176349534374973, + "grad_norm": 21.326202243993908, + "learning_rate": 5.06175149714731e-06, + "loss": 0.14936981201171876, + "step": 59865 + }, + { + "epoch": 0.5176781869590406, + "grad_norm": 2.6448318265831174, + "learning_rate": 5.061603496440307e-06, + "loss": 0.0982269287109375, + "step": 59870 + }, + { + "epoch": 0.5177214204805838, + "grad_norm": 20.021922069080514, + "learning_rate": 5.061455486225371e-06, + "loss": 0.65511474609375, + "step": 59875 + }, + { + "epoch": 0.5177646540021271, + "grad_norm": 17.024253170290613, + "learning_rate": 5.061307466503184e-06, + "loss": 0.7697662353515625, + "step": 59880 + }, + { + "epoch": 0.5178078875236704, + "grad_norm": 0.395575225520129, + "learning_rate": 5.061159437274429e-06, + "loss": 0.13326492309570312, + "step": 59885 + }, + { + "epoch": 0.5178511210452136, + "grad_norm": 1.7027060123397049, + "learning_rate": 5.061011398539789e-06, + "loss": 0.46235198974609376, + "step": 59890 + }, + { + "epoch": 0.5178943545667569, + "grad_norm": 3.195575767359308, + "learning_rate": 5.060863350299947e-06, + "loss": 0.3417999267578125, + "step": 59895 + }, + { + "epoch": 0.5179375880883001, + "grad_norm": 3.223160900140212, + "learning_rate": 5.060715292555585e-06, + "loss": 0.14192142486572265, + "step": 59900 + }, + { + "epoch": 0.5179808216098434, + "grad_norm": 2.1553901970033467, + "learning_rate": 5.060567225307386e-06, + "loss": 0.21512222290039062, + "step": 59905 + }, + { + "epoch": 0.5180240551313867, + "grad_norm": 18.311422232282382, + "learning_rate": 5.0604191485560325e-06, + "loss": 0.16412887573242188, + "step": 59910 + }, + { + "epoch": 0.51806728865293, + "grad_norm": 0.16611797285472388, + "learning_rate": 5.060271062302209e-06, + "loss": 0.018089675903320314, + "step": 59915 + }, + { + "epoch": 0.5181105221744732, + "grad_norm": 9.511844336430274, + "learning_rate": 5.060122966546597e-06, + "loss": 0.1725849151611328, + "step": 59920 + }, + { + "epoch": 0.5181537556960165, + "grad_norm": 16.92081294689371, + "learning_rate": 5.059974861289879e-06, + "loss": 0.38531494140625, + "step": 59925 + }, + { + "epoch": 0.5181969892175597, + "grad_norm": 3.9979035729053605, + "learning_rate": 5.059826746532741e-06, + "loss": 0.0634521484375, + "step": 59930 + }, + { + "epoch": 0.518240222739103, + "grad_norm": 14.81915187649156, + "learning_rate": 5.059678622275863e-06, + "loss": 0.07991180419921876, + "step": 59935 + }, + { + "epoch": 0.5182834562606462, + "grad_norm": 2.5640258853912234, + "learning_rate": 5.059530488519929e-06, + "loss": 0.063922119140625, + "step": 59940 + }, + { + "epoch": 0.5183266897821895, + "grad_norm": 1.102381338904673, + "learning_rate": 5.059382345265623e-06, + "loss": 0.077301025390625, + "step": 59945 + }, + { + "epoch": 0.5183699233037328, + "grad_norm": 0.22448523048956426, + "learning_rate": 5.059234192513627e-06, + "loss": 0.11273727416992188, + "step": 59950 + }, + { + "epoch": 0.518413156825276, + "grad_norm": 13.620095892461311, + "learning_rate": 5.059086030264625e-06, + "loss": 0.329296875, + "step": 59955 + }, + { + "epoch": 0.5184563903468193, + "grad_norm": 0.47470916290986964, + "learning_rate": 5.0589378585193e-06, + "loss": 0.060840606689453125, + "step": 59960 + }, + { + "epoch": 0.5184996238683626, + "grad_norm": 18.84061531040756, + "learning_rate": 5.058789677278335e-06, + "loss": 0.51143798828125, + "step": 59965 + }, + { + "epoch": 0.5185428573899058, + "grad_norm": 15.07148253839602, + "learning_rate": 5.0586414865424146e-06, + "loss": 0.12668275833129883, + "step": 59970 + }, + { + "epoch": 0.5185860909114491, + "grad_norm": 28.593108091724645, + "learning_rate": 5.058493286312222e-06, + "loss": 0.16686744689941407, + "step": 59975 + }, + { + "epoch": 0.5186293244329924, + "grad_norm": 24.532078203516967, + "learning_rate": 5.05834507658844e-06, + "loss": 0.0998382568359375, + "step": 59980 + }, + { + "epoch": 0.5186725579545356, + "grad_norm": 18.01410324008087, + "learning_rate": 5.058196857371752e-06, + "loss": 0.11941680908203126, + "step": 59985 + }, + { + "epoch": 0.5187157914760789, + "grad_norm": 1.8953776002909495, + "learning_rate": 5.058048628662842e-06, + "loss": 0.06193332672119141, + "step": 59990 + }, + { + "epoch": 0.5187590249976222, + "grad_norm": 6.871014958004035, + "learning_rate": 5.057900390462394e-06, + "loss": 0.06414871215820313, + "step": 59995 + }, + { + "epoch": 0.5188022585191654, + "grad_norm": 14.497801472228517, + "learning_rate": 5.057752142771091e-06, + "loss": 0.18307037353515626, + "step": 60000 + }, + { + "epoch": 0.5188454920407087, + "grad_norm": 4.649432913301617, + "learning_rate": 5.057603885589616e-06, + "loss": 0.155364990234375, + "step": 60005 + }, + { + "epoch": 0.518888725562252, + "grad_norm": 1.240392789910586, + "learning_rate": 5.057455618918655e-06, + "loss": 0.14400863647460938, + "step": 60010 + }, + { + "epoch": 0.5189319590837952, + "grad_norm": 0.20543449812696493, + "learning_rate": 5.05730734275889e-06, + "loss": 0.1834014892578125, + "step": 60015 + }, + { + "epoch": 0.5189751926053384, + "grad_norm": 1.2234442959771932, + "learning_rate": 5.057159057111006e-06, + "loss": 0.5161151885986328, + "step": 60020 + }, + { + "epoch": 0.5190184261268818, + "grad_norm": 0.3705490697131077, + "learning_rate": 5.057010761975685e-06, + "loss": 0.04586029052734375, + "step": 60025 + }, + { + "epoch": 0.519061659648425, + "grad_norm": 1.3329294827050218, + "learning_rate": 5.056862457353613e-06, + "loss": 0.19063262939453124, + "step": 60030 + }, + { + "epoch": 0.5191048931699682, + "grad_norm": 5.360535080822689, + "learning_rate": 5.056714143245473e-06, + "loss": 0.16675949096679688, + "step": 60035 + }, + { + "epoch": 0.5191481266915116, + "grad_norm": 26.334172102353037, + "learning_rate": 5.056565819651948e-06, + "loss": 0.16688194274902343, + "step": 60040 + }, + { + "epoch": 0.5191913602130548, + "grad_norm": 0.5304787369306242, + "learning_rate": 5.056417486573725e-06, + "loss": 0.025719642639160156, + "step": 60045 + }, + { + "epoch": 0.519234593734598, + "grad_norm": 22.871467624680157, + "learning_rate": 5.056269144011485e-06, + "loss": 0.160919189453125, + "step": 60050 + }, + { + "epoch": 0.5192778272561414, + "grad_norm": 1.2999092638516823, + "learning_rate": 5.0561207919659135e-06, + "loss": 0.023087310791015624, + "step": 60055 + }, + { + "epoch": 0.5193210607776846, + "grad_norm": 4.710336289685409, + "learning_rate": 5.0559724304376955e-06, + "loss": 0.15950546264648438, + "step": 60060 + }, + { + "epoch": 0.5193642942992278, + "grad_norm": 1.3853500573355064, + "learning_rate": 5.055824059427513e-06, + "loss": 0.12784423828125, + "step": 60065 + }, + { + "epoch": 0.5194075278207712, + "grad_norm": 22.343743338510095, + "learning_rate": 5.0556756789360525e-06, + "loss": 0.16311187744140626, + "step": 60070 + }, + { + "epoch": 0.5194507613423144, + "grad_norm": 2.1744219528349196, + "learning_rate": 5.055527288963997e-06, + "loss": 0.1153839111328125, + "step": 60075 + }, + { + "epoch": 0.5194939948638576, + "grad_norm": 1.0528612713833425, + "learning_rate": 5.055378889512032e-06, + "loss": 0.02093353271484375, + "step": 60080 + }, + { + "epoch": 0.519537228385401, + "grad_norm": 26.092128265155832, + "learning_rate": 5.055230480580839e-06, + "loss": 0.158465576171875, + "step": 60085 + }, + { + "epoch": 0.5195804619069442, + "grad_norm": 4.561351352379346, + "learning_rate": 5.055082062171107e-06, + "loss": 0.03303680419921875, + "step": 60090 + }, + { + "epoch": 0.5196236954284874, + "grad_norm": 29.06224510969493, + "learning_rate": 5.054933634283518e-06, + "loss": 0.16787853240966796, + "step": 60095 + }, + { + "epoch": 0.5196669289500307, + "grad_norm": 13.784815819364317, + "learning_rate": 5.054785196918756e-06, + "loss": 0.06646194458007812, + "step": 60100 + }, + { + "epoch": 0.519710162471574, + "grad_norm": 37.43252721868929, + "learning_rate": 5.054636750077506e-06, + "loss": 0.267181396484375, + "step": 60105 + }, + { + "epoch": 0.5197533959931172, + "grad_norm": 30.43982671049306, + "learning_rate": 5.054488293760453e-06, + "loss": 0.2052764892578125, + "step": 60110 + }, + { + "epoch": 0.5197966295146604, + "grad_norm": 17.511416191885747, + "learning_rate": 5.054339827968282e-06, + "loss": 0.13010711669921876, + "step": 60115 + }, + { + "epoch": 0.5198398630362038, + "grad_norm": 13.366663872881611, + "learning_rate": 5.054191352701676e-06, + "loss": 0.4038970947265625, + "step": 60120 + }, + { + "epoch": 0.519883096557747, + "grad_norm": 3.1469301648027472, + "learning_rate": 5.054042867961322e-06, + "loss": 0.218402099609375, + "step": 60125 + }, + { + "epoch": 0.5199263300792902, + "grad_norm": 11.64810089733048, + "learning_rate": 5.0538943737479045e-06, + "loss": 0.0816741943359375, + "step": 60130 + }, + { + "epoch": 0.5199695636008336, + "grad_norm": 2.3202666049024967, + "learning_rate": 5.053745870062106e-06, + "loss": 0.3318641662597656, + "step": 60135 + }, + { + "epoch": 0.5200127971223768, + "grad_norm": 0.49333718275474997, + "learning_rate": 5.053597356904614e-06, + "loss": 0.07333908081054688, + "step": 60140 + }, + { + "epoch": 0.52005603064392, + "grad_norm": 1.0325443472425069, + "learning_rate": 5.053448834276113e-06, + "loss": 0.051073455810546876, + "step": 60145 + }, + { + "epoch": 0.5200992641654634, + "grad_norm": 3.5955489069157727, + "learning_rate": 5.053300302177286e-06, + "loss": 0.21627197265625, + "step": 60150 + }, + { + "epoch": 0.5201424976870066, + "grad_norm": 4.506283009788555, + "learning_rate": 5.05315176060882e-06, + "loss": 0.09333953857421876, + "step": 60155 + }, + { + "epoch": 0.5201857312085498, + "grad_norm": 9.702454873683902, + "learning_rate": 5.0530032095714e-06, + "loss": 0.09951171875, + "step": 60160 + }, + { + "epoch": 0.5202289647300932, + "grad_norm": 31.25899728480537, + "learning_rate": 5.05285464906571e-06, + "loss": 0.5737852096557617, + "step": 60165 + }, + { + "epoch": 0.5202721982516364, + "grad_norm": 49.235044472879, + "learning_rate": 5.052706079092436e-06, + "loss": 0.37447509765625, + "step": 60170 + }, + { + "epoch": 0.5203154317731796, + "grad_norm": 1.5129363122572266, + "learning_rate": 5.0525574996522634e-06, + "loss": 0.0668304443359375, + "step": 60175 + }, + { + "epoch": 0.520358665294723, + "grad_norm": 4.406369435349332, + "learning_rate": 5.052408910745876e-06, + "loss": 0.115838623046875, + "step": 60180 + }, + { + "epoch": 0.5204018988162662, + "grad_norm": 0.916101035202919, + "learning_rate": 5.052260312373961e-06, + "loss": 0.2054931640625, + "step": 60185 + }, + { + "epoch": 0.5204451323378094, + "grad_norm": 5.862793009997696, + "learning_rate": 5.052111704537202e-06, + "loss": 0.1236175537109375, + "step": 60190 + }, + { + "epoch": 0.5204883658593527, + "grad_norm": 5.351679317718771, + "learning_rate": 5.0519630872362855e-06, + "loss": 0.095263671875, + "step": 60195 + }, + { + "epoch": 0.520531599380896, + "grad_norm": 8.784508716969954, + "learning_rate": 5.0518144604718965e-06, + "loss": 0.197686767578125, + "step": 60200 + }, + { + "epoch": 0.5205748329024392, + "grad_norm": 4.409610845992689, + "learning_rate": 5.0516658242447205e-06, + "loss": 0.571722412109375, + "step": 60205 + }, + { + "epoch": 0.5206180664239825, + "grad_norm": 18.57249004653862, + "learning_rate": 5.051517178555444e-06, + "loss": 0.127789306640625, + "step": 60210 + }, + { + "epoch": 0.5206612999455258, + "grad_norm": 3.879391998902535, + "learning_rate": 5.0513685234047495e-06, + "loss": 0.14785690307617189, + "step": 60215 + }, + { + "epoch": 0.520704533467069, + "grad_norm": 7.252928180153438, + "learning_rate": 5.051219858793326e-06, + "loss": 0.34532470703125, + "step": 60220 + }, + { + "epoch": 0.5207477669886122, + "grad_norm": 2.2073982842389634, + "learning_rate": 5.051071184721858e-06, + "loss": 0.21720428466796876, + "step": 60225 + }, + { + "epoch": 0.5207910005101556, + "grad_norm": 1.1208522536426342, + "learning_rate": 5.05092250119103e-06, + "loss": 0.102874755859375, + "step": 60230 + }, + { + "epoch": 0.5208342340316988, + "grad_norm": 4.871470923255524, + "learning_rate": 5.050773808201529e-06, + "loss": 0.2171783447265625, + "step": 60235 + }, + { + "epoch": 0.520877467553242, + "grad_norm": 0.32651968335393894, + "learning_rate": 5.05062510575404e-06, + "loss": 0.19432735443115234, + "step": 60240 + }, + { + "epoch": 0.5209207010747854, + "grad_norm": 3.283648709430652, + "learning_rate": 5.05047639384925e-06, + "loss": 0.1200592041015625, + "step": 60245 + }, + { + "epoch": 0.5209639345963286, + "grad_norm": 0.4223627015468828, + "learning_rate": 5.050327672487844e-06, + "loss": 0.18785552978515624, + "step": 60250 + }, + { + "epoch": 0.5210071681178718, + "grad_norm": 4.446713199155556, + "learning_rate": 5.050178941670507e-06, + "loss": 0.06277008056640625, + "step": 60255 + }, + { + "epoch": 0.5210504016394152, + "grad_norm": 21.738068130432477, + "learning_rate": 5.0500302013979264e-06, + "loss": 0.42490615844726565, + "step": 60260 + }, + { + "epoch": 0.5210936351609584, + "grad_norm": 0.3288598197594526, + "learning_rate": 5.0498814516707886e-06, + "loss": 0.36050872802734374, + "step": 60265 + }, + { + "epoch": 0.5211368686825016, + "grad_norm": 6.130055452970898, + "learning_rate": 5.049732692489778e-06, + "loss": 0.07384185791015625, + "step": 60270 + }, + { + "epoch": 0.521180102204045, + "grad_norm": 9.12093919228983, + "learning_rate": 5.0495839238555805e-06, + "loss": 0.15239791870117186, + "step": 60275 + }, + { + "epoch": 0.5212233357255882, + "grad_norm": 44.40059090775936, + "learning_rate": 5.049435145768884e-06, + "loss": 0.330712890625, + "step": 60280 + }, + { + "epoch": 0.5212665692471314, + "grad_norm": 0.13151543735086704, + "learning_rate": 5.049286358230373e-06, + "loss": 0.15080337524414061, + "step": 60285 + }, + { + "epoch": 0.5213098027686747, + "grad_norm": 2.2140587973629127, + "learning_rate": 5.049137561240735e-06, + "loss": 0.0648406982421875, + "step": 60290 + }, + { + "epoch": 0.521353036290218, + "grad_norm": 4.0033204423755135, + "learning_rate": 5.048988754800656e-06, + "loss": 0.1151357650756836, + "step": 60295 + }, + { + "epoch": 0.5213962698117612, + "grad_norm": 6.663228887642941, + "learning_rate": 5.048839938910821e-06, + "loss": 0.2095256805419922, + "step": 60300 + }, + { + "epoch": 0.5214395033333045, + "grad_norm": 2.1582566462450767, + "learning_rate": 5.048691113571918e-06, + "loss": 0.1072998046875, + "step": 60305 + }, + { + "epoch": 0.5214827368548478, + "grad_norm": 1.4490364140867351, + "learning_rate": 5.048542278784632e-06, + "loss": 0.2874885559082031, + "step": 60310 + }, + { + "epoch": 0.521525970376391, + "grad_norm": 23.499953975143555, + "learning_rate": 5.048393434549651e-06, + "loss": 0.279656982421875, + "step": 60315 + }, + { + "epoch": 0.5215692038979343, + "grad_norm": 3.024988752025712, + "learning_rate": 5.04824458086766e-06, + "loss": 0.20453014373779296, + "step": 60320 + }, + { + "epoch": 0.5216124374194776, + "grad_norm": 0.6178622447568987, + "learning_rate": 5.048095717739345e-06, + "loss": 0.032575225830078124, + "step": 60325 + }, + { + "epoch": 0.5216556709410208, + "grad_norm": 6.868860165367864, + "learning_rate": 5.047946845165394e-06, + "loss": 0.10335617065429688, + "step": 60330 + }, + { + "epoch": 0.5216989044625641, + "grad_norm": 3.4854846720156347, + "learning_rate": 5.047797963146494e-06, + "loss": 0.10778427124023438, + "step": 60335 + }, + { + "epoch": 0.5217421379841074, + "grad_norm": 3.7070930093515275, + "learning_rate": 5.04764907168333e-06, + "loss": 0.04321479797363281, + "step": 60340 + }, + { + "epoch": 0.5217853715056506, + "grad_norm": 1.8080757216768506, + "learning_rate": 5.047500170776589e-06, + "loss": 0.541357421875, + "step": 60345 + }, + { + "epoch": 0.5218286050271939, + "grad_norm": 12.597641421580256, + "learning_rate": 5.04735126042696e-06, + "loss": 0.0867950439453125, + "step": 60350 + }, + { + "epoch": 0.5218718385487372, + "grad_norm": 1.2162793148819155, + "learning_rate": 5.047202340635126e-06, + "loss": 0.10424423217773438, + "step": 60355 + }, + { + "epoch": 0.5219150720702804, + "grad_norm": 2.5299779087866217, + "learning_rate": 5.047053411401776e-06, + "loss": 0.11371078491210937, + "step": 60360 + }, + { + "epoch": 0.5219583055918237, + "grad_norm": 0.49179365664297836, + "learning_rate": 5.046904472727597e-06, + "loss": 0.13408164978027343, + "step": 60365 + }, + { + "epoch": 0.5220015391133669, + "grad_norm": 17.37808800279564, + "learning_rate": 5.046755524613275e-06, + "loss": 0.14152297973632813, + "step": 60370 + }, + { + "epoch": 0.5220447726349102, + "grad_norm": 42.81595899960863, + "learning_rate": 5.046606567059497e-06, + "loss": 0.12232589721679688, + "step": 60375 + }, + { + "epoch": 0.5220880061564535, + "grad_norm": 1.7524980234961824, + "learning_rate": 5.046457600066952e-06, + "loss": 0.38608551025390625, + "step": 60380 + }, + { + "epoch": 0.5221312396779967, + "grad_norm": 19.591969863000287, + "learning_rate": 5.046308623636324e-06, + "loss": 0.1486541748046875, + "step": 60385 + }, + { + "epoch": 0.52217447319954, + "grad_norm": 42.397047232023596, + "learning_rate": 5.046159637768301e-06, + "loss": 0.347509765625, + "step": 60390 + }, + { + "epoch": 0.5222177067210833, + "grad_norm": 8.555407290666711, + "learning_rate": 5.046010642463572e-06, + "loss": 0.2381378173828125, + "step": 60395 + }, + { + "epoch": 0.5222609402426265, + "grad_norm": 5.911832180320232, + "learning_rate": 5.045861637722821e-06, + "loss": 0.14569091796875, + "step": 60400 + }, + { + "epoch": 0.5223041737641698, + "grad_norm": 6.4261446978130286, + "learning_rate": 5.045712623546739e-06, + "loss": 0.0608978271484375, + "step": 60405 + }, + { + "epoch": 0.522347407285713, + "grad_norm": 25.75983134141889, + "learning_rate": 5.045563599936011e-06, + "loss": 0.451654052734375, + "step": 60410 + }, + { + "epoch": 0.5223906408072563, + "grad_norm": 1.7685840353734328, + "learning_rate": 5.045414566891322e-06, + "loss": 0.054354095458984376, + "step": 60415 + }, + { + "epoch": 0.5224338743287996, + "grad_norm": 23.724331577509744, + "learning_rate": 5.045265524413364e-06, + "loss": 0.15766220092773436, + "step": 60420 + }, + { + "epoch": 0.5224771078503428, + "grad_norm": 3.2009782484455283, + "learning_rate": 5.0451164725028214e-06, + "loss": 0.14889068603515626, + "step": 60425 + }, + { + "epoch": 0.5225203413718861, + "grad_norm": 8.830860926715705, + "learning_rate": 5.044967411160383e-06, + "loss": 0.3036949157714844, + "step": 60430 + }, + { + "epoch": 0.5225635748934294, + "grad_norm": 4.414352810322143, + "learning_rate": 5.044818340386734e-06, + "loss": 0.11908493041992188, + "step": 60435 + }, + { + "epoch": 0.5226068084149726, + "grad_norm": 14.91516778866376, + "learning_rate": 5.0446692601825644e-06, + "loss": 0.1948406219482422, + "step": 60440 + }, + { + "epoch": 0.5226500419365159, + "grad_norm": 6.334233824061408, + "learning_rate": 5.044520170548561e-06, + "loss": 0.19788055419921874, + "step": 60445 + }, + { + "epoch": 0.5226932754580592, + "grad_norm": 3.9372991907380603, + "learning_rate": 5.044371071485411e-06, + "loss": 0.18736801147460938, + "step": 60450 + }, + { + "epoch": 0.5227365089796024, + "grad_norm": 8.387950496931257, + "learning_rate": 5.044221962993802e-06, + "loss": 0.578985595703125, + "step": 60455 + }, + { + "epoch": 0.5227797425011457, + "grad_norm": 2.0616056918103665, + "learning_rate": 5.044072845074423e-06, + "loss": 0.023897171020507812, + "step": 60460 + }, + { + "epoch": 0.5228229760226889, + "grad_norm": 6.189402320838331, + "learning_rate": 5.04392371772796e-06, + "loss": 0.06170196533203125, + "step": 60465 + }, + { + "epoch": 0.5228662095442322, + "grad_norm": 0.5930222137155648, + "learning_rate": 5.0437745809551e-06, + "loss": 0.21861324310302735, + "step": 60470 + }, + { + "epoch": 0.5229094430657755, + "grad_norm": 9.86976218095447, + "learning_rate": 5.043625434756534e-06, + "loss": 0.16544189453125, + "step": 60475 + }, + { + "epoch": 0.5229526765873187, + "grad_norm": 3.73963216028839, + "learning_rate": 5.043476279132949e-06, + "loss": 0.054524993896484374, + "step": 60480 + }, + { + "epoch": 0.522995910108862, + "grad_norm": 1.089723287516771, + "learning_rate": 5.04332711408503e-06, + "loss": 0.047649383544921875, + "step": 60485 + }, + { + "epoch": 0.5230391436304053, + "grad_norm": 3.2217604562371194, + "learning_rate": 5.043177939613468e-06, + "loss": 0.214886474609375, + "step": 60490 + }, + { + "epoch": 0.5230823771519485, + "grad_norm": 19.554669140360037, + "learning_rate": 5.043028755718949e-06, + "loss": 0.256671142578125, + "step": 60495 + }, + { + "epoch": 0.5231256106734918, + "grad_norm": 0.19084420980079617, + "learning_rate": 5.042879562402162e-06, + "loss": 0.05196075439453125, + "step": 60500 + }, + { + "epoch": 0.5231688441950351, + "grad_norm": 0.27772710653696536, + "learning_rate": 5.042730359663795e-06, + "loss": 0.01855297088623047, + "step": 60505 + }, + { + "epoch": 0.5232120777165783, + "grad_norm": 26.50773396656689, + "learning_rate": 5.042581147504537e-06, + "loss": 0.311175537109375, + "step": 60510 + }, + { + "epoch": 0.5232553112381216, + "grad_norm": 21.43315071910383, + "learning_rate": 5.042431925925074e-06, + "loss": 0.2216949462890625, + "step": 60515 + }, + { + "epoch": 0.5232985447596649, + "grad_norm": 8.670532918786437, + "learning_rate": 5.042282694926097e-06, + "loss": 0.240673828125, + "step": 60520 + }, + { + "epoch": 0.5233417782812081, + "grad_norm": 2.167034930127061, + "learning_rate": 5.042133454508291e-06, + "loss": 0.09323005676269532, + "step": 60525 + }, + { + "epoch": 0.5233850118027514, + "grad_norm": 4.081406081754495, + "learning_rate": 5.041984204672347e-06, + "loss": 0.2584228515625, + "step": 60530 + }, + { + "epoch": 0.5234282453242947, + "grad_norm": 1.0940462923046794, + "learning_rate": 5.0418349454189516e-06, + "loss": 0.10684356689453126, + "step": 60535 + }, + { + "epoch": 0.5234714788458379, + "grad_norm": 3.4122508675863332, + "learning_rate": 5.041685676748794e-06, + "loss": 0.05778961181640625, + "step": 60540 + }, + { + "epoch": 0.5235147123673811, + "grad_norm": 3.0157456776589404, + "learning_rate": 5.041536398662563e-06, + "loss": 0.12821502685546876, + "step": 60545 + }, + { + "epoch": 0.5235579458889245, + "grad_norm": 7.490358859233511, + "learning_rate": 5.041387111160946e-06, + "loss": 0.06133270263671875, + "step": 60550 + }, + { + "epoch": 0.5236011794104677, + "grad_norm": 3.47650798671677, + "learning_rate": 5.041237814244632e-06, + "loss": 0.12303314208984376, + "step": 60555 + }, + { + "epoch": 0.5236444129320109, + "grad_norm": 5.646641267607, + "learning_rate": 5.04108850791431e-06, + "loss": 0.05628509521484375, + "step": 60560 + }, + { + "epoch": 0.5236876464535543, + "grad_norm": 2.722355024486502, + "learning_rate": 5.040939192170667e-06, + "loss": 0.148052978515625, + "step": 60565 + }, + { + "epoch": 0.5237308799750975, + "grad_norm": 0.46636760312256403, + "learning_rate": 5.040789867014394e-06, + "loss": 0.15655975341796874, + "step": 60570 + }, + { + "epoch": 0.5237741134966407, + "grad_norm": 14.542095751047958, + "learning_rate": 5.0406405324461786e-06, + "loss": 0.1585357666015625, + "step": 60575 + }, + { + "epoch": 0.523817347018184, + "grad_norm": 14.71961578631313, + "learning_rate": 5.0404911884667085e-06, + "loss": 0.377398681640625, + "step": 60580 + }, + { + "epoch": 0.5238605805397273, + "grad_norm": 4.573260510335551, + "learning_rate": 5.040341835076674e-06, + "loss": 0.07494659423828125, + "step": 60585 + }, + { + "epoch": 0.5239038140612705, + "grad_norm": 1.1897644747677139, + "learning_rate": 5.040192472276764e-06, + "loss": 0.113555908203125, + "step": 60590 + }, + { + "epoch": 0.5239470475828139, + "grad_norm": 3.4098129741935086, + "learning_rate": 5.0400431000676645e-06, + "loss": 0.04595794677734375, + "step": 60595 + }, + { + "epoch": 0.5239902811043571, + "grad_norm": 3.3820577149296933, + "learning_rate": 5.039893718450068e-06, + "loss": 0.07518272399902344, + "step": 60600 + }, + { + "epoch": 0.5240335146259003, + "grad_norm": 1.336172772799598, + "learning_rate": 5.039744327424661e-06, + "loss": 0.13888931274414062, + "step": 60605 + }, + { + "epoch": 0.5240767481474436, + "grad_norm": 1.0745816323879882, + "learning_rate": 5.039594926992134e-06, + "loss": 0.18732452392578125, + "step": 60610 + }, + { + "epoch": 0.5241199816689869, + "grad_norm": 0.445953323297024, + "learning_rate": 5.039445517153175e-06, + "loss": 0.07272911071777344, + "step": 60615 + }, + { + "epoch": 0.5241632151905301, + "grad_norm": 11.508217888684673, + "learning_rate": 5.039296097908475e-06, + "loss": 0.02939453125, + "step": 60620 + }, + { + "epoch": 0.5242064487120734, + "grad_norm": 10.695255497135975, + "learning_rate": 5.039146669258721e-06, + "loss": 0.06516532897949219, + "step": 60625 + }, + { + "epoch": 0.5242496822336167, + "grad_norm": 21.71462837314281, + "learning_rate": 5.038997231204601e-06, + "loss": 0.1693267822265625, + "step": 60630 + }, + { + "epoch": 0.5242929157551599, + "grad_norm": 5.818313687466517, + "learning_rate": 5.038847783746807e-06, + "loss": 0.28237152099609375, + "step": 60635 + }, + { + "epoch": 0.5243361492767031, + "grad_norm": 27.68406073399555, + "learning_rate": 5.038698326886028e-06, + "loss": 0.15374164581298827, + "step": 60640 + }, + { + "epoch": 0.5243793827982465, + "grad_norm": 12.967722992933842, + "learning_rate": 5.038548860622951e-06, + "loss": 0.1215087890625, + "step": 60645 + }, + { + "epoch": 0.5244226163197897, + "grad_norm": 4.852837120208411, + "learning_rate": 5.038399384958267e-06, + "loss": 0.09550247192382813, + "step": 60650 + }, + { + "epoch": 0.5244658498413329, + "grad_norm": 10.991595937727862, + "learning_rate": 5.038249899892667e-06, + "loss": 0.16199798583984376, + "step": 60655 + }, + { + "epoch": 0.5245090833628763, + "grad_norm": 28.825554676276226, + "learning_rate": 5.038100405426837e-06, + "loss": 0.3841888427734375, + "step": 60660 + }, + { + "epoch": 0.5245523168844195, + "grad_norm": 32.806775044087956, + "learning_rate": 5.037950901561467e-06, + "loss": 0.15276947021484374, + "step": 60665 + }, + { + "epoch": 0.5245955504059627, + "grad_norm": 53.43740458389931, + "learning_rate": 5.03780138829725e-06, + "loss": 0.60460205078125, + "step": 60670 + }, + { + "epoch": 0.5246387839275061, + "grad_norm": 6.101269449797264, + "learning_rate": 5.037651865634871e-06, + "loss": 0.082952880859375, + "step": 60675 + }, + { + "epoch": 0.5246820174490493, + "grad_norm": 1.2695263296346202, + "learning_rate": 5.037502333575023e-06, + "loss": 0.118072509765625, + "step": 60680 + }, + { + "epoch": 0.5247252509705925, + "grad_norm": 10.129845400492057, + "learning_rate": 5.037352792118393e-06, + "loss": 0.23987941741943358, + "step": 60685 + }, + { + "epoch": 0.5247684844921359, + "grad_norm": 19.302336967561345, + "learning_rate": 5.037203241265673e-06, + "loss": 0.125543212890625, + "step": 60690 + }, + { + "epoch": 0.5248117180136791, + "grad_norm": 3.047851176404399, + "learning_rate": 5.037053681017551e-06, + "loss": 0.29411659240722654, + "step": 60695 + }, + { + "epoch": 0.5248549515352223, + "grad_norm": 19.515782524366085, + "learning_rate": 5.036904111374718e-06, + "loss": 0.16043014526367189, + "step": 60700 + }, + { + "epoch": 0.5248981850567657, + "grad_norm": 22.721577497940743, + "learning_rate": 5.036754532337863e-06, + "loss": 0.12007904052734375, + "step": 60705 + }, + { + "epoch": 0.5249414185783089, + "grad_norm": 8.14015321839501, + "learning_rate": 5.036604943907676e-06, + "loss": 0.2078948974609375, + "step": 60710 + }, + { + "epoch": 0.5249846520998521, + "grad_norm": 26.74976130989947, + "learning_rate": 5.036455346084848e-06, + "loss": 0.18668365478515625, + "step": 60715 + }, + { + "epoch": 0.5250278856213954, + "grad_norm": 11.503132012782515, + "learning_rate": 5.036305738870066e-06, + "loss": 0.06389884948730469, + "step": 60720 + }, + { + "epoch": 0.5250711191429387, + "grad_norm": 18.330894517476548, + "learning_rate": 5.036156122264023e-06, + "loss": 0.2260711669921875, + "step": 60725 + }, + { + "epoch": 0.5251143526644819, + "grad_norm": 3.1089979275070965, + "learning_rate": 5.036006496267408e-06, + "loss": 0.0804107666015625, + "step": 60730 + }, + { + "epoch": 0.5251575861860251, + "grad_norm": 0.31710303461709516, + "learning_rate": 5.03585686088091e-06, + "loss": 0.1166534423828125, + "step": 60735 + }, + { + "epoch": 0.5252008197075685, + "grad_norm": 10.55217433211065, + "learning_rate": 5.035707216105221e-06, + "loss": 0.07460174560546876, + "step": 60740 + }, + { + "epoch": 0.5252440532291117, + "grad_norm": 1.821438582676043, + "learning_rate": 5.03555756194103e-06, + "loss": 0.3294189453125, + "step": 60745 + }, + { + "epoch": 0.525287286750655, + "grad_norm": 32.20265615252781, + "learning_rate": 5.035407898389027e-06, + "loss": 0.22791213989257814, + "step": 60750 + }, + { + "epoch": 0.5253305202721983, + "grad_norm": 2.7796204118212544, + "learning_rate": 5.035258225449903e-06, + "loss": 0.0443511962890625, + "step": 60755 + }, + { + "epoch": 0.5253737537937415, + "grad_norm": 2.951804700704762, + "learning_rate": 5.0351085431243474e-06, + "loss": 0.402545166015625, + "step": 60760 + }, + { + "epoch": 0.5254169873152847, + "grad_norm": 2.0749965310615734, + "learning_rate": 5.034958851413051e-06, + "loss": 0.0235748291015625, + "step": 60765 + }, + { + "epoch": 0.5254602208368281, + "grad_norm": 32.67301227484279, + "learning_rate": 5.034809150316705e-06, + "loss": 0.53636474609375, + "step": 60770 + }, + { + "epoch": 0.5255034543583713, + "grad_norm": 32.45827627352755, + "learning_rate": 5.034659439835998e-06, + "loss": 0.09457244873046874, + "step": 60775 + }, + { + "epoch": 0.5255466878799145, + "grad_norm": 4.7491526163379785, + "learning_rate": 5.034509719971623e-06, + "loss": 0.03565521240234375, + "step": 60780 + }, + { + "epoch": 0.5255899214014579, + "grad_norm": 10.701915216425252, + "learning_rate": 5.034359990724268e-06, + "loss": 0.06509857177734375, + "step": 60785 + }, + { + "epoch": 0.5256331549230011, + "grad_norm": 3.305101609824262, + "learning_rate": 5.034210252094624e-06, + "loss": 0.2739845275878906, + "step": 60790 + }, + { + "epoch": 0.5256763884445443, + "grad_norm": 16.019271292392794, + "learning_rate": 5.0340605040833824e-06, + "loss": 0.5115463256835937, + "step": 60795 + }, + { + "epoch": 0.5257196219660877, + "grad_norm": 2.221972468721752, + "learning_rate": 5.033910746691233e-06, + "loss": 0.3431270599365234, + "step": 60800 + }, + { + "epoch": 0.5257628554876309, + "grad_norm": 13.830949092441944, + "learning_rate": 5.033760979918868e-06, + "loss": 0.06351814270019532, + "step": 60805 + }, + { + "epoch": 0.5258060890091741, + "grad_norm": 0.7688603238242145, + "learning_rate": 5.033611203766977e-06, + "loss": 0.0472259521484375, + "step": 60810 + }, + { + "epoch": 0.5258493225307174, + "grad_norm": 1.8793647546273202, + "learning_rate": 5.03346141823625e-06, + "loss": 0.20050048828125, + "step": 60815 + }, + { + "epoch": 0.5258925560522607, + "grad_norm": 7.231522347339771, + "learning_rate": 5.033311623327379e-06, + "loss": 0.11746978759765625, + "step": 60820 + }, + { + "epoch": 0.5259357895738039, + "grad_norm": 6.103602990650352, + "learning_rate": 5.033161819041055e-06, + "loss": 0.07427520751953125, + "step": 60825 + }, + { + "epoch": 0.5259790230953472, + "grad_norm": 0.11638185081073527, + "learning_rate": 5.0330120053779674e-06, + "loss": 0.35734806060791013, + "step": 60830 + }, + { + "epoch": 0.5260222566168905, + "grad_norm": 7.263304446236815, + "learning_rate": 5.032862182338809e-06, + "loss": 0.0685272216796875, + "step": 60835 + }, + { + "epoch": 0.5260654901384337, + "grad_norm": 3.5244644265684784, + "learning_rate": 5.0327123499242696e-06, + "loss": 0.180517578125, + "step": 60840 + }, + { + "epoch": 0.526108723659977, + "grad_norm": 15.188258824391006, + "learning_rate": 5.03256250813504e-06, + "loss": 0.164691162109375, + "step": 60845 + }, + { + "epoch": 0.5261519571815203, + "grad_norm": 1.8015166162127736, + "learning_rate": 5.0324126569718115e-06, + "loss": 0.08157501220703126, + "step": 60850 + }, + { + "epoch": 0.5261951907030635, + "grad_norm": 40.53097416169756, + "learning_rate": 5.032262796435276e-06, + "loss": 0.2256591796875, + "step": 60855 + }, + { + "epoch": 0.5262384242246068, + "grad_norm": 14.38011320821747, + "learning_rate": 5.032112926526124e-06, + "loss": 0.265283203125, + "step": 60860 + }, + { + "epoch": 0.5262816577461501, + "grad_norm": 6.654963478775008, + "learning_rate": 5.031963047245046e-06, + "loss": 0.18665084838867188, + "step": 60865 + }, + { + "epoch": 0.5263248912676933, + "grad_norm": 37.67668135795326, + "learning_rate": 5.031813158592735e-06, + "loss": 0.469891357421875, + "step": 60870 + }, + { + "epoch": 0.5263681247892366, + "grad_norm": 5.72820794984049, + "learning_rate": 5.031663260569881e-06, + "loss": 0.08554153442382813, + "step": 60875 + }, + { + "epoch": 0.5264113583107799, + "grad_norm": 14.9196915398, + "learning_rate": 5.031513353177175e-06, + "loss": 0.272564697265625, + "step": 60880 + }, + { + "epoch": 0.5264545918323231, + "grad_norm": 2.919283728259726, + "learning_rate": 5.031363436415309e-06, + "loss": 0.049587249755859375, + "step": 60885 + }, + { + "epoch": 0.5264978253538664, + "grad_norm": 0.0879024670451879, + "learning_rate": 5.031213510284975e-06, + "loss": 0.1061859130859375, + "step": 60890 + }, + { + "epoch": 0.5265410588754096, + "grad_norm": 3.7440037278473746, + "learning_rate": 5.031063574786864e-06, + "loss": 0.06773681640625, + "step": 60895 + }, + { + "epoch": 0.5265842923969529, + "grad_norm": 1.3690297261561521, + "learning_rate": 5.0309136299216665e-06, + "loss": 0.02097015380859375, + "step": 60900 + }, + { + "epoch": 0.5266275259184962, + "grad_norm": 0.25481145005276695, + "learning_rate": 5.030763675690075e-06, + "loss": 0.022124576568603515, + "step": 60905 + }, + { + "epoch": 0.5266707594400394, + "grad_norm": 3.5490700797256594, + "learning_rate": 5.030613712092781e-06, + "loss": 0.134027099609375, + "step": 60910 + }, + { + "epoch": 0.5267139929615827, + "grad_norm": 36.25340321957265, + "learning_rate": 5.0304637391304755e-06, + "loss": 0.6069664001464844, + "step": 60915 + }, + { + "epoch": 0.526757226483126, + "grad_norm": 13.287437712054286, + "learning_rate": 5.0303137568038515e-06, + "loss": 0.2134033203125, + "step": 60920 + }, + { + "epoch": 0.5268004600046692, + "grad_norm": 0.4777949581220546, + "learning_rate": 5.030163765113599e-06, + "loss": 0.1701873779296875, + "step": 60925 + }, + { + "epoch": 0.5268436935262125, + "grad_norm": 2.7803119051002363, + "learning_rate": 5.030013764060411e-06, + "loss": 0.122686767578125, + "step": 60930 + }, + { + "epoch": 0.5268869270477557, + "grad_norm": 25.78367040752776, + "learning_rate": 5.029863753644978e-06, + "loss": 0.08052444458007812, + "step": 60935 + }, + { + "epoch": 0.526930160569299, + "grad_norm": 18.295358550405922, + "learning_rate": 5.0297137338679945e-06, + "loss": 0.06348800659179688, + "step": 60940 + }, + { + "epoch": 0.5269733940908423, + "grad_norm": 14.513022709628828, + "learning_rate": 5.02956370473015e-06, + "loss": 0.17505645751953125, + "step": 60945 + }, + { + "epoch": 0.5270166276123855, + "grad_norm": 1.8214003357850275, + "learning_rate": 5.029413666232136e-06, + "loss": 0.15538177490234376, + "step": 60950 + }, + { + "epoch": 0.5270598611339288, + "grad_norm": 2.998116412964843, + "learning_rate": 5.029263618374647e-06, + "loss": 0.3200077056884766, + "step": 60955 + }, + { + "epoch": 0.5271030946554721, + "grad_norm": 1.600055423756045, + "learning_rate": 5.029113561158372e-06, + "loss": 0.07519073486328125, + "step": 60960 + }, + { + "epoch": 0.5271463281770153, + "grad_norm": 29.472065035080497, + "learning_rate": 5.028963494584005e-06, + "loss": 0.4765625, + "step": 60965 + }, + { + "epoch": 0.5271895616985586, + "grad_norm": 20.16515625413148, + "learning_rate": 5.0288134186522385e-06, + "loss": 0.27558364868164065, + "step": 60970 + }, + { + "epoch": 0.5272327952201019, + "grad_norm": 7.165872985932902, + "learning_rate": 5.028663333363763e-06, + "loss": 0.290130615234375, + "step": 60975 + }, + { + "epoch": 0.5272760287416451, + "grad_norm": 0.580488323447724, + "learning_rate": 5.028513238719272e-06, + "loss": 0.2592803955078125, + "step": 60980 + }, + { + "epoch": 0.5273192622631884, + "grad_norm": 23.60831910650052, + "learning_rate": 5.0283631347194565e-06, + "loss": 0.1360443115234375, + "step": 60985 + }, + { + "epoch": 0.5273624957847316, + "grad_norm": 33.79792749119868, + "learning_rate": 5.0282130213650095e-06, + "loss": 0.19941558837890624, + "step": 60990 + }, + { + "epoch": 0.5274057293062749, + "grad_norm": 8.110240610141897, + "learning_rate": 5.028062898656623e-06, + "loss": 0.0629852294921875, + "step": 60995 + }, + { + "epoch": 0.5274489628278182, + "grad_norm": 2.0053061117823776, + "learning_rate": 5.027912766594991e-06, + "loss": 0.16453475952148439, + "step": 61000 + }, + { + "epoch": 0.5274921963493614, + "grad_norm": 27.522972599763985, + "learning_rate": 5.027762625180804e-06, + "loss": 0.2882080078125, + "step": 61005 + }, + { + "epoch": 0.5275354298709047, + "grad_norm": 7.831487141152584, + "learning_rate": 5.027612474414754e-06, + "loss": 0.11655120849609375, + "step": 61010 + }, + { + "epoch": 0.527578663392448, + "grad_norm": 6.688232706876347, + "learning_rate": 5.027462314297536e-06, + "loss": 0.102008056640625, + "step": 61015 + }, + { + "epoch": 0.5276218969139912, + "grad_norm": 56.70614373249409, + "learning_rate": 5.027312144829839e-06, + "loss": 0.500970458984375, + "step": 61020 + }, + { + "epoch": 0.5276651304355345, + "grad_norm": 14.361106069454047, + "learning_rate": 5.0271619660123584e-06, + "loss": 0.18739471435546876, + "step": 61025 + }, + { + "epoch": 0.5277083639570778, + "grad_norm": 33.79775245189958, + "learning_rate": 5.027011777845786e-06, + "loss": 0.381494140625, + "step": 61030 + }, + { + "epoch": 0.527751597478621, + "grad_norm": 0.15959945321263733, + "learning_rate": 5.0268615803308156e-06, + "loss": 0.10991058349609376, + "step": 61035 + }, + { + "epoch": 0.5277948310001643, + "grad_norm": 8.60277086711335, + "learning_rate": 5.026711373468137e-06, + "loss": 0.15377349853515626, + "step": 61040 + }, + { + "epoch": 0.5278380645217076, + "grad_norm": 28.853202313818603, + "learning_rate": 5.026561157258446e-06, + "loss": 0.22263336181640625, + "step": 61045 + }, + { + "epoch": 0.5278812980432508, + "grad_norm": 6.526309377945949, + "learning_rate": 5.026410931702432e-06, + "loss": 0.11348495483398438, + "step": 61050 + }, + { + "epoch": 0.5279245315647941, + "grad_norm": 6.779818304430491, + "learning_rate": 5.026260696800792e-06, + "loss": 0.07586669921875, + "step": 61055 + }, + { + "epoch": 0.5279677650863374, + "grad_norm": 10.120759241442414, + "learning_rate": 5.026110452554216e-06, + "loss": 0.22371368408203124, + "step": 61060 + }, + { + "epoch": 0.5280109986078806, + "grad_norm": 4.475843982876067, + "learning_rate": 5.025960198963397e-06, + "loss": 0.09685173034667968, + "step": 61065 + }, + { + "epoch": 0.5280542321294238, + "grad_norm": 3.7332538159369095, + "learning_rate": 5.0258099360290285e-06, + "loss": 0.2834918975830078, + "step": 61070 + }, + { + "epoch": 0.5280974656509672, + "grad_norm": 1.2406434282555368, + "learning_rate": 5.025659663751804e-06, + "loss": 0.08550071716308594, + "step": 61075 + }, + { + "epoch": 0.5281406991725104, + "grad_norm": 1.34207846354995, + "learning_rate": 5.025509382132417e-06, + "loss": 0.19530792236328126, + "step": 61080 + }, + { + "epoch": 0.5281839326940536, + "grad_norm": 29.307141146309444, + "learning_rate": 5.025359091171558e-06, + "loss": 0.1988494873046875, + "step": 61085 + }, + { + "epoch": 0.528227166215597, + "grad_norm": 24.294374244836302, + "learning_rate": 5.025208790869922e-06, + "loss": 0.15991668701171874, + "step": 61090 + }, + { + "epoch": 0.5282703997371402, + "grad_norm": 1.7844017942628914, + "learning_rate": 5.025058481228203e-06, + "loss": 0.0557037353515625, + "step": 61095 + }, + { + "epoch": 0.5283136332586834, + "grad_norm": 7.089182152459032, + "learning_rate": 5.024908162247093e-06, + "loss": 0.0692840576171875, + "step": 61100 + }, + { + "epoch": 0.5283568667802268, + "grad_norm": 2.9168460214761027, + "learning_rate": 5.0247578339272855e-06, + "loss": 0.07988643646240234, + "step": 61105 + }, + { + "epoch": 0.52840010030177, + "grad_norm": 1.9608516621596934, + "learning_rate": 5.0246074962694744e-06, + "loss": 0.1361083984375, + "step": 61110 + }, + { + "epoch": 0.5284433338233132, + "grad_norm": 1.4524862973975647, + "learning_rate": 5.024457149274351e-06, + "loss": 0.08771495819091797, + "step": 61115 + }, + { + "epoch": 0.5284865673448566, + "grad_norm": 14.841441345860439, + "learning_rate": 5.024306792942611e-06, + "loss": 0.266766357421875, + "step": 61120 + }, + { + "epoch": 0.5285298008663998, + "grad_norm": 0.3736205021997576, + "learning_rate": 5.024156427274947e-06, + "loss": 0.07989301681518554, + "step": 61125 + }, + { + "epoch": 0.528573034387943, + "grad_norm": 18.55503629638657, + "learning_rate": 5.024006052272052e-06, + "loss": 0.11578521728515626, + "step": 61130 + }, + { + "epoch": 0.5286162679094863, + "grad_norm": 4.02978844141272, + "learning_rate": 5.02385566793462e-06, + "loss": 0.06352462768554687, + "step": 61135 + }, + { + "epoch": 0.5286595014310296, + "grad_norm": 16.184601882244415, + "learning_rate": 5.023705274263344e-06, + "loss": 0.12215576171875, + "step": 61140 + }, + { + "epoch": 0.5287027349525728, + "grad_norm": 6.581640879067845, + "learning_rate": 5.023554871258919e-06, + "loss": 0.15230560302734375, + "step": 61145 + }, + { + "epoch": 0.5287459684741161, + "grad_norm": 2.191180581070319, + "learning_rate": 5.023404458922038e-06, + "loss": 0.0508087158203125, + "step": 61150 + }, + { + "epoch": 0.5287892019956594, + "grad_norm": 1.9305288073252551, + "learning_rate": 5.023254037253393e-06, + "loss": 0.04097061157226563, + "step": 61155 + }, + { + "epoch": 0.5288324355172026, + "grad_norm": 14.188247224026941, + "learning_rate": 5.02310360625368e-06, + "loss": 0.1585235595703125, + "step": 61160 + }, + { + "epoch": 0.5288756690387458, + "grad_norm": 34.531117973176926, + "learning_rate": 5.022953165923592e-06, + "loss": 0.1431915283203125, + "step": 61165 + }, + { + "epoch": 0.5289189025602892, + "grad_norm": 15.135295560775669, + "learning_rate": 5.022802716263823e-06, + "loss": 0.145684814453125, + "step": 61170 + }, + { + "epoch": 0.5289621360818324, + "grad_norm": 7.447297818953242, + "learning_rate": 5.0226522572750665e-06, + "loss": 0.34654693603515624, + "step": 61175 + }, + { + "epoch": 0.5290053696033756, + "grad_norm": 7.630251526335181, + "learning_rate": 5.022501788958016e-06, + "loss": 0.18269996643066405, + "step": 61180 + }, + { + "epoch": 0.529048603124919, + "grad_norm": 39.08097314478949, + "learning_rate": 5.022351311313367e-06, + "loss": 0.24363555908203124, + "step": 61185 + }, + { + "epoch": 0.5290918366464622, + "grad_norm": 2.3738476457903555, + "learning_rate": 5.02220082434181e-06, + "loss": 0.061102294921875, + "step": 61190 + }, + { + "epoch": 0.5291350701680054, + "grad_norm": 0.9089330939776693, + "learning_rate": 5.022050328044044e-06, + "loss": 0.07621917724609376, + "step": 61195 + }, + { + "epoch": 0.5291783036895488, + "grad_norm": 0.1388733520523295, + "learning_rate": 5.02189982242076e-06, + "loss": 0.09812660217285156, + "step": 61200 + }, + { + "epoch": 0.529221537211092, + "grad_norm": 0.7527847095412671, + "learning_rate": 5.021749307472652e-06, + "loss": 0.06634712219238281, + "step": 61205 + }, + { + "epoch": 0.5292647707326352, + "grad_norm": 13.230889840226194, + "learning_rate": 5.021598783200416e-06, + "loss": 0.11480064392089843, + "step": 61210 + }, + { + "epoch": 0.5293080042541786, + "grad_norm": 0.3986872684683004, + "learning_rate": 5.021448249604743e-06, + "loss": 0.30268325805664065, + "step": 61215 + }, + { + "epoch": 0.5293512377757218, + "grad_norm": 2.120813316466086, + "learning_rate": 5.021297706686332e-06, + "loss": 0.0866119384765625, + "step": 61220 + }, + { + "epoch": 0.529394471297265, + "grad_norm": 7.195061857064411, + "learning_rate": 5.021147154445873e-06, + "loss": 0.2574485778808594, + "step": 61225 + }, + { + "epoch": 0.5294377048188084, + "grad_norm": 2.9972985563002426, + "learning_rate": 5.020996592884062e-06, + "loss": 0.4555511474609375, + "step": 61230 + }, + { + "epoch": 0.5294809383403516, + "grad_norm": 1.9395235598009388, + "learning_rate": 5.020846022001593e-06, + "loss": 0.04322357177734375, + "step": 61235 + }, + { + "epoch": 0.5295241718618948, + "grad_norm": 27.84664251171026, + "learning_rate": 5.020695441799161e-06, + "loss": 0.1722391128540039, + "step": 61240 + }, + { + "epoch": 0.529567405383438, + "grad_norm": 0.9323281762672613, + "learning_rate": 5.020544852277461e-06, + "loss": 0.1074737548828125, + "step": 61245 + }, + { + "epoch": 0.5296106389049814, + "grad_norm": 0.3721635691281676, + "learning_rate": 5.0203942534371855e-06, + "loss": 0.02720470428466797, + "step": 61250 + }, + { + "epoch": 0.5296538724265246, + "grad_norm": 19.212944626523427, + "learning_rate": 5.020243645279031e-06, + "loss": 0.08323707580566406, + "step": 61255 + }, + { + "epoch": 0.5296971059480678, + "grad_norm": 4.67770062571488, + "learning_rate": 5.020093027803691e-06, + "loss": 0.1597259521484375, + "step": 61260 + }, + { + "epoch": 0.5297403394696112, + "grad_norm": 10.37059256396749, + "learning_rate": 5.01994240101186e-06, + "loss": 0.07787017822265625, + "step": 61265 + }, + { + "epoch": 0.5297835729911544, + "grad_norm": 4.246415300069037, + "learning_rate": 5.019791764904234e-06, + "loss": 0.059088134765625, + "step": 61270 + }, + { + "epoch": 0.5298268065126976, + "grad_norm": 19.628919843077338, + "learning_rate": 5.019641119481507e-06, + "loss": 0.5797752380371094, + "step": 61275 + }, + { + "epoch": 0.529870040034241, + "grad_norm": 27.88663398469511, + "learning_rate": 5.019490464744372e-06, + "loss": 0.183416748046875, + "step": 61280 + }, + { + "epoch": 0.5299132735557842, + "grad_norm": 9.139899627465626, + "learning_rate": 5.019339800693527e-06, + "loss": 0.0350006103515625, + "step": 61285 + }, + { + "epoch": 0.5299565070773274, + "grad_norm": 2.1894991273818007, + "learning_rate": 5.019189127329665e-06, + "loss": 0.112762451171875, + "step": 61290 + }, + { + "epoch": 0.5299997405988708, + "grad_norm": 5.259079891347302, + "learning_rate": 5.019038444653481e-06, + "loss": 0.09023971557617187, + "step": 61295 + }, + { + "epoch": 0.530042974120414, + "grad_norm": 34.32424948585085, + "learning_rate": 5.01888775266567e-06, + "loss": 0.22734909057617186, + "step": 61300 + }, + { + "epoch": 0.5300862076419572, + "grad_norm": 4.019337207144552, + "learning_rate": 5.018737051366926e-06, + "loss": 0.06844863891601563, + "step": 61305 + }, + { + "epoch": 0.5301294411635006, + "grad_norm": 20.106007059366025, + "learning_rate": 5.018586340757947e-06, + "loss": 0.206036376953125, + "step": 61310 + }, + { + "epoch": 0.5301726746850438, + "grad_norm": 2.2766763493888367, + "learning_rate": 5.018435620839425e-06, + "loss": 0.4473297119140625, + "step": 61315 + }, + { + "epoch": 0.530215908206587, + "grad_norm": 6.241570096441954, + "learning_rate": 5.018284891612057e-06, + "loss": 0.46949462890625, + "step": 61320 + }, + { + "epoch": 0.5302591417281303, + "grad_norm": 16.688533370499172, + "learning_rate": 5.018134153076537e-06, + "loss": 0.15410003662109376, + "step": 61325 + }, + { + "epoch": 0.5303023752496736, + "grad_norm": 6.059781155889411, + "learning_rate": 5.017983405233561e-06, + "loss": 0.306561279296875, + "step": 61330 + }, + { + "epoch": 0.5303456087712168, + "grad_norm": 0.8116933092448254, + "learning_rate": 5.017832648083823e-06, + "loss": 0.02363243103027344, + "step": 61335 + }, + { + "epoch": 0.5303888422927601, + "grad_norm": 6.68977994703583, + "learning_rate": 5.01768188162802e-06, + "loss": 0.10112457275390625, + "step": 61340 + }, + { + "epoch": 0.5304320758143034, + "grad_norm": 14.702153543222625, + "learning_rate": 5.017531105866845e-06, + "loss": 0.14365158081054688, + "step": 61345 + }, + { + "epoch": 0.5304753093358466, + "grad_norm": 0.3378727526748961, + "learning_rate": 5.017380320800997e-06, + "loss": 0.13987960815429687, + "step": 61350 + }, + { + "epoch": 0.5305185428573899, + "grad_norm": 7.40804845419581, + "learning_rate": 5.0172295264311675e-06, + "loss": 0.07111129760742188, + "step": 61355 + }, + { + "epoch": 0.5305617763789332, + "grad_norm": 3.295585540708523, + "learning_rate": 5.017078722758054e-06, + "loss": 0.17892723083496093, + "step": 61360 + }, + { + "epoch": 0.5306050099004764, + "grad_norm": 8.42262379505072, + "learning_rate": 5.016927909782352e-06, + "loss": 0.35478515625, + "step": 61365 + }, + { + "epoch": 0.5306482434220197, + "grad_norm": 13.873504382848777, + "learning_rate": 5.016777087504757e-06, + "loss": 0.23727569580078126, + "step": 61370 + }, + { + "epoch": 0.530691476943563, + "grad_norm": 11.650166090164294, + "learning_rate": 5.016626255925963e-06, + "loss": 0.05111770629882813, + "step": 61375 + }, + { + "epoch": 0.5307347104651062, + "grad_norm": 10.24480543343127, + "learning_rate": 5.016475415046668e-06, + "loss": 0.19844970703125, + "step": 61380 + }, + { + "epoch": 0.5307779439866495, + "grad_norm": 9.586752619604308, + "learning_rate": 5.016324564867567e-06, + "loss": 0.4253795623779297, + "step": 61385 + }, + { + "epoch": 0.5308211775081928, + "grad_norm": 36.746794459923215, + "learning_rate": 5.016173705389354e-06, + "loss": 0.21456565856933593, + "step": 61390 + }, + { + "epoch": 0.530864411029736, + "grad_norm": 16.904256885081566, + "learning_rate": 5.016022836612727e-06, + "loss": 0.1432699203491211, + "step": 61395 + }, + { + "epoch": 0.5309076445512793, + "grad_norm": 41.46318318911471, + "learning_rate": 5.015871958538381e-06, + "loss": 0.4136444091796875, + "step": 61400 + }, + { + "epoch": 0.5309508780728226, + "grad_norm": 0.4367351917359215, + "learning_rate": 5.01572107116701e-06, + "loss": 0.24252243041992189, + "step": 61405 + }, + { + "epoch": 0.5309941115943658, + "grad_norm": 1.6423191181525625, + "learning_rate": 5.0155701744993135e-06, + "loss": 0.1977325439453125, + "step": 61410 + }, + { + "epoch": 0.531037345115909, + "grad_norm": 8.93346517422219, + "learning_rate": 5.015419268535985e-06, + "loss": 0.04122238159179688, + "step": 61415 + }, + { + "epoch": 0.5310805786374523, + "grad_norm": 1.9349797692314479, + "learning_rate": 5.01526835327772e-06, + "loss": 0.10662841796875, + "step": 61420 + }, + { + "epoch": 0.5311238121589956, + "grad_norm": 7.735807163519915, + "learning_rate": 5.015117428725217e-06, + "loss": 0.1288055419921875, + "step": 61425 + }, + { + "epoch": 0.5311670456805389, + "grad_norm": 28.55256385900191, + "learning_rate": 5.014966494879169e-06, + "loss": 0.3568838119506836, + "step": 61430 + }, + { + "epoch": 0.5312102792020821, + "grad_norm": 29.872553701073656, + "learning_rate": 5.014815551740274e-06, + "loss": 0.2221710205078125, + "step": 61435 + }, + { + "epoch": 0.5312535127236254, + "grad_norm": 11.990704498358806, + "learning_rate": 5.014664599309228e-06, + "loss": 0.09202651977539063, + "step": 61440 + }, + { + "epoch": 0.5312967462451686, + "grad_norm": 3.3998934199864053, + "learning_rate": 5.0145136375867256e-06, + "loss": 0.1007965087890625, + "step": 61445 + }, + { + "epoch": 0.5313399797667119, + "grad_norm": 5.045240923080389, + "learning_rate": 5.0143626665734656e-06, + "loss": 0.072808837890625, + "step": 61450 + }, + { + "epoch": 0.5313832132882552, + "grad_norm": 20.20998168348255, + "learning_rate": 5.014211686270143e-06, + "loss": 0.08595657348632812, + "step": 61455 + }, + { + "epoch": 0.5314264468097984, + "grad_norm": 1.4197517845546819, + "learning_rate": 5.014060696677453e-06, + "loss": 0.09164581298828126, + "step": 61460 + }, + { + "epoch": 0.5314696803313417, + "grad_norm": 19.008653587264114, + "learning_rate": 5.013909697796094e-06, + "loss": 0.0848541259765625, + "step": 61465 + }, + { + "epoch": 0.531512913852885, + "grad_norm": 0.06414600511641495, + "learning_rate": 5.013758689626761e-06, + "loss": 0.17332763671875, + "step": 61470 + }, + { + "epoch": 0.5315561473744282, + "grad_norm": 9.55375123824681, + "learning_rate": 5.013607672170152e-06, + "loss": 0.385113525390625, + "step": 61475 + }, + { + "epoch": 0.5315993808959715, + "grad_norm": 1.261397103342932, + "learning_rate": 5.01345664542696e-06, + "loss": 0.0743682861328125, + "step": 61480 + }, + { + "epoch": 0.5316426144175148, + "grad_norm": 3.8914204014310156, + "learning_rate": 5.013305609397886e-06, + "loss": 0.1075897216796875, + "step": 61485 + }, + { + "epoch": 0.531685847939058, + "grad_norm": 1.2861483972403025, + "learning_rate": 5.013154564083624e-06, + "loss": 0.0884307861328125, + "step": 61490 + }, + { + "epoch": 0.5317290814606013, + "grad_norm": 14.229453582451443, + "learning_rate": 5.013003509484871e-06, + "loss": 0.22603607177734375, + "step": 61495 + }, + { + "epoch": 0.5317723149821445, + "grad_norm": 0.38697514572334735, + "learning_rate": 5.0128524456023235e-06, + "loss": 0.14812889099121093, + "step": 61500 + }, + { + "epoch": 0.5318155485036878, + "grad_norm": 14.148298636188496, + "learning_rate": 5.012701372436678e-06, + "loss": 0.30843353271484375, + "step": 61505 + }, + { + "epoch": 0.5318587820252311, + "grad_norm": 9.546249427841069, + "learning_rate": 5.012550289988633e-06, + "loss": 0.0716522216796875, + "step": 61510 + }, + { + "epoch": 0.5319020155467743, + "grad_norm": 5.60820451791156, + "learning_rate": 5.012399198258882e-06, + "loss": 0.144329833984375, + "step": 61515 + }, + { + "epoch": 0.5319452490683176, + "grad_norm": 0.3558527638858737, + "learning_rate": 5.012248097248124e-06, + "loss": 0.3224067687988281, + "step": 61520 + }, + { + "epoch": 0.5319884825898609, + "grad_norm": 16.558364310679124, + "learning_rate": 5.012096986957056e-06, + "loss": 0.10224838256835937, + "step": 61525 + }, + { + "epoch": 0.5320317161114041, + "grad_norm": 11.852594750334287, + "learning_rate": 5.011945867386375e-06, + "loss": 0.1215423583984375, + "step": 61530 + }, + { + "epoch": 0.5320749496329474, + "grad_norm": 32.46843788566901, + "learning_rate": 5.011794738536778e-06, + "loss": 0.32906494140625, + "step": 61535 + }, + { + "epoch": 0.5321181831544907, + "grad_norm": 2.6481909257854346, + "learning_rate": 5.011643600408959e-06, + "loss": 0.1917144775390625, + "step": 61540 + }, + { + "epoch": 0.5321614166760339, + "grad_norm": 0.5895001297438618, + "learning_rate": 5.011492453003619e-06, + "loss": 0.39024200439453127, + "step": 61545 + }, + { + "epoch": 0.5322046501975772, + "grad_norm": 5.240525816325731, + "learning_rate": 5.011341296321454e-06, + "loss": 0.1384105682373047, + "step": 61550 + }, + { + "epoch": 0.5322478837191205, + "grad_norm": 17.426591261329957, + "learning_rate": 5.01119013036316e-06, + "loss": 0.15027694702148436, + "step": 61555 + }, + { + "epoch": 0.5322911172406637, + "grad_norm": 7.193070103010632, + "learning_rate": 5.011038955129435e-06, + "loss": 0.120941162109375, + "step": 61560 + }, + { + "epoch": 0.532334350762207, + "grad_norm": 7.831683360315983, + "learning_rate": 5.010887770620976e-06, + "loss": 0.1637969970703125, + "step": 61565 + }, + { + "epoch": 0.5323775842837503, + "grad_norm": 10.906199491855627, + "learning_rate": 5.0107365768384795e-06, + "loss": 0.230828857421875, + "step": 61570 + }, + { + "epoch": 0.5324208178052935, + "grad_norm": 6.679333548317462, + "learning_rate": 5.010585373782645e-06, + "loss": 0.121954345703125, + "step": 61575 + }, + { + "epoch": 0.5324640513268368, + "grad_norm": 1.8840416767635453, + "learning_rate": 5.010434161454168e-06, + "loss": 0.3274658203125, + "step": 61580 + }, + { + "epoch": 0.5325072848483801, + "grad_norm": 53.53469181047533, + "learning_rate": 5.010282939853746e-06, + "loss": 0.17048492431640624, + "step": 61585 + }, + { + "epoch": 0.5325505183699233, + "grad_norm": 22.06267729735373, + "learning_rate": 5.010131708982077e-06, + "loss": 0.17246551513671876, + "step": 61590 + }, + { + "epoch": 0.5325937518914665, + "grad_norm": 47.984129911116305, + "learning_rate": 5.009980468839858e-06, + "loss": 0.19869461059570312, + "step": 61595 + }, + { + "epoch": 0.5326369854130099, + "grad_norm": 1.9143912400003276, + "learning_rate": 5.0098292194277865e-06, + "loss": 0.33382415771484375, + "step": 61600 + }, + { + "epoch": 0.5326802189345531, + "grad_norm": 4.7637415552549545, + "learning_rate": 5.009677960746561e-06, + "loss": 0.0970916748046875, + "step": 61605 + }, + { + "epoch": 0.5327234524560963, + "grad_norm": 19.80349831526252, + "learning_rate": 5.0095266927968775e-06, + "loss": 0.24241943359375, + "step": 61610 + }, + { + "epoch": 0.5327666859776397, + "grad_norm": 4.528312450819428, + "learning_rate": 5.009375415579434e-06, + "loss": 0.1043670654296875, + "step": 61615 + }, + { + "epoch": 0.5328099194991829, + "grad_norm": 34.61610819892928, + "learning_rate": 5.00922412909493e-06, + "loss": 0.2777740478515625, + "step": 61620 + }, + { + "epoch": 0.5328531530207261, + "grad_norm": 23.828248587956757, + "learning_rate": 5.0090728333440615e-06, + "loss": 0.21449432373046876, + "step": 61625 + }, + { + "epoch": 0.5328963865422695, + "grad_norm": 14.487574972796326, + "learning_rate": 5.008921528327525e-06, + "loss": 0.13088951110839844, + "step": 61630 + }, + { + "epoch": 0.5329396200638127, + "grad_norm": 0.33945850627606233, + "learning_rate": 5.008770214046022e-06, + "loss": 0.06642799377441407, + "step": 61635 + }, + { + "epoch": 0.5329828535853559, + "grad_norm": 10.57444485880408, + "learning_rate": 5.008618890500248e-06, + "loss": 0.2793769836425781, + "step": 61640 + }, + { + "epoch": 0.5330260871068992, + "grad_norm": 25.025280138807663, + "learning_rate": 5.0084675576909006e-06, + "loss": 0.10722808837890625, + "step": 61645 + }, + { + "epoch": 0.5330693206284425, + "grad_norm": 0.18378223057727489, + "learning_rate": 5.008316215618678e-06, + "loss": 0.2255523681640625, + "step": 61650 + }, + { + "epoch": 0.5331125541499857, + "grad_norm": 2.1038853496897136, + "learning_rate": 5.008164864284279e-06, + "loss": 0.0334014892578125, + "step": 61655 + }, + { + "epoch": 0.533155787671529, + "grad_norm": 9.203339954659556, + "learning_rate": 5.0080135036884e-06, + "loss": 0.1480804443359375, + "step": 61660 + }, + { + "epoch": 0.5331990211930723, + "grad_norm": 3.2405654838306637, + "learning_rate": 5.007862133831741e-06, + "loss": 0.051019287109375, + "step": 61665 + }, + { + "epoch": 0.5332422547146155, + "grad_norm": 40.41329893648348, + "learning_rate": 5.007710754714999e-06, + "loss": 0.27520599365234377, + "step": 61670 + }, + { + "epoch": 0.5332854882361587, + "grad_norm": 0.4256255180001555, + "learning_rate": 5.007559366338874e-06, + "loss": 0.16578521728515624, + "step": 61675 + }, + { + "epoch": 0.5333287217577021, + "grad_norm": 3.8271469448676423, + "learning_rate": 5.007407968704061e-06, + "loss": 0.11541328430175782, + "step": 61680 + }, + { + "epoch": 0.5333719552792453, + "grad_norm": 9.583784872157189, + "learning_rate": 5.00725656181126e-06, + "loss": 0.06396331787109374, + "step": 61685 + }, + { + "epoch": 0.5334151888007885, + "grad_norm": 2.8387202972769385, + "learning_rate": 5.00710514566117e-06, + "loss": 0.06010589599609375, + "step": 61690 + }, + { + "epoch": 0.5334584223223319, + "grad_norm": 36.039265993173565, + "learning_rate": 5.006953720254487e-06, + "loss": 0.3034454345703125, + "step": 61695 + }, + { + "epoch": 0.5335016558438751, + "grad_norm": 8.003377074753033, + "learning_rate": 5.0068022855919115e-06, + "loss": 0.460675048828125, + "step": 61700 + }, + { + "epoch": 0.5335448893654183, + "grad_norm": 1.1666553805929596, + "learning_rate": 5.006650841674141e-06, + "loss": 0.175439453125, + "step": 61705 + }, + { + "epoch": 0.5335881228869617, + "grad_norm": 45.350517224581125, + "learning_rate": 5.006499388501874e-06, + "loss": 0.31366119384765623, + "step": 61710 + }, + { + "epoch": 0.5336313564085049, + "grad_norm": 1.8819662881914028, + "learning_rate": 5.00634792607581e-06, + "loss": 0.051569366455078126, + "step": 61715 + }, + { + "epoch": 0.5336745899300481, + "grad_norm": 33.00067609190403, + "learning_rate": 5.006196454396646e-06, + "loss": 0.2245410919189453, + "step": 61720 + }, + { + "epoch": 0.5337178234515915, + "grad_norm": 7.630342312884826, + "learning_rate": 5.006044973465081e-06, + "loss": 0.1376667022705078, + "step": 61725 + }, + { + "epoch": 0.5337610569731347, + "grad_norm": 0.5808265423103668, + "learning_rate": 5.005893483281813e-06, + "loss": 0.07956657409667969, + "step": 61730 + }, + { + "epoch": 0.5338042904946779, + "grad_norm": 1.7711563763983162, + "learning_rate": 5.005741983847543e-06, + "loss": 0.09769058227539062, + "step": 61735 + }, + { + "epoch": 0.5338475240162213, + "grad_norm": 1.102338328625633, + "learning_rate": 5.005590475162968e-06, + "loss": 0.09283218383789063, + "step": 61740 + }, + { + "epoch": 0.5338907575377645, + "grad_norm": 5.7629074059307, + "learning_rate": 5.005438957228786e-06, + "loss": 0.1125152587890625, + "step": 61745 + }, + { + "epoch": 0.5339339910593077, + "grad_norm": 9.99574404923431, + "learning_rate": 5.005287430045698e-06, + "loss": 0.08738479614257813, + "step": 61750 + }, + { + "epoch": 0.5339772245808511, + "grad_norm": 7.740973481761607, + "learning_rate": 5.005135893614401e-06, + "loss": 0.0744049072265625, + "step": 61755 + }, + { + "epoch": 0.5340204581023943, + "grad_norm": 1.54653918741715, + "learning_rate": 5.004984347935595e-06, + "loss": 0.422528076171875, + "step": 61760 + }, + { + "epoch": 0.5340636916239375, + "grad_norm": 22.167164337975798, + "learning_rate": 5.004832793009977e-06, + "loss": 0.18421173095703125, + "step": 61765 + }, + { + "epoch": 0.5341069251454807, + "grad_norm": 6.388965589443219, + "learning_rate": 5.004681228838249e-06, + "loss": 0.2003631591796875, + "step": 61770 + }, + { + "epoch": 0.5341501586670241, + "grad_norm": 10.984856796530316, + "learning_rate": 5.0045296554211065e-06, + "loss": 0.19011993408203126, + "step": 61775 + }, + { + "epoch": 0.5341933921885673, + "grad_norm": 2.7393553639481287, + "learning_rate": 5.004378072759252e-06, + "loss": 0.030712890625, + "step": 61780 + }, + { + "epoch": 0.5342366257101105, + "grad_norm": 18.579243359749334, + "learning_rate": 5.004226480853382e-06, + "loss": 0.079888916015625, + "step": 61785 + }, + { + "epoch": 0.5342798592316539, + "grad_norm": 4.439534158207288, + "learning_rate": 5.004074879704196e-06, + "loss": 0.21446380615234376, + "step": 61790 + }, + { + "epoch": 0.5343230927531971, + "grad_norm": 7.054786278435126, + "learning_rate": 5.003923269312395e-06, + "loss": 0.4635406494140625, + "step": 61795 + }, + { + "epoch": 0.5343663262747403, + "grad_norm": 1.7736230340301795, + "learning_rate": 5.003771649678677e-06, + "loss": 0.06491241455078126, + "step": 61800 + }, + { + "epoch": 0.5344095597962837, + "grad_norm": 22.11903232215577, + "learning_rate": 5.00362002080374e-06, + "loss": 0.24793243408203125, + "step": 61805 + }, + { + "epoch": 0.5344527933178269, + "grad_norm": 27.037932562184448, + "learning_rate": 5.003468382688286e-06, + "loss": 0.255780029296875, + "step": 61810 + }, + { + "epoch": 0.5344960268393701, + "grad_norm": 1.2153065569443575, + "learning_rate": 5.003316735333011e-06, + "loss": 0.16551780700683594, + "step": 61815 + }, + { + "epoch": 0.5345392603609135, + "grad_norm": 5.696461144726234, + "learning_rate": 5.003165078738618e-06, + "loss": 0.12307891845703126, + "step": 61820 + }, + { + "epoch": 0.5345824938824567, + "grad_norm": 23.02948801000081, + "learning_rate": 5.003013412905804e-06, + "loss": 0.10767974853515624, + "step": 61825 + }, + { + "epoch": 0.5346257274039999, + "grad_norm": 5.40263019033317, + "learning_rate": 5.002861737835269e-06, + "loss": 0.20289306640625, + "step": 61830 + }, + { + "epoch": 0.5346689609255433, + "grad_norm": 0.7460474629118491, + "learning_rate": 5.002710053527712e-06, + "loss": 0.2199127197265625, + "step": 61835 + }, + { + "epoch": 0.5347121944470865, + "grad_norm": 0.5752279048962938, + "learning_rate": 5.002558359983834e-06, + "loss": 0.20742855072021485, + "step": 61840 + }, + { + "epoch": 0.5347554279686297, + "grad_norm": 0.8357307815231886, + "learning_rate": 5.002406657204334e-06, + "loss": 0.1013336181640625, + "step": 61845 + }, + { + "epoch": 0.534798661490173, + "grad_norm": 1.1415106445281356, + "learning_rate": 5.002254945189911e-06, + "loss": 0.20628738403320312, + "step": 61850 + }, + { + "epoch": 0.5348418950117163, + "grad_norm": 3.045408996075672, + "learning_rate": 5.002103223941265e-06, + "loss": 0.14630126953125, + "step": 61855 + }, + { + "epoch": 0.5348851285332595, + "grad_norm": 0.30150955465650325, + "learning_rate": 5.001951493459097e-06, + "loss": 0.29913330078125, + "step": 61860 + }, + { + "epoch": 0.5349283620548028, + "grad_norm": 3.9771824252628414, + "learning_rate": 5.001799753744104e-06, + "loss": 0.15744552612304688, + "step": 61865 + }, + { + "epoch": 0.5349715955763461, + "grad_norm": 8.426802832882593, + "learning_rate": 5.0016480047969875e-06, + "loss": 0.13282127380371095, + "step": 61870 + }, + { + "epoch": 0.5350148290978893, + "grad_norm": 53.79290244346885, + "learning_rate": 5.001496246618448e-06, + "loss": 0.101678466796875, + "step": 61875 + }, + { + "epoch": 0.5350580626194326, + "grad_norm": 17.985870823561893, + "learning_rate": 5.001344479209186e-06, + "loss": 0.13920822143554687, + "step": 61880 + }, + { + "epoch": 0.5351012961409759, + "grad_norm": 3.0024626604824047, + "learning_rate": 5.001192702569897e-06, + "loss": 0.173480224609375, + "step": 61885 + }, + { + "epoch": 0.5351445296625191, + "grad_norm": 9.943303840819734, + "learning_rate": 5.001040916701286e-06, + "loss": 0.0510009765625, + "step": 61890 + }, + { + "epoch": 0.5351877631840624, + "grad_norm": 1.8587619054382416, + "learning_rate": 5.000889121604051e-06, + "loss": 0.04316253662109375, + "step": 61895 + }, + { + "epoch": 0.5352309967056057, + "grad_norm": 34.60993714861635, + "learning_rate": 5.000737317278892e-06, + "loss": 0.3241233825683594, + "step": 61900 + }, + { + "epoch": 0.5352742302271489, + "grad_norm": 0.27512490621733604, + "learning_rate": 5.000585503726509e-06, + "loss": 0.09819717407226562, + "step": 61905 + }, + { + "epoch": 0.5353174637486922, + "grad_norm": 2.310572260306316, + "learning_rate": 5.000433680947603e-06, + "loss": 0.06484222412109375, + "step": 61910 + }, + { + "epoch": 0.5353606972702355, + "grad_norm": 6.151099651902213, + "learning_rate": 5.000281848942873e-06, + "loss": 0.2369140625, + "step": 61915 + }, + { + "epoch": 0.5354039307917787, + "grad_norm": 12.471722111297066, + "learning_rate": 5.00013000771302e-06, + "loss": 0.1627948760986328, + "step": 61920 + }, + { + "epoch": 0.535447164313322, + "grad_norm": 11.856822011273092, + "learning_rate": 4.999978157258745e-06, + "loss": 0.4499176025390625, + "step": 61925 + }, + { + "epoch": 0.5354903978348653, + "grad_norm": 3.094734562868667, + "learning_rate": 4.999826297580746e-06, + "loss": 0.08703079223632812, + "step": 61930 + }, + { + "epoch": 0.5355336313564085, + "grad_norm": 5.090562535909471, + "learning_rate": 4.999674428679726e-06, + "loss": 0.15683441162109374, + "step": 61935 + }, + { + "epoch": 0.5355768648779518, + "grad_norm": 10.320401417912693, + "learning_rate": 4.999522550556384e-06, + "loss": 0.08795051574707032, + "step": 61940 + }, + { + "epoch": 0.535620098399495, + "grad_norm": 20.347446530692455, + "learning_rate": 4.999370663211421e-06, + "loss": 0.21405715942382814, + "step": 61945 + }, + { + "epoch": 0.5356633319210383, + "grad_norm": 2.4810242791129467, + "learning_rate": 4.999218766645536e-06, + "loss": 0.39979400634765627, + "step": 61950 + }, + { + "epoch": 0.5357065654425816, + "grad_norm": 15.154141756177335, + "learning_rate": 4.999066860859432e-06, + "loss": 0.13156890869140625, + "step": 61955 + }, + { + "epoch": 0.5357497989641248, + "grad_norm": 2.6860505808498356, + "learning_rate": 4.998914945853807e-06, + "loss": 0.0283721923828125, + "step": 61960 + }, + { + "epoch": 0.5357930324856681, + "grad_norm": 1.830066415464161, + "learning_rate": 4.998763021629364e-06, + "loss": 0.2475616455078125, + "step": 61965 + }, + { + "epoch": 0.5358362660072113, + "grad_norm": 18.322192829495624, + "learning_rate": 4.9986110881868015e-06, + "loss": 0.11736907958984374, + "step": 61970 + }, + { + "epoch": 0.5358794995287546, + "grad_norm": 7.106734945062029, + "learning_rate": 4.998459145526822e-06, + "loss": 0.16661376953125, + "step": 61975 + }, + { + "epoch": 0.5359227330502979, + "grad_norm": 19.471946544668047, + "learning_rate": 4.998307193650125e-06, + "loss": 0.3442352294921875, + "step": 61980 + }, + { + "epoch": 0.5359659665718411, + "grad_norm": 33.65810153528926, + "learning_rate": 4.998155232557412e-06, + "loss": 0.2804130554199219, + "step": 61985 + }, + { + "epoch": 0.5360092000933844, + "grad_norm": 22.67848646880795, + "learning_rate": 4.998003262249385e-06, + "loss": 0.4632904052734375, + "step": 61990 + }, + { + "epoch": 0.5360524336149277, + "grad_norm": 37.011995749832835, + "learning_rate": 4.997851282726741e-06, + "loss": 0.44466094970703124, + "step": 61995 + }, + { + "epoch": 0.5360956671364709, + "grad_norm": 8.708852316395193, + "learning_rate": 4.997699293990186e-06, + "loss": 0.0514556884765625, + "step": 62000 + }, + { + "epoch": 0.5361389006580142, + "grad_norm": 0.5321740879213046, + "learning_rate": 4.997547296040417e-06, + "loss": 0.06713104248046875, + "step": 62005 + }, + { + "epoch": 0.5361821341795575, + "grad_norm": 5.124506476791877, + "learning_rate": 4.997395288878136e-06, + "loss": 0.307958984375, + "step": 62010 + }, + { + "epoch": 0.5362253677011007, + "grad_norm": 10.030022576999402, + "learning_rate": 4.997243272504045e-06, + "loss": 0.1623382568359375, + "step": 62015 + }, + { + "epoch": 0.536268601222644, + "grad_norm": 17.428371020594, + "learning_rate": 4.997091246918845e-06, + "loss": 0.17445907592773438, + "step": 62020 + }, + { + "epoch": 0.5363118347441872, + "grad_norm": 1.1734869606913543, + "learning_rate": 4.9969392121232356e-06, + "loss": 0.3726104736328125, + "step": 62025 + }, + { + "epoch": 0.5363550682657305, + "grad_norm": 1.0055214461262807, + "learning_rate": 4.996787168117919e-06, + "loss": 0.12009849548339843, + "step": 62030 + }, + { + "epoch": 0.5363983017872738, + "grad_norm": 18.265794747178173, + "learning_rate": 4.996635114903597e-06, + "loss": 0.14145240783691407, + "step": 62035 + }, + { + "epoch": 0.536441535308817, + "grad_norm": 2.6200391825409244, + "learning_rate": 4.996483052480971e-06, + "loss": 0.28477783203125, + "step": 62040 + }, + { + "epoch": 0.5364847688303603, + "grad_norm": 23.451291415187338, + "learning_rate": 4.9963309808507405e-06, + "loss": 0.3263153076171875, + "step": 62045 + }, + { + "epoch": 0.5365280023519036, + "grad_norm": 30.345394616849056, + "learning_rate": 4.996178900013607e-06, + "loss": 0.2162200927734375, + "step": 62050 + }, + { + "epoch": 0.5365712358734468, + "grad_norm": 5.692101426321659, + "learning_rate": 4.996026809970275e-06, + "loss": 0.15313720703125, + "step": 62055 + }, + { + "epoch": 0.5366144693949901, + "grad_norm": 18.707016277905762, + "learning_rate": 4.995874710721442e-06, + "loss": 0.22582244873046875, + "step": 62060 + }, + { + "epoch": 0.5366577029165334, + "grad_norm": 43.23444743973761, + "learning_rate": 4.995722602267811e-06, + "loss": 0.20889892578125, + "step": 62065 + }, + { + "epoch": 0.5367009364380766, + "grad_norm": 42.13169963828679, + "learning_rate": 4.995570484610085e-06, + "loss": 0.22325439453125, + "step": 62070 + }, + { + "epoch": 0.5367441699596199, + "grad_norm": 26.11389660413531, + "learning_rate": 4.9954183577489635e-06, + "loss": 0.2374725341796875, + "step": 62075 + }, + { + "epoch": 0.5367874034811632, + "grad_norm": 3.6288488179061007, + "learning_rate": 4.99526622168515e-06, + "loss": 0.249493408203125, + "step": 62080 + }, + { + "epoch": 0.5368306370027064, + "grad_norm": 12.78041047800833, + "learning_rate": 4.9951140764193445e-06, + "loss": 0.1171234130859375, + "step": 62085 + }, + { + "epoch": 0.5368738705242497, + "grad_norm": 3.6922639168349036, + "learning_rate": 4.994961921952248e-06, + "loss": 0.23956298828125, + "step": 62090 + }, + { + "epoch": 0.536917104045793, + "grad_norm": 5.366856928011813, + "learning_rate": 4.994809758284565e-06, + "loss": 0.0364837646484375, + "step": 62095 + }, + { + "epoch": 0.5369603375673362, + "grad_norm": 4.391014446033003, + "learning_rate": 4.994657585416995e-06, + "loss": 0.029193878173828125, + "step": 62100 + }, + { + "epoch": 0.5370035710888795, + "grad_norm": 0.20404519975688876, + "learning_rate": 4.994505403350239e-06, + "loss": 0.0959197998046875, + "step": 62105 + }, + { + "epoch": 0.5370468046104228, + "grad_norm": 0.7295833137152301, + "learning_rate": 4.9943532120850015e-06, + "loss": 0.05809478759765625, + "step": 62110 + }, + { + "epoch": 0.537090038131966, + "grad_norm": 1.7860519468902827, + "learning_rate": 4.994201011621985e-06, + "loss": 0.07546157836914062, + "step": 62115 + }, + { + "epoch": 0.5371332716535092, + "grad_norm": 0.2321726143580119, + "learning_rate": 4.9940488019618875e-06, + "loss": 0.0764862060546875, + "step": 62120 + }, + { + "epoch": 0.5371765051750526, + "grad_norm": 4.109280749647618, + "learning_rate": 4.993896583105414e-06, + "loss": 0.13754920959472655, + "step": 62125 + }, + { + "epoch": 0.5372197386965958, + "grad_norm": 6.261705900369135, + "learning_rate": 4.993744355053265e-06, + "loss": 0.0324371337890625, + "step": 62130 + }, + { + "epoch": 0.537262972218139, + "grad_norm": 0.988452411507858, + "learning_rate": 4.9935921178061425e-06, + "loss": 0.05429840087890625, + "step": 62135 + }, + { + "epoch": 0.5373062057396824, + "grad_norm": 17.117548213661355, + "learning_rate": 4.993439871364751e-06, + "loss": 0.2397705078125, + "step": 62140 + }, + { + "epoch": 0.5373494392612256, + "grad_norm": 97.97955065473442, + "learning_rate": 4.993287615729791e-06, + "loss": 0.19125213623046874, + "step": 62145 + }, + { + "epoch": 0.5373926727827688, + "grad_norm": 25.120350221829703, + "learning_rate": 4.993135350901962e-06, + "loss": 0.23453369140625, + "step": 62150 + }, + { + "epoch": 0.5374359063043121, + "grad_norm": 31.762047140895454, + "learning_rate": 4.992983076881972e-06, + "loss": 0.10420951843261719, + "step": 62155 + }, + { + "epoch": 0.5374791398258554, + "grad_norm": 10.346091040676425, + "learning_rate": 4.992830793670519e-06, + "loss": 0.19626846313476562, + "step": 62160 + }, + { + "epoch": 0.5375223733473986, + "grad_norm": 20.45151866680968, + "learning_rate": 4.9926785012683065e-06, + "loss": 0.12545623779296874, + "step": 62165 + }, + { + "epoch": 0.537565606868942, + "grad_norm": 8.809211990301682, + "learning_rate": 4.992526199676036e-06, + "loss": 0.1150421142578125, + "step": 62170 + }, + { + "epoch": 0.5376088403904852, + "grad_norm": 12.375690046776292, + "learning_rate": 4.992373888894412e-06, + "loss": 0.10975799560546876, + "step": 62175 + }, + { + "epoch": 0.5376520739120284, + "grad_norm": 13.18270658494109, + "learning_rate": 4.9922215689241355e-06, + "loss": 0.3920562744140625, + "step": 62180 + }, + { + "epoch": 0.5376953074335717, + "grad_norm": 4.9054192470714755, + "learning_rate": 4.992069239765908e-06, + "loss": 0.49599151611328124, + "step": 62185 + }, + { + "epoch": 0.537738540955115, + "grad_norm": 2.91470262154007, + "learning_rate": 4.991916901420434e-06, + "loss": 0.1333251953125, + "step": 62190 + }, + { + "epoch": 0.5377817744766582, + "grad_norm": 7.917048582598327, + "learning_rate": 4.9917645538884154e-06, + "loss": 0.0591644287109375, + "step": 62195 + }, + { + "epoch": 0.5378250079982014, + "grad_norm": 27.365995166521166, + "learning_rate": 4.991612197170554e-06, + "loss": 0.3071311950683594, + "step": 62200 + }, + { + "epoch": 0.5378682415197448, + "grad_norm": 20.798338869513074, + "learning_rate": 4.991459831267554e-06, + "loss": 0.09171371459960938, + "step": 62205 + }, + { + "epoch": 0.537911475041288, + "grad_norm": 13.693727818199374, + "learning_rate": 4.991307456180117e-06, + "loss": 0.1520923614501953, + "step": 62210 + }, + { + "epoch": 0.5379547085628312, + "grad_norm": 1.3264341028124302, + "learning_rate": 4.991155071908946e-06, + "loss": 0.3759635925292969, + "step": 62215 + }, + { + "epoch": 0.5379979420843746, + "grad_norm": 57.744065966365596, + "learning_rate": 4.991002678454742e-06, + "loss": 0.23402481079101561, + "step": 62220 + }, + { + "epoch": 0.5380411756059178, + "grad_norm": 4.974120524223735, + "learning_rate": 4.990850275818212e-06, + "loss": 0.22563858032226564, + "step": 62225 + }, + { + "epoch": 0.538084409127461, + "grad_norm": 8.179191511622106, + "learning_rate": 4.990697864000056e-06, + "loss": 0.1073028564453125, + "step": 62230 + }, + { + "epoch": 0.5381276426490044, + "grad_norm": 11.591352253208981, + "learning_rate": 4.990545443000977e-06, + "loss": 0.264202880859375, + "step": 62235 + }, + { + "epoch": 0.5381708761705476, + "grad_norm": 0.6428615030367566, + "learning_rate": 4.990393012821678e-06, + "loss": 0.04548425674438476, + "step": 62240 + }, + { + "epoch": 0.5382141096920908, + "grad_norm": 13.782037157048943, + "learning_rate": 4.990240573462863e-06, + "loss": 0.136083984375, + "step": 62245 + }, + { + "epoch": 0.5382573432136342, + "grad_norm": 0.2652564194046113, + "learning_rate": 4.990088124925233e-06, + "loss": 0.124383544921875, + "step": 62250 + }, + { + "epoch": 0.5383005767351774, + "grad_norm": 20.76722105305847, + "learning_rate": 4.989935667209494e-06, + "loss": 0.24971237182617187, + "step": 62255 + }, + { + "epoch": 0.5383438102567206, + "grad_norm": 1.302026391157295, + "learning_rate": 4.989783200316347e-06, + "loss": 0.10714941024780274, + "step": 62260 + }, + { + "epoch": 0.538387043778264, + "grad_norm": 3.3233301972926883, + "learning_rate": 4.989630724246495e-06, + "loss": 0.24939422607421874, + "step": 62265 + }, + { + "epoch": 0.5384302772998072, + "grad_norm": 1.7457838134102213, + "learning_rate": 4.989478239000643e-06, + "loss": 0.13114089965820314, + "step": 62270 + }, + { + "epoch": 0.5384735108213504, + "grad_norm": 27.5084559660309, + "learning_rate": 4.9893257445794925e-06, + "loss": 0.1884441375732422, + "step": 62275 + }, + { + "epoch": 0.5385167443428938, + "grad_norm": 23.604832785924998, + "learning_rate": 4.9891732409837465e-06, + "loss": 0.41022567749023436, + "step": 62280 + }, + { + "epoch": 0.538559977864437, + "grad_norm": 2.3844994386264453, + "learning_rate": 4.989020728214111e-06, + "loss": 0.298187255859375, + "step": 62285 + }, + { + "epoch": 0.5386032113859802, + "grad_norm": 8.76228536146049, + "learning_rate": 4.988868206271287e-06, + "loss": 0.30767822265625, + "step": 62290 + }, + { + "epoch": 0.5386464449075234, + "grad_norm": 10.106874130508988, + "learning_rate": 4.988715675155979e-06, + "loss": 0.11443367004394531, + "step": 62295 + }, + { + "epoch": 0.5386896784290668, + "grad_norm": 4.471899319532895, + "learning_rate": 4.988563134868889e-06, + "loss": 0.17056198120117189, + "step": 62300 + }, + { + "epoch": 0.53873291195061, + "grad_norm": 9.340795460573318, + "learning_rate": 4.988410585410722e-06, + "loss": 0.21633453369140626, + "step": 62305 + }, + { + "epoch": 0.5387761454721532, + "grad_norm": 10.689459077325708, + "learning_rate": 4.988258026782182e-06, + "loss": 0.137322998046875, + "step": 62310 + }, + { + "epoch": 0.5388193789936966, + "grad_norm": 22.755213991718904, + "learning_rate": 4.98810545898397e-06, + "loss": 0.3515468597412109, + "step": 62315 + }, + { + "epoch": 0.5388626125152398, + "grad_norm": 5.614846548331474, + "learning_rate": 4.987952882016793e-06, + "loss": 0.1970855712890625, + "step": 62320 + }, + { + "epoch": 0.538905846036783, + "grad_norm": 93.02338853562833, + "learning_rate": 4.987800295881351e-06, + "loss": 0.6518936157226562, + "step": 62325 + }, + { + "epoch": 0.5389490795583264, + "grad_norm": 2.3383943731992733, + "learning_rate": 4.987647700578351e-06, + "loss": 0.07755584716796875, + "step": 62330 + }, + { + "epoch": 0.5389923130798696, + "grad_norm": 22.65646355110409, + "learning_rate": 4.987495096108496e-06, + "loss": 0.321685791015625, + "step": 62335 + }, + { + "epoch": 0.5390355466014128, + "grad_norm": 33.257557385235586, + "learning_rate": 4.987342482472488e-06, + "loss": 0.13148212432861328, + "step": 62340 + }, + { + "epoch": 0.5390787801229562, + "grad_norm": 13.446993106651222, + "learning_rate": 4.987189859671032e-06, + "loss": 0.130230712890625, + "step": 62345 + }, + { + "epoch": 0.5391220136444994, + "grad_norm": 1.708694954637702, + "learning_rate": 4.9870372277048325e-06, + "loss": 0.037412261962890624, + "step": 62350 + }, + { + "epoch": 0.5391652471660426, + "grad_norm": 1.9350041166012057, + "learning_rate": 4.9868845865745935e-06, + "loss": 0.0475738525390625, + "step": 62355 + }, + { + "epoch": 0.539208480687586, + "grad_norm": 2.793515582123683, + "learning_rate": 4.9867319362810174e-06, + "loss": 0.058905029296875, + "step": 62360 + }, + { + "epoch": 0.5392517142091292, + "grad_norm": 33.55339688565548, + "learning_rate": 4.986579276824809e-06, + "loss": 0.08532485961914063, + "step": 62365 + }, + { + "epoch": 0.5392949477306724, + "grad_norm": 5.697546992949083, + "learning_rate": 4.986426608206674e-06, + "loss": 0.1273895263671875, + "step": 62370 + }, + { + "epoch": 0.5393381812522157, + "grad_norm": 32.58334597368, + "learning_rate": 4.986273930427313e-06, + "loss": 0.3612548828125, + "step": 62375 + }, + { + "epoch": 0.539381414773759, + "grad_norm": 0.5149425416167862, + "learning_rate": 4.986121243487434e-06, + "loss": 0.2203369140625, + "step": 62380 + }, + { + "epoch": 0.5394246482953022, + "grad_norm": 28.06693417592731, + "learning_rate": 4.985968547387738e-06, + "loss": 0.45821590423583985, + "step": 62385 + }, + { + "epoch": 0.5394678818168455, + "grad_norm": 1.8518698781453309, + "learning_rate": 4.985815842128931e-06, + "loss": 0.07980060577392578, + "step": 62390 + }, + { + "epoch": 0.5395111153383888, + "grad_norm": 17.941627908062216, + "learning_rate": 4.985663127711716e-06, + "loss": 0.428533935546875, + "step": 62395 + }, + { + "epoch": 0.539554348859932, + "grad_norm": 0.21178439210940136, + "learning_rate": 4.985510404136799e-06, + "loss": 0.103485107421875, + "step": 62400 + }, + { + "epoch": 0.5395975823814753, + "grad_norm": 9.341119338566713, + "learning_rate": 4.985357671404884e-06, + "loss": 0.1859375, + "step": 62405 + }, + { + "epoch": 0.5396408159030186, + "grad_norm": 16.290699442512413, + "learning_rate": 4.9852049295166746e-06, + "loss": 0.2903293609619141, + "step": 62410 + }, + { + "epoch": 0.5396840494245618, + "grad_norm": 0.8675057557044608, + "learning_rate": 4.985052178472875e-06, + "loss": 0.2836006164550781, + "step": 62415 + }, + { + "epoch": 0.5397272829461051, + "grad_norm": 7.566008157006689, + "learning_rate": 4.98489941827419e-06, + "loss": 0.10753936767578125, + "step": 62420 + }, + { + "epoch": 0.5397705164676484, + "grad_norm": 1.7251003992412424, + "learning_rate": 4.984746648921324e-06, + "loss": 0.14370880126953126, + "step": 62425 + }, + { + "epoch": 0.5398137499891916, + "grad_norm": 1.2310718324135173, + "learning_rate": 4.984593870414983e-06, + "loss": 0.07596206665039062, + "step": 62430 + }, + { + "epoch": 0.5398569835107349, + "grad_norm": 8.855694471008338, + "learning_rate": 4.98444108275587e-06, + "loss": 0.1758331298828125, + "step": 62435 + }, + { + "epoch": 0.5399002170322782, + "grad_norm": 4.1150956694947824, + "learning_rate": 4.9842882859446895e-06, + "loss": 0.11917190551757813, + "step": 62440 + }, + { + "epoch": 0.5399434505538214, + "grad_norm": 29.050241176307964, + "learning_rate": 4.984135479982147e-06, + "loss": 0.1512939453125, + "step": 62445 + }, + { + "epoch": 0.5399866840753647, + "grad_norm": 1.2676882756671906, + "learning_rate": 4.983982664868947e-06, + "loss": 0.016647720336914064, + "step": 62450 + }, + { + "epoch": 0.540029917596908, + "grad_norm": 52.591832705431045, + "learning_rate": 4.983829840605795e-06, + "loss": 0.24900970458984376, + "step": 62455 + }, + { + "epoch": 0.5400731511184512, + "grad_norm": 24.356546468633766, + "learning_rate": 4.983677007193394e-06, + "loss": 0.25626220703125, + "step": 62460 + }, + { + "epoch": 0.5401163846399945, + "grad_norm": 20.24625231028196, + "learning_rate": 4.9835241646324505e-06, + "loss": 0.10371627807617187, + "step": 62465 + }, + { + "epoch": 0.5401596181615377, + "grad_norm": 0.297869831943087, + "learning_rate": 4.983371312923669e-06, + "loss": 0.030820465087890624, + "step": 62470 + }, + { + "epoch": 0.540202851683081, + "grad_norm": 9.2521877149157, + "learning_rate": 4.983218452067754e-06, + "loss": 1.0760787963867187, + "step": 62475 + }, + { + "epoch": 0.5402460852046242, + "grad_norm": 1.8429931344110757, + "learning_rate": 4.983065582065411e-06, + "loss": 0.27274551391601565, + "step": 62480 + }, + { + "epoch": 0.5402893187261675, + "grad_norm": 31.919188928109136, + "learning_rate": 4.982912702917344e-06, + "loss": 0.6957763671875, + "step": 62485 + }, + { + "epoch": 0.5403325522477108, + "grad_norm": 2.5059819873533566, + "learning_rate": 4.982759814624258e-06, + "loss": 0.053003692626953126, + "step": 62490 + }, + { + "epoch": 0.540375785769254, + "grad_norm": 1.7458292983050199, + "learning_rate": 4.982606917186861e-06, + "loss": 0.0634765625, + "step": 62495 + }, + { + "epoch": 0.5404190192907973, + "grad_norm": 4.358465929207919, + "learning_rate": 4.982454010605856e-06, + "loss": 0.16764392852783203, + "step": 62500 + }, + { + "epoch": 0.5404622528123406, + "grad_norm": 8.592844738286994, + "learning_rate": 4.982301094881947e-06, + "loss": 0.242266845703125, + "step": 62505 + }, + { + "epoch": 0.5405054863338838, + "grad_norm": 35.90284471419332, + "learning_rate": 4.982148170015841e-06, + "loss": 0.5561294555664062, + "step": 62510 + }, + { + "epoch": 0.5405487198554271, + "grad_norm": 41.66393764358975, + "learning_rate": 4.981995236008243e-06, + "loss": 0.5066009521484375, + "step": 62515 + }, + { + "epoch": 0.5405919533769704, + "grad_norm": 5.55830746470856, + "learning_rate": 4.981842292859858e-06, + "loss": 0.266015625, + "step": 62520 + }, + { + "epoch": 0.5406351868985136, + "grad_norm": 7.851011067587743, + "learning_rate": 4.981689340571392e-06, + "loss": 0.09356536865234374, + "step": 62525 + }, + { + "epoch": 0.5406784204200569, + "grad_norm": 26.023748036233766, + "learning_rate": 4.98153637914355e-06, + "loss": 0.1885711669921875, + "step": 62530 + }, + { + "epoch": 0.5407216539416002, + "grad_norm": 15.287049898971247, + "learning_rate": 4.9813834085770355e-06, + "loss": 0.18172607421875, + "step": 62535 + }, + { + "epoch": 0.5407648874631434, + "grad_norm": 9.85399836397549, + "learning_rate": 4.981230428872557e-06, + "loss": 0.0674102783203125, + "step": 62540 + }, + { + "epoch": 0.5408081209846867, + "grad_norm": 36.55191086475514, + "learning_rate": 4.981077440030819e-06, + "loss": 0.16417236328125, + "step": 62545 + }, + { + "epoch": 0.5408513545062299, + "grad_norm": 22.305966905282624, + "learning_rate": 4.980924442052527e-06, + "loss": 0.6950302124023438, + "step": 62550 + }, + { + "epoch": 0.5408945880277732, + "grad_norm": 0.7225088625456735, + "learning_rate": 4.980771434938386e-06, + "loss": 0.027409744262695313, + "step": 62555 + }, + { + "epoch": 0.5409378215493165, + "grad_norm": 60.6097444547725, + "learning_rate": 4.980618418689102e-06, + "loss": 0.31286087036132815, + "step": 62560 + }, + { + "epoch": 0.5409810550708597, + "grad_norm": 3.0343316160261553, + "learning_rate": 4.980465393305381e-06, + "loss": 0.61416015625, + "step": 62565 + }, + { + "epoch": 0.541024288592403, + "grad_norm": 22.138683955343176, + "learning_rate": 4.980312358787929e-06, + "loss": 0.12869224548339844, + "step": 62570 + }, + { + "epoch": 0.5410675221139463, + "grad_norm": 1.097244252224793, + "learning_rate": 4.980159315137451e-06, + "loss": 0.0570281982421875, + "step": 62575 + }, + { + "epoch": 0.5411107556354895, + "grad_norm": 1.8488919227009781, + "learning_rate": 4.980006262354653e-06, + "loss": 0.09125747680664062, + "step": 62580 + }, + { + "epoch": 0.5411539891570328, + "grad_norm": 6.757696523247361, + "learning_rate": 4.979853200440242e-06, + "loss": 0.059357833862304685, + "step": 62585 + }, + { + "epoch": 0.5411972226785761, + "grad_norm": 0.396932170839413, + "learning_rate": 4.979700129394922e-06, + "loss": 0.021489715576171874, + "step": 62590 + }, + { + "epoch": 0.5412404562001193, + "grad_norm": 14.075387207138771, + "learning_rate": 4.9795470492194e-06, + "loss": 0.2210296630859375, + "step": 62595 + }, + { + "epoch": 0.5412836897216626, + "grad_norm": 1.6044671961100538, + "learning_rate": 4.979393959914382e-06, + "loss": 0.07033157348632812, + "step": 62600 + }, + { + "epoch": 0.5413269232432059, + "grad_norm": 27.259662178986616, + "learning_rate": 4.979240861480575e-06, + "loss": 0.42185401916503906, + "step": 62605 + }, + { + "epoch": 0.5413701567647491, + "grad_norm": 31.738112047329807, + "learning_rate": 4.979087753918683e-06, + "loss": 0.5085433959960938, + "step": 62610 + }, + { + "epoch": 0.5414133902862924, + "grad_norm": 4.464698735147008, + "learning_rate": 4.978934637229413e-06, + "loss": 0.21676177978515626, + "step": 62615 + }, + { + "epoch": 0.5414566238078357, + "grad_norm": 5.9982394769653276, + "learning_rate": 4.978781511413472e-06, + "loss": 0.18484649658203126, + "step": 62620 + }, + { + "epoch": 0.5414998573293789, + "grad_norm": 3.1137124171819104, + "learning_rate": 4.978628376471565e-06, + "loss": 0.2676511764526367, + "step": 62625 + }, + { + "epoch": 0.5415430908509222, + "grad_norm": 6.181203178408766, + "learning_rate": 4.978475232404399e-06, + "loss": 0.1842620849609375, + "step": 62630 + }, + { + "epoch": 0.5415863243724655, + "grad_norm": 4.88167524570093, + "learning_rate": 4.978322079212681e-06, + "loss": 0.0520904541015625, + "step": 62635 + }, + { + "epoch": 0.5416295578940087, + "grad_norm": 10.879588682415932, + "learning_rate": 4.978168916897114e-06, + "loss": 0.11636962890625, + "step": 62640 + }, + { + "epoch": 0.5416727914155519, + "grad_norm": 2.8180923272379395, + "learning_rate": 4.9780157454584094e-06, + "loss": 0.20685272216796874, + "step": 62645 + }, + { + "epoch": 0.5417160249370953, + "grad_norm": 10.779933467415962, + "learning_rate": 4.977862564897269e-06, + "loss": 0.068060302734375, + "step": 62650 + }, + { + "epoch": 0.5417592584586385, + "grad_norm": 6.406559757566243, + "learning_rate": 4.977709375214402e-06, + "loss": 0.48529205322265623, + "step": 62655 + }, + { + "epoch": 0.5418024919801817, + "grad_norm": 22.589788973752178, + "learning_rate": 4.977556176410515e-06, + "loss": 0.1569122314453125, + "step": 62660 + }, + { + "epoch": 0.541845725501725, + "grad_norm": 7.14611296174679, + "learning_rate": 4.977402968486312e-06, + "loss": 0.28492431640625, + "step": 62665 + }, + { + "epoch": 0.5418889590232683, + "grad_norm": 0.8186488469588341, + "learning_rate": 4.977249751442503e-06, + "loss": 0.25410003662109376, + "step": 62670 + }, + { + "epoch": 0.5419321925448115, + "grad_norm": 2.3616700280527696, + "learning_rate": 4.977096525279791e-06, + "loss": 0.18049049377441406, + "step": 62675 + }, + { + "epoch": 0.5419754260663548, + "grad_norm": 15.099533719143357, + "learning_rate": 4.976943289998886e-06, + "loss": 0.055844879150390624, + "step": 62680 + }, + { + "epoch": 0.5420186595878981, + "grad_norm": 0.19278022243317755, + "learning_rate": 4.976790045600492e-06, + "loss": 0.32527999877929686, + "step": 62685 + }, + { + "epoch": 0.5420618931094413, + "grad_norm": 23.09468596813384, + "learning_rate": 4.976636792085318e-06, + "loss": 0.2158477783203125, + "step": 62690 + }, + { + "epoch": 0.5421051266309846, + "grad_norm": 42.15041217030848, + "learning_rate": 4.976483529454069e-06, + "loss": 0.16599502563476562, + "step": 62695 + }, + { + "epoch": 0.5421483601525279, + "grad_norm": 31.74650916900967, + "learning_rate": 4.976330257707453e-06, + "loss": 0.22301025390625, + "step": 62700 + }, + { + "epoch": 0.5421915936740711, + "grad_norm": 0.6387721199544085, + "learning_rate": 4.976176976846176e-06, + "loss": 0.08256988525390625, + "step": 62705 + }, + { + "epoch": 0.5422348271956144, + "grad_norm": 0.06356291807134826, + "learning_rate": 4.9760236868709456e-06, + "loss": 0.19658393859863282, + "step": 62710 + }, + { + "epoch": 0.5422780607171577, + "grad_norm": 0.40773502043470583, + "learning_rate": 4.975870387782469e-06, + "loss": 0.01632843017578125, + "step": 62715 + }, + { + "epoch": 0.5423212942387009, + "grad_norm": 33.7980448516257, + "learning_rate": 4.975717079581454e-06, + "loss": 0.3091703414916992, + "step": 62720 + }, + { + "epoch": 0.5423645277602441, + "grad_norm": 1.5147671895412762, + "learning_rate": 4.975563762268604e-06, + "loss": 0.2662483215332031, + "step": 62725 + }, + { + "epoch": 0.5424077612817875, + "grad_norm": 14.423302452110576, + "learning_rate": 4.97541043584463e-06, + "loss": 0.0832977294921875, + "step": 62730 + }, + { + "epoch": 0.5424509948033307, + "grad_norm": 2.686540954769561, + "learning_rate": 4.975257100310236e-06, + "loss": 0.44765777587890626, + "step": 62735 + }, + { + "epoch": 0.5424942283248739, + "grad_norm": 18.236974919046148, + "learning_rate": 4.975103755666132e-06, + "loss": 0.21531982421875, + "step": 62740 + }, + { + "epoch": 0.5425374618464173, + "grad_norm": 10.934952848413623, + "learning_rate": 4.974950401913023e-06, + "loss": 0.17136154174804688, + "step": 62745 + }, + { + "epoch": 0.5425806953679605, + "grad_norm": 34.158265766513125, + "learning_rate": 4.974797039051619e-06, + "loss": 0.18226242065429688, + "step": 62750 + }, + { + "epoch": 0.5426239288895037, + "grad_norm": 3.652693614060662, + "learning_rate": 4.9746436670826246e-06, + "loss": 0.03426361083984375, + "step": 62755 + }, + { + "epoch": 0.5426671624110471, + "grad_norm": 0.853807317758858, + "learning_rate": 4.974490286006748e-06, + "loss": 0.10775585174560547, + "step": 62760 + }, + { + "epoch": 0.5427103959325903, + "grad_norm": 0.10297451366572336, + "learning_rate": 4.974336895824697e-06, + "loss": 0.07254180908203126, + "step": 62765 + }, + { + "epoch": 0.5427536294541335, + "grad_norm": 3.7629004110735322, + "learning_rate": 4.974183496537179e-06, + "loss": 0.08939743041992188, + "step": 62770 + }, + { + "epoch": 0.5427968629756769, + "grad_norm": 10.681799873600701, + "learning_rate": 4.974030088144901e-06, + "loss": 0.22850265502929687, + "step": 62775 + }, + { + "epoch": 0.5428400964972201, + "grad_norm": 43.71482424236227, + "learning_rate": 4.97387667064857e-06, + "loss": 0.20648193359375, + "step": 62780 + }, + { + "epoch": 0.5428833300187633, + "grad_norm": 13.45384824541788, + "learning_rate": 4.9737232440488945e-06, + "loss": 0.124853515625, + "step": 62785 + }, + { + "epoch": 0.5429265635403067, + "grad_norm": 1.0970668284348721, + "learning_rate": 4.973569808346581e-06, + "loss": 0.15101852416992187, + "step": 62790 + }, + { + "epoch": 0.5429697970618499, + "grad_norm": 6.651087713996622, + "learning_rate": 4.97341636354234e-06, + "loss": 0.2827606201171875, + "step": 62795 + }, + { + "epoch": 0.5430130305833931, + "grad_norm": 9.55816968862363, + "learning_rate": 4.973262909636876e-06, + "loss": 0.064727783203125, + "step": 62800 + }, + { + "epoch": 0.5430562641049363, + "grad_norm": 18.588107179664725, + "learning_rate": 4.973109446630898e-06, + "loss": 0.0790802001953125, + "step": 62805 + }, + { + "epoch": 0.5430994976264797, + "grad_norm": 13.897028059314732, + "learning_rate": 4.972955974525112e-06, + "loss": 0.07901725769042969, + "step": 62810 + }, + { + "epoch": 0.5431427311480229, + "grad_norm": 1.339210744487122, + "learning_rate": 4.972802493320229e-06, + "loss": 0.3228515625, + "step": 62815 + }, + { + "epoch": 0.5431859646695661, + "grad_norm": 12.854785450308496, + "learning_rate": 4.972649003016955e-06, + "loss": 0.424346923828125, + "step": 62820 + }, + { + "epoch": 0.5432291981911095, + "grad_norm": 1.0948498968386324, + "learning_rate": 4.9724955036159985e-06, + "loss": 0.06905784606933593, + "step": 62825 + }, + { + "epoch": 0.5432724317126527, + "grad_norm": 17.029147048386793, + "learning_rate": 4.972341995118066e-06, + "loss": 0.2300628662109375, + "step": 62830 + }, + { + "epoch": 0.5433156652341959, + "grad_norm": 12.08278132636943, + "learning_rate": 4.972188477523867e-06, + "loss": 0.15459060668945312, + "step": 62835 + }, + { + "epoch": 0.5433588987557393, + "grad_norm": 2.080132733335746, + "learning_rate": 4.972034950834109e-06, + "loss": 0.2243663787841797, + "step": 62840 + }, + { + "epoch": 0.5434021322772825, + "grad_norm": 0.777726594909214, + "learning_rate": 4.9718814150495e-06, + "loss": 0.19114990234375, + "step": 62845 + }, + { + "epoch": 0.5434453657988257, + "grad_norm": 26.852374116722654, + "learning_rate": 4.971727870170748e-06, + "loss": 0.136517333984375, + "step": 62850 + }, + { + "epoch": 0.5434885993203691, + "grad_norm": 14.563141796436213, + "learning_rate": 4.971574316198562e-06, + "loss": 0.19165191650390626, + "step": 62855 + }, + { + "epoch": 0.5435318328419123, + "grad_norm": 3.706508417975262, + "learning_rate": 4.971420753133649e-06, + "loss": 0.15220317840576172, + "step": 62860 + }, + { + "epoch": 0.5435750663634555, + "grad_norm": 1.0540191084624133, + "learning_rate": 4.971267180976718e-06, + "loss": 0.33673553466796874, + "step": 62865 + }, + { + "epoch": 0.5436182998849989, + "grad_norm": 15.537050655903926, + "learning_rate": 4.971113599728476e-06, + "loss": 0.07860755920410156, + "step": 62870 + }, + { + "epoch": 0.5436615334065421, + "grad_norm": 1.0078527279037324, + "learning_rate": 4.970960009389633e-06, + "loss": 0.017123031616210937, + "step": 62875 + }, + { + "epoch": 0.5437047669280853, + "grad_norm": 14.281112765111832, + "learning_rate": 4.970806409960897e-06, + "loss": 0.13095169067382811, + "step": 62880 + }, + { + "epoch": 0.5437480004496287, + "grad_norm": 2.212725556010016, + "learning_rate": 4.970652801442975e-06, + "loss": 0.1513427734375, + "step": 62885 + }, + { + "epoch": 0.5437912339711719, + "grad_norm": 31.976433185282517, + "learning_rate": 4.9704991838365765e-06, + "loss": 0.08205795288085938, + "step": 62890 + }, + { + "epoch": 0.5438344674927151, + "grad_norm": 16.675135547161695, + "learning_rate": 4.9703455571424104e-06, + "loss": 0.1482513427734375, + "step": 62895 + }, + { + "epoch": 0.5438777010142584, + "grad_norm": 1.222012765173669, + "learning_rate": 4.970191921361185e-06, + "loss": 0.1058013916015625, + "step": 62900 + }, + { + "epoch": 0.5439209345358017, + "grad_norm": 1.5383243729848168, + "learning_rate": 4.970038276493608e-06, + "loss": 0.08751068115234376, + "step": 62905 + }, + { + "epoch": 0.5439641680573449, + "grad_norm": 20.03863043181459, + "learning_rate": 4.969884622540388e-06, + "loss": 0.08498764038085938, + "step": 62910 + }, + { + "epoch": 0.5440074015788882, + "grad_norm": 13.16820474045554, + "learning_rate": 4.969730959502235e-06, + "loss": 0.212872314453125, + "step": 62915 + }, + { + "epoch": 0.5440506351004315, + "grad_norm": 2.064220772461373, + "learning_rate": 4.969577287379857e-06, + "loss": 0.15272178649902343, + "step": 62920 + }, + { + "epoch": 0.5440938686219747, + "grad_norm": 10.283944517607702, + "learning_rate": 4.969423606173962e-06, + "loss": 0.1422698974609375, + "step": 62925 + }, + { + "epoch": 0.544137102143518, + "grad_norm": 17.121529467820043, + "learning_rate": 4.96926991588526e-06, + "loss": 0.07326507568359375, + "step": 62930 + }, + { + "epoch": 0.5441803356650613, + "grad_norm": 3.558341730305562, + "learning_rate": 4.969116216514458e-06, + "loss": 0.20645751953125, + "step": 62935 + }, + { + "epoch": 0.5442235691866045, + "grad_norm": 17.506013671024856, + "learning_rate": 4.968962508062267e-06, + "loss": 0.26924762725830076, + "step": 62940 + }, + { + "epoch": 0.5442668027081478, + "grad_norm": 15.580620706846597, + "learning_rate": 4.968808790529395e-06, + "loss": 0.30260009765625, + "step": 62945 + }, + { + "epoch": 0.5443100362296911, + "grad_norm": 23.700905968523347, + "learning_rate": 4.96865506391655e-06, + "loss": 0.4227436065673828, + "step": 62950 + }, + { + "epoch": 0.5443532697512343, + "grad_norm": 12.186694182877169, + "learning_rate": 4.968501328224442e-06, + "loss": 0.1295745849609375, + "step": 62955 + }, + { + "epoch": 0.5443965032727776, + "grad_norm": 6.570533165481122, + "learning_rate": 4.968347583453781e-06, + "loss": 0.19963302612304687, + "step": 62960 + }, + { + "epoch": 0.5444397367943209, + "grad_norm": 2.2212678321591346, + "learning_rate": 4.968193829605273e-06, + "loss": 0.6379318237304688, + "step": 62965 + }, + { + "epoch": 0.5444829703158641, + "grad_norm": 2.4448882819922093, + "learning_rate": 4.96804006667963e-06, + "loss": 0.17780532836914062, + "step": 62970 + }, + { + "epoch": 0.5445262038374074, + "grad_norm": 3.7968296269107835, + "learning_rate": 4.96788629467756e-06, + "loss": 0.3319831848144531, + "step": 62975 + }, + { + "epoch": 0.5445694373589506, + "grad_norm": 0.12269359827834837, + "learning_rate": 4.9677325135997734e-06, + "loss": 0.4581935882568359, + "step": 62980 + }, + { + "epoch": 0.5446126708804939, + "grad_norm": 1.8695321468803219, + "learning_rate": 4.967578723446976e-06, + "loss": 0.031878662109375, + "step": 62985 + }, + { + "epoch": 0.5446559044020371, + "grad_norm": 0.16125871271157616, + "learning_rate": 4.9674249242198816e-06, + "loss": 0.17078704833984376, + "step": 62990 + }, + { + "epoch": 0.5446991379235804, + "grad_norm": 26.772631612554346, + "learning_rate": 4.967271115919196e-06, + "loss": 0.5803943634033203, + "step": 62995 + }, + { + "epoch": 0.5447423714451237, + "grad_norm": 3.371554083618623, + "learning_rate": 4.967117298545631e-06, + "loss": 0.6954376220703125, + "step": 63000 + }, + { + "epoch": 0.544785604966667, + "grad_norm": 7.5034035845048805, + "learning_rate": 4.966963472099894e-06, + "loss": 0.03372650146484375, + "step": 63005 + }, + { + "epoch": 0.5448288384882102, + "grad_norm": 1.3750943068292518, + "learning_rate": 4.9668096365826946e-06, + "loss": 0.028191375732421874, + "step": 63010 + }, + { + "epoch": 0.5448720720097535, + "grad_norm": 5.022053276142194, + "learning_rate": 4.966655791994743e-06, + "loss": 0.07970962524414063, + "step": 63015 + }, + { + "epoch": 0.5449153055312967, + "grad_norm": 22.317208208900254, + "learning_rate": 4.96650193833675e-06, + "loss": 0.18458404541015624, + "step": 63020 + }, + { + "epoch": 0.54495853905284, + "grad_norm": 11.337836003100257, + "learning_rate": 4.966348075609423e-06, + "loss": 0.07074127197265626, + "step": 63025 + }, + { + "epoch": 0.5450017725743833, + "grad_norm": 3.92913197890516, + "learning_rate": 4.966194203813473e-06, + "loss": 0.09093780517578125, + "step": 63030 + }, + { + "epoch": 0.5450450060959265, + "grad_norm": 1.1121895839690186, + "learning_rate": 4.966040322949608e-06, + "loss": 0.12630386352539064, + "step": 63035 + }, + { + "epoch": 0.5450882396174698, + "grad_norm": 2.3278494337968434, + "learning_rate": 4.965886433018539e-06, + "loss": 0.09703826904296875, + "step": 63040 + }, + { + "epoch": 0.5451314731390131, + "grad_norm": 13.366857782839384, + "learning_rate": 4.965732534020976e-06, + "loss": 0.11288604736328126, + "step": 63045 + }, + { + "epoch": 0.5451747066605563, + "grad_norm": 18.44766301749058, + "learning_rate": 4.965578625957628e-06, + "loss": 0.257696533203125, + "step": 63050 + }, + { + "epoch": 0.5452179401820996, + "grad_norm": 36.01099913960632, + "learning_rate": 4.965424708829206e-06, + "loss": 0.19802703857421874, + "step": 63055 + }, + { + "epoch": 0.5452611737036429, + "grad_norm": 18.779852881812126, + "learning_rate": 4.965270782636417e-06, + "loss": 0.22333889007568358, + "step": 63060 + }, + { + "epoch": 0.5453044072251861, + "grad_norm": 23.767879878584186, + "learning_rate": 4.965116847379974e-06, + "loss": 0.14580230712890624, + "step": 63065 + }, + { + "epoch": 0.5453476407467294, + "grad_norm": 7.427650574163851, + "learning_rate": 4.964962903060586e-06, + "loss": 0.124365234375, + "step": 63070 + }, + { + "epoch": 0.5453908742682726, + "grad_norm": 1.046489921110981, + "learning_rate": 4.964808949678963e-06, + "loss": 0.06816635131835938, + "step": 63075 + }, + { + "epoch": 0.5454341077898159, + "grad_norm": 2.7904716736816604, + "learning_rate": 4.9646549872358135e-06, + "loss": 0.17053680419921874, + "step": 63080 + }, + { + "epoch": 0.5454773413113592, + "grad_norm": 3.0579281145977397, + "learning_rate": 4.96450101573185e-06, + "loss": 0.13739662170410155, + "step": 63085 + }, + { + "epoch": 0.5455205748329024, + "grad_norm": 20.6735220077465, + "learning_rate": 4.96434703516778e-06, + "loss": 0.2129974365234375, + "step": 63090 + }, + { + "epoch": 0.5455638083544457, + "grad_norm": 0.9248116335343461, + "learning_rate": 4.964193045544316e-06, + "loss": 0.13956985473632813, + "step": 63095 + }, + { + "epoch": 0.545607041875989, + "grad_norm": 11.248193441391976, + "learning_rate": 4.9640390468621675e-06, + "loss": 0.16613922119140626, + "step": 63100 + }, + { + "epoch": 0.5456502753975322, + "grad_norm": 3.5726986003332684, + "learning_rate": 4.9638850391220435e-06, + "loss": 0.04984893798828125, + "step": 63105 + }, + { + "epoch": 0.5456935089190755, + "grad_norm": 7.394254802713638, + "learning_rate": 4.963731022324655e-06, + "loss": 0.17826690673828124, + "step": 63110 + }, + { + "epoch": 0.5457367424406188, + "grad_norm": 1.0908276291066958, + "learning_rate": 4.963576996470714e-06, + "loss": 0.1369293212890625, + "step": 63115 + }, + { + "epoch": 0.545779975962162, + "grad_norm": 68.31552870774941, + "learning_rate": 4.9634229615609285e-06, + "loss": 0.2800506591796875, + "step": 63120 + }, + { + "epoch": 0.5458232094837053, + "grad_norm": 1.068199042451712, + "learning_rate": 4.96326891759601e-06, + "loss": 0.110894775390625, + "step": 63125 + }, + { + "epoch": 0.5458664430052486, + "grad_norm": 19.252224130777574, + "learning_rate": 4.963114864576669e-06, + "loss": 0.3202392578125, + "step": 63130 + }, + { + "epoch": 0.5459096765267918, + "grad_norm": 1.065053080832001, + "learning_rate": 4.962960802503615e-06, + "loss": 0.2922523498535156, + "step": 63135 + }, + { + "epoch": 0.5459529100483351, + "grad_norm": 49.26276379545063, + "learning_rate": 4.962806731377559e-06, + "loss": 0.45194091796875, + "step": 63140 + }, + { + "epoch": 0.5459961435698784, + "grad_norm": 7.724206353202386, + "learning_rate": 4.962652651199213e-06, + "loss": 0.19860992431640626, + "step": 63145 + }, + { + "epoch": 0.5460393770914216, + "grad_norm": 0.13134435649788284, + "learning_rate": 4.962498561969286e-06, + "loss": 0.40167884826660155, + "step": 63150 + }, + { + "epoch": 0.5460826106129648, + "grad_norm": 6.58187911466336, + "learning_rate": 4.962344463688489e-06, + "loss": 0.097503662109375, + "step": 63155 + }, + { + "epoch": 0.5461258441345082, + "grad_norm": 15.9417641127438, + "learning_rate": 4.9621903563575325e-06, + "loss": 0.24794387817382812, + "step": 63160 + }, + { + "epoch": 0.5461690776560514, + "grad_norm": 17.218714745892665, + "learning_rate": 4.962036239977127e-06, + "loss": 0.26620635986328123, + "step": 63165 + }, + { + "epoch": 0.5462123111775946, + "grad_norm": 2.777823154153115, + "learning_rate": 4.9618821145479854e-06, + "loss": 0.21563796997070311, + "step": 63170 + }, + { + "epoch": 0.546255544699138, + "grad_norm": 29.064597587666114, + "learning_rate": 4.961727980070815e-06, + "loss": 0.26450119018554685, + "step": 63175 + }, + { + "epoch": 0.5462987782206812, + "grad_norm": 24.258673707855003, + "learning_rate": 4.961573836546329e-06, + "loss": 0.14971160888671875, + "step": 63180 + }, + { + "epoch": 0.5463420117422244, + "grad_norm": 11.401562668703566, + "learning_rate": 4.961419683975238e-06, + "loss": 0.1674053192138672, + "step": 63185 + }, + { + "epoch": 0.5463852452637677, + "grad_norm": 20.946065700536817, + "learning_rate": 4.961265522358252e-06, + "loss": 0.2120682716369629, + "step": 63190 + }, + { + "epoch": 0.546428478785311, + "grad_norm": 3.049950675638369, + "learning_rate": 4.961111351696084e-06, + "loss": 0.40660858154296875, + "step": 63195 + }, + { + "epoch": 0.5464717123068542, + "grad_norm": 94.18869148037636, + "learning_rate": 4.960957171989443e-06, + "loss": 0.27032470703125, + "step": 63200 + }, + { + "epoch": 0.5465149458283975, + "grad_norm": 0.264652142510099, + "learning_rate": 4.9608029832390415e-06, + "loss": 0.07553977966308593, + "step": 63205 + }, + { + "epoch": 0.5465581793499408, + "grad_norm": 5.17141678324712, + "learning_rate": 4.960648785445589e-06, + "loss": 0.233538818359375, + "step": 63210 + }, + { + "epoch": 0.546601412871484, + "grad_norm": 2.1725218164626106, + "learning_rate": 4.960494578609798e-06, + "loss": 0.018574905395507813, + "step": 63215 + }, + { + "epoch": 0.5466446463930273, + "grad_norm": 4.5078544068496145, + "learning_rate": 4.960340362732379e-06, + "loss": 0.026215362548828124, + "step": 63220 + }, + { + "epoch": 0.5466878799145706, + "grad_norm": 3.457225668153734, + "learning_rate": 4.960186137814044e-06, + "loss": 0.42821807861328126, + "step": 63225 + }, + { + "epoch": 0.5467311134361138, + "grad_norm": 18.103125107989268, + "learning_rate": 4.9600319038555034e-06, + "loss": 0.312567138671875, + "step": 63230 + }, + { + "epoch": 0.5467743469576571, + "grad_norm": 27.431534554158475, + "learning_rate": 4.959877660857468e-06, + "loss": 0.3121063232421875, + "step": 63235 + }, + { + "epoch": 0.5468175804792004, + "grad_norm": 5.553461252319264, + "learning_rate": 4.959723408820652e-06, + "loss": 0.6870689392089844, + "step": 63240 + }, + { + "epoch": 0.5468608140007436, + "grad_norm": 0.8569401579172933, + "learning_rate": 4.959569147745763e-06, + "loss": 0.04816131591796875, + "step": 63245 + }, + { + "epoch": 0.5469040475222868, + "grad_norm": 1.2442955000271985, + "learning_rate": 4.9594148776335145e-06, + "loss": 0.1670379638671875, + "step": 63250 + }, + { + "epoch": 0.5469472810438302, + "grad_norm": 3.927716129798256, + "learning_rate": 4.9592605984846186e-06, + "loss": 0.08089599609375, + "step": 63255 + }, + { + "epoch": 0.5469905145653734, + "grad_norm": 7.624051527973244, + "learning_rate": 4.959106310299785e-06, + "loss": 0.10861968994140625, + "step": 63260 + }, + { + "epoch": 0.5470337480869166, + "grad_norm": 51.791260573953394, + "learning_rate": 4.958952013079727e-06, + "loss": 0.47835693359375, + "step": 63265 + }, + { + "epoch": 0.54707698160846, + "grad_norm": 6.086982789184343, + "learning_rate": 4.958797706825155e-06, + "loss": 0.09106216430664063, + "step": 63270 + }, + { + "epoch": 0.5471202151300032, + "grad_norm": 8.220710287445163, + "learning_rate": 4.9586433915367815e-06, + "loss": 0.184710693359375, + "step": 63275 + }, + { + "epoch": 0.5471634486515464, + "grad_norm": 24.433085187154397, + "learning_rate": 4.958489067215317e-06, + "loss": 0.38686065673828124, + "step": 63280 + }, + { + "epoch": 0.5472066821730898, + "grad_norm": 1.6188595810327373, + "learning_rate": 4.958334733861474e-06, + "loss": 0.11289482116699219, + "step": 63285 + }, + { + "epoch": 0.547249915694633, + "grad_norm": 3.0643296813064804, + "learning_rate": 4.9581803914759655e-06, + "loss": 0.3055908203125, + "step": 63290 + }, + { + "epoch": 0.5472931492161762, + "grad_norm": 2.5961250096755872, + "learning_rate": 4.958026040059501e-06, + "loss": 0.08429794311523438, + "step": 63295 + }, + { + "epoch": 0.5473363827377196, + "grad_norm": 0.2883121377758635, + "learning_rate": 4.957871679612793e-06, + "loss": 0.0773895263671875, + "step": 63300 + }, + { + "epoch": 0.5473796162592628, + "grad_norm": 0.9388272165382071, + "learning_rate": 4.957717310136555e-06, + "loss": 0.0712127685546875, + "step": 63305 + }, + { + "epoch": 0.547422849780806, + "grad_norm": 2.315243185026376, + "learning_rate": 4.957562931631496e-06, + "loss": 0.050336456298828124, + "step": 63310 + }, + { + "epoch": 0.5474660833023494, + "grad_norm": 17.16038957582936, + "learning_rate": 4.957408544098331e-06, + "loss": 0.2815132141113281, + "step": 63315 + }, + { + "epoch": 0.5475093168238926, + "grad_norm": 4.278576217840107, + "learning_rate": 4.957254147537771e-06, + "loss": 0.060104751586914064, + "step": 63320 + }, + { + "epoch": 0.5475525503454358, + "grad_norm": 12.53738860021597, + "learning_rate": 4.957099741950527e-06, + "loss": 0.1420360565185547, + "step": 63325 + }, + { + "epoch": 0.547595783866979, + "grad_norm": 21.202487096582736, + "learning_rate": 4.956945327337313e-06, + "loss": 0.1857666015625, + "step": 63330 + }, + { + "epoch": 0.5476390173885224, + "grad_norm": 1.1183184724995079, + "learning_rate": 4.956790903698839e-06, + "loss": 0.2114501953125, + "step": 63335 + }, + { + "epoch": 0.5476822509100656, + "grad_norm": 19.98915068395306, + "learning_rate": 4.9566364710358184e-06, + "loss": 0.28203277587890624, + "step": 63340 + }, + { + "epoch": 0.5477254844316088, + "grad_norm": 7.889919933173626, + "learning_rate": 4.956482029348964e-06, + "loss": 0.0799591064453125, + "step": 63345 + }, + { + "epoch": 0.5477687179531522, + "grad_norm": 3.110441039532483, + "learning_rate": 4.956327578638987e-06, + "loss": 0.20758056640625, + "step": 63350 + }, + { + "epoch": 0.5478119514746954, + "grad_norm": 1.3808310000330046, + "learning_rate": 4.9561731189066e-06, + "loss": 0.14508514404296874, + "step": 63355 + }, + { + "epoch": 0.5478551849962386, + "grad_norm": 5.600060086665769, + "learning_rate": 4.956018650152515e-06, + "loss": 0.43604736328125, + "step": 63360 + }, + { + "epoch": 0.547898418517782, + "grad_norm": 65.1623864968369, + "learning_rate": 4.955864172377445e-06, + "loss": 0.47533378601074217, + "step": 63365 + }, + { + "epoch": 0.5479416520393252, + "grad_norm": 5.460006639066618, + "learning_rate": 4.955709685582103e-06, + "loss": 0.10802764892578125, + "step": 63370 + }, + { + "epoch": 0.5479848855608684, + "grad_norm": 18.412203011584687, + "learning_rate": 4.9555551897672005e-06, + "loss": 0.16691131591796876, + "step": 63375 + }, + { + "epoch": 0.5480281190824118, + "grad_norm": 1.9141872867501513, + "learning_rate": 4.955400684933449e-06, + "loss": 0.17135772705078126, + "step": 63380 + }, + { + "epoch": 0.548071352603955, + "grad_norm": 4.35585715856134, + "learning_rate": 4.955246171081564e-06, + "loss": 0.05980224609375, + "step": 63385 + }, + { + "epoch": 0.5481145861254982, + "grad_norm": 9.370758710674039, + "learning_rate": 4.955091648212256e-06, + "loss": 0.15159912109375, + "step": 63390 + }, + { + "epoch": 0.5481578196470416, + "grad_norm": 21.17023860383011, + "learning_rate": 4.9549371163262374e-06, + "loss": 0.3137481689453125, + "step": 63395 + }, + { + "epoch": 0.5482010531685848, + "grad_norm": 12.935164013942188, + "learning_rate": 4.9547825754242225e-06, + "loss": 0.10931625366210937, + "step": 63400 + }, + { + "epoch": 0.548244286690128, + "grad_norm": 14.935819912909675, + "learning_rate": 4.954628025506922e-06, + "loss": 0.15094146728515626, + "step": 63405 + }, + { + "epoch": 0.5482875202116714, + "grad_norm": 61.58682410930661, + "learning_rate": 4.9544734665750505e-06, + "loss": 0.6516983032226562, + "step": 63410 + }, + { + "epoch": 0.5483307537332146, + "grad_norm": 10.036110663925413, + "learning_rate": 4.95431889862932e-06, + "loss": 0.13997802734375, + "step": 63415 + }, + { + "epoch": 0.5483739872547578, + "grad_norm": 15.516773415304492, + "learning_rate": 4.954164321670443e-06, + "loss": 0.2723114013671875, + "step": 63420 + }, + { + "epoch": 0.5484172207763011, + "grad_norm": 6.6407157753950194, + "learning_rate": 4.954009735699133e-06, + "loss": 0.04305610656738281, + "step": 63425 + }, + { + "epoch": 0.5484604542978444, + "grad_norm": 0.3128899603065627, + "learning_rate": 4.953855140716103e-06, + "loss": 0.06319198608398438, + "step": 63430 + }, + { + "epoch": 0.5485036878193876, + "grad_norm": 39.95366565970146, + "learning_rate": 4.953700536722066e-06, + "loss": 0.3909912109375, + "step": 63435 + }, + { + "epoch": 0.5485469213409309, + "grad_norm": 5.0752311606700165, + "learning_rate": 4.953545923717734e-06, + "loss": 0.26591796875, + "step": 63440 + }, + { + "epoch": 0.5485901548624742, + "grad_norm": 9.638799085717068, + "learning_rate": 4.953391301703821e-06, + "loss": 0.309295654296875, + "step": 63445 + }, + { + "epoch": 0.5486333883840174, + "grad_norm": 26.762492322743864, + "learning_rate": 4.9532366706810404e-06, + "loss": 0.0995880126953125, + "step": 63450 + }, + { + "epoch": 0.5486766219055607, + "grad_norm": 1.398800371487656, + "learning_rate": 4.953082030650105e-06, + "loss": 0.0487762451171875, + "step": 63455 + }, + { + "epoch": 0.548719855427104, + "grad_norm": 1.444621189914032, + "learning_rate": 4.952927381611727e-06, + "loss": 0.364569091796875, + "step": 63460 + }, + { + "epoch": 0.5487630889486472, + "grad_norm": 3.2461455806796065, + "learning_rate": 4.952772723566621e-06, + "loss": 0.2274566650390625, + "step": 63465 + }, + { + "epoch": 0.5488063224701905, + "grad_norm": 0.415895396971579, + "learning_rate": 4.9526180565155e-06, + "loss": 0.22926559448242187, + "step": 63470 + }, + { + "epoch": 0.5488495559917338, + "grad_norm": 0.9838433068638673, + "learning_rate": 4.952463380459076e-06, + "loss": 0.03140830993652344, + "step": 63475 + }, + { + "epoch": 0.548892789513277, + "grad_norm": 2.94252763304863, + "learning_rate": 4.952308695398065e-06, + "loss": 0.2010162353515625, + "step": 63480 + }, + { + "epoch": 0.5489360230348203, + "grad_norm": 16.6278961450564, + "learning_rate": 4.952154001333177e-06, + "loss": 0.228717041015625, + "step": 63485 + }, + { + "epoch": 0.5489792565563636, + "grad_norm": 0.7967045894467377, + "learning_rate": 4.951999298265128e-06, + "loss": 0.05481719970703125, + "step": 63490 + }, + { + "epoch": 0.5490224900779068, + "grad_norm": 7.537613446215508, + "learning_rate": 4.9518445861946316e-06, + "loss": 0.087744140625, + "step": 63495 + }, + { + "epoch": 0.54906572359945, + "grad_norm": 128.56736432123128, + "learning_rate": 4.951689865122399e-06, + "loss": 0.2836029052734375, + "step": 63500 + }, + { + "epoch": 0.5491089571209933, + "grad_norm": 12.360524106152337, + "learning_rate": 4.9515351350491466e-06, + "loss": 0.3676250457763672, + "step": 63505 + }, + { + "epoch": 0.5491521906425366, + "grad_norm": 0.3556085575561065, + "learning_rate": 4.951380395975586e-06, + "loss": 0.09072418212890625, + "step": 63510 + }, + { + "epoch": 0.5491954241640798, + "grad_norm": 31.916105973567937, + "learning_rate": 4.951225647902431e-06, + "loss": 0.3564777374267578, + "step": 63515 + }, + { + "epoch": 0.5492386576856231, + "grad_norm": 9.904315791472262, + "learning_rate": 4.951070890830396e-06, + "loss": 0.10263519287109375, + "step": 63520 + }, + { + "epoch": 0.5492818912071664, + "grad_norm": 25.179845456398468, + "learning_rate": 4.950916124760195e-06, + "loss": 0.18747482299804688, + "step": 63525 + }, + { + "epoch": 0.5493251247287096, + "grad_norm": 31.74900965584002, + "learning_rate": 4.9507613496925405e-06, + "loss": 0.147222900390625, + "step": 63530 + }, + { + "epoch": 0.5493683582502529, + "grad_norm": 4.069981955502241, + "learning_rate": 4.9506065656281474e-06, + "loss": 0.09751644134521484, + "step": 63535 + }, + { + "epoch": 0.5494115917717962, + "grad_norm": 17.694474133022332, + "learning_rate": 4.950451772567728e-06, + "loss": 0.0994293212890625, + "step": 63540 + }, + { + "epoch": 0.5494548252933394, + "grad_norm": 6.999660452132123, + "learning_rate": 4.950296970512e-06, + "loss": 0.205126953125, + "step": 63545 + }, + { + "epoch": 0.5494980588148827, + "grad_norm": 3.669380949256623, + "learning_rate": 4.950142159461673e-06, + "loss": 0.21876220703125, + "step": 63550 + }, + { + "epoch": 0.549541292336426, + "grad_norm": 11.219492644812762, + "learning_rate": 4.949987339417463e-06, + "loss": 0.1431488037109375, + "step": 63555 + }, + { + "epoch": 0.5495845258579692, + "grad_norm": 32.52073323734785, + "learning_rate": 4.949832510380083e-06, + "loss": 0.33892669677734377, + "step": 63560 + }, + { + "epoch": 0.5496277593795125, + "grad_norm": 26.086671244200872, + "learning_rate": 4.949677672350249e-06, + "loss": 0.477825927734375, + "step": 63565 + }, + { + "epoch": 0.5496709929010558, + "grad_norm": 28.06310063089184, + "learning_rate": 4.949522825328672e-06, + "loss": 0.236663818359375, + "step": 63570 + }, + { + "epoch": 0.549714226422599, + "grad_norm": 0.14725433290788828, + "learning_rate": 4.949367969316069e-06, + "loss": 0.2099334716796875, + "step": 63575 + }, + { + "epoch": 0.5497574599441423, + "grad_norm": 19.164975844917173, + "learning_rate": 4.949213104313155e-06, + "loss": 0.466571044921875, + "step": 63580 + }, + { + "epoch": 0.5498006934656856, + "grad_norm": 2.4115539161200354, + "learning_rate": 4.94905823032064e-06, + "loss": 0.12330322265625, + "step": 63585 + }, + { + "epoch": 0.5498439269872288, + "grad_norm": 2.529484403264169, + "learning_rate": 4.9489033473392415e-06, + "loss": 0.12895278930664061, + "step": 63590 + }, + { + "epoch": 0.5498871605087721, + "grad_norm": 0.5160447114126417, + "learning_rate": 4.948748455369672e-06, + "loss": 0.3008598327636719, + "step": 63595 + }, + { + "epoch": 0.5499303940303153, + "grad_norm": 16.759303585769132, + "learning_rate": 4.948593554412648e-06, + "loss": 0.14520111083984374, + "step": 63600 + }, + { + "epoch": 0.5499736275518586, + "grad_norm": 29.243856154782243, + "learning_rate": 4.948438644468883e-06, + "loss": 0.19669113159179688, + "step": 63605 + }, + { + "epoch": 0.5500168610734019, + "grad_norm": 12.414883626576232, + "learning_rate": 4.94828372553909e-06, + "loss": 0.4201618194580078, + "step": 63610 + }, + { + "epoch": 0.5500600945949451, + "grad_norm": 26.308541509661804, + "learning_rate": 4.948128797623985e-06, + "loss": 0.3399782180786133, + "step": 63615 + }, + { + "epoch": 0.5501033281164884, + "grad_norm": 20.02303057288452, + "learning_rate": 4.947973860724282e-06, + "loss": 0.535748291015625, + "step": 63620 + }, + { + "epoch": 0.5501465616380317, + "grad_norm": 25.24745829339774, + "learning_rate": 4.947818914840696e-06, + "loss": 0.30631179809570314, + "step": 63625 + }, + { + "epoch": 0.5501897951595749, + "grad_norm": 1.1939318212912842, + "learning_rate": 4.947663959973942e-06, + "loss": 0.14481658935546876, + "step": 63630 + }, + { + "epoch": 0.5502330286811182, + "grad_norm": 6.150816890652966, + "learning_rate": 4.947508996124733e-06, + "loss": 0.05680084228515625, + "step": 63635 + }, + { + "epoch": 0.5502762622026615, + "grad_norm": 31.739121574540345, + "learning_rate": 4.947354023293784e-06, + "loss": 0.2346435546875, + "step": 63640 + }, + { + "epoch": 0.5503194957242047, + "grad_norm": 25.519976159694522, + "learning_rate": 4.947199041481811e-06, + "loss": 0.1926300048828125, + "step": 63645 + }, + { + "epoch": 0.550362729245748, + "grad_norm": 8.12450464406765, + "learning_rate": 4.947044050689529e-06, + "loss": 0.21979217529296874, + "step": 63650 + }, + { + "epoch": 0.5504059627672913, + "grad_norm": 17.644623930981968, + "learning_rate": 4.94688905091765e-06, + "loss": 0.1301513671875, + "step": 63655 + }, + { + "epoch": 0.5504491962888345, + "grad_norm": 12.335802778024275, + "learning_rate": 4.946734042166892e-06, + "loss": 0.07162399291992187, + "step": 63660 + }, + { + "epoch": 0.5504924298103778, + "grad_norm": 14.319945000819164, + "learning_rate": 4.946579024437967e-06, + "loss": 0.06774444580078125, + "step": 63665 + }, + { + "epoch": 0.550535663331921, + "grad_norm": 0.8112386425908578, + "learning_rate": 4.9464239977315945e-06, + "loss": 0.04023284912109375, + "step": 63670 + }, + { + "epoch": 0.5505788968534643, + "grad_norm": 7.225776576972544, + "learning_rate": 4.946268962048484e-06, + "loss": 0.37596817016601564, + "step": 63675 + }, + { + "epoch": 0.5506221303750075, + "grad_norm": 28.39172120712706, + "learning_rate": 4.946113917389354e-06, + "loss": 0.187188720703125, + "step": 63680 + }, + { + "epoch": 0.5506653638965509, + "grad_norm": 24.101233336255365, + "learning_rate": 4.945958863754918e-06, + "loss": 0.3891582489013672, + "step": 63685 + }, + { + "epoch": 0.5507085974180941, + "grad_norm": 11.367685149710654, + "learning_rate": 4.945803801145893e-06, + "loss": 0.0878488540649414, + "step": 63690 + }, + { + "epoch": 0.5507518309396373, + "grad_norm": 16.26594172536211, + "learning_rate": 4.945648729562992e-06, + "loss": 0.278076171875, + "step": 63695 + }, + { + "epoch": 0.5507950644611807, + "grad_norm": 13.528794908764198, + "learning_rate": 4.94549364900693e-06, + "loss": 0.234295654296875, + "step": 63700 + }, + { + "epoch": 0.5508382979827239, + "grad_norm": 1.586281391283713, + "learning_rate": 4.945338559478424e-06, + "loss": 0.08461456298828125, + "step": 63705 + }, + { + "epoch": 0.5508815315042671, + "grad_norm": 0.2522536982859769, + "learning_rate": 4.9451834609781895e-06, + "loss": 0.11059112548828125, + "step": 63710 + }, + { + "epoch": 0.5509247650258104, + "grad_norm": 46.65384695946864, + "learning_rate": 4.945028353506939e-06, + "loss": 0.3195507049560547, + "step": 63715 + }, + { + "epoch": 0.5509679985473537, + "grad_norm": 4.6823922559432445, + "learning_rate": 4.944873237065391e-06, + "loss": 0.16708755493164062, + "step": 63720 + }, + { + "epoch": 0.5510112320688969, + "grad_norm": 2.7848822540610394, + "learning_rate": 4.944718111654259e-06, + "loss": 0.2027618408203125, + "step": 63725 + }, + { + "epoch": 0.5510544655904402, + "grad_norm": 1.921754399834776, + "learning_rate": 4.944562977274258e-06, + "loss": 0.316796875, + "step": 63730 + }, + { + "epoch": 0.5510976991119835, + "grad_norm": 32.26734591702538, + "learning_rate": 4.9444078339261056e-06, + "loss": 0.21163330078125, + "step": 63735 + }, + { + "epoch": 0.5511409326335267, + "grad_norm": 18.74450871987047, + "learning_rate": 4.944252681610516e-06, + "loss": 0.22316207885742187, + "step": 63740 + }, + { + "epoch": 0.55118416615507, + "grad_norm": 10.513905412195633, + "learning_rate": 4.944097520328204e-06, + "loss": 0.371246337890625, + "step": 63745 + }, + { + "epoch": 0.5512273996766133, + "grad_norm": 4.6408457417154265, + "learning_rate": 4.943942350079887e-06, + "loss": 0.10367431640625, + "step": 63750 + }, + { + "epoch": 0.5512706331981565, + "grad_norm": 31.699498410456297, + "learning_rate": 4.943787170866278e-06, + "loss": 0.2211700439453125, + "step": 63755 + }, + { + "epoch": 0.5513138667196998, + "grad_norm": 1.557020285318394, + "learning_rate": 4.9436319826880954e-06, + "loss": 0.2316356658935547, + "step": 63760 + }, + { + "epoch": 0.5513571002412431, + "grad_norm": 1.7962088944394952, + "learning_rate": 4.943476785546054e-06, + "loss": 0.23180999755859374, + "step": 63765 + }, + { + "epoch": 0.5514003337627863, + "grad_norm": 0.14645829258494966, + "learning_rate": 4.94332157944087e-06, + "loss": 0.32658977508544923, + "step": 63770 + }, + { + "epoch": 0.5514435672843295, + "grad_norm": 38.481991069366934, + "learning_rate": 4.943166364373258e-06, + "loss": 0.1485198974609375, + "step": 63775 + }, + { + "epoch": 0.5514868008058729, + "grad_norm": 0.15652356238380485, + "learning_rate": 4.9430111403439345e-06, + "loss": 0.06547660827636718, + "step": 63780 + }, + { + "epoch": 0.5515300343274161, + "grad_norm": 7.874660224239283, + "learning_rate": 4.942855907353615e-06, + "loss": 0.05215187072753906, + "step": 63785 + }, + { + "epoch": 0.5515732678489593, + "grad_norm": 7.772140127896184, + "learning_rate": 4.942700665403017e-06, + "loss": 0.35374298095703127, + "step": 63790 + }, + { + "epoch": 0.5516165013705027, + "grad_norm": 21.27286904673641, + "learning_rate": 4.942545414492853e-06, + "loss": 0.3637298583984375, + "step": 63795 + }, + { + "epoch": 0.5516597348920459, + "grad_norm": 4.029474597149719, + "learning_rate": 4.942390154623842e-06, + "loss": 0.09335556030273437, + "step": 63800 + }, + { + "epoch": 0.5517029684135891, + "grad_norm": 16.37501202646185, + "learning_rate": 4.9422348857967e-06, + "loss": 0.24947662353515626, + "step": 63805 + }, + { + "epoch": 0.5517462019351325, + "grad_norm": 39.43733947690524, + "learning_rate": 4.942079608012142e-06, + "loss": 0.35012969970703123, + "step": 63810 + }, + { + "epoch": 0.5517894354566757, + "grad_norm": 17.357932165053782, + "learning_rate": 4.941924321270884e-06, + "loss": 0.19313507080078124, + "step": 63815 + }, + { + "epoch": 0.5518326689782189, + "grad_norm": 11.890895624093494, + "learning_rate": 4.941769025573643e-06, + "loss": 0.136041259765625, + "step": 63820 + }, + { + "epoch": 0.5518759024997623, + "grad_norm": 15.48527733507027, + "learning_rate": 4.941613720921135e-06, + "loss": 0.11638031005859376, + "step": 63825 + }, + { + "epoch": 0.5519191360213055, + "grad_norm": 4.370270282160991, + "learning_rate": 4.941458407314075e-06, + "loss": 0.13992538452148437, + "step": 63830 + }, + { + "epoch": 0.5519623695428487, + "grad_norm": 1.1868906991932162, + "learning_rate": 4.941303084753182e-06, + "loss": 0.05508270263671875, + "step": 63835 + }, + { + "epoch": 0.5520056030643921, + "grad_norm": 9.077468677211119, + "learning_rate": 4.941147753239169e-06, + "loss": 0.0527679443359375, + "step": 63840 + }, + { + "epoch": 0.5520488365859353, + "grad_norm": 2.2623753356372442, + "learning_rate": 4.940992412772755e-06, + "loss": 0.051399612426757814, + "step": 63845 + }, + { + "epoch": 0.5520920701074785, + "grad_norm": 0.7785723961820038, + "learning_rate": 4.940837063354656e-06, + "loss": 0.03383941650390625, + "step": 63850 + }, + { + "epoch": 0.5521353036290217, + "grad_norm": 5.798970780973608, + "learning_rate": 4.940681704985588e-06, + "loss": 0.22915802001953126, + "step": 63855 + }, + { + "epoch": 0.5521785371505651, + "grad_norm": 10.906785881786899, + "learning_rate": 4.940526337666266e-06, + "loss": 0.2466033935546875, + "step": 63860 + }, + { + "epoch": 0.5522217706721083, + "grad_norm": 1.4331884500881111, + "learning_rate": 4.940370961397409e-06, + "loss": 0.1528900146484375, + "step": 63865 + }, + { + "epoch": 0.5522650041936515, + "grad_norm": 10.190105161976184, + "learning_rate": 4.940215576179732e-06, + "loss": 0.03507537841796875, + "step": 63870 + }, + { + "epoch": 0.5523082377151949, + "grad_norm": 10.590053233156532, + "learning_rate": 4.940060182013953e-06, + "loss": 0.09295921325683594, + "step": 63875 + }, + { + "epoch": 0.5523514712367381, + "grad_norm": 8.169281706653896, + "learning_rate": 4.9399047789007874e-06, + "loss": 0.12366409301757812, + "step": 63880 + }, + { + "epoch": 0.5523947047582813, + "grad_norm": 10.103165862201907, + "learning_rate": 4.939749366840953e-06, + "loss": 0.4764259338378906, + "step": 63885 + }, + { + "epoch": 0.5524379382798247, + "grad_norm": 33.6749657768642, + "learning_rate": 4.939593945835165e-06, + "loss": 0.14141159057617186, + "step": 63890 + }, + { + "epoch": 0.5524811718013679, + "grad_norm": 87.5076978767378, + "learning_rate": 4.939438515884142e-06, + "loss": 0.18324203491210939, + "step": 63895 + }, + { + "epoch": 0.5525244053229111, + "grad_norm": 15.579327701605107, + "learning_rate": 4.939283076988599e-06, + "loss": 0.10371780395507812, + "step": 63900 + }, + { + "epoch": 0.5525676388444545, + "grad_norm": 31.90300065166613, + "learning_rate": 4.939127629149254e-06, + "loss": 0.3205596923828125, + "step": 63905 + }, + { + "epoch": 0.5526108723659977, + "grad_norm": 3.1147757292577167, + "learning_rate": 4.9389721723668245e-06, + "loss": 0.15931930541992187, + "step": 63910 + }, + { + "epoch": 0.5526541058875409, + "grad_norm": 0.8072037645157075, + "learning_rate": 4.938816706642026e-06, + "loss": 0.6019241333007812, + "step": 63915 + }, + { + "epoch": 0.5526973394090843, + "grad_norm": 6.816731715414492, + "learning_rate": 4.9386612319755765e-06, + "loss": 0.10760498046875, + "step": 63920 + }, + { + "epoch": 0.5527405729306275, + "grad_norm": 5.890834706447313, + "learning_rate": 4.938505748368193e-06, + "loss": 0.1958587646484375, + "step": 63925 + }, + { + "epoch": 0.5527838064521707, + "grad_norm": 20.928900618855973, + "learning_rate": 4.938350255820592e-06, + "loss": 0.12377548217773438, + "step": 63930 + }, + { + "epoch": 0.5528270399737141, + "grad_norm": 3.5344601051229048, + "learning_rate": 4.93819475433349e-06, + "loss": 0.26632843017578123, + "step": 63935 + }, + { + "epoch": 0.5528702734952573, + "grad_norm": 40.69401657529324, + "learning_rate": 4.938039243907606e-06, + "loss": 0.20706787109375, + "step": 63940 + }, + { + "epoch": 0.5529135070168005, + "grad_norm": 9.236393914590721, + "learning_rate": 4.937883724543656e-06, + "loss": 0.26380462646484376, + "step": 63945 + }, + { + "epoch": 0.5529567405383438, + "grad_norm": 24.93233039527676, + "learning_rate": 4.937728196242358e-06, + "loss": 0.21067962646484376, + "step": 63950 + }, + { + "epoch": 0.5529999740598871, + "grad_norm": 30.744890303250223, + "learning_rate": 4.9375726590044274e-06, + "loss": 0.1840179443359375, + "step": 63955 + }, + { + "epoch": 0.5530432075814303, + "grad_norm": 5.256422844708782, + "learning_rate": 4.937417112830584e-06, + "loss": 0.052679443359375, + "step": 63960 + }, + { + "epoch": 0.5530864411029736, + "grad_norm": 0.7624719886561172, + "learning_rate": 4.937261557721544e-06, + "loss": 0.11634979248046876, + "step": 63965 + }, + { + "epoch": 0.5531296746245169, + "grad_norm": 3.2980217068651183, + "learning_rate": 4.937105993678024e-06, + "loss": 0.145355224609375, + "step": 63970 + }, + { + "epoch": 0.5531729081460601, + "grad_norm": 2.382817165348892, + "learning_rate": 4.936950420700744e-06, + "loss": 0.05747299194335938, + "step": 63975 + }, + { + "epoch": 0.5532161416676034, + "grad_norm": 9.941040929889843, + "learning_rate": 4.936794838790419e-06, + "loss": 0.194866943359375, + "step": 63980 + }, + { + "epoch": 0.5532593751891467, + "grad_norm": 6.758349941608321, + "learning_rate": 4.936639247947766e-06, + "loss": 0.07703857421875, + "step": 63985 + }, + { + "epoch": 0.5533026087106899, + "grad_norm": 4.446979377313447, + "learning_rate": 4.9364836481735054e-06, + "loss": 0.066937255859375, + "step": 63990 + }, + { + "epoch": 0.5533458422322332, + "grad_norm": 18.146891663909166, + "learning_rate": 4.9363280394683536e-06, + "loss": 0.28404998779296875, + "step": 63995 + }, + { + "epoch": 0.5533890757537765, + "grad_norm": 18.296163690175522, + "learning_rate": 4.936172421833027e-06, + "loss": 0.06783733367919922, + "step": 64000 + }, + { + "epoch": 0.5534323092753197, + "grad_norm": 6.110610641542058, + "learning_rate": 4.936016795268246e-06, + "loss": 0.3171363830566406, + "step": 64005 + }, + { + "epoch": 0.553475542796863, + "grad_norm": 3.3358320834222375, + "learning_rate": 4.935861159774725e-06, + "loss": 0.11667633056640625, + "step": 64010 + }, + { + "epoch": 0.5535187763184063, + "grad_norm": 6.2066540829405, + "learning_rate": 4.935705515353183e-06, + "loss": 0.15128402709960936, + "step": 64015 + }, + { + "epoch": 0.5535620098399495, + "grad_norm": 2.105792323856726, + "learning_rate": 4.935549862004339e-06, + "loss": 0.7000968933105469, + "step": 64020 + }, + { + "epoch": 0.5536052433614927, + "grad_norm": 14.507273454949644, + "learning_rate": 4.93539419972891e-06, + "loss": 0.237060546875, + "step": 64025 + }, + { + "epoch": 0.553648476883036, + "grad_norm": 2.0277048706836176, + "learning_rate": 4.935238528527614e-06, + "loss": 0.02520599365234375, + "step": 64030 + }, + { + "epoch": 0.5536917104045793, + "grad_norm": 67.77672854991621, + "learning_rate": 4.935082848401169e-06, + "loss": 0.35106697082519533, + "step": 64035 + }, + { + "epoch": 0.5537349439261225, + "grad_norm": 2.5139052154573553, + "learning_rate": 4.934927159350292e-06, + "loss": 0.2444427490234375, + "step": 64040 + }, + { + "epoch": 0.5537781774476658, + "grad_norm": 15.336783008475393, + "learning_rate": 4.9347714613757035e-06, + "loss": 0.12431106567382813, + "step": 64045 + }, + { + "epoch": 0.5538214109692091, + "grad_norm": 20.498819544392695, + "learning_rate": 4.934615754478118e-06, + "loss": 0.21707763671875, + "step": 64050 + }, + { + "epoch": 0.5538646444907523, + "grad_norm": 7.048506828562369, + "learning_rate": 4.934460038658257e-06, + "loss": 0.396380615234375, + "step": 64055 + }, + { + "epoch": 0.5539078780122956, + "grad_norm": 38.58616292529129, + "learning_rate": 4.934304313916838e-06, + "loss": 0.2073577880859375, + "step": 64060 + }, + { + "epoch": 0.5539511115338389, + "grad_norm": 6.873947686310068, + "learning_rate": 4.934148580254577e-06, + "loss": 0.13981170654296876, + "step": 64065 + }, + { + "epoch": 0.5539943450553821, + "grad_norm": 3.194979367152315, + "learning_rate": 4.933992837672193e-06, + "loss": 0.12966766357421874, + "step": 64070 + }, + { + "epoch": 0.5540375785769254, + "grad_norm": 34.925159285552816, + "learning_rate": 4.933837086170407e-06, + "loss": 0.23170166015625, + "step": 64075 + }, + { + "epoch": 0.5540808120984687, + "grad_norm": 7.676256129710436, + "learning_rate": 4.933681325749933e-06, + "loss": 0.10100631713867188, + "step": 64080 + }, + { + "epoch": 0.5541240456200119, + "grad_norm": 2.0794474200782873, + "learning_rate": 4.933525556411493e-06, + "loss": 0.10705299377441406, + "step": 64085 + }, + { + "epoch": 0.5541672791415552, + "grad_norm": 3.4138505234631373, + "learning_rate": 4.9333697781558044e-06, + "loss": 0.03908729553222656, + "step": 64090 + }, + { + "epoch": 0.5542105126630985, + "grad_norm": 1.7604080541919365, + "learning_rate": 4.933213990983584e-06, + "loss": 0.12395858764648438, + "step": 64095 + }, + { + "epoch": 0.5542537461846417, + "grad_norm": 4.854071982413523, + "learning_rate": 4.933058194895552e-06, + "loss": 0.4145225524902344, + "step": 64100 + }, + { + "epoch": 0.554296979706185, + "grad_norm": 16.393350428832203, + "learning_rate": 4.932902389892427e-06, + "loss": 0.16568984985351562, + "step": 64105 + }, + { + "epoch": 0.5543402132277283, + "grad_norm": 8.01428080934992, + "learning_rate": 4.932746575974926e-06, + "loss": 0.2429412841796875, + "step": 64110 + }, + { + "epoch": 0.5543834467492715, + "grad_norm": 8.872677949695674, + "learning_rate": 4.932590753143769e-06, + "loss": 0.053537940979003905, + "step": 64115 + }, + { + "epoch": 0.5544266802708148, + "grad_norm": 17.39033484129503, + "learning_rate": 4.932434921399675e-06, + "loss": 0.1573486328125, + "step": 64120 + }, + { + "epoch": 0.554469913792358, + "grad_norm": 45.75689735217389, + "learning_rate": 4.932279080743361e-06, + "loss": 0.11469268798828125, + "step": 64125 + }, + { + "epoch": 0.5545131473139013, + "grad_norm": 20.871353427621383, + "learning_rate": 4.9321232311755474e-06, + "loss": 0.122906494140625, + "step": 64130 + }, + { + "epoch": 0.5545563808354446, + "grad_norm": 0.8579279131775625, + "learning_rate": 4.931967372696951e-06, + "loss": 0.11465301513671874, + "step": 64135 + }, + { + "epoch": 0.5545996143569878, + "grad_norm": 12.406435031491114, + "learning_rate": 4.931811505308292e-06, + "loss": 0.23943710327148438, + "step": 64140 + }, + { + "epoch": 0.5546428478785311, + "grad_norm": 2.301356710814878, + "learning_rate": 4.931655629010289e-06, + "loss": 0.056545448303222653, + "step": 64145 + }, + { + "epoch": 0.5546860814000744, + "grad_norm": 3.753128141229569, + "learning_rate": 4.931499743803662e-06, + "loss": 0.171337890625, + "step": 64150 + }, + { + "epoch": 0.5547293149216176, + "grad_norm": 0.1294315063029083, + "learning_rate": 4.931343849689128e-06, + "loss": 0.44498291015625, + "step": 64155 + }, + { + "epoch": 0.5547725484431609, + "grad_norm": 2.956180624390118, + "learning_rate": 4.9311879466674065e-06, + "loss": 0.03000640869140625, + "step": 64160 + }, + { + "epoch": 0.5548157819647042, + "grad_norm": 0.49377903608951373, + "learning_rate": 4.931032034739218e-06, + "loss": 0.08708953857421875, + "step": 64165 + }, + { + "epoch": 0.5548590154862474, + "grad_norm": 147.93019243127685, + "learning_rate": 4.930876113905279e-06, + "loss": 0.29728240966796876, + "step": 64170 + }, + { + "epoch": 0.5549022490077907, + "grad_norm": 0.5670919668140447, + "learning_rate": 4.93072018416631e-06, + "loss": 0.010934829711914062, + "step": 64175 + }, + { + "epoch": 0.554945482529334, + "grad_norm": 39.44717269681122, + "learning_rate": 4.9305642455230315e-06, + "loss": 0.131561279296875, + "step": 64180 + }, + { + "epoch": 0.5549887160508772, + "grad_norm": 21.919374221425034, + "learning_rate": 4.930408297976161e-06, + "loss": 0.36069526672363283, + "step": 64185 + }, + { + "epoch": 0.5550319495724205, + "grad_norm": 0.640779232096657, + "learning_rate": 4.930252341526417e-06, + "loss": 0.16117210388183595, + "step": 64190 + }, + { + "epoch": 0.5550751830939638, + "grad_norm": 0.696868035294177, + "learning_rate": 4.93009637617452e-06, + "loss": 0.0375244140625, + "step": 64195 + }, + { + "epoch": 0.555118416615507, + "grad_norm": 1.2522346502381414, + "learning_rate": 4.92994040192119e-06, + "loss": 0.2446308135986328, + "step": 64200 + }, + { + "epoch": 0.5551616501370502, + "grad_norm": 10.846781873401083, + "learning_rate": 4.929784418767145e-06, + "loss": 0.25307159423828124, + "step": 64205 + }, + { + "epoch": 0.5552048836585936, + "grad_norm": 0.35045918670836684, + "learning_rate": 4.929628426713104e-06, + "loss": 0.23640823364257812, + "step": 64210 + }, + { + "epoch": 0.5552481171801368, + "grad_norm": 1.4679911948609756, + "learning_rate": 4.929472425759788e-06, + "loss": 0.0466888427734375, + "step": 64215 + }, + { + "epoch": 0.55529135070168, + "grad_norm": 5.711846466164028, + "learning_rate": 4.9293164159079155e-06, + "loss": 0.048223876953125, + "step": 64220 + }, + { + "epoch": 0.5553345842232233, + "grad_norm": 5.269590029290739, + "learning_rate": 4.929160397158206e-06, + "loss": 0.1429290771484375, + "step": 64225 + }, + { + "epoch": 0.5553778177447666, + "grad_norm": 14.220751088355653, + "learning_rate": 4.929004369511379e-06, + "loss": 0.09864501953125, + "step": 64230 + }, + { + "epoch": 0.5554210512663098, + "grad_norm": 6.611764627540555, + "learning_rate": 4.928848332968155e-06, + "loss": 0.0929656982421875, + "step": 64235 + }, + { + "epoch": 0.5554642847878531, + "grad_norm": 18.626325152642675, + "learning_rate": 4.928692287529252e-06, + "loss": 0.16351356506347656, + "step": 64240 + }, + { + "epoch": 0.5555075183093964, + "grad_norm": 7.03149984397072, + "learning_rate": 4.928536233195391e-06, + "loss": 0.12046051025390625, + "step": 64245 + }, + { + "epoch": 0.5555507518309396, + "grad_norm": 0.08163197750529336, + "learning_rate": 4.928380169967292e-06, + "loss": 0.08212509155273437, + "step": 64250 + }, + { + "epoch": 0.5555939853524829, + "grad_norm": 10.799514596569514, + "learning_rate": 4.928224097845673e-06, + "loss": 0.442236328125, + "step": 64255 + }, + { + "epoch": 0.5556372188740262, + "grad_norm": 28.757925726923112, + "learning_rate": 4.928068016831256e-06, + "loss": 0.10123538970947266, + "step": 64260 + }, + { + "epoch": 0.5556804523955694, + "grad_norm": 5.613253188797798, + "learning_rate": 4.9279119269247585e-06, + "loss": 0.0802581787109375, + "step": 64265 + }, + { + "epoch": 0.5557236859171127, + "grad_norm": 1.1748026293178813, + "learning_rate": 4.9277558281269016e-06, + "loss": 0.1518157958984375, + "step": 64270 + }, + { + "epoch": 0.555766919438656, + "grad_norm": 7.58502952959344, + "learning_rate": 4.9275997204384055e-06, + "loss": 0.20487403869628906, + "step": 64275 + }, + { + "epoch": 0.5558101529601992, + "grad_norm": 0.19825528279384413, + "learning_rate": 4.92744360385999e-06, + "loss": 0.06813430786132812, + "step": 64280 + }, + { + "epoch": 0.5558533864817424, + "grad_norm": 0.42412027233671573, + "learning_rate": 4.927287478392375e-06, + "loss": 0.2793975830078125, + "step": 64285 + }, + { + "epoch": 0.5558966200032858, + "grad_norm": 0.9244281609983381, + "learning_rate": 4.92713134403628e-06, + "loss": 0.20822372436523437, + "step": 64290 + }, + { + "epoch": 0.555939853524829, + "grad_norm": 4.840767290641476, + "learning_rate": 4.926975200792426e-06, + "loss": 0.0297454833984375, + "step": 64295 + }, + { + "epoch": 0.5559830870463722, + "grad_norm": 25.218712832286542, + "learning_rate": 4.926819048661533e-06, + "loss": 0.2789794921875, + "step": 64300 + }, + { + "epoch": 0.5560263205679156, + "grad_norm": 2.518343290817628, + "learning_rate": 4.92666288764432e-06, + "loss": 0.15789260864257812, + "step": 64305 + }, + { + "epoch": 0.5560695540894588, + "grad_norm": 22.408631694736602, + "learning_rate": 4.926506717741508e-06, + "loss": 0.12556991577148438, + "step": 64310 + }, + { + "epoch": 0.556112787611002, + "grad_norm": 1.5756670112958426, + "learning_rate": 4.9263505389538174e-06, + "loss": 0.09760284423828125, + "step": 64315 + }, + { + "epoch": 0.5561560211325454, + "grad_norm": 0.24823281493900806, + "learning_rate": 4.926194351281968e-06, + "loss": 0.09253482818603516, + "step": 64320 + }, + { + "epoch": 0.5561992546540886, + "grad_norm": 14.188743968674359, + "learning_rate": 4.9260381547266815e-06, + "loss": 0.30385818481445315, + "step": 64325 + }, + { + "epoch": 0.5562424881756318, + "grad_norm": 6.306334771870855, + "learning_rate": 4.925881949288676e-06, + "loss": 0.1425994873046875, + "step": 64330 + }, + { + "epoch": 0.5562857216971752, + "grad_norm": 0.2788664015566465, + "learning_rate": 4.925725734968674e-06, + "loss": 0.14137115478515624, + "step": 64335 + }, + { + "epoch": 0.5563289552187184, + "grad_norm": 4.671271504199601, + "learning_rate": 4.925569511767395e-06, + "loss": 0.08037567138671875, + "step": 64340 + }, + { + "epoch": 0.5563721887402616, + "grad_norm": 0.9003387383241723, + "learning_rate": 4.925413279685559e-06, + "loss": 0.0423828125, + "step": 64345 + }, + { + "epoch": 0.556415422261805, + "grad_norm": 28.15166843122734, + "learning_rate": 4.925257038723887e-06, + "loss": 0.19541015625, + "step": 64350 + }, + { + "epoch": 0.5564586557833482, + "grad_norm": 5.845714093272009, + "learning_rate": 4.925100788883101e-06, + "loss": 0.15661582946777344, + "step": 64355 + }, + { + "epoch": 0.5565018893048914, + "grad_norm": 2.5028627093436895, + "learning_rate": 4.924944530163919e-06, + "loss": 0.2341278076171875, + "step": 64360 + }, + { + "epoch": 0.5565451228264348, + "grad_norm": 8.707951012668586, + "learning_rate": 4.924788262567062e-06, + "loss": 0.10905590057373046, + "step": 64365 + }, + { + "epoch": 0.556588356347978, + "grad_norm": 14.018566415055236, + "learning_rate": 4.924631986093254e-06, + "loss": 0.4510833740234375, + "step": 64370 + }, + { + "epoch": 0.5566315898695212, + "grad_norm": 7.886753180541905, + "learning_rate": 4.924475700743212e-06, + "loss": 0.18632354736328124, + "step": 64375 + }, + { + "epoch": 0.5566748233910644, + "grad_norm": 20.396954806615643, + "learning_rate": 4.924319406517659e-06, + "loss": 0.20503387451171876, + "step": 64380 + }, + { + "epoch": 0.5567180569126078, + "grad_norm": 12.76318268796108, + "learning_rate": 4.9241631034173135e-06, + "loss": 0.2260578155517578, + "step": 64385 + }, + { + "epoch": 0.556761290434151, + "grad_norm": 6.901045510081799, + "learning_rate": 4.924006791442899e-06, + "loss": 0.14240570068359376, + "step": 64390 + }, + { + "epoch": 0.5568045239556942, + "grad_norm": 1.7783112990036352, + "learning_rate": 4.923850470595134e-06, + "loss": 0.06625518798828126, + "step": 64395 + }, + { + "epoch": 0.5568477574772376, + "grad_norm": 3.4733228524679163, + "learning_rate": 4.923694140874741e-06, + "loss": 0.134979248046875, + "step": 64400 + }, + { + "epoch": 0.5568909909987808, + "grad_norm": 1.549436866482796, + "learning_rate": 4.9235378022824415e-06, + "loss": 0.16241455078125, + "step": 64405 + }, + { + "epoch": 0.556934224520324, + "grad_norm": 1.615592705225635, + "learning_rate": 4.9233814548189555e-06, + "loss": 0.10533676147460938, + "step": 64410 + }, + { + "epoch": 0.5569774580418674, + "grad_norm": 2.5011364563265768, + "learning_rate": 4.923225098485004e-06, + "loss": 0.1165863037109375, + "step": 64415 + }, + { + "epoch": 0.5570206915634106, + "grad_norm": 7.79653688836088, + "learning_rate": 4.923068733281308e-06, + "loss": 0.04331817626953125, + "step": 64420 + }, + { + "epoch": 0.5570639250849538, + "grad_norm": 12.21014481972256, + "learning_rate": 4.922912359208589e-06, + "loss": 0.13551025390625, + "step": 64425 + }, + { + "epoch": 0.5571071586064972, + "grad_norm": 9.272346179963044, + "learning_rate": 4.922755976267568e-06, + "loss": 0.12784271240234374, + "step": 64430 + }, + { + "epoch": 0.5571503921280404, + "grad_norm": 1.2205740651539394, + "learning_rate": 4.922599584458966e-06, + "loss": 0.04429779052734375, + "step": 64435 + }, + { + "epoch": 0.5571936256495836, + "grad_norm": 4.93743862160503, + "learning_rate": 4.922443183783506e-06, + "loss": 0.295343017578125, + "step": 64440 + }, + { + "epoch": 0.557236859171127, + "grad_norm": 40.238993919385464, + "learning_rate": 4.922286774241906e-06, + "loss": 0.336871337890625, + "step": 64445 + }, + { + "epoch": 0.5572800926926702, + "grad_norm": 5.4578279745747595, + "learning_rate": 4.92213035583489e-06, + "loss": 0.1510498046875, + "step": 64450 + }, + { + "epoch": 0.5573233262142134, + "grad_norm": 5.260166040458014, + "learning_rate": 4.921973928563179e-06, + "loss": 0.20890579223632813, + "step": 64455 + }, + { + "epoch": 0.5573665597357567, + "grad_norm": 5.99792893296789, + "learning_rate": 4.921817492427494e-06, + "loss": 0.14068527221679689, + "step": 64460 + }, + { + "epoch": 0.5574097932573, + "grad_norm": 3.1437137580842207, + "learning_rate": 4.921661047428556e-06, + "loss": 0.50489501953125, + "step": 64465 + }, + { + "epoch": 0.5574530267788432, + "grad_norm": 0.8770256385777191, + "learning_rate": 4.921504593567088e-06, + "loss": 0.07677268981933594, + "step": 64470 + }, + { + "epoch": 0.5574962603003865, + "grad_norm": 4.878732105692007, + "learning_rate": 4.9213481308438105e-06, + "loss": 0.15565872192382812, + "step": 64475 + }, + { + "epoch": 0.5575394938219298, + "grad_norm": 9.870506304233443, + "learning_rate": 4.9211916592594436e-06, + "loss": 0.04000091552734375, + "step": 64480 + }, + { + "epoch": 0.557582727343473, + "grad_norm": 3.5326485777558037, + "learning_rate": 4.921035178814712e-06, + "loss": 0.0588165283203125, + "step": 64485 + }, + { + "epoch": 0.5576259608650163, + "grad_norm": 13.692582374628842, + "learning_rate": 4.920878689510336e-06, + "loss": 0.1697509765625, + "step": 64490 + }, + { + "epoch": 0.5576691943865596, + "grad_norm": 0.49255156996525273, + "learning_rate": 4.920722191347036e-06, + "loss": 0.0357879638671875, + "step": 64495 + }, + { + "epoch": 0.5577124279081028, + "grad_norm": 36.11418866208971, + "learning_rate": 4.920565684325536e-06, + "loss": 0.358843994140625, + "step": 64500 + }, + { + "epoch": 0.557755661429646, + "grad_norm": 10.37626993898693, + "learning_rate": 4.920409168446557e-06, + "loss": 0.11043243408203125, + "step": 64505 + }, + { + "epoch": 0.5577988949511894, + "grad_norm": 1.8519700110685724, + "learning_rate": 4.9202526437108194e-06, + "loss": 0.0633636474609375, + "step": 64510 + }, + { + "epoch": 0.5578421284727326, + "grad_norm": 0.703147899164198, + "learning_rate": 4.920096110119048e-06, + "loss": 0.11213226318359375, + "step": 64515 + }, + { + "epoch": 0.5578853619942759, + "grad_norm": 4.724798372472479, + "learning_rate": 4.9199395676719616e-06, + "loss": 0.02426490783691406, + "step": 64520 + }, + { + "epoch": 0.5579285955158192, + "grad_norm": 2.7619008230333355, + "learning_rate": 4.919783016370284e-06, + "loss": 0.2710662841796875, + "step": 64525 + }, + { + "epoch": 0.5579718290373624, + "grad_norm": 24.768113491853043, + "learning_rate": 4.919626456214737e-06, + "loss": 0.2301025390625, + "step": 64530 + }, + { + "epoch": 0.5580150625589057, + "grad_norm": 4.311646640652846, + "learning_rate": 4.919469887206041e-06, + "loss": 0.1370098114013672, + "step": 64535 + }, + { + "epoch": 0.558058296080449, + "grad_norm": 0.5259803193025437, + "learning_rate": 4.9193133093449215e-06, + "loss": 0.14666061401367186, + "step": 64540 + }, + { + "epoch": 0.5581015296019922, + "grad_norm": 7.282578774225484, + "learning_rate": 4.919156722632098e-06, + "loss": 0.10312833786010742, + "step": 64545 + }, + { + "epoch": 0.5581447631235354, + "grad_norm": 11.509823697442858, + "learning_rate": 4.919000127068293e-06, + "loss": 0.2929370880126953, + "step": 64550 + }, + { + "epoch": 0.5581879966450787, + "grad_norm": 5.064927800065358, + "learning_rate": 4.918843522654229e-06, + "loss": 0.12768783569335937, + "step": 64555 + }, + { + "epoch": 0.558231230166622, + "grad_norm": 31.999717970599523, + "learning_rate": 4.918686909390628e-06, + "loss": 0.1400848388671875, + "step": 64560 + }, + { + "epoch": 0.5582744636881652, + "grad_norm": 0.7191299244804392, + "learning_rate": 4.918530287278214e-06, + "loss": 0.03527488708496094, + "step": 64565 + }, + { + "epoch": 0.5583176972097085, + "grad_norm": 5.218899869263055, + "learning_rate": 4.9183736563177055e-06, + "loss": 0.099755859375, + "step": 64570 + }, + { + "epoch": 0.5583609307312518, + "grad_norm": 16.79108538279851, + "learning_rate": 4.918217016509828e-06, + "loss": 0.32051239013671873, + "step": 64575 + }, + { + "epoch": 0.558404164252795, + "grad_norm": 4.873846569236935, + "learning_rate": 4.918060367855304e-06, + "loss": 0.046665191650390625, + "step": 64580 + }, + { + "epoch": 0.5584473977743383, + "grad_norm": 10.732624676671142, + "learning_rate": 4.917903710354854e-06, + "loss": 0.17674674987792968, + "step": 64585 + }, + { + "epoch": 0.5584906312958816, + "grad_norm": 3.8826927422183455, + "learning_rate": 4.917747044009202e-06, + "loss": 0.088311767578125, + "step": 64590 + }, + { + "epoch": 0.5585338648174248, + "grad_norm": 5.370266309133408, + "learning_rate": 4.917590368819071e-06, + "loss": 0.16388874053955077, + "step": 64595 + }, + { + "epoch": 0.5585770983389681, + "grad_norm": 13.528125236445067, + "learning_rate": 4.917433684785181e-06, + "loss": 0.21986083984375, + "step": 64600 + }, + { + "epoch": 0.5586203318605114, + "grad_norm": 10.855341540102106, + "learning_rate": 4.917276991908257e-06, + "loss": 0.02635345458984375, + "step": 64605 + }, + { + "epoch": 0.5586635653820546, + "grad_norm": 6.416954270057677, + "learning_rate": 4.917120290189021e-06, + "loss": 0.0838958740234375, + "step": 64610 + }, + { + "epoch": 0.5587067989035979, + "grad_norm": 2.5658951226272366, + "learning_rate": 4.916963579628196e-06, + "loss": 0.0888153076171875, + "step": 64615 + }, + { + "epoch": 0.5587500324251412, + "grad_norm": 41.47540464420656, + "learning_rate": 4.916806860226503e-06, + "loss": 0.1702972412109375, + "step": 64620 + }, + { + "epoch": 0.5587932659466844, + "grad_norm": 9.160592609556131, + "learning_rate": 4.916650131984668e-06, + "loss": 0.2317626953125, + "step": 64625 + }, + { + "epoch": 0.5588364994682277, + "grad_norm": 14.740252751133427, + "learning_rate": 4.9164933949034104e-06, + "loss": 0.1726715087890625, + "step": 64630 + }, + { + "epoch": 0.5588797329897709, + "grad_norm": 7.860668142982011, + "learning_rate": 4.916336648983456e-06, + "loss": 0.11837615966796874, + "step": 64635 + }, + { + "epoch": 0.5589229665113142, + "grad_norm": 5.884380198223803, + "learning_rate": 4.916179894225525e-06, + "loss": 0.06550788879394531, + "step": 64640 + }, + { + "epoch": 0.5589662000328575, + "grad_norm": 0.7892302616222288, + "learning_rate": 4.916023130630342e-06, + "loss": 0.12815704345703124, + "step": 64645 + }, + { + "epoch": 0.5590094335544007, + "grad_norm": 26.294632873036292, + "learning_rate": 4.91586635819863e-06, + "loss": 0.2384552001953125, + "step": 64650 + }, + { + "epoch": 0.559052667075944, + "grad_norm": 1.0384698855818981, + "learning_rate": 4.915709576931112e-06, + "loss": 0.30055809020996094, + "step": 64655 + }, + { + "epoch": 0.5590959005974873, + "grad_norm": 22.28880984074767, + "learning_rate": 4.91555278682851e-06, + "loss": 0.13173370361328124, + "step": 64660 + }, + { + "epoch": 0.5591391341190305, + "grad_norm": 49.445566739009664, + "learning_rate": 4.915395987891548e-06, + "loss": 0.104296875, + "step": 64665 + }, + { + "epoch": 0.5591823676405738, + "grad_norm": 5.801549652788616, + "learning_rate": 4.915239180120949e-06, + "loss": 0.4674842834472656, + "step": 64670 + }, + { + "epoch": 0.5592256011621171, + "grad_norm": 14.049487863765131, + "learning_rate": 4.915082363517436e-06, + "loss": 0.38155364990234375, + "step": 64675 + }, + { + "epoch": 0.5592688346836603, + "grad_norm": 3.2669377807926625, + "learning_rate": 4.914925538081732e-06, + "loss": 0.0567352294921875, + "step": 64680 + }, + { + "epoch": 0.5593120682052036, + "grad_norm": 4.7442096238145, + "learning_rate": 4.9147687038145615e-06, + "loss": 0.09154815673828125, + "step": 64685 + }, + { + "epoch": 0.5593553017267469, + "grad_norm": 1.5437206446323384, + "learning_rate": 4.9146118607166455e-06, + "loss": 0.21395111083984375, + "step": 64690 + }, + { + "epoch": 0.5593985352482901, + "grad_norm": 0.9038638994162909, + "learning_rate": 4.914455008788711e-06, + "loss": 0.15476417541503906, + "step": 64695 + }, + { + "epoch": 0.5594417687698334, + "grad_norm": 25.683631602201274, + "learning_rate": 4.914298148031478e-06, + "loss": 0.25954132080078124, + "step": 64700 + }, + { + "epoch": 0.5594850022913767, + "grad_norm": 12.704070300697621, + "learning_rate": 4.91414127844567e-06, + "loss": 0.12939224243164063, + "step": 64705 + }, + { + "epoch": 0.5595282358129199, + "grad_norm": 11.423033476528644, + "learning_rate": 4.913984400032013e-06, + "loss": 0.3798736572265625, + "step": 64710 + }, + { + "epoch": 0.5595714693344632, + "grad_norm": 5.048588269965394, + "learning_rate": 4.913827512791229e-06, + "loss": 0.02824859619140625, + "step": 64715 + }, + { + "epoch": 0.5596147028560065, + "grad_norm": 12.706501292787703, + "learning_rate": 4.913670616724041e-06, + "loss": 0.036777496337890625, + "step": 64720 + }, + { + "epoch": 0.5596579363775497, + "grad_norm": 7.754624984285275, + "learning_rate": 4.913513711831174e-06, + "loss": 0.10718231201171875, + "step": 64725 + }, + { + "epoch": 0.5597011698990929, + "grad_norm": 2.43168970691101, + "learning_rate": 4.91335679811335e-06, + "loss": 0.30088958740234373, + "step": 64730 + }, + { + "epoch": 0.5597444034206362, + "grad_norm": 5.8722248011890885, + "learning_rate": 4.9131998755712935e-06, + "loss": 0.16454925537109374, + "step": 64735 + }, + { + "epoch": 0.5597876369421795, + "grad_norm": 2.2283867474328716, + "learning_rate": 4.913042944205728e-06, + "loss": 0.06299705505371093, + "step": 64740 + }, + { + "epoch": 0.5598308704637227, + "grad_norm": 1.2140271173598147, + "learning_rate": 4.912886004017378e-06, + "loss": 0.07529335021972657, + "step": 64745 + }, + { + "epoch": 0.559874103985266, + "grad_norm": 1.052640941474728, + "learning_rate": 4.912729055006967e-06, + "loss": 0.0468292236328125, + "step": 64750 + }, + { + "epoch": 0.5599173375068093, + "grad_norm": 4.489833934524629, + "learning_rate": 4.912572097175219e-06, + "loss": 0.30818023681640627, + "step": 64755 + }, + { + "epoch": 0.5599605710283525, + "grad_norm": 0.6743971423183068, + "learning_rate": 4.9124151305228576e-06, + "loss": 0.0958404541015625, + "step": 64760 + }, + { + "epoch": 0.5600038045498958, + "grad_norm": 0.31749982761040185, + "learning_rate": 4.9122581550506055e-06, + "loss": 0.14597625732421876, + "step": 64765 + }, + { + "epoch": 0.5600470380714391, + "grad_norm": 0.24804884947747116, + "learning_rate": 4.912101170759188e-06, + "loss": 0.18132858276367186, + "step": 64770 + }, + { + "epoch": 0.5600902715929823, + "grad_norm": 7.192879648976281, + "learning_rate": 4.911944177649329e-06, + "loss": 0.2100677490234375, + "step": 64775 + }, + { + "epoch": 0.5601335051145256, + "grad_norm": 1.1789109771791901, + "learning_rate": 4.911787175721752e-06, + "loss": 0.0942901611328125, + "step": 64780 + }, + { + "epoch": 0.5601767386360689, + "grad_norm": 0.47854177298109646, + "learning_rate": 4.9116301649771815e-06, + "loss": 0.201885986328125, + "step": 64785 + }, + { + "epoch": 0.5602199721576121, + "grad_norm": 14.493528917539901, + "learning_rate": 4.911473145416343e-06, + "loss": 0.18787155151367188, + "step": 64790 + }, + { + "epoch": 0.5602632056791554, + "grad_norm": 25.568315055273615, + "learning_rate": 4.911316117039958e-06, + "loss": 0.526837158203125, + "step": 64795 + }, + { + "epoch": 0.5603064392006987, + "grad_norm": 0.4005421727036246, + "learning_rate": 4.911159079848752e-06, + "loss": 0.13326568603515626, + "step": 64800 + }, + { + "epoch": 0.5603496727222419, + "grad_norm": 3.7178136530168264, + "learning_rate": 4.91100203384345e-06, + "loss": 0.04969215393066406, + "step": 64805 + }, + { + "epoch": 0.5603929062437851, + "grad_norm": 23.797828584160147, + "learning_rate": 4.910844979024774e-06, + "loss": 0.31846160888671876, + "step": 64810 + }, + { + "epoch": 0.5604361397653285, + "grad_norm": 0.8479882325049576, + "learning_rate": 4.910687915393451e-06, + "loss": 0.1334247589111328, + "step": 64815 + }, + { + "epoch": 0.5604793732868717, + "grad_norm": 1.0598993945320159, + "learning_rate": 4.910530842950204e-06, + "loss": 0.1898040771484375, + "step": 64820 + }, + { + "epoch": 0.5605226068084149, + "grad_norm": 0.2947698795029786, + "learning_rate": 4.9103737616957576e-06, + "loss": 0.08523101806640625, + "step": 64825 + }, + { + "epoch": 0.5605658403299583, + "grad_norm": 0.37400470363673405, + "learning_rate": 4.9102166716308354e-06, + "loss": 0.06418590545654297, + "step": 64830 + }, + { + "epoch": 0.5606090738515015, + "grad_norm": 14.388573481614436, + "learning_rate": 4.910059572756163e-06, + "loss": 0.169384765625, + "step": 64835 + }, + { + "epoch": 0.5606523073730447, + "grad_norm": 30.714183077538955, + "learning_rate": 4.9099024650724655e-06, + "loss": 0.067230224609375, + "step": 64840 + }, + { + "epoch": 0.5606955408945881, + "grad_norm": 0.09261983418702734, + "learning_rate": 4.909745348580466e-06, + "loss": 0.16261138916015624, + "step": 64845 + }, + { + "epoch": 0.5607387744161313, + "grad_norm": 8.535537769690235, + "learning_rate": 4.909588223280889e-06, + "loss": 0.1169952392578125, + "step": 64850 + }, + { + "epoch": 0.5607820079376745, + "grad_norm": 1.9407923125133222, + "learning_rate": 4.909431089174462e-06, + "loss": 0.3448169708251953, + "step": 64855 + }, + { + "epoch": 0.5608252414592179, + "grad_norm": 4.755103760576633, + "learning_rate": 4.909273946261905e-06, + "loss": 0.24154434204101563, + "step": 64860 + }, + { + "epoch": 0.5608684749807611, + "grad_norm": 3.1897134810515, + "learning_rate": 4.9091167945439475e-06, + "loss": 0.233221435546875, + "step": 64865 + }, + { + "epoch": 0.5609117085023043, + "grad_norm": 0.7499869056349351, + "learning_rate": 4.90895963402131e-06, + "loss": 0.1243438720703125, + "step": 64870 + }, + { + "epoch": 0.5609549420238477, + "grad_norm": 0.21404206181244403, + "learning_rate": 4.908802464694721e-06, + "loss": 0.042928314208984374, + "step": 64875 + }, + { + "epoch": 0.5609981755453909, + "grad_norm": 44.47125140731723, + "learning_rate": 4.908645286564902e-06, + "loss": 0.17690620422363282, + "step": 64880 + }, + { + "epoch": 0.5610414090669341, + "grad_norm": 26.64898274687512, + "learning_rate": 4.908488099632581e-06, + "loss": 0.229974365234375, + "step": 64885 + }, + { + "epoch": 0.5610846425884775, + "grad_norm": 20.75557206742936, + "learning_rate": 4.908330903898482e-06, + "loss": 0.165313720703125, + "step": 64890 + }, + { + "epoch": 0.5611278761100207, + "grad_norm": 0.6406266164167935, + "learning_rate": 4.908173699363328e-06, + "loss": 0.09054031372070312, + "step": 64895 + }, + { + "epoch": 0.5611711096315639, + "grad_norm": 0.7722413615329127, + "learning_rate": 4.908016486027846e-06, + "loss": 0.06290817260742188, + "step": 64900 + }, + { + "epoch": 0.5612143431531071, + "grad_norm": 5.08899456914983, + "learning_rate": 4.90785926389276e-06, + "loss": 0.13249244689941406, + "step": 64905 + }, + { + "epoch": 0.5612575766746505, + "grad_norm": 15.242114319785053, + "learning_rate": 4.9077020329587975e-06, + "loss": 0.18124542236328126, + "step": 64910 + }, + { + "epoch": 0.5613008101961937, + "grad_norm": 1.119687396080866, + "learning_rate": 4.90754479322668e-06, + "loss": 0.0504241943359375, + "step": 64915 + }, + { + "epoch": 0.5613440437177369, + "grad_norm": 9.128844764651433, + "learning_rate": 4.907387544697136e-06, + "loss": 0.0566925048828125, + "step": 64920 + }, + { + "epoch": 0.5613872772392803, + "grad_norm": 20.722235667314507, + "learning_rate": 4.907230287370887e-06, + "loss": 0.1716156005859375, + "step": 64925 + }, + { + "epoch": 0.5614305107608235, + "grad_norm": 4.263905764167684, + "learning_rate": 4.907073021248662e-06, + "loss": 0.07347984313964843, + "step": 64930 + }, + { + "epoch": 0.5614737442823667, + "grad_norm": 8.64656133983265, + "learning_rate": 4.9069157463311845e-06, + "loss": 0.08393707275390624, + "step": 64935 + }, + { + "epoch": 0.5615169778039101, + "grad_norm": 19.18237928074101, + "learning_rate": 4.906758462619179e-06, + "loss": 0.1560612678527832, + "step": 64940 + }, + { + "epoch": 0.5615602113254533, + "grad_norm": 3.671984520616383, + "learning_rate": 4.906601170113373e-06, + "loss": 0.11699676513671875, + "step": 64945 + }, + { + "epoch": 0.5616034448469965, + "grad_norm": 12.585116255586318, + "learning_rate": 4.906443868814491e-06, + "loss": 0.10054550170898438, + "step": 64950 + }, + { + "epoch": 0.5616466783685399, + "grad_norm": 2.8736726231940213, + "learning_rate": 4.906286558723258e-06, + "loss": 0.051602935791015624, + "step": 64955 + }, + { + "epoch": 0.5616899118900831, + "grad_norm": 11.912782499118968, + "learning_rate": 4.906129239840401e-06, + "loss": 0.20882339477539064, + "step": 64960 + }, + { + "epoch": 0.5617331454116263, + "grad_norm": 52.00016392606325, + "learning_rate": 4.905971912166643e-06, + "loss": 0.3606201171875, + "step": 64965 + }, + { + "epoch": 0.5617763789331697, + "grad_norm": 74.46875426429017, + "learning_rate": 4.905814575702711e-06, + "loss": 0.32291336059570314, + "step": 64970 + }, + { + "epoch": 0.5618196124547129, + "grad_norm": 0.16375920151714263, + "learning_rate": 4.905657230449331e-06, + "loss": 0.060174560546875, + "step": 64975 + }, + { + "epoch": 0.5618628459762561, + "grad_norm": 7.845312790500728, + "learning_rate": 4.905499876407228e-06, + "loss": 0.09848613739013672, + "step": 64980 + }, + { + "epoch": 0.5619060794977994, + "grad_norm": 1.6499504904936126, + "learning_rate": 4.905342513577128e-06, + "loss": 0.3321226119995117, + "step": 64985 + }, + { + "epoch": 0.5619493130193427, + "grad_norm": 3.7797583144135216, + "learning_rate": 4.905185141959757e-06, + "loss": 0.30327301025390624, + "step": 64990 + }, + { + "epoch": 0.5619925465408859, + "grad_norm": 4.445143816718667, + "learning_rate": 4.905027761555841e-06, + "loss": 0.2752349853515625, + "step": 64995 + }, + { + "epoch": 0.5620357800624292, + "grad_norm": 30.44504209235308, + "learning_rate": 4.904870372366104e-06, + "loss": 0.26395187377929685, + "step": 65000 + }, + { + "epoch": 0.5620790135839725, + "grad_norm": 1.1689877862069264, + "learning_rate": 4.904712974391274e-06, + "loss": 0.05289878845214844, + "step": 65005 + }, + { + "epoch": 0.5621222471055157, + "grad_norm": 1.6020619674692453, + "learning_rate": 4.904555567632075e-06, + "loss": 0.1529693603515625, + "step": 65010 + }, + { + "epoch": 0.562165480627059, + "grad_norm": 0.7021000562083642, + "learning_rate": 4.904398152089235e-06, + "loss": 0.11346817016601562, + "step": 65015 + }, + { + "epoch": 0.5622087141486023, + "grad_norm": 3.4898369209183153, + "learning_rate": 4.904240727763479e-06, + "loss": 0.059326171875, + "step": 65020 + }, + { + "epoch": 0.5622519476701455, + "grad_norm": 12.794748588651608, + "learning_rate": 4.904083294655532e-06, + "loss": 0.14284820556640626, + "step": 65025 + }, + { + "epoch": 0.5622951811916888, + "grad_norm": 12.730408863670114, + "learning_rate": 4.903925852766123e-06, + "loss": 0.20186920166015626, + "step": 65030 + }, + { + "epoch": 0.5623384147132321, + "grad_norm": 31.775513960698806, + "learning_rate": 4.903768402095975e-06, + "loss": 0.09557723999023438, + "step": 65035 + }, + { + "epoch": 0.5623816482347753, + "grad_norm": 6.712814306258559, + "learning_rate": 4.903610942645814e-06, + "loss": 0.30420379638671874, + "step": 65040 + }, + { + "epoch": 0.5624248817563186, + "grad_norm": 11.722760377309767, + "learning_rate": 4.90345347441637e-06, + "loss": 0.17526702880859374, + "step": 65045 + }, + { + "epoch": 0.5624681152778619, + "grad_norm": 8.593165703048141, + "learning_rate": 4.9032959974083655e-06, + "loss": 0.182763671875, + "step": 65050 + }, + { + "epoch": 0.5625113487994051, + "grad_norm": 19.562762650292665, + "learning_rate": 4.903138511622529e-06, + "loss": 0.13987159729003906, + "step": 65055 + }, + { + "epoch": 0.5625545823209483, + "grad_norm": 6.840107570993744, + "learning_rate": 4.902981017059584e-06, + "loss": 0.1765056610107422, + "step": 65060 + }, + { + "epoch": 0.5625978158424917, + "grad_norm": 1.2479945936857144, + "learning_rate": 4.90282351372026e-06, + "loss": 0.063629150390625, + "step": 65065 + }, + { + "epoch": 0.5626410493640349, + "grad_norm": 13.582534668587623, + "learning_rate": 4.9026660016052824e-06, + "loss": 0.2254425048828125, + "step": 65070 + }, + { + "epoch": 0.5626842828855781, + "grad_norm": 0.4007205060318491, + "learning_rate": 4.902508480715377e-06, + "loss": 0.029329299926757812, + "step": 65075 + }, + { + "epoch": 0.5627275164071214, + "grad_norm": 19.216635083411198, + "learning_rate": 4.90235095105127e-06, + "loss": 0.4155517578125, + "step": 65080 + }, + { + "epoch": 0.5627707499286647, + "grad_norm": 1.6351181178302319, + "learning_rate": 4.902193412613689e-06, + "loss": 0.10004768371582032, + "step": 65085 + }, + { + "epoch": 0.5628139834502079, + "grad_norm": 1.9317993683150916, + "learning_rate": 4.90203586540336e-06, + "loss": 0.523126220703125, + "step": 65090 + }, + { + "epoch": 0.5628572169717512, + "grad_norm": 20.816687448395157, + "learning_rate": 4.90187830942101e-06, + "loss": 0.10073394775390625, + "step": 65095 + }, + { + "epoch": 0.5629004504932945, + "grad_norm": 3.9871977609644413, + "learning_rate": 4.9017207446673644e-06, + "loss": 0.20686492919921876, + "step": 65100 + }, + { + "epoch": 0.5629436840148377, + "grad_norm": 24.334899399962275, + "learning_rate": 4.901563171143152e-06, + "loss": 0.2813507080078125, + "step": 65105 + }, + { + "epoch": 0.562986917536381, + "grad_norm": 5.5670888532540665, + "learning_rate": 4.901405588849097e-06, + "loss": 0.1900177001953125, + "step": 65110 + }, + { + "epoch": 0.5630301510579243, + "grad_norm": 2.595485586351629, + "learning_rate": 4.901247997785928e-06, + "loss": 0.0992767333984375, + "step": 65115 + }, + { + "epoch": 0.5630733845794675, + "grad_norm": 12.69826806302756, + "learning_rate": 4.901090397954372e-06, + "loss": 0.12848358154296874, + "step": 65120 + }, + { + "epoch": 0.5631166181010108, + "grad_norm": 25.221158520773663, + "learning_rate": 4.9009327893551535e-06, + "loss": 0.0678131103515625, + "step": 65125 + }, + { + "epoch": 0.5631598516225541, + "grad_norm": 1.3456482111086836, + "learning_rate": 4.900775171989002e-06, + "loss": 0.20782012939453126, + "step": 65130 + }, + { + "epoch": 0.5632030851440973, + "grad_norm": 1.0780596955669428, + "learning_rate": 4.900617545856644e-06, + "loss": 0.180230712890625, + "step": 65135 + }, + { + "epoch": 0.5632463186656406, + "grad_norm": 43.48433318296306, + "learning_rate": 4.900459910958804e-06, + "loss": 0.3644195556640625, + "step": 65140 + }, + { + "epoch": 0.5632895521871839, + "grad_norm": 0.6300274495648889, + "learning_rate": 4.900302267296212e-06, + "loss": 0.024904251098632812, + "step": 65145 + }, + { + "epoch": 0.5633327857087271, + "grad_norm": 5.660897144132779, + "learning_rate": 4.900144614869593e-06, + "loss": 0.059869384765625, + "step": 65150 + }, + { + "epoch": 0.5633760192302704, + "grad_norm": 36.540181141528066, + "learning_rate": 4.899986953679676e-06, + "loss": 0.08835296630859375, + "step": 65155 + }, + { + "epoch": 0.5634192527518136, + "grad_norm": 1.1278350473678111, + "learning_rate": 4.899829283727187e-06, + "loss": 0.3002288818359375, + "step": 65160 + }, + { + "epoch": 0.5634624862733569, + "grad_norm": 33.74230553927536, + "learning_rate": 4.899671605012852e-06, + "loss": 0.2177276611328125, + "step": 65165 + }, + { + "epoch": 0.5635057197949002, + "grad_norm": 0.791300001488574, + "learning_rate": 4.899513917537401e-06, + "loss": 0.18719406127929689, + "step": 65170 + }, + { + "epoch": 0.5635489533164434, + "grad_norm": 0.1385600049805472, + "learning_rate": 4.8993562213015585e-06, + "loss": 0.05164413452148438, + "step": 65175 + }, + { + "epoch": 0.5635921868379867, + "grad_norm": 5.4636670708327735, + "learning_rate": 4.899198516306053e-06, + "loss": 0.48396453857421873, + "step": 65180 + }, + { + "epoch": 0.56363542035953, + "grad_norm": 1.0360998722406494, + "learning_rate": 4.899040802551612e-06, + "loss": 0.439306640625, + "step": 65185 + }, + { + "epoch": 0.5636786538810732, + "grad_norm": 0.11877576921563492, + "learning_rate": 4.898883080038963e-06, + "loss": 0.2372974395751953, + "step": 65190 + }, + { + "epoch": 0.5637218874026165, + "grad_norm": 30.84107842208073, + "learning_rate": 4.898725348768833e-06, + "loss": 0.5297821044921875, + "step": 65195 + }, + { + "epoch": 0.5637651209241598, + "grad_norm": 4.021436676387727, + "learning_rate": 4.898567608741949e-06, + "loss": 0.15638961791992187, + "step": 65200 + }, + { + "epoch": 0.563808354445703, + "grad_norm": 5.948736134746515, + "learning_rate": 4.8984098599590385e-06, + "loss": 0.08878555297851562, + "step": 65205 + }, + { + "epoch": 0.5638515879672463, + "grad_norm": 37.28343424722485, + "learning_rate": 4.89825210242083e-06, + "loss": 0.262738037109375, + "step": 65210 + }, + { + "epoch": 0.5638948214887896, + "grad_norm": 1.9516856742463555, + "learning_rate": 4.8980943361280514e-06, + "loss": 0.05612678527832031, + "step": 65215 + }, + { + "epoch": 0.5639380550103328, + "grad_norm": 3.9549836253506334, + "learning_rate": 4.897936561081429e-06, + "loss": 0.1465576171875, + "step": 65220 + }, + { + "epoch": 0.5639812885318761, + "grad_norm": 35.63527846061976, + "learning_rate": 4.89777877728169e-06, + "loss": 0.13986778259277344, + "step": 65225 + }, + { + "epoch": 0.5640245220534194, + "grad_norm": 2.1433665345138997, + "learning_rate": 4.897620984729563e-06, + "loss": 0.1541778564453125, + "step": 65230 + }, + { + "epoch": 0.5640677555749626, + "grad_norm": 20.015980573971586, + "learning_rate": 4.897463183425777e-06, + "loss": 0.3974945068359375, + "step": 65235 + }, + { + "epoch": 0.5641109890965059, + "grad_norm": 63.01540938732532, + "learning_rate": 4.897305373371057e-06, + "loss": 0.33819122314453126, + "step": 65240 + }, + { + "epoch": 0.5641542226180492, + "grad_norm": 20.010280966165475, + "learning_rate": 4.8971475545661335e-06, + "loss": 0.30675811767578126, + "step": 65245 + }, + { + "epoch": 0.5641974561395924, + "grad_norm": 14.782583582871407, + "learning_rate": 4.896989727011733e-06, + "loss": 0.15394363403320313, + "step": 65250 + }, + { + "epoch": 0.5642406896611356, + "grad_norm": 0.2631253147022526, + "learning_rate": 4.896831890708583e-06, + "loss": 0.17311019897460939, + "step": 65255 + }, + { + "epoch": 0.564283923182679, + "grad_norm": 0.3679858563014709, + "learning_rate": 4.896674045657412e-06, + "loss": 0.17109375, + "step": 65260 + }, + { + "epoch": 0.5643271567042222, + "grad_norm": 0.7936314063684343, + "learning_rate": 4.896516191858948e-06, + "loss": 0.13899917602539064, + "step": 65265 + }, + { + "epoch": 0.5643703902257654, + "grad_norm": 3.7389525999781035, + "learning_rate": 4.896358329313919e-06, + "loss": 0.26709136962890623, + "step": 65270 + }, + { + "epoch": 0.5644136237473087, + "grad_norm": 0.08906093191377683, + "learning_rate": 4.8962004580230516e-06, + "loss": 0.14929542541503907, + "step": 65275 + }, + { + "epoch": 0.564456857268852, + "grad_norm": 1.9382455401169414, + "learning_rate": 4.896042577987077e-06, + "loss": 0.228118896484375, + "step": 65280 + }, + { + "epoch": 0.5645000907903952, + "grad_norm": 17.33006823099776, + "learning_rate": 4.895884689206722e-06, + "loss": 0.16098442077636718, + "step": 65285 + }, + { + "epoch": 0.5645433243119385, + "grad_norm": 17.0489929508274, + "learning_rate": 4.895726791682712e-06, + "loss": 0.1827850341796875, + "step": 65290 + }, + { + "epoch": 0.5645865578334818, + "grad_norm": 7.339063426610509, + "learning_rate": 4.8955688854157795e-06, + "loss": 0.4391048431396484, + "step": 65295 + }, + { + "epoch": 0.564629791355025, + "grad_norm": 0.4835587262448179, + "learning_rate": 4.895410970406651e-06, + "loss": 0.02783355712890625, + "step": 65300 + }, + { + "epoch": 0.5646730248765683, + "grad_norm": 13.696841529337153, + "learning_rate": 4.895253046656052e-06, + "loss": 0.324371337890625, + "step": 65305 + }, + { + "epoch": 0.5647162583981116, + "grad_norm": 6.278394564521722, + "learning_rate": 4.8950951141647155e-06, + "loss": 0.2373382568359375, + "step": 65310 + }, + { + "epoch": 0.5647594919196548, + "grad_norm": 23.475424674244252, + "learning_rate": 4.894937172933368e-06, + "loss": 0.09224090576171876, + "step": 65315 + }, + { + "epoch": 0.5648027254411981, + "grad_norm": 0.10112710236452278, + "learning_rate": 4.894779222962736e-06, + "loss": 0.060749053955078125, + "step": 65320 + }, + { + "epoch": 0.5648459589627414, + "grad_norm": 0.5507085516087162, + "learning_rate": 4.894621264253552e-06, + "loss": 0.15070343017578125, + "step": 65325 + }, + { + "epoch": 0.5648891924842846, + "grad_norm": 16.147766514822692, + "learning_rate": 4.894463296806541e-06, + "loss": 0.46512374877929685, + "step": 65330 + }, + { + "epoch": 0.5649324260058278, + "grad_norm": 3.1383811216620985, + "learning_rate": 4.894305320622432e-06, + "loss": 0.39209728240966796, + "step": 65335 + }, + { + "epoch": 0.5649756595273712, + "grad_norm": 3.673261659534606, + "learning_rate": 4.8941473357019546e-06, + "loss": 0.226300048828125, + "step": 65340 + }, + { + "epoch": 0.5650188930489144, + "grad_norm": 49.95404656439289, + "learning_rate": 4.893989342045838e-06, + "loss": 0.2024566650390625, + "step": 65345 + }, + { + "epoch": 0.5650621265704576, + "grad_norm": 1.7563964405905177, + "learning_rate": 4.89383133965481e-06, + "loss": 0.14505615234375, + "step": 65350 + }, + { + "epoch": 0.565105360092001, + "grad_norm": 0.7718491791178953, + "learning_rate": 4.893673328529597e-06, + "loss": 0.58919677734375, + "step": 65355 + }, + { + "epoch": 0.5651485936135442, + "grad_norm": 28.05439820539546, + "learning_rate": 4.893515308670931e-06, + "loss": 0.11379852294921874, + "step": 65360 + }, + { + "epoch": 0.5651918271350874, + "grad_norm": 3.4861006326994004, + "learning_rate": 4.893357280079541e-06, + "loss": 0.06575927734375, + "step": 65365 + }, + { + "epoch": 0.5652350606566308, + "grad_norm": 0.8167253367765609, + "learning_rate": 4.8931992427561535e-06, + "loss": 0.3474334716796875, + "step": 65370 + }, + { + "epoch": 0.565278294178174, + "grad_norm": 32.847965267736754, + "learning_rate": 4.893041196701498e-06, + "loss": 0.416021728515625, + "step": 65375 + }, + { + "epoch": 0.5653215276997172, + "grad_norm": 27.768928057550056, + "learning_rate": 4.892883141916304e-06, + "loss": 0.39298553466796876, + "step": 65380 + }, + { + "epoch": 0.5653647612212606, + "grad_norm": 13.421925751987677, + "learning_rate": 4.892725078401301e-06, + "loss": 0.1255462646484375, + "step": 65385 + }, + { + "epoch": 0.5654079947428038, + "grad_norm": 1.7549928704998976, + "learning_rate": 4.8925670061572164e-06, + "loss": 0.07931747436523437, + "step": 65390 + }, + { + "epoch": 0.565451228264347, + "grad_norm": 0.2169849082951757, + "learning_rate": 4.89240892518478e-06, + "loss": 0.20577621459960938, + "step": 65395 + }, + { + "epoch": 0.5654944617858904, + "grad_norm": 5.420870936313204, + "learning_rate": 4.892250835484722e-06, + "loss": 0.08486289978027343, + "step": 65400 + }, + { + "epoch": 0.5655376953074336, + "grad_norm": 4.125114606853287, + "learning_rate": 4.892092737057769e-06, + "loss": 0.09609375, + "step": 65405 + }, + { + "epoch": 0.5655809288289768, + "grad_norm": 14.884182049440183, + "learning_rate": 4.891934629904652e-06, + "loss": 0.14915695190429687, + "step": 65410 + }, + { + "epoch": 0.5656241623505202, + "grad_norm": 20.8553041884599, + "learning_rate": 4.891776514026099e-06, + "loss": 0.35513916015625, + "step": 65415 + }, + { + "epoch": 0.5656673958720634, + "grad_norm": 0.750896070146707, + "learning_rate": 4.89161838942284e-06, + "loss": 0.03958206176757813, + "step": 65420 + }, + { + "epoch": 0.5657106293936066, + "grad_norm": 24.316910775774392, + "learning_rate": 4.891460256095605e-06, + "loss": 0.12277679443359375, + "step": 65425 + }, + { + "epoch": 0.5657538629151498, + "grad_norm": 5.126506678267677, + "learning_rate": 4.891302114045122e-06, + "loss": 0.10013446807861329, + "step": 65430 + }, + { + "epoch": 0.5657970964366932, + "grad_norm": 0.3115423729039779, + "learning_rate": 4.89114396327212e-06, + "loss": 0.20799560546875, + "step": 65435 + }, + { + "epoch": 0.5658403299582364, + "grad_norm": 0.6358011224144735, + "learning_rate": 4.890985803777331e-06, + "loss": 0.13975372314453124, + "step": 65440 + }, + { + "epoch": 0.5658835634797796, + "grad_norm": 0.16667935797373654, + "learning_rate": 4.8908276355614814e-06, + "loss": 0.046558380126953125, + "step": 65445 + }, + { + "epoch": 0.565926797001323, + "grad_norm": 26.10148565346131, + "learning_rate": 4.890669458625302e-06, + "loss": 0.428271484375, + "step": 65450 + }, + { + "epoch": 0.5659700305228662, + "grad_norm": 7.858499857338581, + "learning_rate": 4.890511272969523e-06, + "loss": 0.06304855346679687, + "step": 65455 + }, + { + "epoch": 0.5660132640444094, + "grad_norm": 3.929803010316092, + "learning_rate": 4.8903530785948704e-06, + "loss": 0.06257972717285157, + "step": 65460 + }, + { + "epoch": 0.5660564975659528, + "grad_norm": 51.4752755803223, + "learning_rate": 4.890194875502078e-06, + "loss": 0.47146759033203123, + "step": 65465 + }, + { + "epoch": 0.566099731087496, + "grad_norm": 2.4061758889776423, + "learning_rate": 4.890036663691875e-06, + "loss": 0.3352535247802734, + "step": 65470 + }, + { + "epoch": 0.5661429646090392, + "grad_norm": 35.09528587592999, + "learning_rate": 4.889878443164989e-06, + "loss": 0.30382537841796875, + "step": 65475 + }, + { + "epoch": 0.5661861981305826, + "grad_norm": 0.7407204555596795, + "learning_rate": 4.88972021392215e-06, + "loss": 0.06801605224609375, + "step": 65480 + }, + { + "epoch": 0.5662294316521258, + "grad_norm": 15.412695754537738, + "learning_rate": 4.889561975964088e-06, + "loss": 0.2881317138671875, + "step": 65485 + }, + { + "epoch": 0.566272665173669, + "grad_norm": 14.908542864303755, + "learning_rate": 4.889403729291535e-06, + "loss": 0.29915542602539064, + "step": 65490 + }, + { + "epoch": 0.5663158986952124, + "grad_norm": 0.4712448308330638, + "learning_rate": 4.889245473905217e-06, + "loss": 0.1977874755859375, + "step": 65495 + }, + { + "epoch": 0.5663591322167556, + "grad_norm": 14.751740192286345, + "learning_rate": 4.889087209805867e-06, + "loss": 0.1466583251953125, + "step": 65500 + }, + { + "epoch": 0.5664023657382988, + "grad_norm": 19.80823707989805, + "learning_rate": 4.888928936994213e-06, + "loss": 0.0924835205078125, + "step": 65505 + }, + { + "epoch": 0.5664455992598421, + "grad_norm": 31.990734489376162, + "learning_rate": 4.888770655470987e-06, + "loss": 0.3321699142456055, + "step": 65510 + }, + { + "epoch": 0.5664888327813854, + "grad_norm": 5.60712768258993, + "learning_rate": 4.888612365236917e-06, + "loss": 0.33980712890625, + "step": 65515 + }, + { + "epoch": 0.5665320663029286, + "grad_norm": 6.818396498969793, + "learning_rate": 4.888454066292734e-06, + "loss": 0.2175159454345703, + "step": 65520 + }, + { + "epoch": 0.5665752998244719, + "grad_norm": 2.978206087735023, + "learning_rate": 4.888295758639166e-06, + "loss": 0.03640956878662109, + "step": 65525 + }, + { + "epoch": 0.5666185333460152, + "grad_norm": 0.6456176383078736, + "learning_rate": 4.888137442276947e-06, + "loss": 0.0818939208984375, + "step": 65530 + }, + { + "epoch": 0.5666617668675584, + "grad_norm": 27.678768536821945, + "learning_rate": 4.887979117206803e-06, + "loss": 0.12258377075195312, + "step": 65535 + }, + { + "epoch": 0.5667050003891017, + "grad_norm": 33.99308694185566, + "learning_rate": 4.887820783429467e-06, + "loss": 0.0470916748046875, + "step": 65540 + }, + { + "epoch": 0.566748233910645, + "grad_norm": 40.27441570659988, + "learning_rate": 4.887662440945668e-06, + "loss": 0.65430908203125, + "step": 65545 + }, + { + "epoch": 0.5667914674321882, + "grad_norm": 11.326213047348299, + "learning_rate": 4.887504089756137e-06, + "loss": 0.24276123046875, + "step": 65550 + }, + { + "epoch": 0.5668347009537315, + "grad_norm": 0.2755478026025756, + "learning_rate": 4.887345729861604e-06, + "loss": 0.07221908569335937, + "step": 65555 + }, + { + "epoch": 0.5668779344752748, + "grad_norm": 0.08037029737392515, + "learning_rate": 4.887187361262799e-06, + "loss": 0.058075904846191406, + "step": 65560 + }, + { + "epoch": 0.566921167996818, + "grad_norm": 2.083547962556401, + "learning_rate": 4.887028983960453e-06, + "loss": 0.321258544921875, + "step": 65565 + }, + { + "epoch": 0.5669644015183612, + "grad_norm": 0.383931878848138, + "learning_rate": 4.886870597955296e-06, + "loss": 0.03707809448242187, + "step": 65570 + }, + { + "epoch": 0.5670076350399046, + "grad_norm": 7.182559710564235, + "learning_rate": 4.886712203248058e-06, + "loss": 0.18217048645019532, + "step": 65575 + }, + { + "epoch": 0.5670508685614478, + "grad_norm": 2.2248965662888103, + "learning_rate": 4.886553799839471e-06, + "loss": 0.11397247314453125, + "step": 65580 + }, + { + "epoch": 0.567094102082991, + "grad_norm": 1.0029251544460185, + "learning_rate": 4.886395387730263e-06, + "loss": 0.14185333251953125, + "step": 65585 + }, + { + "epoch": 0.5671373356045344, + "grad_norm": 109.7865859349594, + "learning_rate": 4.886236966921167e-06, + "loss": 0.07536773681640625, + "step": 65590 + }, + { + "epoch": 0.5671805691260776, + "grad_norm": 12.130369198895167, + "learning_rate": 4.886078537412913e-06, + "loss": 0.06112594604492187, + "step": 65595 + }, + { + "epoch": 0.5672238026476208, + "grad_norm": 2.2105000622560276, + "learning_rate": 4.8859200992062325e-06, + "loss": 0.080230712890625, + "step": 65600 + }, + { + "epoch": 0.5672670361691641, + "grad_norm": 29.581744998677266, + "learning_rate": 4.885761652301854e-06, + "loss": 0.23456993103027343, + "step": 65605 + }, + { + "epoch": 0.5673102696907074, + "grad_norm": 5.039317796020561, + "learning_rate": 4.8856031967005085e-06, + "loss": 0.247088623046875, + "step": 65610 + }, + { + "epoch": 0.5673535032122506, + "grad_norm": 26.16007516582612, + "learning_rate": 4.885444732402928e-06, + "loss": 0.135791015625, + "step": 65615 + }, + { + "epoch": 0.5673967367337939, + "grad_norm": 4.3152488637153805, + "learning_rate": 4.885286259409844e-06, + "loss": 0.15570068359375, + "step": 65620 + }, + { + "epoch": 0.5674399702553372, + "grad_norm": 13.17104501398107, + "learning_rate": 4.885127777721987e-06, + "loss": 0.12635498046875, + "step": 65625 + }, + { + "epoch": 0.5674832037768804, + "grad_norm": 31.15736490823683, + "learning_rate": 4.884969287340086e-06, + "loss": 0.3069366455078125, + "step": 65630 + }, + { + "epoch": 0.5675264372984237, + "grad_norm": 2.9316327682606693, + "learning_rate": 4.884810788264874e-06, + "loss": 0.1988811492919922, + "step": 65635 + }, + { + "epoch": 0.567569670819967, + "grad_norm": 5.1515384387997, + "learning_rate": 4.88465228049708e-06, + "loss": 0.1013275146484375, + "step": 65640 + }, + { + "epoch": 0.5676129043415102, + "grad_norm": 1.938775997386321, + "learning_rate": 4.884493764037436e-06, + "loss": 0.35029296875, + "step": 65645 + }, + { + "epoch": 0.5676561378630535, + "grad_norm": 4.99550800104695, + "learning_rate": 4.884335238886675e-06, + "loss": 0.13166732788085939, + "step": 65650 + }, + { + "epoch": 0.5676993713845968, + "grad_norm": 17.945461919116205, + "learning_rate": 4.884176705045525e-06, + "loss": 0.33098297119140624, + "step": 65655 + }, + { + "epoch": 0.56774260490614, + "grad_norm": 7.869992337423117, + "learning_rate": 4.88401816251472e-06, + "loss": 0.2227783203125, + "step": 65660 + }, + { + "epoch": 0.5677858384276833, + "grad_norm": 0.3841278650947174, + "learning_rate": 4.883859611294988e-06, + "loss": 0.041900634765625, + "step": 65665 + }, + { + "epoch": 0.5678290719492266, + "grad_norm": 5.115718502712095, + "learning_rate": 4.883701051387063e-06, + "loss": 0.5284698486328125, + "step": 65670 + }, + { + "epoch": 0.5678723054707698, + "grad_norm": 32.21176388071177, + "learning_rate": 4.883542482791675e-06, + "loss": 0.09288330078125, + "step": 65675 + }, + { + "epoch": 0.5679155389923131, + "grad_norm": 0.5274973441316291, + "learning_rate": 4.883383905509555e-06, + "loss": 0.17181777954101562, + "step": 65680 + }, + { + "epoch": 0.5679587725138563, + "grad_norm": 30.982575844420776, + "learning_rate": 4.883225319541435e-06, + "loss": 0.4179473876953125, + "step": 65685 + }, + { + "epoch": 0.5680020060353996, + "grad_norm": 22.189145495196446, + "learning_rate": 4.883066724888046e-06, + "loss": 0.234344482421875, + "step": 65690 + }, + { + "epoch": 0.5680452395569429, + "grad_norm": 4.712422900131775, + "learning_rate": 4.88290812155012e-06, + "loss": 0.09375152587890626, + "step": 65695 + }, + { + "epoch": 0.5680884730784861, + "grad_norm": 17.87422662370857, + "learning_rate": 4.882749509528389e-06, + "loss": 0.2577659606933594, + "step": 65700 + }, + { + "epoch": 0.5681317066000294, + "grad_norm": 9.252703483382396, + "learning_rate": 4.882590888823582e-06, + "loss": 0.052044677734375, + "step": 65705 + }, + { + "epoch": 0.5681749401215727, + "grad_norm": 0.5592255135219172, + "learning_rate": 4.882432259436433e-06, + "loss": 0.41710205078125, + "step": 65710 + }, + { + "epoch": 0.5682181736431159, + "grad_norm": 0.10166811361641845, + "learning_rate": 4.882273621367674e-06, + "loss": 0.23425140380859374, + "step": 65715 + }, + { + "epoch": 0.5682614071646592, + "grad_norm": 20.71122336380128, + "learning_rate": 4.882114974618033e-06, + "loss": 0.2999359130859375, + "step": 65720 + }, + { + "epoch": 0.5683046406862025, + "grad_norm": 15.90079496727666, + "learning_rate": 4.881956319188245e-06, + "loss": 0.17846221923828126, + "step": 65725 + }, + { + "epoch": 0.5683478742077457, + "grad_norm": 24.97996925931549, + "learning_rate": 4.881797655079041e-06, + "loss": 0.1216339111328125, + "step": 65730 + }, + { + "epoch": 0.568391107729289, + "grad_norm": 17.525994969686156, + "learning_rate": 4.8816389822911525e-06, + "loss": 0.17980728149414063, + "step": 65735 + }, + { + "epoch": 0.5684343412508323, + "grad_norm": 17.67886328360771, + "learning_rate": 4.8814803008253115e-06, + "loss": 0.17362289428710936, + "step": 65740 + }, + { + "epoch": 0.5684775747723755, + "grad_norm": 0.4012688885870515, + "learning_rate": 4.881321610682249e-06, + "loss": 0.04201812744140625, + "step": 65745 + }, + { + "epoch": 0.5685208082939188, + "grad_norm": 9.00319120448008, + "learning_rate": 4.881162911862698e-06, + "loss": 0.040106582641601565, + "step": 65750 + }, + { + "epoch": 0.568564041815462, + "grad_norm": 4.149319119431506, + "learning_rate": 4.8810042043673895e-06, + "loss": 0.2330913543701172, + "step": 65755 + }, + { + "epoch": 0.5686072753370053, + "grad_norm": 0.5284486473837496, + "learning_rate": 4.880845488197056e-06, + "loss": 0.11422119140625, + "step": 65760 + }, + { + "epoch": 0.5686505088585485, + "grad_norm": 33.9696685304325, + "learning_rate": 4.8806867633524285e-06, + "loss": 0.18281021118164062, + "step": 65765 + }, + { + "epoch": 0.5686937423800918, + "grad_norm": 32.01385491532755, + "learning_rate": 4.880528029834241e-06, + "loss": 0.2577301025390625, + "step": 65770 + }, + { + "epoch": 0.5687369759016351, + "grad_norm": 2.8372289983190897, + "learning_rate": 4.880369287643224e-06, + "loss": 0.3134674072265625, + "step": 65775 + }, + { + "epoch": 0.5687802094231783, + "grad_norm": 9.620802978868802, + "learning_rate": 4.880210536780111e-06, + "loss": 0.4912841796875, + "step": 65780 + }, + { + "epoch": 0.5688234429447216, + "grad_norm": 0.13428946269696942, + "learning_rate": 4.88005177724563e-06, + "loss": 0.07977447509765626, + "step": 65785 + }, + { + "epoch": 0.5688666764662649, + "grad_norm": 2.758390566471936, + "learning_rate": 4.8798930090405195e-06, + "loss": 0.14024658203125, + "step": 65790 + }, + { + "epoch": 0.5689099099878081, + "grad_norm": 2.4094396327747574, + "learning_rate": 4.879734232165508e-06, + "loss": 0.20911026000976562, + "step": 65795 + }, + { + "epoch": 0.5689531435093514, + "grad_norm": 6.618528577391952, + "learning_rate": 4.8795754466213265e-06, + "loss": 0.13583831787109374, + "step": 65800 + }, + { + "epoch": 0.5689963770308947, + "grad_norm": 1.3549770066764422, + "learning_rate": 4.879416652408711e-06, + "loss": 0.24957275390625, + "step": 65805 + }, + { + "epoch": 0.5690396105524379, + "grad_norm": 39.236991282081384, + "learning_rate": 4.879257849528391e-06, + "loss": 0.26181640625, + "step": 65810 + }, + { + "epoch": 0.5690828440739812, + "grad_norm": 7.232956087906325, + "learning_rate": 4.879099037981101e-06, + "loss": 0.45601806640625, + "step": 65815 + }, + { + "epoch": 0.5691260775955245, + "grad_norm": 6.479759847738085, + "learning_rate": 4.878940217767572e-06, + "loss": 0.06396484375, + "step": 65820 + }, + { + "epoch": 0.5691693111170677, + "grad_norm": 9.21071414247146, + "learning_rate": 4.8787813888885355e-06, + "loss": 0.17162704467773438, + "step": 65825 + }, + { + "epoch": 0.569212544638611, + "grad_norm": 1.502694772561975, + "learning_rate": 4.878622551344726e-06, + "loss": 0.1648193359375, + "step": 65830 + }, + { + "epoch": 0.5692557781601543, + "grad_norm": 36.532915727663884, + "learning_rate": 4.8784637051368745e-06, + "loss": 0.1527557373046875, + "step": 65835 + }, + { + "epoch": 0.5692990116816975, + "grad_norm": 18.95799423752386, + "learning_rate": 4.878304850265716e-06, + "loss": 0.08082504272460937, + "step": 65840 + }, + { + "epoch": 0.5693422452032408, + "grad_norm": 1.867356616153351, + "learning_rate": 4.87814598673198e-06, + "loss": 0.0555633544921875, + "step": 65845 + }, + { + "epoch": 0.5693854787247841, + "grad_norm": 3.4402064024058103, + "learning_rate": 4.877987114536402e-06, + "loss": 0.15961761474609376, + "step": 65850 + }, + { + "epoch": 0.5694287122463273, + "grad_norm": 3.061201819676881, + "learning_rate": 4.877828233679713e-06, + "loss": 0.36481552124023436, + "step": 65855 + }, + { + "epoch": 0.5694719457678705, + "grad_norm": 2.4171600955884927, + "learning_rate": 4.877669344162645e-06, + "loss": 0.13906784057617189, + "step": 65860 + }, + { + "epoch": 0.5695151792894139, + "grad_norm": 10.698236222502368, + "learning_rate": 4.877510445985933e-06, + "loss": 0.12930908203125, + "step": 65865 + }, + { + "epoch": 0.5695584128109571, + "grad_norm": 31.663340371282334, + "learning_rate": 4.877351539150308e-06, + "loss": 0.30041961669921874, + "step": 65870 + }, + { + "epoch": 0.5696016463325003, + "grad_norm": 2.2410625695543285, + "learning_rate": 4.8771926236565044e-06, + "loss": 0.16346435546875, + "step": 65875 + }, + { + "epoch": 0.5696448798540437, + "grad_norm": 0.5639292618033422, + "learning_rate": 4.877033699505254e-06, + "loss": 0.1267563819885254, + "step": 65880 + }, + { + "epoch": 0.5696881133755869, + "grad_norm": 23.448312717665242, + "learning_rate": 4.87687476669729e-06, + "loss": 0.10360565185546874, + "step": 65885 + }, + { + "epoch": 0.5697313468971301, + "grad_norm": 0.5780493698748592, + "learning_rate": 4.876715825233346e-06, + "loss": 0.024161529541015626, + "step": 65890 + }, + { + "epoch": 0.5697745804186735, + "grad_norm": 0.14950289356627502, + "learning_rate": 4.876556875114153e-06, + "loss": 0.23624267578125, + "step": 65895 + }, + { + "epoch": 0.5698178139402167, + "grad_norm": 12.487733838783512, + "learning_rate": 4.876397916340446e-06, + "loss": 0.05966339111328125, + "step": 65900 + }, + { + "epoch": 0.5698610474617599, + "grad_norm": 0.27195995760967917, + "learning_rate": 4.876238948912959e-06, + "loss": 0.41686038970947265, + "step": 65905 + }, + { + "epoch": 0.5699042809833033, + "grad_norm": 5.805051779350772, + "learning_rate": 4.876079972832422e-06, + "loss": 0.11847915649414062, + "step": 65910 + }, + { + "epoch": 0.5699475145048465, + "grad_norm": 10.636215760124534, + "learning_rate": 4.87592098809957e-06, + "loss": 0.130322265625, + "step": 65915 + }, + { + "epoch": 0.5699907480263897, + "grad_norm": 5.25961628618465, + "learning_rate": 4.875761994715137e-06, + "loss": 0.14060325622558595, + "step": 65920 + }, + { + "epoch": 0.570033981547933, + "grad_norm": 27.71560300406579, + "learning_rate": 4.875602992679855e-06, + "loss": 0.23859405517578125, + "step": 65925 + }, + { + "epoch": 0.5700772150694763, + "grad_norm": 5.151145117703025, + "learning_rate": 4.875443981994458e-06, + "loss": 0.022119903564453126, + "step": 65930 + }, + { + "epoch": 0.5701204485910195, + "grad_norm": 4.420430173166472, + "learning_rate": 4.8752849626596794e-06, + "loss": 0.0277130126953125, + "step": 65935 + }, + { + "epoch": 0.5701636821125627, + "grad_norm": 12.451468347532023, + "learning_rate": 4.875125934676252e-06, + "loss": 0.1098297119140625, + "step": 65940 + }, + { + "epoch": 0.5702069156341061, + "grad_norm": 10.923428123575263, + "learning_rate": 4.874966898044909e-06, + "loss": 0.17710342407226562, + "step": 65945 + }, + { + "epoch": 0.5702501491556493, + "grad_norm": 1.2761200188425808, + "learning_rate": 4.874807852766386e-06, + "loss": 0.0459259033203125, + "step": 65950 + }, + { + "epoch": 0.5702933826771925, + "grad_norm": 1.0316195447120386, + "learning_rate": 4.874648798841413e-06, + "loss": 0.07979621887207031, + "step": 65955 + }, + { + "epoch": 0.5703366161987359, + "grad_norm": 20.72562872983377, + "learning_rate": 4.874489736270725e-06, + "loss": 0.2018585205078125, + "step": 65960 + }, + { + "epoch": 0.5703798497202791, + "grad_norm": 20.439419949493878, + "learning_rate": 4.874330665055058e-06, + "loss": 0.6409591674804688, + "step": 65965 + }, + { + "epoch": 0.5704230832418223, + "grad_norm": 21.284373688845466, + "learning_rate": 4.874171585195142e-06, + "loss": 0.1267822265625, + "step": 65970 + }, + { + "epoch": 0.5704663167633657, + "grad_norm": 29.768679908969755, + "learning_rate": 4.874012496691712e-06, + "loss": 0.27596607208251955, + "step": 65975 + }, + { + "epoch": 0.5705095502849089, + "grad_norm": 1.8621014182042923, + "learning_rate": 4.873853399545503e-06, + "loss": 0.020701217651367187, + "step": 65980 + }, + { + "epoch": 0.5705527838064521, + "grad_norm": 26.46730021167508, + "learning_rate": 4.873694293757248e-06, + "loss": 0.2354339599609375, + "step": 65985 + }, + { + "epoch": 0.5705960173279955, + "grad_norm": 18.206544497081595, + "learning_rate": 4.87353517932768e-06, + "loss": 0.303106689453125, + "step": 65990 + }, + { + "epoch": 0.5706392508495387, + "grad_norm": 21.628817214277984, + "learning_rate": 4.873376056257532e-06, + "loss": 0.15836944580078124, + "step": 65995 + }, + { + "epoch": 0.5706824843710819, + "grad_norm": 2.5208813200297997, + "learning_rate": 4.873216924547541e-06, + "loss": 0.2788816452026367, + "step": 66000 + }, + { + "epoch": 0.5707257178926253, + "grad_norm": 0.035357050781247894, + "learning_rate": 4.873057784198438e-06, + "loss": 0.10513687133789062, + "step": 66005 + }, + { + "epoch": 0.5707689514141685, + "grad_norm": 5.41515264458363, + "learning_rate": 4.8728986352109576e-06, + "loss": 0.03526382446289063, + "step": 66010 + }, + { + "epoch": 0.5708121849357117, + "grad_norm": 4.576187826361284, + "learning_rate": 4.872739477585836e-06, + "loss": 0.06002388000488281, + "step": 66015 + }, + { + "epoch": 0.5708554184572551, + "grad_norm": 23.037831629982943, + "learning_rate": 4.872580311323805e-06, + "loss": 0.4678318023681641, + "step": 66020 + }, + { + "epoch": 0.5708986519787983, + "grad_norm": 6.801393665244901, + "learning_rate": 4.872421136425597e-06, + "loss": 0.1608306884765625, + "step": 66025 + }, + { + "epoch": 0.5709418855003415, + "grad_norm": 1.9413567780688294, + "learning_rate": 4.87226195289195e-06, + "loss": 0.11425018310546875, + "step": 66030 + }, + { + "epoch": 0.5709851190218848, + "grad_norm": 9.489776288009514, + "learning_rate": 4.872102760723596e-06, + "loss": 0.1061309814453125, + "step": 66035 + }, + { + "epoch": 0.5710283525434281, + "grad_norm": 7.084205282193627, + "learning_rate": 4.8719435599212695e-06, + "loss": 0.07563056945800781, + "step": 66040 + }, + { + "epoch": 0.5710715860649713, + "grad_norm": 33.25340148322494, + "learning_rate": 4.871784350485705e-06, + "loss": 0.158197021484375, + "step": 66045 + }, + { + "epoch": 0.5711148195865146, + "grad_norm": 8.254818181985176, + "learning_rate": 4.871625132417636e-06, + "loss": 0.06467742919921875, + "step": 66050 + }, + { + "epoch": 0.5711580531080579, + "grad_norm": 18.089199677453095, + "learning_rate": 4.871465905717797e-06, + "loss": 0.13300018310546874, + "step": 66055 + }, + { + "epoch": 0.5712012866296011, + "grad_norm": 41.99093025774287, + "learning_rate": 4.871306670386923e-06, + "loss": 0.3260154724121094, + "step": 66060 + }, + { + "epoch": 0.5712445201511444, + "grad_norm": 6.3770026213983835, + "learning_rate": 4.871147426425748e-06, + "loss": 0.0874176025390625, + "step": 66065 + }, + { + "epoch": 0.5712877536726877, + "grad_norm": 39.84884415960908, + "learning_rate": 4.8709881738350066e-06, + "loss": 0.301348876953125, + "step": 66070 + }, + { + "epoch": 0.5713309871942309, + "grad_norm": 3.4378395030356668, + "learning_rate": 4.870828912615433e-06, + "loss": 0.1115234375, + "step": 66075 + }, + { + "epoch": 0.5713742207157742, + "grad_norm": 53.4703273984332, + "learning_rate": 4.870669642767761e-06, + "loss": 0.16097030639648438, + "step": 66080 + }, + { + "epoch": 0.5714174542373175, + "grad_norm": 6.78630524922212, + "learning_rate": 4.870510364292727e-06, + "loss": 0.16769866943359374, + "step": 66085 + }, + { + "epoch": 0.5714606877588607, + "grad_norm": 1.7997520671733311, + "learning_rate": 4.870351077191063e-06, + "loss": 0.3275413513183594, + "step": 66090 + }, + { + "epoch": 0.571503921280404, + "grad_norm": 0.6632381798777259, + "learning_rate": 4.870191781463507e-06, + "loss": 0.09729232788085937, + "step": 66095 + }, + { + "epoch": 0.5715471548019473, + "grad_norm": 2.543193701918116, + "learning_rate": 4.870032477110791e-06, + "loss": 0.19478073120117187, + "step": 66100 + }, + { + "epoch": 0.5715903883234905, + "grad_norm": 10.81225755758137, + "learning_rate": 4.869873164133649e-06, + "loss": 0.25399150848388674, + "step": 66105 + }, + { + "epoch": 0.5716336218450337, + "grad_norm": 1.9059372342351573, + "learning_rate": 4.8697138425328195e-06, + "loss": 0.09694175720214844, + "step": 66110 + }, + { + "epoch": 0.571676855366577, + "grad_norm": 9.568926108062984, + "learning_rate": 4.869554512309034e-06, + "loss": 0.256341552734375, + "step": 66115 + }, + { + "epoch": 0.5717200888881203, + "grad_norm": 0.5419045108533991, + "learning_rate": 4.869395173463028e-06, + "loss": 0.11040401458740234, + "step": 66120 + }, + { + "epoch": 0.5717633224096635, + "grad_norm": 2.1564679406766882, + "learning_rate": 4.869235825995537e-06, + "loss": 0.134075927734375, + "step": 66125 + }, + { + "epoch": 0.5718065559312068, + "grad_norm": 0.6565119337116949, + "learning_rate": 4.869076469907295e-06, + "loss": 0.1679210662841797, + "step": 66130 + }, + { + "epoch": 0.5718497894527501, + "grad_norm": 12.364294722580691, + "learning_rate": 4.868917105199039e-06, + "loss": 0.2413177490234375, + "step": 66135 + }, + { + "epoch": 0.5718930229742933, + "grad_norm": 0.2474279122752451, + "learning_rate": 4.868757731871501e-06, + "loss": 0.1324859619140625, + "step": 66140 + }, + { + "epoch": 0.5719362564958366, + "grad_norm": 4.240569578324095, + "learning_rate": 4.8685983499254175e-06, + "loss": 0.18542327880859374, + "step": 66145 + }, + { + "epoch": 0.5719794900173799, + "grad_norm": 6.7757122880900695, + "learning_rate": 4.868438959361524e-06, + "loss": 0.18175086975097657, + "step": 66150 + }, + { + "epoch": 0.5720227235389231, + "grad_norm": 2.4216126039206136, + "learning_rate": 4.868279560180554e-06, + "loss": 0.1360870361328125, + "step": 66155 + }, + { + "epoch": 0.5720659570604664, + "grad_norm": 0.2975811655558153, + "learning_rate": 4.8681201523832454e-06, + "loss": 0.08255081176757813, + "step": 66160 + }, + { + "epoch": 0.5721091905820097, + "grad_norm": 0.8091974497218765, + "learning_rate": 4.867960735970331e-06, + "loss": 0.2941619873046875, + "step": 66165 + }, + { + "epoch": 0.5721524241035529, + "grad_norm": 24.65970731456708, + "learning_rate": 4.867801310942547e-06, + "loss": 0.2967254638671875, + "step": 66170 + }, + { + "epoch": 0.5721956576250962, + "grad_norm": 3.0167964226302013, + "learning_rate": 4.867641877300628e-06, + "loss": 0.27906646728515627, + "step": 66175 + }, + { + "epoch": 0.5722388911466395, + "grad_norm": 4.852337021949996, + "learning_rate": 4.86748243504531e-06, + "loss": 0.30352249145507815, + "step": 66180 + }, + { + "epoch": 0.5722821246681827, + "grad_norm": 30.601237087541683, + "learning_rate": 4.8673229841773275e-06, + "loss": 0.4820648193359375, + "step": 66185 + }, + { + "epoch": 0.572325358189726, + "grad_norm": 1.34587823108161, + "learning_rate": 4.867163524697417e-06, + "loss": 0.10732154846191407, + "step": 66190 + }, + { + "epoch": 0.5723685917112693, + "grad_norm": 27.42733250335868, + "learning_rate": 4.867004056606313e-06, + "loss": 0.221832275390625, + "step": 66195 + }, + { + "epoch": 0.5724118252328125, + "grad_norm": 21.43088758242358, + "learning_rate": 4.866844579904751e-06, + "loss": 0.33946075439453127, + "step": 66200 + }, + { + "epoch": 0.5724550587543558, + "grad_norm": 0.1457453977431617, + "learning_rate": 4.8666850945934676e-06, + "loss": 0.056101226806640626, + "step": 66205 + }, + { + "epoch": 0.572498292275899, + "grad_norm": 0.8956812270333125, + "learning_rate": 4.866525600673197e-06, + "loss": 0.04538726806640625, + "step": 66210 + }, + { + "epoch": 0.5725415257974423, + "grad_norm": 14.52017898035653, + "learning_rate": 4.866366098144675e-06, + "loss": 0.23856143951416015, + "step": 66215 + }, + { + "epoch": 0.5725847593189856, + "grad_norm": 1.0432266941207378, + "learning_rate": 4.8662065870086376e-06, + "loss": 0.12390365600585937, + "step": 66220 + }, + { + "epoch": 0.5726279928405288, + "grad_norm": 10.39680077461756, + "learning_rate": 4.86604706726582e-06, + "loss": 0.133880615234375, + "step": 66225 + }, + { + "epoch": 0.5726712263620721, + "grad_norm": 21.168114820331567, + "learning_rate": 4.865887538916959e-06, + "loss": 0.26340255737304685, + "step": 66230 + }, + { + "epoch": 0.5727144598836154, + "grad_norm": 1.5016723675457988, + "learning_rate": 4.865728001962789e-06, + "loss": 0.09731597900390625, + "step": 66235 + }, + { + "epoch": 0.5727576934051586, + "grad_norm": 4.807794023738379, + "learning_rate": 4.865568456404047e-06, + "loss": 0.103546142578125, + "step": 66240 + }, + { + "epoch": 0.5728009269267019, + "grad_norm": 0.6592125173065908, + "learning_rate": 4.8654089022414674e-06, + "loss": 0.19545021057128906, + "step": 66245 + }, + { + "epoch": 0.5728441604482452, + "grad_norm": 12.228522794467189, + "learning_rate": 4.865249339475787e-06, + "loss": 0.15462989807128907, + "step": 66250 + }, + { + "epoch": 0.5728873939697884, + "grad_norm": 6.836094392012096, + "learning_rate": 4.865089768107742e-06, + "loss": 0.2413482666015625, + "step": 66255 + }, + { + "epoch": 0.5729306274913317, + "grad_norm": 55.027409146393914, + "learning_rate": 4.864930188138067e-06, + "loss": 0.1686248779296875, + "step": 66260 + }, + { + "epoch": 0.572973861012875, + "grad_norm": 15.673785640152165, + "learning_rate": 4.864770599567499e-06, + "loss": 0.1217041015625, + "step": 66265 + }, + { + "epoch": 0.5730170945344182, + "grad_norm": 2.353534243782813, + "learning_rate": 4.864611002396774e-06, + "loss": 0.37728271484375, + "step": 66270 + }, + { + "epoch": 0.5730603280559615, + "grad_norm": 2.4311625204089995, + "learning_rate": 4.8644513966266266e-06, + "loss": 0.099798583984375, + "step": 66275 + }, + { + "epoch": 0.5731035615775047, + "grad_norm": 5.830623764269659, + "learning_rate": 4.8642917822577955e-06, + "loss": 0.06820068359375, + "step": 66280 + }, + { + "epoch": 0.573146795099048, + "grad_norm": 31.95886616548637, + "learning_rate": 4.864132159291015e-06, + "loss": 0.2577507019042969, + "step": 66285 + }, + { + "epoch": 0.5731900286205912, + "grad_norm": 0.3414861735185852, + "learning_rate": 4.863972527727022e-06, + "loss": 0.03541374206542969, + "step": 66290 + }, + { + "epoch": 0.5732332621421345, + "grad_norm": 8.256492126237074, + "learning_rate": 4.863812887566552e-06, + "loss": 0.39189834594726564, + "step": 66295 + }, + { + "epoch": 0.5732764956636778, + "grad_norm": 6.000742455220808, + "learning_rate": 4.863653238810342e-06, + "loss": 0.03899192810058594, + "step": 66300 + }, + { + "epoch": 0.573319729185221, + "grad_norm": 23.44683296782039, + "learning_rate": 4.863493581459127e-06, + "loss": 0.22678756713867188, + "step": 66305 + }, + { + "epoch": 0.5733629627067643, + "grad_norm": 28.318465679411645, + "learning_rate": 4.863333915513645e-06, + "loss": 0.13802490234375, + "step": 66310 + }, + { + "epoch": 0.5734061962283076, + "grad_norm": 10.283999075864362, + "learning_rate": 4.863174240974632e-06, + "loss": 0.12643585205078126, + "step": 66315 + }, + { + "epoch": 0.5734494297498508, + "grad_norm": 1.1986014577360935, + "learning_rate": 4.863014557842823e-06, + "loss": 0.26478729248046873, + "step": 66320 + }, + { + "epoch": 0.5734926632713941, + "grad_norm": 2.0454335450881698, + "learning_rate": 4.8628548661189565e-06, + "loss": 0.15404052734375, + "step": 66325 + }, + { + "epoch": 0.5735358967929374, + "grad_norm": 12.369772388933288, + "learning_rate": 4.862695165803768e-06, + "loss": 0.23100738525390624, + "step": 66330 + }, + { + "epoch": 0.5735791303144806, + "grad_norm": 1.4927524481219265, + "learning_rate": 4.862535456897993e-06, + "loss": 0.04600963592529297, + "step": 66335 + }, + { + "epoch": 0.5736223638360239, + "grad_norm": 11.991262382985006, + "learning_rate": 4.86237573940237e-06, + "loss": 0.247705078125, + "step": 66340 + }, + { + "epoch": 0.5736655973575672, + "grad_norm": 39.78505898524652, + "learning_rate": 4.862216013317635e-06, + "loss": 0.6302230834960938, + "step": 66345 + }, + { + "epoch": 0.5737088308791104, + "grad_norm": 24.963409424546082, + "learning_rate": 4.862056278644524e-06, + "loss": 0.2971435546875, + "step": 66350 + }, + { + "epoch": 0.5737520644006537, + "grad_norm": 40.609081743008716, + "learning_rate": 4.861896535383773e-06, + "loss": 0.498480224609375, + "step": 66355 + }, + { + "epoch": 0.573795297922197, + "grad_norm": 8.08166349606824, + "learning_rate": 4.86173678353612e-06, + "loss": 0.05720672607421875, + "step": 66360 + }, + { + "epoch": 0.5738385314437402, + "grad_norm": 36.68479517234999, + "learning_rate": 4.861577023102301e-06, + "loss": 0.31283111572265626, + "step": 66365 + }, + { + "epoch": 0.5738817649652835, + "grad_norm": 2.1550138461151707, + "learning_rate": 4.861417254083055e-06, + "loss": 0.19157867431640624, + "step": 66370 + }, + { + "epoch": 0.5739249984868268, + "grad_norm": 0.5944534170016158, + "learning_rate": 4.861257476479115e-06, + "loss": 0.26298828125, + "step": 66375 + }, + { + "epoch": 0.57396823200837, + "grad_norm": 1.1663717938504894, + "learning_rate": 4.861097690291222e-06, + "loss": 0.08441429138183594, + "step": 66380 + }, + { + "epoch": 0.5740114655299132, + "grad_norm": 5.395919358165921, + "learning_rate": 4.86093789552011e-06, + "loss": 0.05845489501953125, + "step": 66385 + }, + { + "epoch": 0.5740546990514566, + "grad_norm": 14.169721675431898, + "learning_rate": 4.860778092166516e-06, + "loss": 0.15943679809570313, + "step": 66390 + }, + { + "epoch": 0.5740979325729998, + "grad_norm": 0.5958668746583793, + "learning_rate": 4.860618280231178e-06, + "loss": 0.10190277099609375, + "step": 66395 + }, + { + "epoch": 0.574141166094543, + "grad_norm": 38.2057406638617, + "learning_rate": 4.8604584597148345e-06, + "loss": 0.23291015625, + "step": 66400 + }, + { + "epoch": 0.5741843996160864, + "grad_norm": 29.9117313196762, + "learning_rate": 4.86029863061822e-06, + "loss": 0.19216423034667968, + "step": 66405 + }, + { + "epoch": 0.5742276331376296, + "grad_norm": 4.1074132663205, + "learning_rate": 4.860138792942073e-06, + "loss": 0.09814300537109374, + "step": 66410 + }, + { + "epoch": 0.5742708666591728, + "grad_norm": 7.653803581637909, + "learning_rate": 4.85997894668713e-06, + "loss": 0.4917510986328125, + "step": 66415 + }, + { + "epoch": 0.5743141001807162, + "grad_norm": 13.52977004209655, + "learning_rate": 4.859819091854127e-06, + "loss": 0.130279541015625, + "step": 66420 + }, + { + "epoch": 0.5743573337022594, + "grad_norm": 1.5556027607242409, + "learning_rate": 4.859659228443805e-06, + "loss": 0.25293731689453125, + "step": 66425 + }, + { + "epoch": 0.5744005672238026, + "grad_norm": 0.8752976058345289, + "learning_rate": 4.8594993564568985e-06, + "loss": 0.14078369140625, + "step": 66430 + }, + { + "epoch": 0.574443800745346, + "grad_norm": 3.383285608956857, + "learning_rate": 4.8593394758941444e-06, + "loss": 0.09734420776367188, + "step": 66435 + }, + { + "epoch": 0.5744870342668892, + "grad_norm": 1.104268732289973, + "learning_rate": 4.859179586756281e-06, + "loss": 0.16138916015625, + "step": 66440 + }, + { + "epoch": 0.5745302677884324, + "grad_norm": 9.559029204108912, + "learning_rate": 4.859019689044047e-06, + "loss": 0.38560791015625, + "step": 66445 + }, + { + "epoch": 0.5745735013099758, + "grad_norm": 26.247715437372943, + "learning_rate": 4.858859782758177e-06, + "loss": 0.11858062744140625, + "step": 66450 + }, + { + "epoch": 0.574616734831519, + "grad_norm": 19.45134028639503, + "learning_rate": 4.8586998678994094e-06, + "loss": 0.136456298828125, + "step": 66455 + }, + { + "epoch": 0.5746599683530622, + "grad_norm": 23.773454731380237, + "learning_rate": 4.858539944468483e-06, + "loss": 0.12910842895507812, + "step": 66460 + }, + { + "epoch": 0.5747032018746054, + "grad_norm": 67.7795967279608, + "learning_rate": 4.858380012466136e-06, + "loss": 0.2047607421875, + "step": 66465 + }, + { + "epoch": 0.5747464353961488, + "grad_norm": 0.15441810303728312, + "learning_rate": 4.858220071893103e-06, + "loss": 0.04077072143554687, + "step": 66470 + }, + { + "epoch": 0.574789668917692, + "grad_norm": 16.30544310915518, + "learning_rate": 4.858060122750123e-06, + "loss": 0.08554229736328126, + "step": 66475 + }, + { + "epoch": 0.5748329024392352, + "grad_norm": 3.057004894443389, + "learning_rate": 4.8579001650379345e-06, + "loss": 0.15498771667480468, + "step": 66480 + }, + { + "epoch": 0.5748761359607786, + "grad_norm": 11.106569615189516, + "learning_rate": 4.857740198757275e-06, + "loss": 0.09385910034179687, + "step": 66485 + }, + { + "epoch": 0.5749193694823218, + "grad_norm": 15.171101524993354, + "learning_rate": 4.857580223908881e-06, + "loss": 0.1446746826171875, + "step": 66490 + }, + { + "epoch": 0.574962603003865, + "grad_norm": 1.7049291018522634, + "learning_rate": 4.8574202404934916e-06, + "loss": 0.024407958984375, + "step": 66495 + }, + { + "epoch": 0.5750058365254084, + "grad_norm": 10.41508524492608, + "learning_rate": 4.857260248511844e-06, + "loss": 0.19467391967773437, + "step": 66500 + }, + { + "epoch": 0.5750490700469516, + "grad_norm": 3.522403558668042, + "learning_rate": 4.857100247964677e-06, + "loss": 0.0760080337524414, + "step": 66505 + }, + { + "epoch": 0.5750923035684948, + "grad_norm": 9.586502945749686, + "learning_rate": 4.856940238852727e-06, + "loss": 0.096063232421875, + "step": 66510 + }, + { + "epoch": 0.5751355370900382, + "grad_norm": 16.89802120927867, + "learning_rate": 4.8567802211767325e-06, + "loss": 0.1983062744140625, + "step": 66515 + }, + { + "epoch": 0.5751787706115814, + "grad_norm": 15.1472890295204, + "learning_rate": 4.856620194937432e-06, + "loss": 0.06826286315917969, + "step": 66520 + }, + { + "epoch": 0.5752220041331246, + "grad_norm": 4.523254281067961, + "learning_rate": 4.856460160135564e-06, + "loss": 0.4527679443359375, + "step": 66525 + }, + { + "epoch": 0.575265237654668, + "grad_norm": 64.4589247156301, + "learning_rate": 4.856300116771864e-06, + "loss": 0.4653942108154297, + "step": 66530 + }, + { + "epoch": 0.5753084711762112, + "grad_norm": 5.69098080152511, + "learning_rate": 4.856140064847073e-06, + "loss": 0.020179939270019532, + "step": 66535 + }, + { + "epoch": 0.5753517046977544, + "grad_norm": 3.0811131880030835, + "learning_rate": 4.855980004361928e-06, + "loss": 0.04701080322265625, + "step": 66540 + }, + { + "epoch": 0.5753949382192978, + "grad_norm": 33.04077615076834, + "learning_rate": 4.855819935317167e-06, + "loss": 0.126019287109375, + "step": 66545 + }, + { + "epoch": 0.575438171740841, + "grad_norm": 25.81473982672866, + "learning_rate": 4.855659857713529e-06, + "loss": 0.0937225341796875, + "step": 66550 + }, + { + "epoch": 0.5754814052623842, + "grad_norm": 4.187460853929747, + "learning_rate": 4.855499771551751e-06, + "loss": 0.26153411865234377, + "step": 66555 + }, + { + "epoch": 0.5755246387839275, + "grad_norm": 62.609924889264526, + "learning_rate": 4.855339676832573e-06, + "loss": 0.5876068115234375, + "step": 66560 + }, + { + "epoch": 0.5755678723054708, + "grad_norm": 0.8138088868740971, + "learning_rate": 4.855179573556731e-06, + "loss": 0.36439857482910154, + "step": 66565 + }, + { + "epoch": 0.575611105827014, + "grad_norm": 3.084345343141221, + "learning_rate": 4.855019461724966e-06, + "loss": 0.373883056640625, + "step": 66570 + }, + { + "epoch": 0.5756543393485573, + "grad_norm": 7.671536767071792, + "learning_rate": 4.854859341338014e-06, + "loss": 0.25308380126953123, + "step": 66575 + }, + { + "epoch": 0.5756975728701006, + "grad_norm": 2.814745896041502, + "learning_rate": 4.854699212396615e-06, + "loss": 0.09287872314453124, + "step": 66580 + }, + { + "epoch": 0.5757408063916438, + "grad_norm": 0.6338703931152421, + "learning_rate": 4.854539074901508e-06, + "loss": 0.21243133544921874, + "step": 66585 + }, + { + "epoch": 0.575784039913187, + "grad_norm": 9.194320037176833, + "learning_rate": 4.854378928853429e-06, + "loss": 0.1239898681640625, + "step": 66590 + }, + { + "epoch": 0.5758272734347304, + "grad_norm": 6.798403841774779, + "learning_rate": 4.85421877425312e-06, + "loss": 0.12111892700195312, + "step": 66595 + }, + { + "epoch": 0.5758705069562736, + "grad_norm": 4.4149733315515185, + "learning_rate": 4.854058611101317e-06, + "loss": 0.26103363037109373, + "step": 66600 + }, + { + "epoch": 0.5759137404778168, + "grad_norm": 13.56314893141936, + "learning_rate": 4.8538984393987596e-06, + "loss": 0.12509841918945314, + "step": 66605 + }, + { + "epoch": 0.5759569739993602, + "grad_norm": 68.36527188833462, + "learning_rate": 4.853738259146187e-06, + "loss": 0.3091888427734375, + "step": 66610 + }, + { + "epoch": 0.5760002075209034, + "grad_norm": 0.404492759181855, + "learning_rate": 4.853578070344336e-06, + "loss": 0.1392120361328125, + "step": 66615 + }, + { + "epoch": 0.5760434410424466, + "grad_norm": 0.047019204689688066, + "learning_rate": 4.853417872993947e-06, + "loss": 0.027142143249511717, + "step": 66620 + }, + { + "epoch": 0.57608667456399, + "grad_norm": 6.041500825866051, + "learning_rate": 4.853257667095759e-06, + "loss": 0.16309967041015624, + "step": 66625 + }, + { + "epoch": 0.5761299080855332, + "grad_norm": 33.56779872154489, + "learning_rate": 4.85309745265051e-06, + "loss": 0.13006439208984374, + "step": 66630 + }, + { + "epoch": 0.5761731416070764, + "grad_norm": 0.6378246312161797, + "learning_rate": 4.85293722965894e-06, + "loss": 0.11570405960083008, + "step": 66635 + }, + { + "epoch": 0.5762163751286197, + "grad_norm": 1.379811519322165, + "learning_rate": 4.852776998121787e-06, + "loss": 0.06495285034179688, + "step": 66640 + }, + { + "epoch": 0.576259608650163, + "grad_norm": 49.26486618708413, + "learning_rate": 4.85261675803979e-06, + "loss": 0.3981952667236328, + "step": 66645 + }, + { + "epoch": 0.5763028421717062, + "grad_norm": 4.089632473637785, + "learning_rate": 4.852456509413689e-06, + "loss": 0.0758270263671875, + "step": 66650 + }, + { + "epoch": 0.5763460756932495, + "grad_norm": 6.91851264463163, + "learning_rate": 4.852296252244221e-06, + "loss": 0.07196884155273438, + "step": 66655 + }, + { + "epoch": 0.5763893092147928, + "grad_norm": 1.8884805754638343, + "learning_rate": 4.852135986532127e-06, + "loss": 0.0769683837890625, + "step": 66660 + }, + { + "epoch": 0.576432542736336, + "grad_norm": 6.555748993524218, + "learning_rate": 4.851975712278145e-06, + "loss": 0.150518798828125, + "step": 66665 + }, + { + "epoch": 0.5764757762578793, + "grad_norm": 4.084005194201709, + "learning_rate": 4.851815429483015e-06, + "loss": 0.07235908508300781, + "step": 66670 + }, + { + "epoch": 0.5765190097794226, + "grad_norm": 8.87640780578882, + "learning_rate": 4.851655138147476e-06, + "loss": 0.3318046569824219, + "step": 66675 + }, + { + "epoch": 0.5765622433009658, + "grad_norm": 41.712240729904586, + "learning_rate": 4.851494838272267e-06, + "loss": 0.25406036376953123, + "step": 66680 + }, + { + "epoch": 0.5766054768225091, + "grad_norm": 12.253798346669601, + "learning_rate": 4.851334529858128e-06, + "loss": 0.2076202392578125, + "step": 66685 + }, + { + "epoch": 0.5766487103440524, + "grad_norm": 3.6676010704078914, + "learning_rate": 4.851174212905797e-06, + "loss": 0.019304656982421876, + "step": 66690 + }, + { + "epoch": 0.5766919438655956, + "grad_norm": 1.8195349872463276, + "learning_rate": 4.851013887416014e-06, + "loss": 0.22443046569824218, + "step": 66695 + }, + { + "epoch": 0.5767351773871389, + "grad_norm": 3.051466953663614, + "learning_rate": 4.850853553389519e-06, + "loss": 0.057642555236816405, + "step": 66700 + }, + { + "epoch": 0.5767784109086822, + "grad_norm": 13.807596276486864, + "learning_rate": 4.85069321082705e-06, + "loss": 0.09557075500488281, + "step": 66705 + }, + { + "epoch": 0.5768216444302254, + "grad_norm": 26.85707201337166, + "learning_rate": 4.850532859729349e-06, + "loss": 0.3605663299560547, + "step": 66710 + }, + { + "epoch": 0.5768648779517687, + "grad_norm": 21.750337110471904, + "learning_rate": 4.850372500097153e-06, + "loss": 0.1145782470703125, + "step": 66715 + }, + { + "epoch": 0.576908111473312, + "grad_norm": 2.4235283045238263, + "learning_rate": 4.850212131931202e-06, + "loss": 0.08171844482421875, + "step": 66720 + }, + { + "epoch": 0.5769513449948552, + "grad_norm": 51.9302083141871, + "learning_rate": 4.850051755232238e-06, + "loss": 0.6972930908203125, + "step": 66725 + }, + { + "epoch": 0.5769945785163985, + "grad_norm": 0.7999198242228254, + "learning_rate": 4.8498913700009976e-06, + "loss": 0.026352310180664064, + "step": 66730 + }, + { + "epoch": 0.5770378120379417, + "grad_norm": 15.246945140834047, + "learning_rate": 4.849730976238221e-06, + "loss": 0.61893310546875, + "step": 66735 + }, + { + "epoch": 0.577081045559485, + "grad_norm": 13.360738525811344, + "learning_rate": 4.849570573944649e-06, + "loss": 0.08382720947265625, + "step": 66740 + }, + { + "epoch": 0.5771242790810283, + "grad_norm": 30.819418790091227, + "learning_rate": 4.849410163121022e-06, + "loss": 0.26650238037109375, + "step": 66745 + }, + { + "epoch": 0.5771675126025715, + "grad_norm": 0.3613194623258989, + "learning_rate": 4.849249743768078e-06, + "loss": 0.08194160461425781, + "step": 66750 + }, + { + "epoch": 0.5772107461241148, + "grad_norm": 21.614606176610703, + "learning_rate": 4.849089315886557e-06, + "loss": 0.2651634216308594, + "step": 66755 + }, + { + "epoch": 0.577253979645658, + "grad_norm": 1.8361899986918047, + "learning_rate": 4.8489288794772e-06, + "loss": 0.10917396545410156, + "step": 66760 + }, + { + "epoch": 0.5772972131672013, + "grad_norm": 41.551758582047476, + "learning_rate": 4.848768434540746e-06, + "loss": 0.7755441665649414, + "step": 66765 + }, + { + "epoch": 0.5773404466887446, + "grad_norm": 2.1871463102765847, + "learning_rate": 4.848607981077936e-06, + "loss": 0.1332866668701172, + "step": 66770 + }, + { + "epoch": 0.5773836802102879, + "grad_norm": 4.317579006255888, + "learning_rate": 4.848447519089509e-06, + "loss": 0.4091644287109375, + "step": 66775 + }, + { + "epoch": 0.5774269137318311, + "grad_norm": 27.256711422126966, + "learning_rate": 4.8482870485762054e-06, + "loss": 0.08588829040527343, + "step": 66780 + }, + { + "epoch": 0.5774701472533744, + "grad_norm": 18.00695793886408, + "learning_rate": 4.848126569538766e-06, + "loss": 0.24847564697265626, + "step": 66785 + }, + { + "epoch": 0.5775133807749177, + "grad_norm": 18.900860357833576, + "learning_rate": 4.8479660819779284e-06, + "loss": 0.43438262939453126, + "step": 66790 + }, + { + "epoch": 0.5775566142964609, + "grad_norm": 5.947909116398094, + "learning_rate": 4.847805585894436e-06, + "loss": 0.178607177734375, + "step": 66795 + }, + { + "epoch": 0.5775998478180042, + "grad_norm": 9.17888556839383, + "learning_rate": 4.847645081289027e-06, + "loss": 0.11300773620605468, + "step": 66800 + }, + { + "epoch": 0.5776430813395474, + "grad_norm": 20.40380294797158, + "learning_rate": 4.847484568162443e-06, + "loss": 0.1802978515625, + "step": 66805 + }, + { + "epoch": 0.5776863148610907, + "grad_norm": 8.635927684857762, + "learning_rate": 4.8473240465154224e-06, + "loss": 0.205682373046875, + "step": 66810 + }, + { + "epoch": 0.5777295483826339, + "grad_norm": 7.964839521503184, + "learning_rate": 4.847163516348707e-06, + "loss": 0.1458465576171875, + "step": 66815 + }, + { + "epoch": 0.5777727819041772, + "grad_norm": 0.7764505746567536, + "learning_rate": 4.847002977663035e-06, + "loss": 0.1788177490234375, + "step": 66820 + }, + { + "epoch": 0.5778160154257205, + "grad_norm": 12.145584824094808, + "learning_rate": 4.846842430459151e-06, + "loss": 0.24563751220703126, + "step": 66825 + }, + { + "epoch": 0.5778592489472637, + "grad_norm": 1.751949297153846, + "learning_rate": 4.846681874737792e-06, + "loss": 0.16043930053710936, + "step": 66830 + }, + { + "epoch": 0.577902482468807, + "grad_norm": 24.30511420274073, + "learning_rate": 4.846521310499698e-06, + "loss": 0.1560272216796875, + "step": 66835 + }, + { + "epoch": 0.5779457159903503, + "grad_norm": 13.400646672830385, + "learning_rate": 4.846360737745613e-06, + "loss": 0.18732986450195313, + "step": 66840 + }, + { + "epoch": 0.5779889495118935, + "grad_norm": 8.51976736211836, + "learning_rate": 4.8462001564762735e-06, + "loss": 0.049003219604492186, + "step": 66845 + }, + { + "epoch": 0.5780321830334368, + "grad_norm": 4.615527248846457, + "learning_rate": 4.846039566692423e-06, + "loss": 0.4658496856689453, + "step": 66850 + }, + { + "epoch": 0.5780754165549801, + "grad_norm": 2.1389291236508003, + "learning_rate": 4.845878968394802e-06, + "loss": 0.10320510864257812, + "step": 66855 + }, + { + "epoch": 0.5781186500765233, + "grad_norm": 0.9352824466558246, + "learning_rate": 4.8457183615841495e-06, + "loss": 0.10180282592773438, + "step": 66860 + }, + { + "epoch": 0.5781618835980666, + "grad_norm": 12.466995570322936, + "learning_rate": 4.845557746261206e-06, + "loss": 0.3115234375, + "step": 66865 + }, + { + "epoch": 0.5782051171196099, + "grad_norm": 7.560826966830415, + "learning_rate": 4.845397122426715e-06, + "loss": 0.10204391479492188, + "step": 66870 + }, + { + "epoch": 0.5782483506411531, + "grad_norm": 8.830894446772842, + "learning_rate": 4.845236490081415e-06, + "loss": 0.0567108154296875, + "step": 66875 + }, + { + "epoch": 0.5782915841626964, + "grad_norm": 518.2368480034353, + "learning_rate": 4.845075849226046e-06, + "loss": 0.6011566162109375, + "step": 66880 + }, + { + "epoch": 0.5783348176842397, + "grad_norm": 6.786363540092992, + "learning_rate": 4.844915199861352e-06, + "loss": 0.22809410095214844, + "step": 66885 + }, + { + "epoch": 0.5783780512057829, + "grad_norm": 10.645287162606875, + "learning_rate": 4.8447545419880715e-06, + "loss": 0.08801727294921875, + "step": 66890 + }, + { + "epoch": 0.5784212847273262, + "grad_norm": 3.4772303192360354, + "learning_rate": 4.844593875606946e-06, + "loss": 0.0212493896484375, + "step": 66895 + }, + { + "epoch": 0.5784645182488695, + "grad_norm": 17.808132014659638, + "learning_rate": 4.844433200718716e-06, + "loss": 0.1953125, + "step": 66900 + }, + { + "epoch": 0.5785077517704127, + "grad_norm": 0.03846833253661281, + "learning_rate": 4.844272517324124e-06, + "loss": 0.4445489883422852, + "step": 66905 + }, + { + "epoch": 0.5785509852919559, + "grad_norm": 3.1650334454475173, + "learning_rate": 4.8441118254239094e-06, + "loss": 0.0227813720703125, + "step": 66910 + }, + { + "epoch": 0.5785942188134993, + "grad_norm": 24.13346302240496, + "learning_rate": 4.843951125018814e-06, + "loss": 0.18605194091796876, + "step": 66915 + }, + { + "epoch": 0.5786374523350425, + "grad_norm": 12.742940458219685, + "learning_rate": 4.84379041610958e-06, + "loss": 0.267034912109375, + "step": 66920 + }, + { + "epoch": 0.5786806858565857, + "grad_norm": 16.375483118842766, + "learning_rate": 4.843629698696947e-06, + "loss": 0.26515045166015627, + "step": 66925 + }, + { + "epoch": 0.5787239193781291, + "grad_norm": 2.170802565589702, + "learning_rate": 4.843468972781655e-06, + "loss": 0.10019378662109375, + "step": 66930 + }, + { + "epoch": 0.5787671528996723, + "grad_norm": 22.666267236597452, + "learning_rate": 4.84330823836445e-06, + "loss": 0.250299072265625, + "step": 66935 + }, + { + "epoch": 0.5788103864212155, + "grad_norm": 41.30869719735503, + "learning_rate": 4.843147495446067e-06, + "loss": 0.338055419921875, + "step": 66940 + }, + { + "epoch": 0.5788536199427589, + "grad_norm": 34.20656342208562, + "learning_rate": 4.842986744027253e-06, + "loss": 0.07806243896484374, + "step": 66945 + }, + { + "epoch": 0.5788968534643021, + "grad_norm": 5.704500231086579, + "learning_rate": 4.842825984108747e-06, + "loss": 0.036226654052734376, + "step": 66950 + }, + { + "epoch": 0.5789400869858453, + "grad_norm": 1.680637273958884, + "learning_rate": 4.84266521569129e-06, + "loss": 0.1847991943359375, + "step": 66955 + }, + { + "epoch": 0.5789833205073887, + "grad_norm": 12.836413045378853, + "learning_rate": 4.842504438775622e-06, + "loss": 0.03267974853515625, + "step": 66960 + }, + { + "epoch": 0.5790265540289319, + "grad_norm": 2.5979658568977118, + "learning_rate": 4.8423436533624895e-06, + "loss": 0.145556640625, + "step": 66965 + }, + { + "epoch": 0.5790697875504751, + "grad_norm": 9.937148909319594, + "learning_rate": 4.842182859452629e-06, + "loss": 0.06267013549804687, + "step": 66970 + }, + { + "epoch": 0.5791130210720185, + "grad_norm": 7.839270236167119, + "learning_rate": 4.842022057046784e-06, + "loss": 0.10478515625, + "step": 66975 + }, + { + "epoch": 0.5791562545935617, + "grad_norm": 3.3411481072482623, + "learning_rate": 4.841861246145696e-06, + "loss": 0.2006011962890625, + "step": 66980 + }, + { + "epoch": 0.5791994881151049, + "grad_norm": 4.0834033014326065, + "learning_rate": 4.841700426750107e-06, + "loss": 0.299273681640625, + "step": 66985 + }, + { + "epoch": 0.5792427216366481, + "grad_norm": 14.172772807277067, + "learning_rate": 4.841539598860759e-06, + "loss": 0.03190193176269531, + "step": 66990 + }, + { + "epoch": 0.5792859551581915, + "grad_norm": 2.9998034559057936, + "learning_rate": 4.841378762478392e-06, + "loss": 0.13143157958984375, + "step": 66995 + }, + { + "epoch": 0.5793291886797347, + "grad_norm": 1.873492530726049, + "learning_rate": 4.841217917603748e-06, + "loss": 0.09952392578125, + "step": 67000 + }, + { + "epoch": 0.5793724222012779, + "grad_norm": 20.107824856130215, + "learning_rate": 4.841057064237573e-06, + "loss": 0.4010658264160156, + "step": 67005 + }, + { + "epoch": 0.5794156557228213, + "grad_norm": 12.377455970579641, + "learning_rate": 4.8408962023806025e-06, + "loss": 0.16404647827148439, + "step": 67010 + }, + { + "epoch": 0.5794588892443645, + "grad_norm": 7.537383651458083, + "learning_rate": 4.840735332033582e-06, + "loss": 0.0363037109375, + "step": 67015 + }, + { + "epoch": 0.5795021227659077, + "grad_norm": 9.337777524553337, + "learning_rate": 4.840574453197253e-06, + "loss": 0.14627838134765625, + "step": 67020 + }, + { + "epoch": 0.5795453562874511, + "grad_norm": 9.096139498670748, + "learning_rate": 4.840413565872358e-06, + "loss": 0.11754608154296875, + "step": 67025 + }, + { + "epoch": 0.5795885898089943, + "grad_norm": 2.703643977986583, + "learning_rate": 4.840252670059637e-06, + "loss": 0.09030914306640625, + "step": 67030 + }, + { + "epoch": 0.5796318233305375, + "grad_norm": 17.37716284701815, + "learning_rate": 4.840091765759834e-06, + "loss": 0.1751007080078125, + "step": 67035 + }, + { + "epoch": 0.5796750568520809, + "grad_norm": 10.974583210828659, + "learning_rate": 4.8399308529736895e-06, + "loss": 0.4068267822265625, + "step": 67040 + }, + { + "epoch": 0.5797182903736241, + "grad_norm": 5.1944978951539245, + "learning_rate": 4.839769931701947e-06, + "loss": 0.21645421981811525, + "step": 67045 + }, + { + "epoch": 0.5797615238951673, + "grad_norm": 4.511387835624076, + "learning_rate": 4.839609001945349e-06, + "loss": 0.4465728759765625, + "step": 67050 + }, + { + "epoch": 0.5798047574167107, + "grad_norm": 0.8168473594012197, + "learning_rate": 4.839448063704637e-06, + "loss": 0.051168632507324216, + "step": 67055 + }, + { + "epoch": 0.5798479909382539, + "grad_norm": 0.2981830276228831, + "learning_rate": 4.8392871169805514e-06, + "loss": 0.33873672485351564, + "step": 67060 + }, + { + "epoch": 0.5798912244597971, + "grad_norm": 2.942877694330209, + "learning_rate": 4.839126161773838e-06, + "loss": 0.10891571044921874, + "step": 67065 + }, + { + "epoch": 0.5799344579813404, + "grad_norm": 32.579589561709106, + "learning_rate": 4.838965198085235e-06, + "loss": 0.24508056640625, + "step": 67070 + }, + { + "epoch": 0.5799776915028837, + "grad_norm": 2.7286587288616473, + "learning_rate": 4.838804225915488e-06, + "loss": 0.5118896484375, + "step": 67075 + }, + { + "epoch": 0.5800209250244269, + "grad_norm": 6.6719677934980295, + "learning_rate": 4.83864324526534e-06, + "loss": 0.17737236022949218, + "step": 67080 + }, + { + "epoch": 0.5800641585459702, + "grad_norm": 34.1305868405533, + "learning_rate": 4.83848225613553e-06, + "loss": 0.18742904663085938, + "step": 67085 + }, + { + "epoch": 0.5801073920675135, + "grad_norm": 15.145407159733349, + "learning_rate": 4.8383212585268026e-06, + "loss": 0.04588394165039063, + "step": 67090 + }, + { + "epoch": 0.5801506255890567, + "grad_norm": 8.713509859493666, + "learning_rate": 4.8381602524399006e-06, + "loss": 0.2888206481933594, + "step": 67095 + }, + { + "epoch": 0.5801938591106, + "grad_norm": 44.582155543313974, + "learning_rate": 4.837999237875565e-06, + "loss": 0.20941696166992188, + "step": 67100 + }, + { + "epoch": 0.5802370926321433, + "grad_norm": 20.81269690536081, + "learning_rate": 4.83783821483454e-06, + "loss": 0.15538330078125, + "step": 67105 + }, + { + "epoch": 0.5802803261536865, + "grad_norm": 1.2959654371393734, + "learning_rate": 4.837677183317566e-06, + "loss": 0.22408599853515626, + "step": 67110 + }, + { + "epoch": 0.5803235596752297, + "grad_norm": 1.3716638450896605, + "learning_rate": 4.83751614332539e-06, + "loss": 0.05204010009765625, + "step": 67115 + }, + { + "epoch": 0.5803667931967731, + "grad_norm": 23.11439632528397, + "learning_rate": 4.83735509485875e-06, + "loss": 0.26904096603393557, + "step": 67120 + }, + { + "epoch": 0.5804100267183163, + "grad_norm": 12.933330331393933, + "learning_rate": 4.8371940379183916e-06, + "loss": 0.13435516357421876, + "step": 67125 + }, + { + "epoch": 0.5804532602398595, + "grad_norm": 0.30259264326082, + "learning_rate": 4.837032972505056e-06, + "loss": 0.19851303100585938, + "step": 67130 + }, + { + "epoch": 0.5804964937614029, + "grad_norm": 26.558659071964694, + "learning_rate": 4.836871898619487e-06, + "loss": 0.264019775390625, + "step": 67135 + }, + { + "epoch": 0.5805397272829461, + "grad_norm": 0.38891938777018736, + "learning_rate": 4.836710816262427e-06, + "loss": 0.18189163208007814, + "step": 67140 + }, + { + "epoch": 0.5805829608044893, + "grad_norm": 0.3912275921371205, + "learning_rate": 4.836549725434619e-06, + "loss": 0.11836929321289062, + "step": 67145 + }, + { + "epoch": 0.5806261943260327, + "grad_norm": 3.466238839894084, + "learning_rate": 4.836388626136805e-06, + "loss": 0.08263397216796875, + "step": 67150 + }, + { + "epoch": 0.5806694278475759, + "grad_norm": 4.004240193402089, + "learning_rate": 4.83622751836973e-06, + "loss": 0.12963790893554689, + "step": 67155 + }, + { + "epoch": 0.5807126613691191, + "grad_norm": 1.0096114396270572, + "learning_rate": 4.836066402134136e-06, + "loss": 0.23604888916015626, + "step": 67160 + }, + { + "epoch": 0.5807558948906624, + "grad_norm": 5.761158137679552, + "learning_rate": 4.8359052774307655e-06, + "loss": 0.06847152709960938, + "step": 67165 + }, + { + "epoch": 0.5807991284122057, + "grad_norm": 9.347165123877739, + "learning_rate": 4.835744144260363e-06, + "loss": 0.1374603271484375, + "step": 67170 + }, + { + "epoch": 0.5808423619337489, + "grad_norm": 1.1709529344421123, + "learning_rate": 4.83558300262367e-06, + "loss": 0.06464767456054688, + "step": 67175 + }, + { + "epoch": 0.5808855954552922, + "grad_norm": 23.35844009025923, + "learning_rate": 4.8354218525214305e-06, + "loss": 0.16263427734375, + "step": 67180 + }, + { + "epoch": 0.5809288289768355, + "grad_norm": 19.988562768256365, + "learning_rate": 4.8352606939543865e-06, + "loss": 0.509637451171875, + "step": 67185 + }, + { + "epoch": 0.5809720624983787, + "grad_norm": 9.261986348306156, + "learning_rate": 4.835099526923284e-06, + "loss": 0.15886154174804687, + "step": 67190 + }, + { + "epoch": 0.581015296019922, + "grad_norm": 0.43467978811812197, + "learning_rate": 4.834938351428865e-06, + "loss": 0.0436187744140625, + "step": 67195 + }, + { + "epoch": 0.5810585295414653, + "grad_norm": 10.525876417830375, + "learning_rate": 4.834777167471871e-06, + "loss": 0.14957351684570314, + "step": 67200 + }, + { + "epoch": 0.5811017630630085, + "grad_norm": 1.5805321555268796, + "learning_rate": 4.834615975053047e-06, + "loss": 0.05897445678710937, + "step": 67205 + }, + { + "epoch": 0.5811449965845518, + "grad_norm": 19.562527199589553, + "learning_rate": 4.834454774173137e-06, + "loss": 0.1868988037109375, + "step": 67210 + }, + { + "epoch": 0.5811882301060951, + "grad_norm": 1.3557782860492282, + "learning_rate": 4.834293564832882e-06, + "loss": 0.4784740447998047, + "step": 67215 + }, + { + "epoch": 0.5812314636276383, + "grad_norm": 6.014053669948648, + "learning_rate": 4.83413234703303e-06, + "loss": 0.4773406982421875, + "step": 67220 + }, + { + "epoch": 0.5812746971491816, + "grad_norm": 6.507990753068167, + "learning_rate": 4.8339711207743195e-06, + "loss": 0.34479827880859376, + "step": 67225 + }, + { + "epoch": 0.5813179306707249, + "grad_norm": 21.969559123541472, + "learning_rate": 4.833809886057496e-06, + "loss": 0.27245254516601564, + "step": 67230 + }, + { + "epoch": 0.5813611641922681, + "grad_norm": 4.107759420833343, + "learning_rate": 4.833648642883304e-06, + "loss": 0.1722137451171875, + "step": 67235 + }, + { + "epoch": 0.5814043977138114, + "grad_norm": 34.20220738362551, + "learning_rate": 4.833487391252487e-06, + "loss": 0.1392822265625, + "step": 67240 + }, + { + "epoch": 0.5814476312353546, + "grad_norm": 4.497278285826183, + "learning_rate": 4.833326131165788e-06, + "loss": 0.11944656372070313, + "step": 67245 + }, + { + "epoch": 0.5814908647568979, + "grad_norm": 0.5380940495572135, + "learning_rate": 4.83316486262395e-06, + "loss": 0.03674774169921875, + "step": 67250 + }, + { + "epoch": 0.5815340982784412, + "grad_norm": 16.772302941470965, + "learning_rate": 4.833003585627718e-06, + "loss": 0.14784641265869142, + "step": 67255 + }, + { + "epoch": 0.5815773317999844, + "grad_norm": 0.3674949528373205, + "learning_rate": 4.8328423001778365e-06, + "loss": 0.1650665283203125, + "step": 67260 + }, + { + "epoch": 0.5816205653215277, + "grad_norm": 0.4486583108079008, + "learning_rate": 4.832681006275047e-06, + "loss": 0.095440673828125, + "step": 67265 + }, + { + "epoch": 0.581663798843071, + "grad_norm": 5.82736481698983, + "learning_rate": 4.832519703920095e-06, + "loss": 0.06958236694335937, + "step": 67270 + }, + { + "epoch": 0.5817070323646142, + "grad_norm": 4.534477733308752, + "learning_rate": 4.832358393113724e-06, + "loss": 0.29764862060546876, + "step": 67275 + }, + { + "epoch": 0.5817502658861575, + "grad_norm": 1.2149568770144707, + "learning_rate": 4.832197073856679e-06, + "loss": 0.035797119140625, + "step": 67280 + }, + { + "epoch": 0.5817934994077008, + "grad_norm": 22.342581634631934, + "learning_rate": 4.8320357461497016e-06, + "loss": 0.3735504150390625, + "step": 67285 + }, + { + "epoch": 0.581836732929244, + "grad_norm": 0.19267097909582526, + "learning_rate": 4.831874409993538e-06, + "loss": 0.20778388977050782, + "step": 67290 + }, + { + "epoch": 0.5818799664507873, + "grad_norm": 2.2240340100565725, + "learning_rate": 4.831713065388931e-06, + "loss": 0.12318115234375, + "step": 67295 + }, + { + "epoch": 0.5819231999723306, + "grad_norm": 1.1008669595867753, + "learning_rate": 4.831551712336626e-06, + "loss": 0.14797210693359375, + "step": 67300 + }, + { + "epoch": 0.5819664334938738, + "grad_norm": 7.40068125555138, + "learning_rate": 4.831390350837366e-06, + "loss": 0.077130126953125, + "step": 67305 + }, + { + "epoch": 0.5820096670154171, + "grad_norm": 0.9542271902421298, + "learning_rate": 4.8312289808918945e-06, + "loss": 0.0320068359375, + "step": 67310 + }, + { + "epoch": 0.5820529005369603, + "grad_norm": 3.725948103018298, + "learning_rate": 4.831067602500958e-06, + "loss": 0.08316650390625, + "step": 67315 + }, + { + "epoch": 0.5820961340585036, + "grad_norm": 0.1646289715553298, + "learning_rate": 4.830906215665299e-06, + "loss": 0.0712982177734375, + "step": 67320 + }, + { + "epoch": 0.5821393675800469, + "grad_norm": 8.181654763701575, + "learning_rate": 4.830744820385662e-06, + "loss": 0.09814605712890626, + "step": 67325 + }, + { + "epoch": 0.5821826011015901, + "grad_norm": 4.5181482441306535, + "learning_rate": 4.830583416662792e-06, + "loss": 0.6600929260253906, + "step": 67330 + }, + { + "epoch": 0.5822258346231334, + "grad_norm": 1.8910229806261594, + "learning_rate": 4.830422004497433e-06, + "loss": 0.053918075561523435, + "step": 67335 + }, + { + "epoch": 0.5822690681446766, + "grad_norm": 7.843730926819512, + "learning_rate": 4.830260583890331e-06, + "loss": 0.2451324462890625, + "step": 67340 + }, + { + "epoch": 0.58231230166622, + "grad_norm": 0.09549510815546255, + "learning_rate": 4.830099154842227e-06, + "loss": 0.021083831787109375, + "step": 67345 + }, + { + "epoch": 0.5823555351877632, + "grad_norm": 4.38376069618974, + "learning_rate": 4.829937717353867e-06, + "loss": 0.21013641357421875, + "step": 67350 + }, + { + "epoch": 0.5823987687093064, + "grad_norm": 4.726832806454038, + "learning_rate": 4.829776271425997e-06, + "loss": 0.13478546142578124, + "step": 67355 + }, + { + "epoch": 0.5824420022308497, + "grad_norm": 2.008906299948712, + "learning_rate": 4.82961481705936e-06, + "loss": 0.2545978546142578, + "step": 67360 + }, + { + "epoch": 0.582485235752393, + "grad_norm": 4.017458664849088, + "learning_rate": 4.829453354254702e-06, + "loss": 0.07281036376953125, + "step": 67365 + }, + { + "epoch": 0.5825284692739362, + "grad_norm": 0.23678158373196695, + "learning_rate": 4.8292918830127664e-06, + "loss": 0.11483917236328126, + "step": 67370 + }, + { + "epoch": 0.5825717027954795, + "grad_norm": 4.314401243851493, + "learning_rate": 4.829130403334298e-06, + "loss": 0.10200424194335937, + "step": 67375 + }, + { + "epoch": 0.5826149363170228, + "grad_norm": 1.3704411906660103, + "learning_rate": 4.828968915220042e-06, + "loss": 0.14821014404296876, + "step": 67380 + }, + { + "epoch": 0.582658169838566, + "grad_norm": 24.609903991844355, + "learning_rate": 4.828807418670743e-06, + "loss": 0.09273033142089844, + "step": 67385 + }, + { + "epoch": 0.5827014033601093, + "grad_norm": 33.834046883822076, + "learning_rate": 4.828645913687145e-06, + "loss": 0.25982208251953126, + "step": 67390 + }, + { + "epoch": 0.5827446368816526, + "grad_norm": 18.233111365342918, + "learning_rate": 4.828484400269995e-06, + "loss": 0.14713554382324218, + "step": 67395 + }, + { + "epoch": 0.5827878704031958, + "grad_norm": 0.804090104624286, + "learning_rate": 4.828322878420035e-06, + "loss": 0.024176692962646483, + "step": 67400 + }, + { + "epoch": 0.5828311039247391, + "grad_norm": 1.7400195785384693, + "learning_rate": 4.828161348138013e-06, + "loss": 0.10040550231933594, + "step": 67405 + }, + { + "epoch": 0.5828743374462824, + "grad_norm": 10.536649204643894, + "learning_rate": 4.827999809424671e-06, + "loss": 0.0961181640625, + "step": 67410 + }, + { + "epoch": 0.5829175709678256, + "grad_norm": 0.4348451198477376, + "learning_rate": 4.827838262280757e-06, + "loss": 0.047174835205078126, + "step": 67415 + }, + { + "epoch": 0.5829608044893688, + "grad_norm": 1.859413699217844, + "learning_rate": 4.827676706707013e-06, + "loss": 0.3824920654296875, + "step": 67420 + }, + { + "epoch": 0.5830040380109122, + "grad_norm": 2.316204944403846, + "learning_rate": 4.827515142704186e-06, + "loss": 0.059304428100585935, + "step": 67425 + }, + { + "epoch": 0.5830472715324554, + "grad_norm": 2.426139531793118, + "learning_rate": 4.827353570273021e-06, + "loss": 0.06457366943359374, + "step": 67430 + }, + { + "epoch": 0.5830905050539986, + "grad_norm": 29.531799223934506, + "learning_rate": 4.827191989414262e-06, + "loss": 0.15647659301757813, + "step": 67435 + }, + { + "epoch": 0.583133738575542, + "grad_norm": 16.246236602120913, + "learning_rate": 4.827030400128656e-06, + "loss": 0.14083046913146974, + "step": 67440 + }, + { + "epoch": 0.5831769720970852, + "grad_norm": 1.6587085831544837, + "learning_rate": 4.8268688024169465e-06, + "loss": 0.09737777709960938, + "step": 67445 + }, + { + "epoch": 0.5832202056186284, + "grad_norm": 2.621785278211735, + "learning_rate": 4.826707196279879e-06, + "loss": 0.2759521484375, + "step": 67450 + }, + { + "epoch": 0.5832634391401718, + "grad_norm": 0.40219901915865097, + "learning_rate": 4.8265455817182004e-06, + "loss": 0.03768463134765625, + "step": 67455 + }, + { + "epoch": 0.583306672661715, + "grad_norm": 4.732976921382258, + "learning_rate": 4.826383958732655e-06, + "loss": 0.1219390869140625, + "step": 67460 + }, + { + "epoch": 0.5833499061832582, + "grad_norm": 11.83331965401863, + "learning_rate": 4.826222327323988e-06, + "loss": 0.23366851806640626, + "step": 67465 + }, + { + "epoch": 0.5833931397048016, + "grad_norm": 23.897452652971076, + "learning_rate": 4.826060687492945e-06, + "loss": 0.32579765319824217, + "step": 67470 + }, + { + "epoch": 0.5834363732263448, + "grad_norm": 11.289110098705278, + "learning_rate": 4.8258990392402705e-06, + "loss": 0.2185791015625, + "step": 67475 + }, + { + "epoch": 0.583479606747888, + "grad_norm": 12.270961531431132, + "learning_rate": 4.825737382566712e-06, + "loss": 0.177081298828125, + "step": 67480 + }, + { + "epoch": 0.5835228402694314, + "grad_norm": 310.9508211361292, + "learning_rate": 4.825575717473014e-06, + "loss": 0.245452880859375, + "step": 67485 + }, + { + "epoch": 0.5835660737909746, + "grad_norm": 6.932559734037668, + "learning_rate": 4.825414043959921e-06, + "loss": 0.09331283569335938, + "step": 67490 + }, + { + "epoch": 0.5836093073125178, + "grad_norm": 0.2492546445016949, + "learning_rate": 4.825252362028181e-06, + "loss": 0.3196277618408203, + "step": 67495 + }, + { + "epoch": 0.5836525408340612, + "grad_norm": 5.208659100917217, + "learning_rate": 4.825090671678538e-06, + "loss": 0.3822929382324219, + "step": 67500 + }, + { + "epoch": 0.5836957743556044, + "grad_norm": 3.2093022315813937, + "learning_rate": 4.824928972911738e-06, + "loss": 0.07453765869140624, + "step": 67505 + }, + { + "epoch": 0.5837390078771476, + "grad_norm": 1.4435443330997264, + "learning_rate": 4.824767265728527e-06, + "loss": 0.1772380828857422, + "step": 67510 + }, + { + "epoch": 0.5837822413986908, + "grad_norm": 8.964341185207875, + "learning_rate": 4.824605550129651e-06, + "loss": 0.0480010986328125, + "step": 67515 + }, + { + "epoch": 0.5838254749202342, + "grad_norm": 17.568599625339207, + "learning_rate": 4.824443826115854e-06, + "loss": 0.14099807739257814, + "step": 67520 + }, + { + "epoch": 0.5838687084417774, + "grad_norm": 17.10271174390467, + "learning_rate": 4.824282093687884e-06, + "loss": 0.2529388427734375, + "step": 67525 + }, + { + "epoch": 0.5839119419633206, + "grad_norm": 1.9955735277071343, + "learning_rate": 4.824120352846487e-06, + "loss": 0.030099105834960938, + "step": 67530 + }, + { + "epoch": 0.583955175484864, + "grad_norm": 0.3660854487528914, + "learning_rate": 4.823958603592407e-06, + "loss": 0.1618377685546875, + "step": 67535 + }, + { + "epoch": 0.5839984090064072, + "grad_norm": 3.813079291849636, + "learning_rate": 4.823796845926391e-06, + "loss": 0.1060821533203125, + "step": 67540 + }, + { + "epoch": 0.5840416425279504, + "grad_norm": 8.343060748521129, + "learning_rate": 4.823635079849186e-06, + "loss": 0.1858306884765625, + "step": 67545 + }, + { + "epoch": 0.5840848760494938, + "grad_norm": 1.6919261046362786, + "learning_rate": 4.823473305361537e-06, + "loss": 0.13759918212890626, + "step": 67550 + }, + { + "epoch": 0.584128109571037, + "grad_norm": 9.211721134147282, + "learning_rate": 4.82331152246419e-06, + "loss": 0.2184234619140625, + "step": 67555 + }, + { + "epoch": 0.5841713430925802, + "grad_norm": 5.5154023418424245, + "learning_rate": 4.823149731157892e-06, + "loss": 0.204766845703125, + "step": 67560 + }, + { + "epoch": 0.5842145766141236, + "grad_norm": 0.7052790223763659, + "learning_rate": 4.822987931443387e-06, + "loss": 0.03130645751953125, + "step": 67565 + }, + { + "epoch": 0.5842578101356668, + "grad_norm": 20.96905349127665, + "learning_rate": 4.822826123321424e-06, + "loss": 0.14260025024414064, + "step": 67570 + }, + { + "epoch": 0.58430104365721, + "grad_norm": 5.874125539594912, + "learning_rate": 4.822664306792748e-06, + "loss": 0.098974609375, + "step": 67575 + }, + { + "epoch": 0.5843442771787534, + "grad_norm": 6.726451726197113, + "learning_rate": 4.8225024818581045e-06, + "loss": 0.098388671875, + "step": 67580 + }, + { + "epoch": 0.5843875107002966, + "grad_norm": 21.280080077677102, + "learning_rate": 4.8223406485182415e-06, + "loss": 0.23487319946289062, + "step": 67585 + }, + { + "epoch": 0.5844307442218398, + "grad_norm": 6.563195206813839, + "learning_rate": 4.822178806773904e-06, + "loss": 0.117755126953125, + "step": 67590 + }, + { + "epoch": 0.584473977743383, + "grad_norm": 6.830458676172836, + "learning_rate": 4.822016956625839e-06, + "loss": 0.10036201477050781, + "step": 67595 + }, + { + "epoch": 0.5845172112649264, + "grad_norm": 0.3932191336503873, + "learning_rate": 4.821855098074793e-06, + "loss": 0.3391632080078125, + "step": 67600 + }, + { + "epoch": 0.5845604447864696, + "grad_norm": 16.58953513769415, + "learning_rate": 4.821693231121512e-06, + "loss": 0.118798828125, + "step": 67605 + }, + { + "epoch": 0.5846036783080129, + "grad_norm": 0.36137688590092193, + "learning_rate": 4.821531355766742e-06, + "loss": 0.202398681640625, + "step": 67610 + }, + { + "epoch": 0.5846469118295562, + "grad_norm": 19.58143063779705, + "learning_rate": 4.82136947201123e-06, + "loss": 0.14921531677246094, + "step": 67615 + }, + { + "epoch": 0.5846901453510994, + "grad_norm": 12.992852921473535, + "learning_rate": 4.821207579855725e-06, + "loss": 0.23222122192382813, + "step": 67620 + }, + { + "epoch": 0.5847333788726427, + "grad_norm": 0.9053781227066772, + "learning_rate": 4.821045679300971e-06, + "loss": 0.0148468017578125, + "step": 67625 + }, + { + "epoch": 0.584776612394186, + "grad_norm": 1.5974407532416444, + "learning_rate": 4.820883770347714e-06, + "loss": 0.2137054443359375, + "step": 67630 + }, + { + "epoch": 0.5848198459157292, + "grad_norm": 4.9611936154880665, + "learning_rate": 4.820721852996703e-06, + "loss": 0.178863525390625, + "step": 67635 + }, + { + "epoch": 0.5848630794372724, + "grad_norm": 13.746946888425946, + "learning_rate": 4.820559927248684e-06, + "loss": 0.07693862915039062, + "step": 67640 + }, + { + "epoch": 0.5849063129588158, + "grad_norm": 2.362749036184085, + "learning_rate": 4.820397993104402e-06, + "loss": 0.13045806884765626, + "step": 67645 + }, + { + "epoch": 0.584949546480359, + "grad_norm": 2.8904155000696203, + "learning_rate": 4.820236050564606e-06, + "loss": 0.27232666015625, + "step": 67650 + }, + { + "epoch": 0.5849927800019022, + "grad_norm": 22.55131141329689, + "learning_rate": 4.820074099630043e-06, + "loss": 0.07789535522460937, + "step": 67655 + }, + { + "epoch": 0.5850360135234456, + "grad_norm": 6.758578139664903, + "learning_rate": 4.819912140301458e-06, + "loss": 0.187078857421875, + "step": 67660 + }, + { + "epoch": 0.5850792470449888, + "grad_norm": 14.024061206856695, + "learning_rate": 4.819750172579599e-06, + "loss": 0.2666473388671875, + "step": 67665 + }, + { + "epoch": 0.585122480566532, + "grad_norm": 18.615321180268893, + "learning_rate": 4.819588196465214e-06, + "loss": 0.3438560485839844, + "step": 67670 + }, + { + "epoch": 0.5851657140880754, + "grad_norm": 3.394872946876965, + "learning_rate": 4.819426211959048e-06, + "loss": 0.2778228759765625, + "step": 67675 + }, + { + "epoch": 0.5852089476096186, + "grad_norm": 0.8761667181694263, + "learning_rate": 4.819264219061851e-06, + "loss": 0.046117401123046874, + "step": 67680 + }, + { + "epoch": 0.5852521811311618, + "grad_norm": 5.6623007658762425, + "learning_rate": 4.819102217774366e-06, + "loss": 0.08189468383789063, + "step": 67685 + }, + { + "epoch": 0.5852954146527051, + "grad_norm": 0.7138511156404687, + "learning_rate": 4.8189402080973435e-06, + "loss": 0.102557373046875, + "step": 67690 + }, + { + "epoch": 0.5853386481742484, + "grad_norm": 38.607847888116, + "learning_rate": 4.8187781900315285e-06, + "loss": 0.16613006591796875, + "step": 67695 + }, + { + "epoch": 0.5853818816957916, + "grad_norm": 1.0496889255910953, + "learning_rate": 4.818616163577669e-06, + "loss": 0.23133087158203125, + "step": 67700 + }, + { + "epoch": 0.5854251152173349, + "grad_norm": 8.283298942304254, + "learning_rate": 4.818454128736514e-06, + "loss": 0.21907634735107423, + "step": 67705 + }, + { + "epoch": 0.5854683487388782, + "grad_norm": 17.065691316372227, + "learning_rate": 4.8182920855088085e-06, + "loss": 0.39522705078125, + "step": 67710 + }, + { + "epoch": 0.5855115822604214, + "grad_norm": 11.828225167062882, + "learning_rate": 4.818130033895301e-06, + "loss": 0.111004638671875, + "step": 67715 + }, + { + "epoch": 0.5855548157819647, + "grad_norm": 5.805682804304965, + "learning_rate": 4.817967973896737e-06, + "loss": 0.2502784729003906, + "step": 67720 + }, + { + "epoch": 0.585598049303508, + "grad_norm": 10.89927409310192, + "learning_rate": 4.8178059055138665e-06, + "loss": 0.07846946716308593, + "step": 67725 + }, + { + "epoch": 0.5856412828250512, + "grad_norm": 3.9267802480580634, + "learning_rate": 4.817643828747435e-06, + "loss": 0.1134307861328125, + "step": 67730 + }, + { + "epoch": 0.5856845163465945, + "grad_norm": 0.33501558289434763, + "learning_rate": 4.817481743598192e-06, + "loss": 0.07590408325195312, + "step": 67735 + }, + { + "epoch": 0.5857277498681378, + "grad_norm": 14.982295854861112, + "learning_rate": 4.817319650066884e-06, + "loss": 0.493292236328125, + "step": 67740 + }, + { + "epoch": 0.585770983389681, + "grad_norm": 5.323532021258545, + "learning_rate": 4.8171575481542565e-06, + "loss": 0.13105316162109376, + "step": 67745 + }, + { + "epoch": 0.5858142169112243, + "grad_norm": 0.9730543001042798, + "learning_rate": 4.81699543786106e-06, + "loss": 0.02119293212890625, + "step": 67750 + }, + { + "epoch": 0.5858574504327676, + "grad_norm": 24.778801922474333, + "learning_rate": 4.81683331918804e-06, + "loss": 0.12314453125, + "step": 67755 + }, + { + "epoch": 0.5859006839543108, + "grad_norm": 4.156432380007037, + "learning_rate": 4.816671192135948e-06, + "loss": 0.35335540771484375, + "step": 67760 + }, + { + "epoch": 0.5859439174758541, + "grad_norm": 5.397973033380546, + "learning_rate": 4.816509056705527e-06, + "loss": 0.1805908203125, + "step": 67765 + }, + { + "epoch": 0.5859871509973973, + "grad_norm": 4.29092122197055, + "learning_rate": 4.816346912897527e-06, + "loss": 0.1467498779296875, + "step": 67770 + }, + { + "epoch": 0.5860303845189406, + "grad_norm": 22.273402827060632, + "learning_rate": 4.816184760712695e-06, + "loss": 0.143914794921875, + "step": 67775 + }, + { + "epoch": 0.5860736180404839, + "grad_norm": 12.781187360743905, + "learning_rate": 4.816022600151781e-06, + "loss": 0.24139480590820311, + "step": 67780 + }, + { + "epoch": 0.5861168515620271, + "grad_norm": 18.834619531431965, + "learning_rate": 4.815860431215529e-06, + "loss": 0.16426544189453124, + "step": 67785 + }, + { + "epoch": 0.5861600850835704, + "grad_norm": 5.1514419866405685, + "learning_rate": 4.81569825390469e-06, + "loss": 0.04613571166992188, + "step": 67790 + }, + { + "epoch": 0.5862033186051137, + "grad_norm": 0.6380443191960371, + "learning_rate": 4.8155360682200124e-06, + "loss": 0.11127738952636719, + "step": 67795 + }, + { + "epoch": 0.5862465521266569, + "grad_norm": 5.0228694705553085, + "learning_rate": 4.815373874162241e-06, + "loss": 0.1069183349609375, + "step": 67800 + }, + { + "epoch": 0.5862897856482002, + "grad_norm": 0.6189521061446133, + "learning_rate": 4.815211671732127e-06, + "loss": 0.0693115234375, + "step": 67805 + }, + { + "epoch": 0.5863330191697435, + "grad_norm": 13.984851005582874, + "learning_rate": 4.8150494609304165e-06, + "loss": 0.2300943374633789, + "step": 67810 + }, + { + "epoch": 0.5863762526912867, + "grad_norm": 5.433530528194445, + "learning_rate": 4.814887241757859e-06, + "loss": 0.24496726989746093, + "step": 67815 + }, + { + "epoch": 0.58641948621283, + "grad_norm": 0.2498720175135624, + "learning_rate": 4.8147250142152016e-06, + "loss": 0.17049407958984375, + "step": 67820 + }, + { + "epoch": 0.5864627197343733, + "grad_norm": 22.189570918602026, + "learning_rate": 4.814562778303192e-06, + "loss": 0.14916229248046875, + "step": 67825 + }, + { + "epoch": 0.5865059532559165, + "grad_norm": 22.094119388396898, + "learning_rate": 4.81440053402258e-06, + "loss": 0.192120361328125, + "step": 67830 + }, + { + "epoch": 0.5865491867774598, + "grad_norm": 12.380128440172507, + "learning_rate": 4.814238281374112e-06, + "loss": 0.18213882446289062, + "step": 67835 + }, + { + "epoch": 0.586592420299003, + "grad_norm": 2.6251686830123675, + "learning_rate": 4.814076020358539e-06, + "loss": 0.098175048828125, + "step": 67840 + }, + { + "epoch": 0.5866356538205463, + "grad_norm": 8.37798961042405, + "learning_rate": 4.813913750976607e-06, + "loss": 0.134844970703125, + "step": 67845 + }, + { + "epoch": 0.5866788873420896, + "grad_norm": 2.6877132045333907, + "learning_rate": 4.813751473229065e-06, + "loss": 0.066192626953125, + "step": 67850 + }, + { + "epoch": 0.5867221208636328, + "grad_norm": 30.045218461798193, + "learning_rate": 4.813589187116661e-06, + "loss": 0.22899980545043946, + "step": 67855 + }, + { + "epoch": 0.5867653543851761, + "grad_norm": 7.79685418883952, + "learning_rate": 4.813426892640144e-06, + "loss": 0.091912841796875, + "step": 67860 + }, + { + "epoch": 0.5868085879067193, + "grad_norm": 4.0284117150939505, + "learning_rate": 4.8132645898002635e-06, + "loss": 0.037646484375, + "step": 67865 + }, + { + "epoch": 0.5868518214282626, + "grad_norm": 23.682966619488568, + "learning_rate": 4.813102278597766e-06, + "loss": 0.268914794921875, + "step": 67870 + }, + { + "epoch": 0.5868950549498059, + "grad_norm": 2.632782729772258, + "learning_rate": 4.812939959033401e-06, + "loss": 0.05864410400390625, + "step": 67875 + }, + { + "epoch": 0.5869382884713491, + "grad_norm": 24.884545675772443, + "learning_rate": 4.812777631107918e-06, + "loss": 0.36267967224121095, + "step": 67880 + }, + { + "epoch": 0.5869815219928924, + "grad_norm": 45.39947495581798, + "learning_rate": 4.812615294822064e-06, + "loss": 0.2910736083984375, + "step": 67885 + }, + { + "epoch": 0.5870247555144357, + "grad_norm": 12.118281061489474, + "learning_rate": 4.812452950176588e-06, + "loss": 0.23906631469726564, + "step": 67890 + }, + { + "epoch": 0.5870679890359789, + "grad_norm": 10.689828869010297, + "learning_rate": 4.81229059717224e-06, + "loss": 0.14176864624023439, + "step": 67895 + }, + { + "epoch": 0.5871112225575222, + "grad_norm": 4.592658144470107, + "learning_rate": 4.812128235809767e-06, + "loss": 0.08875732421875, + "step": 67900 + }, + { + "epoch": 0.5871544560790655, + "grad_norm": 1.2998399947124295, + "learning_rate": 4.81196586608992e-06, + "loss": 0.07296600341796874, + "step": 67905 + }, + { + "epoch": 0.5871976896006087, + "grad_norm": 18.646022921293476, + "learning_rate": 4.811803488013445e-06, + "loss": 0.20956573486328126, + "step": 67910 + }, + { + "epoch": 0.587240923122152, + "grad_norm": 9.228390036508378, + "learning_rate": 4.811641101581092e-06, + "loss": 0.2390594482421875, + "step": 67915 + }, + { + "epoch": 0.5872841566436953, + "grad_norm": 0.27852065314438207, + "learning_rate": 4.811478706793612e-06, + "loss": 0.13490219116210939, + "step": 67920 + }, + { + "epoch": 0.5873273901652385, + "grad_norm": 16.430907083221033, + "learning_rate": 4.811316303651752e-06, + "loss": 0.1670074462890625, + "step": 67925 + }, + { + "epoch": 0.5873706236867818, + "grad_norm": 1.5183089759607915, + "learning_rate": 4.811153892156261e-06, + "loss": 0.212591552734375, + "step": 67930 + }, + { + "epoch": 0.5874138572083251, + "grad_norm": 39.689752992355515, + "learning_rate": 4.810991472307889e-06, + "loss": 0.3081047058105469, + "step": 67935 + }, + { + "epoch": 0.5874570907298683, + "grad_norm": 1.4984301319047284, + "learning_rate": 4.810829044107384e-06, + "loss": 0.2068634033203125, + "step": 67940 + }, + { + "epoch": 0.5875003242514115, + "grad_norm": 2.934751884830707, + "learning_rate": 4.810666607555494e-06, + "loss": 0.19159488677978515, + "step": 67945 + }, + { + "epoch": 0.5875435577729549, + "grad_norm": 0.07749714766336921, + "learning_rate": 4.8105041626529705e-06, + "loss": 0.37404327392578124, + "step": 67950 + }, + { + "epoch": 0.5875867912944981, + "grad_norm": 0.7187315214614787, + "learning_rate": 4.810341709400563e-06, + "loss": 0.06913681030273437, + "step": 67955 + }, + { + "epoch": 0.5876300248160413, + "grad_norm": 6.909624960929049, + "learning_rate": 4.810179247799018e-06, + "loss": 0.3871002197265625, + "step": 67960 + }, + { + "epoch": 0.5876732583375847, + "grad_norm": 1.7225542707835242, + "learning_rate": 4.810016777849087e-06, + "loss": 0.3042930603027344, + "step": 67965 + }, + { + "epoch": 0.5877164918591279, + "grad_norm": 14.042663488603734, + "learning_rate": 4.809854299551517e-06, + "loss": 0.246136474609375, + "step": 67970 + }, + { + "epoch": 0.5877597253806711, + "grad_norm": 114.59464945412915, + "learning_rate": 4.8096918129070616e-06, + "loss": 0.16564855575561524, + "step": 67975 + }, + { + "epoch": 0.5878029589022145, + "grad_norm": 4.492799958871972, + "learning_rate": 4.809529317916465e-06, + "loss": 0.07642822265625, + "step": 67980 + }, + { + "epoch": 0.5878461924237577, + "grad_norm": 2.8227080215082725, + "learning_rate": 4.80936681458048e-06, + "loss": 0.08167724609375, + "step": 67985 + }, + { + "epoch": 0.5878894259453009, + "grad_norm": 9.749948452697383, + "learning_rate": 4.809204302899856e-06, + "loss": 0.08627777099609375, + "step": 67990 + }, + { + "epoch": 0.5879326594668443, + "grad_norm": 14.550139113477137, + "learning_rate": 4.80904178287534e-06, + "loss": 0.18618621826171874, + "step": 67995 + }, + { + "epoch": 0.5879758929883875, + "grad_norm": 4.903666787095757, + "learning_rate": 4.808879254507685e-06, + "loss": 0.13495712280273436, + "step": 68000 + }, + { + "epoch": 0.5880191265099307, + "grad_norm": 1.6304511828837962, + "learning_rate": 4.808716717797638e-06, + "loss": 0.0943267822265625, + "step": 68005 + }, + { + "epoch": 0.588062360031474, + "grad_norm": 1.6036539520339743, + "learning_rate": 4.808554172745948e-06, + "loss": 0.145806884765625, + "step": 68010 + }, + { + "epoch": 0.5881055935530173, + "grad_norm": 1.0178122346028011, + "learning_rate": 4.8083916193533675e-06, + "loss": 0.10030136108398438, + "step": 68015 + }, + { + "epoch": 0.5881488270745605, + "grad_norm": 0.2529583241438766, + "learning_rate": 4.8082290576206446e-06, + "loss": 0.40447540283203126, + "step": 68020 + }, + { + "epoch": 0.5881920605961038, + "grad_norm": 11.074210313979581, + "learning_rate": 4.808066487548527e-06, + "loss": 0.08814697265625, + "step": 68025 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 17.404531542443163, + "learning_rate": 4.80790390913777e-06, + "loss": 0.321923828125, + "step": 68030 + }, + { + "epoch": 0.5882785276391903, + "grad_norm": 2.3814122969849563, + "learning_rate": 4.807741322389117e-06, + "loss": 0.11618499755859375, + "step": 68035 + }, + { + "epoch": 0.5883217611607335, + "grad_norm": 6.117184379697648, + "learning_rate": 4.8075787273033224e-06, + "loss": 0.2347259521484375, + "step": 68040 + }, + { + "epoch": 0.5883649946822769, + "grad_norm": 1.3331743823375146, + "learning_rate": 4.807416123881134e-06, + "loss": 0.024512481689453126, + "step": 68045 + }, + { + "epoch": 0.5884082282038201, + "grad_norm": 21.52140642732303, + "learning_rate": 4.807253512123301e-06, + "loss": 0.17046661376953126, + "step": 68050 + }, + { + "epoch": 0.5884514617253633, + "grad_norm": 1.0503689125426305, + "learning_rate": 4.807090892030576e-06, + "loss": 0.16347312927246094, + "step": 68055 + }, + { + "epoch": 0.5884946952469067, + "grad_norm": 28.981723347525246, + "learning_rate": 4.806928263603707e-06, + "loss": 0.46384353637695314, + "step": 68060 + }, + { + "epoch": 0.5885379287684499, + "grad_norm": 7.021881106472831, + "learning_rate": 4.806765626843445e-06, + "loss": 0.2713733673095703, + "step": 68065 + }, + { + "epoch": 0.5885811622899931, + "grad_norm": 9.639990843120376, + "learning_rate": 4.8066029817505385e-06, + "loss": 0.14325275421142578, + "step": 68070 + }, + { + "epoch": 0.5886243958115365, + "grad_norm": 27.429916147526942, + "learning_rate": 4.806440328325739e-06, + "loss": 0.17618789672851562, + "step": 68075 + }, + { + "epoch": 0.5886676293330797, + "grad_norm": 6.321370260739955, + "learning_rate": 4.806277666569797e-06, + "loss": 0.33507080078125, + "step": 68080 + }, + { + "epoch": 0.5887108628546229, + "grad_norm": 16.20606293229208, + "learning_rate": 4.806114996483461e-06, + "loss": 0.18614883422851564, + "step": 68085 + }, + { + "epoch": 0.5887540963761663, + "grad_norm": 27.764901620630667, + "learning_rate": 4.8059523180674825e-06, + "loss": 0.25721435546875, + "step": 68090 + }, + { + "epoch": 0.5887973298977095, + "grad_norm": 7.792039233031692, + "learning_rate": 4.805789631322613e-06, + "loss": 0.13586807250976562, + "step": 68095 + }, + { + "epoch": 0.5888405634192527, + "grad_norm": 2.0686106381937535, + "learning_rate": 4.8056269362496e-06, + "loss": 0.1836700439453125, + "step": 68100 + }, + { + "epoch": 0.5888837969407961, + "grad_norm": 1.8750957765605685, + "learning_rate": 4.805464232849194e-06, + "loss": 0.20331497192382814, + "step": 68105 + }, + { + "epoch": 0.5889270304623393, + "grad_norm": 0.3096501874184894, + "learning_rate": 4.805301521122148e-06, + "loss": 0.0607666015625, + "step": 68110 + }, + { + "epoch": 0.5889702639838825, + "grad_norm": 14.235865765659055, + "learning_rate": 4.8051388010692115e-06, + "loss": 0.28296051025390623, + "step": 68115 + }, + { + "epoch": 0.5890134975054258, + "grad_norm": 31.34836340664625, + "learning_rate": 4.804976072691133e-06, + "loss": 0.30401611328125, + "step": 68120 + }, + { + "epoch": 0.5890567310269691, + "grad_norm": 2.7158322449760304, + "learning_rate": 4.804813335988664e-06, + "loss": 0.08563461303710937, + "step": 68125 + }, + { + "epoch": 0.5890999645485123, + "grad_norm": 5.656092450032354, + "learning_rate": 4.804650590962557e-06, + "loss": 0.6698211669921875, + "step": 68130 + }, + { + "epoch": 0.5891431980700556, + "grad_norm": 4.1935443990546455, + "learning_rate": 4.80448783761356e-06, + "loss": 0.0879852294921875, + "step": 68135 + }, + { + "epoch": 0.5891864315915989, + "grad_norm": 0.44070485214386784, + "learning_rate": 4.8043250759424244e-06, + "loss": 0.0638458251953125, + "step": 68140 + }, + { + "epoch": 0.5892296651131421, + "grad_norm": 4.384549887951632, + "learning_rate": 4.804162305949901e-06, + "loss": 0.04441680908203125, + "step": 68145 + }, + { + "epoch": 0.5892728986346853, + "grad_norm": 0.5748495600881571, + "learning_rate": 4.803999527636741e-06, + "loss": 0.03128662109375, + "step": 68150 + }, + { + "epoch": 0.5893161321562287, + "grad_norm": 22.753585121340024, + "learning_rate": 4.803836741003694e-06, + "loss": 0.25360565185546874, + "step": 68155 + }, + { + "epoch": 0.5893593656777719, + "grad_norm": 26.856890230549084, + "learning_rate": 4.803673946051511e-06, + "loss": 0.1880615234375, + "step": 68160 + }, + { + "epoch": 0.5894025991993151, + "grad_norm": 1.5728206125300919, + "learning_rate": 4.803511142780945e-06, + "loss": 0.14239349365234374, + "step": 68165 + }, + { + "epoch": 0.5894458327208585, + "grad_norm": 1.061003814568714, + "learning_rate": 4.8033483311927435e-06, + "loss": 0.2909111022949219, + "step": 68170 + }, + { + "epoch": 0.5894890662424017, + "grad_norm": 12.376774079604758, + "learning_rate": 4.803185511287659e-06, + "loss": 0.15223007202148436, + "step": 68175 + }, + { + "epoch": 0.589532299763945, + "grad_norm": 7.536600842647821, + "learning_rate": 4.803022683066442e-06, + "loss": 0.1856903076171875, + "step": 68180 + }, + { + "epoch": 0.5895755332854883, + "grad_norm": 2.199486592239652, + "learning_rate": 4.802859846529844e-06, + "loss": 0.13545570373535157, + "step": 68185 + }, + { + "epoch": 0.5896187668070315, + "grad_norm": 16.76827952620235, + "learning_rate": 4.802697001678616e-06, + "loss": 0.12938461303710938, + "step": 68190 + }, + { + "epoch": 0.5896620003285747, + "grad_norm": 0.948780652208219, + "learning_rate": 4.802534148513509e-06, + "loss": 0.041163063049316405, + "step": 68195 + }, + { + "epoch": 0.5897052338501181, + "grad_norm": 6.00558856513652, + "learning_rate": 4.802371287035272e-06, + "loss": 0.113226318359375, + "step": 68200 + }, + { + "epoch": 0.5897484673716613, + "grad_norm": 22.836837537925554, + "learning_rate": 4.802208417244659e-06, + "loss": 0.19197998046875, + "step": 68205 + }, + { + "epoch": 0.5897917008932045, + "grad_norm": 6.266021465376554, + "learning_rate": 4.80204553914242e-06, + "loss": 0.24044342041015626, + "step": 68210 + }, + { + "epoch": 0.5898349344147478, + "grad_norm": 0.8740430794077052, + "learning_rate": 4.801882652729307e-06, + "loss": 0.20237579345703124, + "step": 68215 + }, + { + "epoch": 0.5898781679362911, + "grad_norm": 21.40322075967707, + "learning_rate": 4.80171975800607e-06, + "loss": 0.16810073852539062, + "step": 68220 + }, + { + "epoch": 0.5899214014578343, + "grad_norm": 13.013573441619233, + "learning_rate": 4.80155685497346e-06, + "loss": 0.167755126953125, + "step": 68225 + }, + { + "epoch": 0.5899646349793776, + "grad_norm": 14.040012512248044, + "learning_rate": 4.801393943632229e-06, + "loss": 0.138641357421875, + "step": 68230 + }, + { + "epoch": 0.5900078685009209, + "grad_norm": 8.051383625728434, + "learning_rate": 4.8012310239831286e-06, + "loss": 0.15302734375, + "step": 68235 + }, + { + "epoch": 0.5900511020224641, + "grad_norm": 2.5822272567625486, + "learning_rate": 4.80106809602691e-06, + "loss": 0.09203338623046875, + "step": 68240 + }, + { + "epoch": 0.5900943355440074, + "grad_norm": 2.744005912429879, + "learning_rate": 4.800905159764325e-06, + "loss": 0.102239990234375, + "step": 68245 + }, + { + "epoch": 0.5901375690655507, + "grad_norm": 31.334136027109555, + "learning_rate": 4.800742215196124e-06, + "loss": 0.37510833740234373, + "step": 68250 + }, + { + "epoch": 0.5901808025870939, + "grad_norm": 0.9522061062356504, + "learning_rate": 4.800579262323058e-06, + "loss": 0.11056098937988282, + "step": 68255 + }, + { + "epoch": 0.5902240361086372, + "grad_norm": 0.32777777825413223, + "learning_rate": 4.8004163011458815e-06, + "loss": 0.1893768310546875, + "step": 68260 + }, + { + "epoch": 0.5902672696301805, + "grad_norm": 2.5880756328007064, + "learning_rate": 4.800253331665343e-06, + "loss": 0.09258651733398438, + "step": 68265 + }, + { + "epoch": 0.5903105031517237, + "grad_norm": 41.456414527448466, + "learning_rate": 4.800090353882195e-06, + "loss": 0.4226951599121094, + "step": 68270 + }, + { + "epoch": 0.590353736673267, + "grad_norm": 4.984295149982733, + "learning_rate": 4.7999273677971896e-06, + "loss": 0.22054443359375, + "step": 68275 + }, + { + "epoch": 0.5903969701948103, + "grad_norm": 3.459146407200667, + "learning_rate": 4.799764373411079e-06, + "loss": 0.13350753784179686, + "step": 68280 + }, + { + "epoch": 0.5904402037163535, + "grad_norm": 1.4902795349587517, + "learning_rate": 4.799601370724613e-06, + "loss": 0.1248016357421875, + "step": 68285 + }, + { + "epoch": 0.5904834372378968, + "grad_norm": 4.015600981118535, + "learning_rate": 4.799438359738545e-06, + "loss": 0.33665771484375, + "step": 68290 + }, + { + "epoch": 0.59052667075944, + "grad_norm": 11.399834984589589, + "learning_rate": 4.799275340453627e-06, + "loss": 0.0700469970703125, + "step": 68295 + }, + { + "epoch": 0.5905699042809833, + "grad_norm": 2.868920842988121, + "learning_rate": 4.7991123128706085e-06, + "loss": 0.026123046875, + "step": 68300 + }, + { + "epoch": 0.5906131378025266, + "grad_norm": 1.5863276906584758, + "learning_rate": 4.798949276990244e-06, + "loss": 0.4328746795654297, + "step": 68305 + }, + { + "epoch": 0.5906563713240698, + "grad_norm": 1.2563286969491498, + "learning_rate": 4.798786232813284e-06, + "loss": 0.050365447998046875, + "step": 68310 + }, + { + "epoch": 0.5906996048456131, + "grad_norm": 11.241197619865037, + "learning_rate": 4.798623180340481e-06, + "loss": 0.10243511199951172, + "step": 68315 + }, + { + "epoch": 0.5907428383671564, + "grad_norm": 18.08025308298033, + "learning_rate": 4.798460119572587e-06, + "loss": 0.15225906372070314, + "step": 68320 + }, + { + "epoch": 0.5907860718886996, + "grad_norm": 50.55883977135284, + "learning_rate": 4.798297050510354e-06, + "loss": 0.5772140502929688, + "step": 68325 + }, + { + "epoch": 0.5908293054102429, + "grad_norm": 21.67709907266195, + "learning_rate": 4.798133973154534e-06, + "loss": 0.16602020263671874, + "step": 68330 + }, + { + "epoch": 0.5908725389317862, + "grad_norm": 3.218655459212324, + "learning_rate": 4.797970887505878e-06, + "loss": 0.16515960693359374, + "step": 68335 + }, + { + "epoch": 0.5909157724533294, + "grad_norm": 5.409036428142189, + "learning_rate": 4.79780779356514e-06, + "loss": 0.049650955200195315, + "step": 68340 + }, + { + "epoch": 0.5909590059748727, + "grad_norm": 1.7382637867508481, + "learning_rate": 4.797644691333071e-06, + "loss": 0.08615570068359375, + "step": 68345 + }, + { + "epoch": 0.591002239496416, + "grad_norm": 2.225357185312172, + "learning_rate": 4.797481580810423e-06, + "loss": 0.15895767211914064, + "step": 68350 + }, + { + "epoch": 0.5910454730179592, + "grad_norm": 2.9506780818719927, + "learning_rate": 4.797318461997949e-06, + "loss": 0.0154052734375, + "step": 68355 + }, + { + "epoch": 0.5910887065395025, + "grad_norm": 1.1581709668258378, + "learning_rate": 4.797155334896403e-06, + "loss": 0.08292236328125, + "step": 68360 + }, + { + "epoch": 0.5911319400610457, + "grad_norm": 4.331353184397354, + "learning_rate": 4.796992199506533e-06, + "loss": 0.031420135498046876, + "step": 68365 + }, + { + "epoch": 0.591175173582589, + "grad_norm": 5.622604352768235, + "learning_rate": 4.796829055829095e-06, + "loss": 0.31113662719726565, + "step": 68370 + }, + { + "epoch": 0.5912184071041323, + "grad_norm": 25.921972090743527, + "learning_rate": 4.796665903864839e-06, + "loss": 0.2008697509765625, + "step": 68375 + }, + { + "epoch": 0.5912616406256755, + "grad_norm": 5.5153309841604825, + "learning_rate": 4.79650274361452e-06, + "loss": 0.126434326171875, + "step": 68380 + }, + { + "epoch": 0.5913048741472188, + "grad_norm": 26.93338296673136, + "learning_rate": 4.796339575078889e-06, + "loss": 0.19478759765625, + "step": 68385 + }, + { + "epoch": 0.591348107668762, + "grad_norm": 1.4464768553354135, + "learning_rate": 4.7961763982586975e-06, + "loss": 0.251666259765625, + "step": 68390 + }, + { + "epoch": 0.5913913411903053, + "grad_norm": 9.180116008720113, + "learning_rate": 4.7960132131546994e-06, + "loss": 0.23986053466796875, + "step": 68395 + }, + { + "epoch": 0.5914345747118486, + "grad_norm": 0.20919831612343442, + "learning_rate": 4.7958500197676475e-06, + "loss": 0.12333984375, + "step": 68400 + }, + { + "epoch": 0.5914778082333918, + "grad_norm": 4.343675147585023, + "learning_rate": 4.795686818098294e-06, + "loss": 0.249517822265625, + "step": 68405 + }, + { + "epoch": 0.5915210417549351, + "grad_norm": 9.362019806727421, + "learning_rate": 4.7955236081473915e-06, + "loss": 0.0634765625, + "step": 68410 + }, + { + "epoch": 0.5915642752764784, + "grad_norm": 3.5946112501682967, + "learning_rate": 4.795360389915693e-06, + "loss": 0.23197021484375, + "step": 68415 + }, + { + "epoch": 0.5916075087980216, + "grad_norm": 5.1347861581630765, + "learning_rate": 4.795197163403951e-06, + "loss": 0.127423095703125, + "step": 68420 + }, + { + "epoch": 0.5916507423195649, + "grad_norm": 1.3075449137119861, + "learning_rate": 4.795033928612917e-06, + "loss": 0.012544631958007812, + "step": 68425 + }, + { + "epoch": 0.5916939758411082, + "grad_norm": 44.09397823381046, + "learning_rate": 4.794870685543347e-06, + "loss": 0.2494110107421875, + "step": 68430 + }, + { + "epoch": 0.5917372093626514, + "grad_norm": 1.0839235258727828, + "learning_rate": 4.794707434195991e-06, + "loss": 0.05968017578125, + "step": 68435 + }, + { + "epoch": 0.5917804428841947, + "grad_norm": 17.07391807426481, + "learning_rate": 4.794544174571602e-06, + "loss": 0.14072608947753906, + "step": 68440 + }, + { + "epoch": 0.591823676405738, + "grad_norm": 8.281194688721483, + "learning_rate": 4.794380906670936e-06, + "loss": 0.3402740478515625, + "step": 68445 + }, + { + "epoch": 0.5918669099272812, + "grad_norm": 1.1419425589184915, + "learning_rate": 4.794217630494742e-06, + "loss": 0.08699760437011719, + "step": 68450 + }, + { + "epoch": 0.5919101434488245, + "grad_norm": 25.74817388322394, + "learning_rate": 4.794054346043775e-06, + "loss": 0.4000823974609375, + "step": 68455 + }, + { + "epoch": 0.5919533769703678, + "grad_norm": 0.40956848515400307, + "learning_rate": 4.79389105331879e-06, + "loss": 0.093743896484375, + "step": 68460 + }, + { + "epoch": 0.591996610491911, + "grad_norm": 112.35421732459851, + "learning_rate": 4.793727752320536e-06, + "loss": 0.14258956909179688, + "step": 68465 + }, + { + "epoch": 0.5920398440134542, + "grad_norm": 2.604955698759303, + "learning_rate": 4.793564443049769e-06, + "loss": 0.299755859375, + "step": 68470 + }, + { + "epoch": 0.5920830775349976, + "grad_norm": 0.45229994430482584, + "learning_rate": 4.793401125507241e-06, + "loss": 0.43766326904296876, + "step": 68475 + }, + { + "epoch": 0.5921263110565408, + "grad_norm": 0.09718299695726104, + "learning_rate": 4.7932377996937044e-06, + "loss": 0.0638702392578125, + "step": 68480 + }, + { + "epoch": 0.592169544578084, + "grad_norm": 2.778088731450862, + "learning_rate": 4.7930744656099145e-06, + "loss": 0.11284637451171875, + "step": 68485 + }, + { + "epoch": 0.5922127780996274, + "grad_norm": 27.74313677532437, + "learning_rate": 4.792911123256624e-06, + "loss": 0.08092041015625, + "step": 68490 + }, + { + "epoch": 0.5922560116211706, + "grad_norm": 2.5864754012132094, + "learning_rate": 4.792747772634584e-06, + "loss": 0.1013092041015625, + "step": 68495 + }, + { + "epoch": 0.5922992451427138, + "grad_norm": 3.1521598103248394, + "learning_rate": 4.792584413744552e-06, + "loss": 0.0778106689453125, + "step": 68500 + }, + { + "epoch": 0.5923424786642572, + "grad_norm": 2.7323977191575524, + "learning_rate": 4.792421046587278e-06, + "loss": 0.061138916015625, + "step": 68505 + }, + { + "epoch": 0.5923857121858004, + "grad_norm": 8.150896489461578, + "learning_rate": 4.7922576711635165e-06, + "loss": 0.060345458984375, + "step": 68510 + }, + { + "epoch": 0.5924289457073436, + "grad_norm": 3.7660794003095694, + "learning_rate": 4.792094287474022e-06, + "loss": 0.15399761199951173, + "step": 68515 + }, + { + "epoch": 0.592472179228887, + "grad_norm": 0.3136459253025327, + "learning_rate": 4.791930895519544e-06, + "loss": 0.02061004638671875, + "step": 68520 + }, + { + "epoch": 0.5925154127504302, + "grad_norm": 13.583958733730444, + "learning_rate": 4.791767495300842e-06, + "loss": 0.11844711303710938, + "step": 68525 + }, + { + "epoch": 0.5925586462719734, + "grad_norm": 2.8088758215049068, + "learning_rate": 4.7916040868186664e-06, + "loss": 0.340625, + "step": 68530 + }, + { + "epoch": 0.5926018797935168, + "grad_norm": 22.96458576103803, + "learning_rate": 4.79144067007377e-06, + "loss": 0.2667797088623047, + "step": 68535 + }, + { + "epoch": 0.59264511331506, + "grad_norm": 3.7723371547323494, + "learning_rate": 4.7912772450669085e-06, + "loss": 0.1117828369140625, + "step": 68540 + }, + { + "epoch": 0.5926883468366032, + "grad_norm": 2.1050944960378244, + "learning_rate": 4.791113811798833e-06, + "loss": 0.29744491577148435, + "step": 68545 + }, + { + "epoch": 0.5927315803581464, + "grad_norm": 44.51355265523246, + "learning_rate": 4.790950370270301e-06, + "loss": 0.24311904907226561, + "step": 68550 + }, + { + "epoch": 0.5927748138796898, + "grad_norm": 7.303975835000679, + "learning_rate": 4.790786920482062e-06, + "loss": 0.323101806640625, + "step": 68555 + }, + { + "epoch": 0.592818047401233, + "grad_norm": 0.9169781359755307, + "learning_rate": 4.790623462434874e-06, + "loss": 0.0615814208984375, + "step": 68560 + }, + { + "epoch": 0.5928612809227762, + "grad_norm": 42.441499171531, + "learning_rate": 4.790459996129487e-06, + "loss": 0.2851570129394531, + "step": 68565 + }, + { + "epoch": 0.5929045144443196, + "grad_norm": 0.926447960819861, + "learning_rate": 4.790296521566658e-06, + "loss": 0.2257293701171875, + "step": 68570 + }, + { + "epoch": 0.5929477479658628, + "grad_norm": 1.8567444573008671, + "learning_rate": 4.790133038747139e-06, + "loss": 0.2727203369140625, + "step": 68575 + }, + { + "epoch": 0.592990981487406, + "grad_norm": 10.337901456773166, + "learning_rate": 4.789969547671685e-06, + "loss": 0.14525890350341797, + "step": 68580 + }, + { + "epoch": 0.5930342150089494, + "grad_norm": 5.4918547335066545, + "learning_rate": 4.7898060483410495e-06, + "loss": 0.13305892944335937, + "step": 68585 + }, + { + "epoch": 0.5930774485304926, + "grad_norm": 13.187189603723326, + "learning_rate": 4.789642540755986e-06, + "loss": 0.0826141357421875, + "step": 68590 + }, + { + "epoch": 0.5931206820520358, + "grad_norm": 0.9242241621785067, + "learning_rate": 4.78947902491725e-06, + "loss": 0.29131317138671875, + "step": 68595 + }, + { + "epoch": 0.5931639155735792, + "grad_norm": 2.632723233440846, + "learning_rate": 4.7893155008255945e-06, + "loss": 0.23171615600585938, + "step": 68600 + }, + { + "epoch": 0.5932071490951224, + "grad_norm": 2.375492927126442, + "learning_rate": 4.789151968481775e-06, + "loss": 0.16655120849609376, + "step": 68605 + }, + { + "epoch": 0.5932503826166656, + "grad_norm": 1.3758594613054353, + "learning_rate": 4.7889884278865435e-06, + "loss": 0.21912364959716796, + "step": 68610 + }, + { + "epoch": 0.593293616138209, + "grad_norm": 18.609094256987433, + "learning_rate": 4.788824879040656e-06, + "loss": 0.20304107666015625, + "step": 68615 + }, + { + "epoch": 0.5933368496597522, + "grad_norm": 4.873711001778022, + "learning_rate": 4.7886613219448654e-06, + "loss": 0.05353355407714844, + "step": 68620 + }, + { + "epoch": 0.5933800831812954, + "grad_norm": 1.718155101466065, + "learning_rate": 4.788497756599928e-06, + "loss": 0.06112594604492187, + "step": 68625 + }, + { + "epoch": 0.5934233167028388, + "grad_norm": 35.25164734010454, + "learning_rate": 4.788334183006595e-06, + "loss": 0.279742431640625, + "step": 68630 + }, + { + "epoch": 0.593466550224382, + "grad_norm": 5.0833743030252405, + "learning_rate": 4.788170601165625e-06, + "loss": 0.180126953125, + "step": 68635 + }, + { + "epoch": 0.5935097837459252, + "grad_norm": 11.945829432596499, + "learning_rate": 4.78800701107777e-06, + "loss": 0.06835050582885742, + "step": 68640 + }, + { + "epoch": 0.5935530172674685, + "grad_norm": 11.050015887771487, + "learning_rate": 4.787843412743784e-06, + "loss": 0.09440574645996094, + "step": 68645 + }, + { + "epoch": 0.5935962507890118, + "grad_norm": 5.567746597219879, + "learning_rate": 4.787679806164422e-06, + "loss": 0.1453460693359375, + "step": 68650 + }, + { + "epoch": 0.593639484310555, + "grad_norm": 0.5093270520432708, + "learning_rate": 4.787516191340439e-06, + "loss": 0.3315399169921875, + "step": 68655 + }, + { + "epoch": 0.5936827178320983, + "grad_norm": 2.862896550947667, + "learning_rate": 4.78735256827259e-06, + "loss": 0.35201091766357423, + "step": 68660 + }, + { + "epoch": 0.5937259513536416, + "grad_norm": 10.363617715860862, + "learning_rate": 4.787188936961627e-06, + "loss": 0.101104736328125, + "step": 68665 + }, + { + "epoch": 0.5937691848751848, + "grad_norm": 124.23281905301685, + "learning_rate": 4.787025297408309e-06, + "loss": 0.5851242065429687, + "step": 68670 + }, + { + "epoch": 0.593812418396728, + "grad_norm": 14.26643344649694, + "learning_rate": 4.7868616496133865e-06, + "loss": 0.07236175537109375, + "step": 68675 + }, + { + "epoch": 0.5938556519182714, + "grad_norm": 40.2381109408416, + "learning_rate": 4.7866979935776156e-06, + "loss": 0.2222900390625, + "step": 68680 + }, + { + "epoch": 0.5938988854398146, + "grad_norm": 2.81707974711384, + "learning_rate": 4.786534329301752e-06, + "loss": 0.28837890625, + "step": 68685 + }, + { + "epoch": 0.5939421189613578, + "grad_norm": 14.922191154744798, + "learning_rate": 4.7863706567865504e-06, + "loss": 0.23193817138671874, + "step": 68690 + }, + { + "epoch": 0.5939853524829012, + "grad_norm": 9.473340801137608, + "learning_rate": 4.786206976032765e-06, + "loss": 0.42466278076171876, + "step": 68695 + }, + { + "epoch": 0.5940285860044444, + "grad_norm": 5.866115746311902, + "learning_rate": 4.786043287041151e-06, + "loss": 0.1919281005859375, + "step": 68700 + }, + { + "epoch": 0.5940718195259876, + "grad_norm": 19.961766609729544, + "learning_rate": 4.785879589812463e-06, + "loss": 0.1647857666015625, + "step": 68705 + }, + { + "epoch": 0.594115053047531, + "grad_norm": 7.179393103807795, + "learning_rate": 4.785715884347456e-06, + "loss": 0.1871673583984375, + "step": 68710 + }, + { + "epoch": 0.5941582865690742, + "grad_norm": 6.346163036637975, + "learning_rate": 4.785552170646885e-06, + "loss": 0.16325302124023439, + "step": 68715 + }, + { + "epoch": 0.5942015200906174, + "grad_norm": 33.0677642610608, + "learning_rate": 4.785388448711505e-06, + "loss": 0.2445648193359375, + "step": 68720 + }, + { + "epoch": 0.5942447536121607, + "grad_norm": 15.3691334022653, + "learning_rate": 4.785224718542072e-06, + "loss": 0.447552490234375, + "step": 68725 + }, + { + "epoch": 0.594287987133704, + "grad_norm": 8.751881685675341, + "learning_rate": 4.785060980139339e-06, + "loss": 0.15638465881347657, + "step": 68730 + }, + { + "epoch": 0.5943312206552472, + "grad_norm": 18.463363364909206, + "learning_rate": 4.7848972335040635e-06, + "loss": 0.1179840087890625, + "step": 68735 + }, + { + "epoch": 0.5943744541767905, + "grad_norm": 11.46485032320439, + "learning_rate": 4.784733478637e-06, + "loss": 0.53134765625, + "step": 68740 + }, + { + "epoch": 0.5944176876983338, + "grad_norm": 0.8162339052852478, + "learning_rate": 4.784569715538903e-06, + "loss": 0.23276901245117188, + "step": 68745 + }, + { + "epoch": 0.594460921219877, + "grad_norm": 6.436583084483726, + "learning_rate": 4.784405944210528e-06, + "loss": 0.07229461669921874, + "step": 68750 + }, + { + "epoch": 0.5945041547414203, + "grad_norm": 4.174119647700111, + "learning_rate": 4.784242164652631e-06, + "loss": 0.0923370361328125, + "step": 68755 + }, + { + "epoch": 0.5945473882629636, + "grad_norm": 2.558354588397841, + "learning_rate": 4.784078376865966e-06, + "loss": 0.18436698913574218, + "step": 68760 + }, + { + "epoch": 0.5945906217845068, + "grad_norm": 2.8592056283289518, + "learning_rate": 4.783914580851289e-06, + "loss": 0.3493156433105469, + "step": 68765 + }, + { + "epoch": 0.5946338553060501, + "grad_norm": 0.040426208419238656, + "learning_rate": 4.783750776609356e-06, + "loss": 0.11061897277832031, + "step": 68770 + }, + { + "epoch": 0.5946770888275934, + "grad_norm": 17.7764795626146, + "learning_rate": 4.7835869641409225e-06, + "loss": 0.23736000061035156, + "step": 68775 + }, + { + "epoch": 0.5947203223491366, + "grad_norm": 1.0247990349963392, + "learning_rate": 4.783423143446744e-06, + "loss": 0.14386138916015626, + "step": 68780 + }, + { + "epoch": 0.5947635558706799, + "grad_norm": 3.7563437705894254, + "learning_rate": 4.7832593145275745e-06, + "loss": 0.2222076416015625, + "step": 68785 + }, + { + "epoch": 0.5948067893922232, + "grad_norm": 30.464505063450392, + "learning_rate": 4.7830954773841705e-06, + "loss": 0.17671356201171876, + "step": 68790 + }, + { + "epoch": 0.5948500229137664, + "grad_norm": 1.1579014221867843, + "learning_rate": 4.782931632017289e-06, + "loss": 0.05639190673828125, + "step": 68795 + }, + { + "epoch": 0.5948932564353097, + "grad_norm": 3.0583824840615272, + "learning_rate": 4.782767778427683e-06, + "loss": 0.13735504150390626, + "step": 68800 + }, + { + "epoch": 0.594936489956853, + "grad_norm": 31.827622826238112, + "learning_rate": 4.78260391661611e-06, + "loss": 0.32099609375, + "step": 68805 + }, + { + "epoch": 0.5949797234783962, + "grad_norm": 3.981027224307676, + "learning_rate": 4.782440046583325e-06, + "loss": 0.08415908813476562, + "step": 68810 + }, + { + "epoch": 0.5950229569999395, + "grad_norm": 33.796382801083666, + "learning_rate": 4.782276168330084e-06, + "loss": 0.17131900787353516, + "step": 68815 + }, + { + "epoch": 0.5950661905214827, + "grad_norm": 8.944559512063767, + "learning_rate": 4.7821122818571435e-06, + "loss": 0.05884246826171875, + "step": 68820 + }, + { + "epoch": 0.595109424043026, + "grad_norm": 1.4135567674253393, + "learning_rate": 4.781948387165258e-06, + "loss": 0.15898056030273439, + "step": 68825 + }, + { + "epoch": 0.5951526575645693, + "grad_norm": 10.542607846131466, + "learning_rate": 4.7817844842551845e-06, + "loss": 0.23290367126464845, + "step": 68830 + }, + { + "epoch": 0.5951958910861125, + "grad_norm": 20.755535567724426, + "learning_rate": 4.781620573127678e-06, + "loss": 0.5214508056640625, + "step": 68835 + }, + { + "epoch": 0.5952391246076558, + "grad_norm": 6.543727904402873, + "learning_rate": 4.781456653783494e-06, + "loss": 0.0379913330078125, + "step": 68840 + }, + { + "epoch": 0.595282358129199, + "grad_norm": 25.887680501823585, + "learning_rate": 4.78129272622339e-06, + "loss": 0.2384735107421875, + "step": 68845 + }, + { + "epoch": 0.5953255916507423, + "grad_norm": 7.3659014514664465, + "learning_rate": 4.781128790448122e-06, + "loss": 0.3076789855957031, + "step": 68850 + }, + { + "epoch": 0.5953688251722856, + "grad_norm": 4.300211501919816, + "learning_rate": 4.780964846458445e-06, + "loss": 0.05500717163085937, + "step": 68855 + }, + { + "epoch": 0.5954120586938288, + "grad_norm": 8.24537736187098, + "learning_rate": 4.780800894255115e-06, + "loss": 0.29673042297363283, + "step": 68860 + }, + { + "epoch": 0.5954552922153721, + "grad_norm": 9.57297256363328, + "learning_rate": 4.78063693383889e-06, + "loss": 0.19187774658203124, + "step": 68865 + }, + { + "epoch": 0.5954985257369154, + "grad_norm": 5.619294851465296, + "learning_rate": 4.780472965210523e-06, + "loss": 0.1177978515625, + "step": 68870 + }, + { + "epoch": 0.5955417592584586, + "grad_norm": 0.529057311790796, + "learning_rate": 4.780308988370773e-06, + "loss": 0.06785736083984376, + "step": 68875 + }, + { + "epoch": 0.5955849927800019, + "grad_norm": 1.084321782574462, + "learning_rate": 4.7801450033203945e-06, + "loss": 0.040256500244140625, + "step": 68880 + }, + { + "epoch": 0.5956282263015452, + "grad_norm": 4.402739541097358, + "learning_rate": 4.779981010060146e-06, + "loss": 0.089404296875, + "step": 68885 + }, + { + "epoch": 0.5956714598230884, + "grad_norm": 43.72925844278333, + "learning_rate": 4.779817008590781e-06, + "loss": 0.5807373046875, + "step": 68890 + }, + { + "epoch": 0.5957146933446317, + "grad_norm": 0.6489962828622178, + "learning_rate": 4.7796529989130584e-06, + "loss": 0.03794231414794922, + "step": 68895 + }, + { + "epoch": 0.5957579268661749, + "grad_norm": 0.6656043955854127, + "learning_rate": 4.779488981027733e-06, + "loss": 0.33575592041015623, + "step": 68900 + }, + { + "epoch": 0.5958011603877182, + "grad_norm": 0.6004126161612708, + "learning_rate": 4.77932495493556e-06, + "loss": 0.07695541381835938, + "step": 68905 + }, + { + "epoch": 0.5958443939092615, + "grad_norm": 5.32087081882586, + "learning_rate": 4.7791609206372996e-06, + "loss": 0.03792877197265625, + "step": 68910 + }, + { + "epoch": 0.5958876274308047, + "grad_norm": 4.410499731972736, + "learning_rate": 4.778996878133706e-06, + "loss": 0.10087242126464843, + "step": 68915 + }, + { + "epoch": 0.595930860952348, + "grad_norm": 2.2775679852887634, + "learning_rate": 4.7788328274255354e-06, + "loss": 0.2344970703125, + "step": 68920 + }, + { + "epoch": 0.5959740944738913, + "grad_norm": 1.0720113070475508, + "learning_rate": 4.778668768513545e-06, + "loss": 0.0493804931640625, + "step": 68925 + }, + { + "epoch": 0.5960173279954345, + "grad_norm": 3.1764628252488785, + "learning_rate": 4.778504701398492e-06, + "loss": 0.114605712890625, + "step": 68930 + }, + { + "epoch": 0.5960605615169778, + "grad_norm": 20.560096464124676, + "learning_rate": 4.778340626081132e-06, + "loss": 0.18690071105957032, + "step": 68935 + }, + { + "epoch": 0.5961037950385211, + "grad_norm": 5.806924305951762, + "learning_rate": 4.7781765425622224e-06, + "loss": 0.169464111328125, + "step": 68940 + }, + { + "epoch": 0.5961470285600643, + "grad_norm": 0.3635761393272504, + "learning_rate": 4.778012450842519e-06, + "loss": 0.0963226318359375, + "step": 68945 + }, + { + "epoch": 0.5961902620816076, + "grad_norm": 6.3056784945508255, + "learning_rate": 4.777848350922779e-06, + "loss": 0.12572879791259767, + "step": 68950 + }, + { + "epoch": 0.5962334956031509, + "grad_norm": 6.947412084681072, + "learning_rate": 4.777684242803761e-06, + "loss": 0.2384552001953125, + "step": 68955 + }, + { + "epoch": 0.5962767291246941, + "grad_norm": 15.867188533574907, + "learning_rate": 4.77752012648622e-06, + "loss": 0.19581451416015624, + "step": 68960 + }, + { + "epoch": 0.5963199626462374, + "grad_norm": 6.555309713215758, + "learning_rate": 4.7773560019709125e-06, + "loss": 0.23216705322265624, + "step": 68965 + }, + { + "epoch": 0.5963631961677807, + "grad_norm": 1.120835560205975, + "learning_rate": 4.7771918692585964e-06, + "loss": 0.27695465087890625, + "step": 68970 + }, + { + "epoch": 0.5964064296893239, + "grad_norm": 2.7208589808206067, + "learning_rate": 4.7770277283500295e-06, + "loss": 0.0965484619140625, + "step": 68975 + }, + { + "epoch": 0.5964496632108672, + "grad_norm": 30.435430173238576, + "learning_rate": 4.776863579245967e-06, + "loss": 0.34858245849609376, + "step": 68980 + }, + { + "epoch": 0.5964928967324105, + "grad_norm": 1.167451911645942, + "learning_rate": 4.776699421947166e-06, + "loss": 0.224609375, + "step": 68985 + }, + { + "epoch": 0.5965361302539537, + "grad_norm": 5.962047049194491, + "learning_rate": 4.776535256454385e-06, + "loss": 0.1605133056640625, + "step": 68990 + }, + { + "epoch": 0.5965793637754969, + "grad_norm": 0.5643511016261624, + "learning_rate": 4.7763710827683805e-06, + "loss": 0.10735321044921875, + "step": 68995 + }, + { + "epoch": 0.5966225972970403, + "grad_norm": 12.285437529657328, + "learning_rate": 4.776206900889908e-06, + "loss": 0.5858673095703125, + "step": 69000 + }, + { + "epoch": 0.5966658308185835, + "grad_norm": 0.2801687810661139, + "learning_rate": 4.776042710819728e-06, + "loss": 0.1048126220703125, + "step": 69005 + }, + { + "epoch": 0.5967090643401267, + "grad_norm": 41.1851025269893, + "learning_rate": 4.775878512558596e-06, + "loss": 0.578076171875, + "step": 69010 + }, + { + "epoch": 0.5967522978616701, + "grad_norm": 68.94017359756775, + "learning_rate": 4.775714306107268e-06, + "loss": 0.4928955078125, + "step": 69015 + }, + { + "epoch": 0.5967955313832133, + "grad_norm": 11.220791822810511, + "learning_rate": 4.775550091466503e-06, + "loss": 0.36951217651367185, + "step": 69020 + }, + { + "epoch": 0.5968387649047565, + "grad_norm": 17.18746264025822, + "learning_rate": 4.775385868637058e-06, + "loss": 0.124371337890625, + "step": 69025 + }, + { + "epoch": 0.5968819984262999, + "grad_norm": 0.18842181190637666, + "learning_rate": 4.775221637619689e-06, + "loss": 0.0482666015625, + "step": 69030 + }, + { + "epoch": 0.5969252319478431, + "grad_norm": 24.633931443141417, + "learning_rate": 4.775057398415157e-06, + "loss": 0.35845260620117186, + "step": 69035 + }, + { + "epoch": 0.5969684654693863, + "grad_norm": 0.7411122968950445, + "learning_rate": 4.774893151024216e-06, + "loss": 0.0245269775390625, + "step": 69040 + }, + { + "epoch": 0.5970116989909297, + "grad_norm": 24.663580178853685, + "learning_rate": 4.774728895447624e-06, + "loss": 0.170758056640625, + "step": 69045 + }, + { + "epoch": 0.5970549325124729, + "grad_norm": 10.444873405840681, + "learning_rate": 4.77456463168614e-06, + "loss": 0.07274360656738281, + "step": 69050 + }, + { + "epoch": 0.5970981660340161, + "grad_norm": 12.711783298776538, + "learning_rate": 4.774400359740522e-06, + "loss": 0.07999801635742188, + "step": 69055 + }, + { + "epoch": 0.5971413995555594, + "grad_norm": 1.0187034868515796, + "learning_rate": 4.774236079611524e-06, + "loss": 0.08736572265625, + "step": 69060 + }, + { + "epoch": 0.5971846330771027, + "grad_norm": 0.3790510177846633, + "learning_rate": 4.774071791299907e-06, + "loss": 0.1326080322265625, + "step": 69065 + }, + { + "epoch": 0.5972278665986459, + "grad_norm": 4.599583537209834, + "learning_rate": 4.7739074948064275e-06, + "loss": 0.07120361328125, + "step": 69070 + }, + { + "epoch": 0.5972711001201891, + "grad_norm": 30.821932064434332, + "learning_rate": 4.773743190131843e-06, + "loss": 0.226153564453125, + "step": 69075 + }, + { + "epoch": 0.5973143336417325, + "grad_norm": 15.10715382955792, + "learning_rate": 4.773578877276912e-06, + "loss": 0.114874267578125, + "step": 69080 + }, + { + "epoch": 0.5973575671632757, + "grad_norm": 15.45013630774107, + "learning_rate": 4.773414556242392e-06, + "loss": 0.14971771240234374, + "step": 69085 + }, + { + "epoch": 0.5974008006848189, + "grad_norm": 23.12623437113896, + "learning_rate": 4.77325022702904e-06, + "loss": 0.115411376953125, + "step": 69090 + }, + { + "epoch": 0.5974440342063623, + "grad_norm": 7.033674959538262, + "learning_rate": 4.773085889637616e-06, + "loss": 0.17133331298828125, + "step": 69095 + }, + { + "epoch": 0.5974872677279055, + "grad_norm": 1.0413394330912031, + "learning_rate": 4.7729215440688755e-06, + "loss": 0.11024322509765624, + "step": 69100 + }, + { + "epoch": 0.5975305012494487, + "grad_norm": 56.54138765895317, + "learning_rate": 4.772757190323579e-06, + "loss": 0.22095947265625, + "step": 69105 + }, + { + "epoch": 0.5975737347709921, + "grad_norm": 7.3186465729847985, + "learning_rate": 4.772592828402481e-06, + "loss": 0.13044509887695313, + "step": 69110 + }, + { + "epoch": 0.5976169682925353, + "grad_norm": 29.05414652939644, + "learning_rate": 4.772428458306342e-06, + "loss": 0.27653350830078127, + "step": 69115 + }, + { + "epoch": 0.5976602018140785, + "grad_norm": 0.8059280404618017, + "learning_rate": 4.772264080035921e-06, + "loss": 0.01808929443359375, + "step": 69120 + }, + { + "epoch": 0.5977034353356219, + "grad_norm": 14.359219746402498, + "learning_rate": 4.772099693591973e-06, + "loss": 0.07228622436523438, + "step": 69125 + }, + { + "epoch": 0.5977466688571651, + "grad_norm": 18.623067456060795, + "learning_rate": 4.771935298975258e-06, + "loss": 0.1058929443359375, + "step": 69130 + }, + { + "epoch": 0.5977899023787083, + "grad_norm": 13.338430182602394, + "learning_rate": 4.7717708961865345e-06, + "loss": 0.2412139892578125, + "step": 69135 + }, + { + "epoch": 0.5978331359002517, + "grad_norm": 24.127685253637196, + "learning_rate": 4.771606485226561e-06, + "loss": 0.12359161376953125, + "step": 69140 + }, + { + "epoch": 0.5978763694217949, + "grad_norm": 4.073049919979753, + "learning_rate": 4.771442066096093e-06, + "loss": 0.06681098937988281, + "step": 69145 + }, + { + "epoch": 0.5979196029433381, + "grad_norm": 0.2776470946385489, + "learning_rate": 4.771277638795892e-06, + "loss": 0.07800750732421875, + "step": 69150 + }, + { + "epoch": 0.5979628364648815, + "grad_norm": 0.8681802640862695, + "learning_rate": 4.771113203326715e-06, + "loss": 0.14560546875, + "step": 69155 + }, + { + "epoch": 0.5980060699864247, + "grad_norm": 6.672778890839329, + "learning_rate": 4.7709487596893195e-06, + "loss": 0.20407257080078126, + "step": 69160 + }, + { + "epoch": 0.5980493035079679, + "grad_norm": 19.866126477471216, + "learning_rate": 4.770784307884466e-06, + "loss": 0.10901031494140626, + "step": 69165 + }, + { + "epoch": 0.5980925370295112, + "grad_norm": 26.92593664350949, + "learning_rate": 4.770619847912911e-06, + "loss": 0.17428245544433593, + "step": 69170 + }, + { + "epoch": 0.5981357705510545, + "grad_norm": 9.493596833494179, + "learning_rate": 4.770455379775414e-06, + "loss": 0.3099781036376953, + "step": 69175 + }, + { + "epoch": 0.5981790040725977, + "grad_norm": 39.126042209070874, + "learning_rate": 4.770290903472733e-06, + "loss": 0.17503814697265624, + "step": 69180 + }, + { + "epoch": 0.598222237594141, + "grad_norm": 0.8216457412702722, + "learning_rate": 4.7701264190056265e-06, + "loss": 0.029760360717773438, + "step": 69185 + }, + { + "epoch": 0.5982654711156843, + "grad_norm": 27.247526539238862, + "learning_rate": 4.769961926374854e-06, + "loss": 0.43184356689453124, + "step": 69190 + }, + { + "epoch": 0.5983087046372275, + "grad_norm": 3.065865320867286, + "learning_rate": 4.769797425581174e-06, + "loss": 0.1217376708984375, + "step": 69195 + }, + { + "epoch": 0.5983519381587707, + "grad_norm": 0.7259235373511449, + "learning_rate": 4.769632916625344e-06, + "loss": 0.12215652465820312, + "step": 69200 + }, + { + "epoch": 0.5983951716803141, + "grad_norm": 28.86960608191186, + "learning_rate": 4.7694683995081224e-06, + "loss": 0.1547483444213867, + "step": 69205 + }, + { + "epoch": 0.5984384052018573, + "grad_norm": 4.94697311712183, + "learning_rate": 4.76930387423027e-06, + "loss": 0.037908935546875, + "step": 69210 + }, + { + "epoch": 0.5984816387234005, + "grad_norm": 12.459004988201963, + "learning_rate": 4.769139340792544e-06, + "loss": 0.11553916931152344, + "step": 69215 + }, + { + "epoch": 0.5985248722449439, + "grad_norm": 32.20303506142289, + "learning_rate": 4.768974799195704e-06, + "loss": 0.3309822082519531, + "step": 69220 + }, + { + "epoch": 0.5985681057664871, + "grad_norm": 0.3855011791702774, + "learning_rate": 4.768810249440508e-06, + "loss": 0.058461761474609374, + "step": 69225 + }, + { + "epoch": 0.5986113392880303, + "grad_norm": 1.5088382515835663, + "learning_rate": 4.768645691527716e-06, + "loss": 0.31720733642578125, + "step": 69230 + }, + { + "epoch": 0.5986545728095737, + "grad_norm": 1.3754342245706368, + "learning_rate": 4.768481125458087e-06, + "loss": 0.0368438720703125, + "step": 69235 + }, + { + "epoch": 0.5986978063311169, + "grad_norm": 32.09592820386077, + "learning_rate": 4.768316551232378e-06, + "loss": 0.287005615234375, + "step": 69240 + }, + { + "epoch": 0.5987410398526601, + "grad_norm": 8.637529093448228, + "learning_rate": 4.76815196885135e-06, + "loss": 0.0463226318359375, + "step": 69245 + }, + { + "epoch": 0.5987842733742034, + "grad_norm": 2.7783542303574476, + "learning_rate": 4.767987378315761e-06, + "loss": 0.13998336791992189, + "step": 69250 + }, + { + "epoch": 0.5988275068957467, + "grad_norm": 33.24298843104482, + "learning_rate": 4.767822779626371e-06, + "loss": 0.28050537109375, + "step": 69255 + }, + { + "epoch": 0.5988707404172899, + "grad_norm": 41.22897105526982, + "learning_rate": 4.767658172783938e-06, + "loss": 0.6651885986328125, + "step": 69260 + }, + { + "epoch": 0.5989139739388332, + "grad_norm": 30.793079807181137, + "learning_rate": 4.767493557789223e-06, + "loss": 0.23005447387695313, + "step": 69265 + }, + { + "epoch": 0.5989572074603765, + "grad_norm": 24.888210213536453, + "learning_rate": 4.767328934642981e-06, + "loss": 0.14107170104980468, + "step": 69270 + }, + { + "epoch": 0.5990004409819197, + "grad_norm": 34.24913561465409, + "learning_rate": 4.767164303345977e-06, + "loss": 0.183990478515625, + "step": 69275 + }, + { + "epoch": 0.599043674503463, + "grad_norm": 19.706785748047434, + "learning_rate": 4.766999663898967e-06, + "loss": 0.1505218505859375, + "step": 69280 + }, + { + "epoch": 0.5990869080250063, + "grad_norm": 7.190728074282822, + "learning_rate": 4.766835016302709e-06, + "loss": 0.0808074951171875, + "step": 69285 + }, + { + "epoch": 0.5991301415465495, + "grad_norm": 0.788844525925891, + "learning_rate": 4.766670360557966e-06, + "loss": 0.08808860778808594, + "step": 69290 + }, + { + "epoch": 0.5991733750680928, + "grad_norm": 19.899490106821258, + "learning_rate": 4.766505696665494e-06, + "loss": 0.2150360107421875, + "step": 69295 + }, + { + "epoch": 0.5992166085896361, + "grad_norm": 16.444940629732415, + "learning_rate": 4.766341024626053e-06, + "loss": 0.13394126892089844, + "step": 69300 + }, + { + "epoch": 0.5992598421111793, + "grad_norm": 2.1265737313932087, + "learning_rate": 4.7661763444404046e-06, + "loss": 0.25653839111328125, + "step": 69305 + }, + { + "epoch": 0.5993030756327226, + "grad_norm": 0.15625014181778704, + "learning_rate": 4.766011656109308e-06, + "loss": 0.062264442443847656, + "step": 69310 + }, + { + "epoch": 0.5993463091542659, + "grad_norm": 4.187674490456623, + "learning_rate": 4.76584695963352e-06, + "loss": 0.08254280090332031, + "step": 69315 + }, + { + "epoch": 0.5993895426758091, + "grad_norm": 1.1304519562035042, + "learning_rate": 4.765682255013802e-06, + "loss": 0.11402511596679688, + "step": 69320 + }, + { + "epoch": 0.5994327761973524, + "grad_norm": 42.086510584077544, + "learning_rate": 4.765517542250914e-06, + "loss": 0.38524322509765624, + "step": 69325 + }, + { + "epoch": 0.5994760097188957, + "grad_norm": 28.045654759816426, + "learning_rate": 4.765352821345614e-06, + "loss": 0.16392440795898439, + "step": 69330 + }, + { + "epoch": 0.5995192432404389, + "grad_norm": 5.992572118112181, + "learning_rate": 4.765188092298665e-06, + "loss": 0.08387451171875, + "step": 69335 + }, + { + "epoch": 0.5995624767619822, + "grad_norm": 1.3366029965383521, + "learning_rate": 4.765023355110823e-06, + "loss": 0.06122398376464844, + "step": 69340 + }, + { + "epoch": 0.5996057102835254, + "grad_norm": 2.3133851841309836, + "learning_rate": 4.76485860978285e-06, + "loss": 0.0995819091796875, + "step": 69345 + }, + { + "epoch": 0.5996489438050687, + "grad_norm": 2.8173720221437555, + "learning_rate": 4.764693856315504e-06, + "loss": 0.22569580078125, + "step": 69350 + }, + { + "epoch": 0.599692177326612, + "grad_norm": 2.8052282418079844, + "learning_rate": 4.7645290947095465e-06, + "loss": 0.1370635986328125, + "step": 69355 + }, + { + "epoch": 0.5997354108481552, + "grad_norm": 1.5469914527475634, + "learning_rate": 4.764364324965736e-06, + "loss": 0.12836265563964844, + "step": 69360 + }, + { + "epoch": 0.5997786443696985, + "grad_norm": 0.3243148858424463, + "learning_rate": 4.764199547084834e-06, + "loss": 0.19262313842773438, + "step": 69365 + }, + { + "epoch": 0.5998218778912418, + "grad_norm": 14.313230209565408, + "learning_rate": 4.764034761067599e-06, + "loss": 0.446197509765625, + "step": 69370 + }, + { + "epoch": 0.599865111412785, + "grad_norm": 25.099793033266398, + "learning_rate": 4.763869966914793e-06, + "loss": 0.23081817626953124, + "step": 69375 + }, + { + "epoch": 0.5999083449343283, + "grad_norm": 1.839571501753661, + "learning_rate": 4.763705164627172e-06, + "loss": 0.24954376220703126, + "step": 69380 + }, + { + "epoch": 0.5999515784558715, + "grad_norm": 120.79804709640258, + "learning_rate": 4.763540354205501e-06, + "loss": 0.14019699096679689, + "step": 69385 + }, + { + "epoch": 0.5999948119774148, + "grad_norm": 7.294375794037661, + "learning_rate": 4.763375535650537e-06, + "loss": 0.05860061645507812, + "step": 69390 + }, + { + "epoch": 0.6000380454989581, + "grad_norm": 1.2276174534425106, + "learning_rate": 4.763210708963039e-06, + "loss": 0.16015472412109374, + "step": 69395 + }, + { + "epoch": 0.6000812790205013, + "grad_norm": 4.0473914532307385, + "learning_rate": 4.763045874143772e-06, + "loss": 0.18914871215820311, + "step": 69400 + }, + { + "epoch": 0.6001245125420446, + "grad_norm": 0.07440388276229325, + "learning_rate": 4.762881031193491e-06, + "loss": 0.11064376831054687, + "step": 69405 + }, + { + "epoch": 0.6001677460635879, + "grad_norm": 1.5205452230861636, + "learning_rate": 4.76271618011296e-06, + "loss": 0.116070556640625, + "step": 69410 + }, + { + "epoch": 0.6002109795851311, + "grad_norm": 34.537691551020345, + "learning_rate": 4.762551320902937e-06, + "loss": 0.23902587890625, + "step": 69415 + }, + { + "epoch": 0.6002542131066744, + "grad_norm": 1.9922071460553656, + "learning_rate": 4.762386453564183e-06, + "loss": 0.0873291015625, + "step": 69420 + }, + { + "epoch": 0.6002974466282176, + "grad_norm": 2.4565807510781084, + "learning_rate": 4.762221578097459e-06, + "loss": 0.296209716796875, + "step": 69425 + }, + { + "epoch": 0.6003406801497609, + "grad_norm": 6.250205287265317, + "learning_rate": 4.762056694503524e-06, + "loss": 0.3614961624145508, + "step": 69430 + }, + { + "epoch": 0.6003839136713042, + "grad_norm": 0.3989881576593368, + "learning_rate": 4.761891802783139e-06, + "loss": 0.04697914123535156, + "step": 69435 + }, + { + "epoch": 0.6004271471928474, + "grad_norm": 5.371674174518285, + "learning_rate": 4.761726902937066e-06, + "loss": 0.49155731201171876, + "step": 69440 + }, + { + "epoch": 0.6004703807143907, + "grad_norm": 5.819649883221934, + "learning_rate": 4.7615619949660625e-06, + "loss": 0.16289329528808594, + "step": 69445 + }, + { + "epoch": 0.600513614235934, + "grad_norm": 50.44683888168846, + "learning_rate": 4.761397078870892e-06, + "loss": 0.27382545471191405, + "step": 69450 + }, + { + "epoch": 0.6005568477574772, + "grad_norm": 0.1732950577320613, + "learning_rate": 4.761232154652313e-06, + "loss": 0.1805755615234375, + "step": 69455 + }, + { + "epoch": 0.6006000812790205, + "grad_norm": 0.585047011383519, + "learning_rate": 4.761067222311087e-06, + "loss": 0.08373565673828125, + "step": 69460 + }, + { + "epoch": 0.6006433148005638, + "grad_norm": 12.450117994237086, + "learning_rate": 4.760902281847976e-06, + "loss": 0.053668212890625, + "step": 69465 + }, + { + "epoch": 0.600686548322107, + "grad_norm": 3.00596306336898, + "learning_rate": 4.760737333263738e-06, + "loss": 0.212969970703125, + "step": 69470 + }, + { + "epoch": 0.6007297818436503, + "grad_norm": 4.6079912975283515, + "learning_rate": 4.760572376559135e-06, + "loss": 0.3764434814453125, + "step": 69475 + }, + { + "epoch": 0.6007730153651936, + "grad_norm": 17.014148547685878, + "learning_rate": 4.760407411734928e-06, + "loss": 0.3080036163330078, + "step": 69480 + }, + { + "epoch": 0.6008162488867368, + "grad_norm": 0.974812672349107, + "learning_rate": 4.760242438791877e-06, + "loss": 0.07398147583007812, + "step": 69485 + }, + { + "epoch": 0.6008594824082801, + "grad_norm": 2.2064686116121344, + "learning_rate": 4.760077457730744e-06, + "loss": 0.3256254196166992, + "step": 69490 + }, + { + "epoch": 0.6009027159298234, + "grad_norm": 27.22745544019507, + "learning_rate": 4.759912468552289e-06, + "loss": 0.20597686767578124, + "step": 69495 + }, + { + "epoch": 0.6009459494513666, + "grad_norm": 28.730507513864627, + "learning_rate": 4.759747471257275e-06, + "loss": 0.12241706848144532, + "step": 69500 + }, + { + "epoch": 0.6009891829729099, + "grad_norm": 12.638712749252258, + "learning_rate": 4.759582465846458e-06, + "loss": 0.1463653564453125, + "step": 69505 + }, + { + "epoch": 0.6010324164944532, + "grad_norm": 5.252301326041155, + "learning_rate": 4.759417452320604e-06, + "loss": 0.06116142272949219, + "step": 69510 + }, + { + "epoch": 0.6010756500159964, + "grad_norm": 4.71777815016474, + "learning_rate": 4.759252430680473e-06, + "loss": 0.11329450607299804, + "step": 69515 + }, + { + "epoch": 0.6011188835375396, + "grad_norm": 28.220632363759012, + "learning_rate": 4.759087400926823e-06, + "loss": 0.14647645950317384, + "step": 69520 + }, + { + "epoch": 0.601162117059083, + "grad_norm": 21.28370672798814, + "learning_rate": 4.758922363060419e-06, + "loss": 0.22495155334472655, + "step": 69525 + }, + { + "epoch": 0.6012053505806262, + "grad_norm": 17.975369693439053, + "learning_rate": 4.75875731708202e-06, + "loss": 0.8129875183105468, + "step": 69530 + }, + { + "epoch": 0.6012485841021694, + "grad_norm": 80.2489933295682, + "learning_rate": 4.758592262992388e-06, + "loss": 0.34622802734375, + "step": 69535 + }, + { + "epoch": 0.6012918176237128, + "grad_norm": 17.660021628851187, + "learning_rate": 4.758427200792283e-06, + "loss": 0.0798614501953125, + "step": 69540 + }, + { + "epoch": 0.601335051145256, + "grad_norm": 1.9083681289375638, + "learning_rate": 4.758262130482468e-06, + "loss": 0.09207687377929688, + "step": 69545 + }, + { + "epoch": 0.6013782846667992, + "grad_norm": 2.656157872278075, + "learning_rate": 4.758097052063703e-06, + "loss": 0.49071197509765624, + "step": 69550 + }, + { + "epoch": 0.6014215181883426, + "grad_norm": 9.883291418754977, + "learning_rate": 4.75793196553675e-06, + "loss": 0.22453155517578124, + "step": 69555 + }, + { + "epoch": 0.6014647517098858, + "grad_norm": 24.305948663566852, + "learning_rate": 4.7577668709023695e-06, + "loss": 0.23405685424804687, + "step": 69560 + }, + { + "epoch": 0.601507985231429, + "grad_norm": 16.158265882046628, + "learning_rate": 4.757601768161324e-06, + "loss": 0.324365234375, + "step": 69565 + }, + { + "epoch": 0.6015512187529723, + "grad_norm": 4.417197662067349, + "learning_rate": 4.7574366573143744e-06, + "loss": 0.059032440185546875, + "step": 69570 + }, + { + "epoch": 0.6015944522745156, + "grad_norm": 10.807076194796672, + "learning_rate": 4.757271538362282e-06, + "loss": 0.246673583984375, + "step": 69575 + }, + { + "epoch": 0.6016376857960588, + "grad_norm": 6.38540110211967, + "learning_rate": 4.75710641130581e-06, + "loss": 0.184136962890625, + "step": 69580 + }, + { + "epoch": 0.6016809193176021, + "grad_norm": 2.1949059884189177, + "learning_rate": 4.7569412761457154e-06, + "loss": 0.09669723510742187, + "step": 69585 + }, + { + "epoch": 0.6017241528391454, + "grad_norm": 4.004875382588878, + "learning_rate": 4.756776132882765e-06, + "loss": 0.2911674499511719, + "step": 69590 + }, + { + "epoch": 0.6017673863606886, + "grad_norm": 36.27003346029061, + "learning_rate": 4.756610981517719e-06, + "loss": 0.39752197265625, + "step": 69595 + }, + { + "epoch": 0.6018106198822318, + "grad_norm": 13.782358981504759, + "learning_rate": 4.756445822051337e-06, + "loss": 0.09586944580078124, + "step": 69600 + }, + { + "epoch": 0.6018538534037752, + "grad_norm": 35.48763369843057, + "learning_rate": 4.756280654484382e-06, + "loss": 0.3047523498535156, + "step": 69605 + }, + { + "epoch": 0.6018970869253184, + "grad_norm": 19.362434097087366, + "learning_rate": 4.756115478817616e-06, + "loss": 0.23958892822265626, + "step": 69610 + }, + { + "epoch": 0.6019403204468616, + "grad_norm": 2.075693913584992, + "learning_rate": 4.755950295051802e-06, + "loss": 0.19913558959960936, + "step": 69615 + }, + { + "epoch": 0.601983553968405, + "grad_norm": 8.228345474843886, + "learning_rate": 4.755785103187699e-06, + "loss": 0.1799072265625, + "step": 69620 + }, + { + "epoch": 0.6020267874899482, + "grad_norm": 1.4516490699365523, + "learning_rate": 4.75561990322607e-06, + "loss": 0.1031280517578125, + "step": 69625 + }, + { + "epoch": 0.6020700210114914, + "grad_norm": 10.284823224003853, + "learning_rate": 4.755454695167678e-06, + "loss": 0.19257965087890624, + "step": 69630 + }, + { + "epoch": 0.6021132545330348, + "grad_norm": 92.8048222471644, + "learning_rate": 4.755289479013283e-06, + "loss": 0.17977828979492189, + "step": 69635 + }, + { + "epoch": 0.602156488054578, + "grad_norm": 42.834521206256355, + "learning_rate": 4.755124254763649e-06, + "loss": 0.31302337646484374, + "step": 69640 + }, + { + "epoch": 0.6021997215761212, + "grad_norm": 29.711872117084248, + "learning_rate": 4.754959022419536e-06, + "loss": 0.54195556640625, + "step": 69645 + }, + { + "epoch": 0.6022429550976646, + "grad_norm": 1.9036198266366515, + "learning_rate": 4.754793781981709e-06, + "loss": 0.06343841552734375, + "step": 69650 + }, + { + "epoch": 0.6022861886192078, + "grad_norm": 24.76741476000596, + "learning_rate": 4.754628533450927e-06, + "loss": 0.31480865478515624, + "step": 69655 + }, + { + "epoch": 0.602329422140751, + "grad_norm": 4.262215138549061, + "learning_rate": 4.754463276827953e-06, + "loss": 0.3519584655761719, + "step": 69660 + }, + { + "epoch": 0.6023726556622944, + "grad_norm": 9.27464258335442, + "learning_rate": 4.754298012113549e-06, + "loss": 0.1405029296875, + "step": 69665 + }, + { + "epoch": 0.6024158891838376, + "grad_norm": 0.22003920031998578, + "learning_rate": 4.754132739308479e-06, + "loss": 0.06608505249023437, + "step": 69670 + }, + { + "epoch": 0.6024591227053808, + "grad_norm": 6.0088549784806276, + "learning_rate": 4.753967458413503e-06, + "loss": 0.2162506103515625, + "step": 69675 + }, + { + "epoch": 0.6025023562269242, + "grad_norm": 0.9165269490392901, + "learning_rate": 4.753802169429383e-06, + "loss": 0.09778180122375488, + "step": 69680 + }, + { + "epoch": 0.6025455897484674, + "grad_norm": 4.09690227595335, + "learning_rate": 4.753636872356884e-06, + "loss": 0.3505462646484375, + "step": 69685 + }, + { + "epoch": 0.6025888232700106, + "grad_norm": 15.932872401357866, + "learning_rate": 4.753471567196766e-06, + "loss": 0.096319580078125, + "step": 69690 + }, + { + "epoch": 0.6026320567915538, + "grad_norm": 0.2850986872087773, + "learning_rate": 4.753306253949792e-06, + "loss": 0.12468948364257812, + "step": 69695 + }, + { + "epoch": 0.6026752903130972, + "grad_norm": 3.156197444153134, + "learning_rate": 4.753140932616725e-06, + "loss": 0.097613525390625, + "step": 69700 + }, + { + "epoch": 0.6027185238346404, + "grad_norm": 0.9361166219434459, + "learning_rate": 4.752975603198327e-06, + "loss": 0.25787811279296874, + "step": 69705 + }, + { + "epoch": 0.6027617573561836, + "grad_norm": 0.9674275037433312, + "learning_rate": 4.75281026569536e-06, + "loss": 0.06970977783203125, + "step": 69710 + }, + { + "epoch": 0.602804990877727, + "grad_norm": 1.1746556638884365, + "learning_rate": 4.752644920108587e-06, + "loss": 0.0360595703125, + "step": 69715 + }, + { + "epoch": 0.6028482243992702, + "grad_norm": 51.134812848069856, + "learning_rate": 4.75247956643877e-06, + "loss": 0.2696697235107422, + "step": 69720 + }, + { + "epoch": 0.6028914579208134, + "grad_norm": 6.403438531299867, + "learning_rate": 4.7523142046866715e-06, + "loss": 0.1672393798828125, + "step": 69725 + }, + { + "epoch": 0.6029346914423568, + "grad_norm": 2.6138812940916196, + "learning_rate": 4.752148834853057e-06, + "loss": 0.26951904296875, + "step": 69730 + }, + { + "epoch": 0.6029779249639, + "grad_norm": 1.4094410046218326, + "learning_rate": 4.751983456938685e-06, + "loss": 0.2836467742919922, + "step": 69735 + }, + { + "epoch": 0.6030211584854432, + "grad_norm": 9.723203713526466, + "learning_rate": 4.751818070944321e-06, + "loss": 0.06914138793945312, + "step": 69740 + }, + { + "epoch": 0.6030643920069866, + "grad_norm": 0.7334986368877633, + "learning_rate": 4.751652676870726e-06, + "loss": 0.5344345092773437, + "step": 69745 + }, + { + "epoch": 0.6031076255285298, + "grad_norm": 27.588719025982787, + "learning_rate": 4.751487274718665e-06, + "loss": 0.4926719665527344, + "step": 69750 + }, + { + "epoch": 0.603150859050073, + "grad_norm": 3.190775915425132, + "learning_rate": 4.7513218644888975e-06, + "loss": 0.046648406982421876, + "step": 69755 + }, + { + "epoch": 0.6031940925716164, + "grad_norm": 0.5803888657466469, + "learning_rate": 4.75115644618219e-06, + "loss": 0.30135040283203124, + "step": 69760 + }, + { + "epoch": 0.6032373260931596, + "grad_norm": 23.338489761866715, + "learning_rate": 4.750991019799303e-06, + "loss": 0.08809585571289062, + "step": 69765 + }, + { + "epoch": 0.6032805596147028, + "grad_norm": 19.98308818992166, + "learning_rate": 4.750825585341001e-06, + "loss": 0.20376815795898437, + "step": 69770 + }, + { + "epoch": 0.6033237931362461, + "grad_norm": 3.7990290835808724, + "learning_rate": 4.750660142808045e-06, + "loss": 0.10811767578125, + "step": 69775 + }, + { + "epoch": 0.6033670266577894, + "grad_norm": 2.9725851391515437, + "learning_rate": 4.7504946922011994e-06, + "loss": 0.050518035888671875, + "step": 69780 + }, + { + "epoch": 0.6034102601793326, + "grad_norm": 2.3492185963307795, + "learning_rate": 4.750329233521227e-06, + "loss": 0.11089725494384765, + "step": 69785 + }, + { + "epoch": 0.6034534937008759, + "grad_norm": 1.470785812866102, + "learning_rate": 4.750163766768891e-06, + "loss": 0.1091827392578125, + "step": 69790 + }, + { + "epoch": 0.6034967272224192, + "grad_norm": 7.560403584683417, + "learning_rate": 4.7499982919449534e-06, + "loss": 0.2938201904296875, + "step": 69795 + }, + { + "epoch": 0.6035399607439624, + "grad_norm": 38.13874221364942, + "learning_rate": 4.74983280905018e-06, + "loss": 0.14330558776855468, + "step": 69800 + }, + { + "epoch": 0.6035831942655057, + "grad_norm": 2.7355917514782204, + "learning_rate": 4.749667318085332e-06, + "loss": 0.1517181396484375, + "step": 69805 + }, + { + "epoch": 0.603626427787049, + "grad_norm": 2.5977936467461498, + "learning_rate": 4.749501819051173e-06, + "loss": 0.2421600341796875, + "step": 69810 + }, + { + "epoch": 0.6036696613085922, + "grad_norm": 0.7172704544792277, + "learning_rate": 4.749336311948464e-06, + "loss": 0.13291568756103517, + "step": 69815 + }, + { + "epoch": 0.6037128948301355, + "grad_norm": 1.4942028639091254, + "learning_rate": 4.749170796777972e-06, + "loss": 0.0664093017578125, + "step": 69820 + }, + { + "epoch": 0.6037561283516788, + "grad_norm": 39.75082299428119, + "learning_rate": 4.749005273540459e-06, + "loss": 0.5881561279296875, + "step": 69825 + }, + { + "epoch": 0.603799361873222, + "grad_norm": 30.53850594307224, + "learning_rate": 4.748839742236689e-06, + "loss": 0.16652374267578124, + "step": 69830 + }, + { + "epoch": 0.6038425953947653, + "grad_norm": 0.16443085945817154, + "learning_rate": 4.748674202867424e-06, + "loss": 0.09720783233642578, + "step": 69835 + }, + { + "epoch": 0.6038858289163086, + "grad_norm": 35.8329981106038, + "learning_rate": 4.748508655433428e-06, + "loss": 0.23712692260742188, + "step": 69840 + }, + { + "epoch": 0.6039290624378518, + "grad_norm": 3.675015676666325, + "learning_rate": 4.748343099935464e-06, + "loss": 0.12724609375, + "step": 69845 + }, + { + "epoch": 0.6039722959593951, + "grad_norm": 0.32336335856438775, + "learning_rate": 4.7481775363742975e-06, + "loss": 0.04844245910644531, + "step": 69850 + }, + { + "epoch": 0.6040155294809384, + "grad_norm": 12.696872460311681, + "learning_rate": 4.748011964750689e-06, + "loss": 0.14297943115234374, + "step": 69855 + }, + { + "epoch": 0.6040587630024816, + "grad_norm": 0.3633811958946926, + "learning_rate": 4.747846385065404e-06, + "loss": 0.0758758544921875, + "step": 69860 + }, + { + "epoch": 0.6041019965240249, + "grad_norm": 10.843564998930818, + "learning_rate": 4.747680797319207e-06, + "loss": 0.19340057373046876, + "step": 69865 + }, + { + "epoch": 0.6041452300455681, + "grad_norm": 3.4882976215422064, + "learning_rate": 4.747515201512861e-06, + "loss": 0.020703125, + "step": 69870 + }, + { + "epoch": 0.6041884635671114, + "grad_norm": 3.6699907109110734, + "learning_rate": 4.747349597647127e-06, + "loss": 0.1105499267578125, + "step": 69875 + }, + { + "epoch": 0.6042316970886547, + "grad_norm": 14.973854720132586, + "learning_rate": 4.747183985722772e-06, + "loss": 0.2874629974365234, + "step": 69880 + }, + { + "epoch": 0.6042749306101979, + "grad_norm": 16.27002514338507, + "learning_rate": 4.74701836574056e-06, + "loss": 0.29180202484130857, + "step": 69885 + }, + { + "epoch": 0.6043181641317412, + "grad_norm": 9.621153347033452, + "learning_rate": 4.746852737701253e-06, + "loss": 0.06281318664550781, + "step": 69890 + }, + { + "epoch": 0.6043613976532844, + "grad_norm": 1.697336726756117, + "learning_rate": 4.746687101605615e-06, + "loss": 0.36540069580078127, + "step": 69895 + }, + { + "epoch": 0.6044046311748277, + "grad_norm": 3.1292727095106363, + "learning_rate": 4.746521457454412e-06, + "loss": 0.37371368408203126, + "step": 69900 + }, + { + "epoch": 0.604447864696371, + "grad_norm": 11.250937265048202, + "learning_rate": 4.746355805248404e-06, + "loss": 0.1509307861328125, + "step": 69905 + }, + { + "epoch": 0.6044910982179142, + "grad_norm": 4.556928532831455, + "learning_rate": 4.74619014498836e-06, + "loss": 0.06110458374023438, + "step": 69910 + }, + { + "epoch": 0.6045343317394575, + "grad_norm": 3.5048920681014115, + "learning_rate": 4.746024476675039e-06, + "loss": 0.18176116943359374, + "step": 69915 + }, + { + "epoch": 0.6045775652610008, + "grad_norm": 4.130822421164101, + "learning_rate": 4.745858800309208e-06, + "loss": 0.21334762573242189, + "step": 69920 + }, + { + "epoch": 0.604620798782544, + "grad_norm": 17.922510409980397, + "learning_rate": 4.745693115891632e-06, + "loss": 0.1490509033203125, + "step": 69925 + }, + { + "epoch": 0.6046640323040873, + "grad_norm": 9.228740311757534, + "learning_rate": 4.7455274234230715e-06, + "loss": 0.20268173217773439, + "step": 69930 + }, + { + "epoch": 0.6047072658256306, + "grad_norm": 1.3892610920706807, + "learning_rate": 4.745361722904293e-06, + "loss": 0.041998291015625, + "step": 69935 + }, + { + "epoch": 0.6047504993471738, + "grad_norm": 47.563827423564206, + "learning_rate": 4.745196014336062e-06, + "loss": 0.25667877197265626, + "step": 69940 + }, + { + "epoch": 0.6047937328687171, + "grad_norm": 3.3555243384935043, + "learning_rate": 4.74503029771914e-06, + "loss": 0.0305145263671875, + "step": 69945 + }, + { + "epoch": 0.6048369663902603, + "grad_norm": 6.612025675106382, + "learning_rate": 4.744864573054293e-06, + "loss": 0.03878345489501953, + "step": 69950 + }, + { + "epoch": 0.6048801999118036, + "grad_norm": 5.323778776496833, + "learning_rate": 4.744698840342285e-06, + "loss": 0.10336074829101563, + "step": 69955 + }, + { + "epoch": 0.6049234334333469, + "grad_norm": 1.5085654327161737, + "learning_rate": 4.744533099583879e-06, + "loss": 0.02138175964355469, + "step": 69960 + }, + { + "epoch": 0.6049666669548901, + "grad_norm": 2.4380826815178507, + "learning_rate": 4.744367350779841e-06, + "loss": 0.1243804931640625, + "step": 69965 + }, + { + "epoch": 0.6050099004764334, + "grad_norm": 2.090745577649719, + "learning_rate": 4.744201593930935e-06, + "loss": 0.37376556396484373, + "step": 69970 + }, + { + "epoch": 0.6050531339979767, + "grad_norm": 0.16356978998875335, + "learning_rate": 4.744035829037926e-06, + "loss": 0.12403488159179688, + "step": 69975 + }, + { + "epoch": 0.6050963675195199, + "grad_norm": 36.32307356156884, + "learning_rate": 4.743870056101577e-06, + "loss": 0.40654754638671875, + "step": 69980 + }, + { + "epoch": 0.6051396010410632, + "grad_norm": 0.47269145269287394, + "learning_rate": 4.743704275122654e-06, + "loss": 0.1319610595703125, + "step": 69985 + }, + { + "epoch": 0.6051828345626065, + "grad_norm": 0.5416900646482935, + "learning_rate": 4.7435384861019206e-06, + "loss": 0.21752471923828126, + "step": 69990 + }, + { + "epoch": 0.6052260680841497, + "grad_norm": 34.69892070635789, + "learning_rate": 4.743372689040142e-06, + "loss": 0.39802017211914065, + "step": 69995 + }, + { + "epoch": 0.605269301605693, + "grad_norm": 1.0799376268786274, + "learning_rate": 4.7432068839380825e-06, + "loss": 0.06044921875, + "step": 70000 + }, + { + "epoch": 0.6053125351272363, + "grad_norm": 5.6040001655254965, + "learning_rate": 4.743041070796506e-06, + "loss": 0.27399520874023436, + "step": 70005 + }, + { + "epoch": 0.6053557686487795, + "grad_norm": 1.1664515408466085, + "learning_rate": 4.7428752496161795e-06, + "loss": 0.04727859497070312, + "step": 70010 + }, + { + "epoch": 0.6053990021703228, + "grad_norm": 2.373016218161401, + "learning_rate": 4.742709420397865e-06, + "loss": 0.14810333251953126, + "step": 70015 + }, + { + "epoch": 0.6054422356918661, + "grad_norm": 29.135574294036914, + "learning_rate": 4.74254358314233e-06, + "loss": 0.329779052734375, + "step": 70020 + }, + { + "epoch": 0.6054854692134093, + "grad_norm": 4.64380620957447, + "learning_rate": 4.742377737850338e-06, + "loss": 0.07926864624023437, + "step": 70025 + }, + { + "epoch": 0.6055287027349525, + "grad_norm": 5.133727152248615, + "learning_rate": 4.742211884522653e-06, + "loss": 0.0792510986328125, + "step": 70030 + }, + { + "epoch": 0.6055719362564959, + "grad_norm": 8.505977141580168, + "learning_rate": 4.74204602316004e-06, + "loss": 0.08536567687988281, + "step": 70035 + }, + { + "epoch": 0.6056151697780391, + "grad_norm": 6.2563182935281185, + "learning_rate": 4.741880153763265e-06, + "loss": 0.06629562377929688, + "step": 70040 + }, + { + "epoch": 0.6056584032995823, + "grad_norm": 0.08504310364189302, + "learning_rate": 4.7417142763330936e-06, + "loss": 0.038809585571289065, + "step": 70045 + }, + { + "epoch": 0.6057016368211257, + "grad_norm": 0.0977459019673571, + "learning_rate": 4.741548390870291e-06, + "loss": 0.149658203125, + "step": 70050 + }, + { + "epoch": 0.6057448703426689, + "grad_norm": 13.740202665060348, + "learning_rate": 4.741382497375618e-06, + "loss": 0.10639801025390624, + "step": 70055 + }, + { + "epoch": 0.6057881038642121, + "grad_norm": 1.8914114429146072, + "learning_rate": 4.741216595849845e-06, + "loss": 0.05504798889160156, + "step": 70060 + }, + { + "epoch": 0.6058313373857555, + "grad_norm": 4.496078258636161, + "learning_rate": 4.741050686293734e-06, + "loss": 0.11534347534179687, + "step": 70065 + }, + { + "epoch": 0.6058745709072987, + "grad_norm": 9.217568276747558, + "learning_rate": 4.740884768708052e-06, + "loss": 0.14102020263671874, + "step": 70070 + }, + { + "epoch": 0.6059178044288419, + "grad_norm": 15.125889343391659, + "learning_rate": 4.7407188430935625e-06, + "loss": 0.20392017364501952, + "step": 70075 + }, + { + "epoch": 0.6059610379503853, + "grad_norm": 0.20728512285630094, + "learning_rate": 4.740552909451032e-06, + "loss": 0.2612049102783203, + "step": 70080 + }, + { + "epoch": 0.6060042714719285, + "grad_norm": 16.914804644300915, + "learning_rate": 4.740386967781224e-06, + "loss": 0.10969772338867187, + "step": 70085 + }, + { + "epoch": 0.6060475049934717, + "grad_norm": 6.864374143763546, + "learning_rate": 4.740221018084906e-06, + "loss": 0.054315185546875, + "step": 70090 + }, + { + "epoch": 0.606090738515015, + "grad_norm": 4.344918315073808, + "learning_rate": 4.740055060362842e-06, + "loss": 0.20509834289550782, + "step": 70095 + }, + { + "epoch": 0.6061339720365583, + "grad_norm": 20.00755790102536, + "learning_rate": 4.739889094615799e-06, + "loss": 0.5223876953125, + "step": 70100 + }, + { + "epoch": 0.6061772055581015, + "grad_norm": 11.519478221211246, + "learning_rate": 4.7397231208445415e-06, + "loss": 0.06685943603515625, + "step": 70105 + }, + { + "epoch": 0.6062204390796448, + "grad_norm": 12.718286423182565, + "learning_rate": 4.739557139049833e-06, + "loss": 0.11986541748046875, + "step": 70110 + }, + { + "epoch": 0.6062636726011881, + "grad_norm": 32.40058374665796, + "learning_rate": 4.739391149232442e-06, + "loss": 0.10423202514648437, + "step": 70115 + }, + { + "epoch": 0.6063069061227313, + "grad_norm": 3.296142790628998, + "learning_rate": 4.7392251513931314e-06, + "loss": 0.14680328369140624, + "step": 70120 + }, + { + "epoch": 0.6063501396442745, + "grad_norm": 13.001377978834894, + "learning_rate": 4.739059145532669e-06, + "loss": 0.19183578491210937, + "step": 70125 + }, + { + "epoch": 0.6063933731658179, + "grad_norm": 9.69537244087508, + "learning_rate": 4.738893131651819e-06, + "loss": 0.24253425598144532, + "step": 70130 + }, + { + "epoch": 0.6064366066873611, + "grad_norm": 16.409908634824003, + "learning_rate": 4.738727109751348e-06, + "loss": 0.12531356811523436, + "step": 70135 + }, + { + "epoch": 0.6064798402089043, + "grad_norm": 2.1698459242466197, + "learning_rate": 4.738561079832022e-06, + "loss": 0.0192413330078125, + "step": 70140 + }, + { + "epoch": 0.6065230737304477, + "grad_norm": 6.100969631553115, + "learning_rate": 4.7383950418946035e-06, + "loss": 0.19802093505859375, + "step": 70145 + }, + { + "epoch": 0.6065663072519909, + "grad_norm": 27.88725641975471, + "learning_rate": 4.738228995939863e-06, + "loss": 0.48239822387695314, + "step": 70150 + }, + { + "epoch": 0.6066095407735341, + "grad_norm": 3.20150612371435, + "learning_rate": 4.738062941968563e-06, + "loss": 0.7464805603027344, + "step": 70155 + }, + { + "epoch": 0.6066527742950775, + "grad_norm": 2.5751604359669247, + "learning_rate": 4.73789687998147e-06, + "loss": 0.2491527557373047, + "step": 70160 + }, + { + "epoch": 0.6066960078166207, + "grad_norm": 2.8417256288548467, + "learning_rate": 4.73773080997935e-06, + "loss": 0.1638427734375, + "step": 70165 + }, + { + "epoch": 0.6067392413381639, + "grad_norm": 14.54104212231943, + "learning_rate": 4.73756473196297e-06, + "loss": 0.153741455078125, + "step": 70170 + }, + { + "epoch": 0.6067824748597073, + "grad_norm": 1.730865351648477, + "learning_rate": 4.737398645933094e-06, + "loss": 0.10157012939453125, + "step": 70175 + }, + { + "epoch": 0.6068257083812505, + "grad_norm": 51.464463035729416, + "learning_rate": 4.73723255189049e-06, + "loss": 0.1262603759765625, + "step": 70180 + }, + { + "epoch": 0.6068689419027937, + "grad_norm": 0.6059861002673871, + "learning_rate": 4.737066449835922e-06, + "loss": 0.0165252685546875, + "step": 70185 + }, + { + "epoch": 0.6069121754243371, + "grad_norm": 29.306142838430826, + "learning_rate": 4.736900339770157e-06, + "loss": 0.26649932861328124, + "step": 70190 + }, + { + "epoch": 0.6069554089458803, + "grad_norm": 76.97099024946597, + "learning_rate": 4.7367342216939625e-06, + "loss": 0.48626251220703126, + "step": 70195 + }, + { + "epoch": 0.6069986424674235, + "grad_norm": 24.93710002906869, + "learning_rate": 4.736568095608102e-06, + "loss": 0.14247512817382812, + "step": 70200 + }, + { + "epoch": 0.6070418759889668, + "grad_norm": 7.996847248268284, + "learning_rate": 4.736401961513344e-06, + "loss": 0.51510009765625, + "step": 70205 + }, + { + "epoch": 0.6070851095105101, + "grad_norm": 5.697358359882075, + "learning_rate": 4.736235819410453e-06, + "loss": 0.144061279296875, + "step": 70210 + }, + { + "epoch": 0.6071283430320533, + "grad_norm": 11.32393930286989, + "learning_rate": 4.736069669300195e-06, + "loss": 0.10418701171875, + "step": 70215 + }, + { + "epoch": 0.6071715765535965, + "grad_norm": 0.5996126138799355, + "learning_rate": 4.735903511183338e-06, + "loss": 0.35166168212890625, + "step": 70220 + }, + { + "epoch": 0.6072148100751399, + "grad_norm": 2.4659512645562796, + "learning_rate": 4.735737345060646e-06, + "loss": 0.0530548095703125, + "step": 70225 + }, + { + "epoch": 0.6072580435966831, + "grad_norm": 2.537610487448434, + "learning_rate": 4.7355711709328885e-06, + "loss": 0.140167236328125, + "step": 70230 + }, + { + "epoch": 0.6073012771182263, + "grad_norm": 0.13183571909152367, + "learning_rate": 4.73540498880083e-06, + "loss": 0.0795135498046875, + "step": 70235 + }, + { + "epoch": 0.6073445106397697, + "grad_norm": 2.669621824521778, + "learning_rate": 4.735238798665236e-06, + "loss": 0.0326934814453125, + "step": 70240 + }, + { + "epoch": 0.6073877441613129, + "grad_norm": 0.400028826442873, + "learning_rate": 4.735072600526875e-06, + "loss": 0.12311305999755859, + "step": 70245 + }, + { + "epoch": 0.6074309776828561, + "grad_norm": 8.04421548205352, + "learning_rate": 4.734906394386512e-06, + "loss": 0.24359130859375, + "step": 70250 + }, + { + "epoch": 0.6074742112043995, + "grad_norm": 0.5829450426202039, + "learning_rate": 4.734740180244914e-06, + "loss": 0.10278091430664063, + "step": 70255 + }, + { + "epoch": 0.6075174447259427, + "grad_norm": 0.28853203920462417, + "learning_rate": 4.734573958102848e-06, + "loss": 0.15012283325195314, + "step": 70260 + }, + { + "epoch": 0.6075606782474859, + "grad_norm": 39.24437023096653, + "learning_rate": 4.734407727961079e-06, + "loss": 0.25972137451171873, + "step": 70265 + }, + { + "epoch": 0.6076039117690293, + "grad_norm": 3.457610493877371, + "learning_rate": 4.734241489820375e-06, + "loss": 0.16169891357421876, + "step": 70270 + }, + { + "epoch": 0.6076471452905725, + "grad_norm": 21.31178202768205, + "learning_rate": 4.7340752436815034e-06, + "loss": 0.192474365234375, + "step": 70275 + }, + { + "epoch": 0.6076903788121157, + "grad_norm": 24.49008941813398, + "learning_rate": 4.73390898954523e-06, + "loss": 0.3281665802001953, + "step": 70280 + }, + { + "epoch": 0.6077336123336591, + "grad_norm": 10.955950466394428, + "learning_rate": 4.733742727412321e-06, + "loss": 0.3057098388671875, + "step": 70285 + }, + { + "epoch": 0.6077768458552023, + "grad_norm": 11.555647513574593, + "learning_rate": 4.733576457283544e-06, + "loss": 0.13665924072265626, + "step": 70290 + }, + { + "epoch": 0.6078200793767455, + "grad_norm": 10.61008659464933, + "learning_rate": 4.733410179159666e-06, + "loss": 0.39081268310546874, + "step": 70295 + }, + { + "epoch": 0.6078633128982888, + "grad_norm": 3.4790342749068497, + "learning_rate": 4.733243893041453e-06, + "loss": 0.0936676025390625, + "step": 70300 + }, + { + "epoch": 0.6079065464198321, + "grad_norm": 8.546274396952452, + "learning_rate": 4.733077598929671e-06, + "loss": 0.2596271514892578, + "step": 70305 + }, + { + "epoch": 0.6079497799413753, + "grad_norm": 0.2569308692475092, + "learning_rate": 4.73291129682509e-06, + "loss": 0.14746551513671874, + "step": 70310 + }, + { + "epoch": 0.6079930134629186, + "grad_norm": 18.061239625108225, + "learning_rate": 4.7327449867284745e-06, + "loss": 0.22908935546875, + "step": 70315 + }, + { + "epoch": 0.6080362469844619, + "grad_norm": 0.3035714301268416, + "learning_rate": 4.732578668640593e-06, + "loss": 0.1699188232421875, + "step": 70320 + }, + { + "epoch": 0.6080794805060051, + "grad_norm": 0.23361441933244734, + "learning_rate": 4.73241234256221e-06, + "loss": 0.07490463256835937, + "step": 70325 + }, + { + "epoch": 0.6081227140275484, + "grad_norm": 17.739993515714602, + "learning_rate": 4.732246008494097e-06, + "loss": 0.2867542266845703, + "step": 70330 + }, + { + "epoch": 0.6081659475490917, + "grad_norm": 12.447942741299098, + "learning_rate": 4.732079666437016e-06, + "loss": 0.21624221801757812, + "step": 70335 + }, + { + "epoch": 0.6082091810706349, + "grad_norm": 10.094016585456606, + "learning_rate": 4.7319133163917375e-06, + "loss": 0.19729118347167968, + "step": 70340 + }, + { + "epoch": 0.6082524145921782, + "grad_norm": 11.618964849644927, + "learning_rate": 4.731746958359029e-06, + "loss": 0.041253280639648435, + "step": 70345 + }, + { + "epoch": 0.6082956481137215, + "grad_norm": 4.878404014627329, + "learning_rate": 4.731580592339655e-06, + "loss": 0.174151611328125, + "step": 70350 + }, + { + "epoch": 0.6083388816352647, + "grad_norm": 25.61474444761134, + "learning_rate": 4.731414218334385e-06, + "loss": 0.1192108154296875, + "step": 70355 + }, + { + "epoch": 0.608382115156808, + "grad_norm": 2.261974539609841, + "learning_rate": 4.731247836343986e-06, + "loss": 0.07341842651367188, + "step": 70360 + }, + { + "epoch": 0.6084253486783513, + "grad_norm": 0.07963484247397293, + "learning_rate": 4.7310814463692235e-06, + "loss": 0.0688812255859375, + "step": 70365 + }, + { + "epoch": 0.6084685821998945, + "grad_norm": 3.311939779949258, + "learning_rate": 4.730915048410867e-06, + "loss": 0.06308212280273437, + "step": 70370 + }, + { + "epoch": 0.6085118157214378, + "grad_norm": 5.193838680445764, + "learning_rate": 4.730748642469684e-06, + "loss": 0.071502685546875, + "step": 70375 + }, + { + "epoch": 0.608555049242981, + "grad_norm": 0.3093960508807758, + "learning_rate": 4.730582228546441e-06, + "loss": 0.26006622314453126, + "step": 70380 + }, + { + "epoch": 0.6085982827645243, + "grad_norm": 1.2288894585043142, + "learning_rate": 4.730415806641905e-06, + "loss": 0.05896739959716797, + "step": 70385 + }, + { + "epoch": 0.6086415162860676, + "grad_norm": 0.8792161640568271, + "learning_rate": 4.730249376756845e-06, + "loss": 0.0947265625, + "step": 70390 + }, + { + "epoch": 0.6086847498076108, + "grad_norm": 11.499771410801747, + "learning_rate": 4.730082938892027e-06, + "loss": 0.10644760131835937, + "step": 70395 + }, + { + "epoch": 0.6087279833291541, + "grad_norm": 32.966255035444235, + "learning_rate": 4.729916493048219e-06, + "loss": 0.22974662780761718, + "step": 70400 + }, + { + "epoch": 0.6087712168506973, + "grad_norm": 29.461351809199673, + "learning_rate": 4.72975003922619e-06, + "loss": 0.21128768920898439, + "step": 70405 + }, + { + "epoch": 0.6088144503722406, + "grad_norm": 3.5420973671132194, + "learning_rate": 4.729583577426707e-06, + "loss": 0.135888671875, + "step": 70410 + }, + { + "epoch": 0.6088576838937839, + "grad_norm": 0.8240682044267906, + "learning_rate": 4.729417107650536e-06, + "loss": 0.22120208740234376, + "step": 70415 + }, + { + "epoch": 0.6089009174153271, + "grad_norm": 15.743512148124564, + "learning_rate": 4.729250629898447e-06, + "loss": 0.1993255615234375, + "step": 70420 + }, + { + "epoch": 0.6089441509368704, + "grad_norm": 4.715202589466452, + "learning_rate": 4.729084144171207e-06, + "loss": 0.30442466735839846, + "step": 70425 + }, + { + "epoch": 0.6089873844584137, + "grad_norm": 0.8398587145213807, + "learning_rate": 4.728917650469582e-06, + "loss": 0.127587890625, + "step": 70430 + }, + { + "epoch": 0.609030617979957, + "grad_norm": 1.461603594641487, + "learning_rate": 4.728751148794344e-06, + "loss": 0.04534912109375, + "step": 70435 + }, + { + "epoch": 0.6090738515015002, + "grad_norm": 13.305827110983142, + "learning_rate": 4.728584639146258e-06, + "loss": 0.32822265625, + "step": 70440 + }, + { + "epoch": 0.6091170850230435, + "grad_norm": 5.799463945240733, + "learning_rate": 4.728418121526091e-06, + "loss": 0.04724884033203125, + "step": 70445 + }, + { + "epoch": 0.6091603185445867, + "grad_norm": 12.962477265384662, + "learning_rate": 4.728251595934613e-06, + "loss": 0.1548065185546875, + "step": 70450 + }, + { + "epoch": 0.60920355206613, + "grad_norm": 13.463770943784368, + "learning_rate": 4.728085062372592e-06, + "loss": 0.066705322265625, + "step": 70455 + }, + { + "epoch": 0.6092467855876733, + "grad_norm": 4.818371934512552, + "learning_rate": 4.727918520840795e-06, + "loss": 0.10522804260253907, + "step": 70460 + }, + { + "epoch": 0.6092900191092165, + "grad_norm": 2.722463550434113, + "learning_rate": 4.72775197133999e-06, + "loss": 0.18188858032226562, + "step": 70465 + }, + { + "epoch": 0.6093332526307598, + "grad_norm": 0.8018541689090525, + "learning_rate": 4.727585413870946e-06, + "loss": 0.037658309936523436, + "step": 70470 + }, + { + "epoch": 0.609376486152303, + "grad_norm": 17.121892615738275, + "learning_rate": 4.727418848434431e-06, + "loss": 0.29433746337890626, + "step": 70475 + }, + { + "epoch": 0.6094197196738463, + "grad_norm": 5.30349135437969, + "learning_rate": 4.7272522750312126e-06, + "loss": 0.13363494873046874, + "step": 70480 + }, + { + "epoch": 0.6094629531953896, + "grad_norm": 15.361748668237407, + "learning_rate": 4.7270856936620584e-06, + "loss": 0.179388427734375, + "step": 70485 + }, + { + "epoch": 0.6095061867169328, + "grad_norm": 1.3870170763034118, + "learning_rate": 4.726919104327739e-06, + "loss": 0.24404563903808593, + "step": 70490 + }, + { + "epoch": 0.6095494202384761, + "grad_norm": 2.5706275006454447, + "learning_rate": 4.72675250702902e-06, + "loss": 0.1196319580078125, + "step": 70495 + }, + { + "epoch": 0.6095926537600194, + "grad_norm": 0.46186794853796487, + "learning_rate": 4.7265859017666734e-06, + "loss": 0.7162193298339844, + "step": 70500 + }, + { + "epoch": 0.6096358872815626, + "grad_norm": 47.8524915014028, + "learning_rate": 4.726419288541463e-06, + "loss": 0.6417304992675781, + "step": 70505 + }, + { + "epoch": 0.6096791208031059, + "grad_norm": 0.2597728739051843, + "learning_rate": 4.72625266735416e-06, + "loss": 0.19505767822265624, + "step": 70510 + }, + { + "epoch": 0.6097223543246492, + "grad_norm": 1.7374194465029416, + "learning_rate": 4.726086038205532e-06, + "loss": 0.30064697265625, + "step": 70515 + }, + { + "epoch": 0.6097655878461924, + "grad_norm": 1.164767763641114, + "learning_rate": 4.725919401096348e-06, + "loss": 0.2407562255859375, + "step": 70520 + }, + { + "epoch": 0.6098088213677357, + "grad_norm": 3.725530993695918, + "learning_rate": 4.725752756027376e-06, + "loss": 0.5099390029907227, + "step": 70525 + }, + { + "epoch": 0.609852054889279, + "grad_norm": 7.363415597536344, + "learning_rate": 4.725586102999385e-06, + "loss": 0.1267669677734375, + "step": 70530 + }, + { + "epoch": 0.6098952884108222, + "grad_norm": 5.251681326009602, + "learning_rate": 4.725419442013143e-06, + "loss": 0.2947723388671875, + "step": 70535 + }, + { + "epoch": 0.6099385219323655, + "grad_norm": 9.301270078663004, + "learning_rate": 4.725252773069419e-06, + "loss": 0.4525886535644531, + "step": 70540 + }, + { + "epoch": 0.6099817554539088, + "grad_norm": 0.11188454599397062, + "learning_rate": 4.7250860961689825e-06, + "loss": 0.007578277587890625, + "step": 70545 + }, + { + "epoch": 0.610024988975452, + "grad_norm": 6.141571507323811, + "learning_rate": 4.7249194113126005e-06, + "loss": 0.16230850219726561, + "step": 70550 + }, + { + "epoch": 0.6100682224969952, + "grad_norm": 22.338829531147212, + "learning_rate": 4.7247527185010436e-06, + "loss": 0.18392562866210938, + "step": 70555 + }, + { + "epoch": 0.6101114560185386, + "grad_norm": 6.0242905759316, + "learning_rate": 4.724586017735078e-06, + "loss": 0.0808563232421875, + "step": 70560 + }, + { + "epoch": 0.6101546895400818, + "grad_norm": 14.70139368579282, + "learning_rate": 4.724419309015475e-06, + "loss": 0.10138206481933594, + "step": 70565 + }, + { + "epoch": 0.610197923061625, + "grad_norm": 2.0209704861779576, + "learning_rate": 4.724252592343002e-06, + "loss": 0.12340431213378907, + "step": 70570 + }, + { + "epoch": 0.6102411565831684, + "grad_norm": 0.6276058093597935, + "learning_rate": 4.7240858677184295e-06, + "loss": 0.04710235595703125, + "step": 70575 + }, + { + "epoch": 0.6102843901047116, + "grad_norm": 9.070389874436538, + "learning_rate": 4.7239191351425246e-06, + "loss": 0.24963531494140626, + "step": 70580 + }, + { + "epoch": 0.6103276236262548, + "grad_norm": 4.255598414060309, + "learning_rate": 4.7237523946160575e-06, + "loss": 0.2740692138671875, + "step": 70585 + }, + { + "epoch": 0.6103708571477982, + "grad_norm": 2.4894999007588416, + "learning_rate": 4.723585646139796e-06, + "loss": 0.040903663635253905, + "step": 70590 + }, + { + "epoch": 0.6104140906693414, + "grad_norm": 2.1397656266260463, + "learning_rate": 4.723418889714509e-06, + "loss": 0.04379119873046875, + "step": 70595 + }, + { + "epoch": 0.6104573241908846, + "grad_norm": 5.020309067642792, + "learning_rate": 4.723252125340968e-06, + "loss": 0.09340476989746094, + "step": 70600 + }, + { + "epoch": 0.610500557712428, + "grad_norm": 0.29855511011859903, + "learning_rate": 4.7230853530199394e-06, + "loss": 0.09540863037109375, + "step": 70605 + }, + { + "epoch": 0.6105437912339712, + "grad_norm": 4.885904621662985, + "learning_rate": 4.722918572752194e-06, + "loss": 0.15423583984375, + "step": 70610 + }, + { + "epoch": 0.6105870247555144, + "grad_norm": 19.664256864153774, + "learning_rate": 4.722751784538501e-06, + "loss": 0.196002197265625, + "step": 70615 + }, + { + "epoch": 0.6106302582770577, + "grad_norm": 32.716761503642374, + "learning_rate": 4.722584988379627e-06, + "loss": 0.235565185546875, + "step": 70620 + }, + { + "epoch": 0.610673491798601, + "grad_norm": 4.565956586375479, + "learning_rate": 4.722418184276345e-06, + "loss": 0.14559288024902345, + "step": 70625 + }, + { + "epoch": 0.6107167253201442, + "grad_norm": 6.730350698575607, + "learning_rate": 4.722251372229422e-06, + "loss": 0.08435745239257812, + "step": 70630 + }, + { + "epoch": 0.6107599588416875, + "grad_norm": 49.28672616706862, + "learning_rate": 4.722084552239628e-06, + "loss": 0.2799163818359375, + "step": 70635 + }, + { + "epoch": 0.6108031923632308, + "grad_norm": 0.13473278704940306, + "learning_rate": 4.721917724307732e-06, + "loss": 0.3170661926269531, + "step": 70640 + }, + { + "epoch": 0.610846425884774, + "grad_norm": 1.4065912753932468, + "learning_rate": 4.721750888434504e-06, + "loss": 0.1192535400390625, + "step": 70645 + }, + { + "epoch": 0.6108896594063172, + "grad_norm": 16.132685761185854, + "learning_rate": 4.7215840446207125e-06, + "loss": 0.16839141845703126, + "step": 70650 + }, + { + "epoch": 0.6109328929278606, + "grad_norm": 2.296974807417779, + "learning_rate": 4.721417192867127e-06, + "loss": 0.188616943359375, + "step": 70655 + }, + { + "epoch": 0.6109761264494038, + "grad_norm": 2.024040876328039, + "learning_rate": 4.721250333174519e-06, + "loss": 0.08993453979492187, + "step": 70660 + }, + { + "epoch": 0.611019359970947, + "grad_norm": 0.3854713788597949, + "learning_rate": 4.7210834655436554e-06, + "loss": 0.11378288269042969, + "step": 70665 + }, + { + "epoch": 0.6110625934924904, + "grad_norm": 1.783098830531676, + "learning_rate": 4.7209165899753075e-06, + "loss": 0.18212432861328126, + "step": 70670 + }, + { + "epoch": 0.6111058270140336, + "grad_norm": 19.025833933740724, + "learning_rate": 4.720749706470244e-06, + "loss": 0.114178466796875, + "step": 70675 + }, + { + "epoch": 0.6111490605355768, + "grad_norm": 12.55465875527372, + "learning_rate": 4.7205828150292355e-06, + "loss": 0.082855224609375, + "step": 70680 + }, + { + "epoch": 0.6111922940571202, + "grad_norm": 0.21711052239498946, + "learning_rate": 4.72041591565305e-06, + "loss": 0.06962890625, + "step": 70685 + }, + { + "epoch": 0.6112355275786634, + "grad_norm": 3.9685381769332047, + "learning_rate": 4.720249008342459e-06, + "loss": 0.14133148193359374, + "step": 70690 + }, + { + "epoch": 0.6112787611002066, + "grad_norm": 2.522313026618653, + "learning_rate": 4.720082093098232e-06, + "loss": 0.0855621337890625, + "step": 70695 + }, + { + "epoch": 0.61132199462175, + "grad_norm": 3.79312577000592, + "learning_rate": 4.719915169921137e-06, + "loss": 0.0853759765625, + "step": 70700 + }, + { + "epoch": 0.6113652281432932, + "grad_norm": 2.090469431299105, + "learning_rate": 4.719748238811947e-06, + "loss": 0.17313804626464843, + "step": 70705 + }, + { + "epoch": 0.6114084616648364, + "grad_norm": 15.484604022700214, + "learning_rate": 4.719581299771429e-06, + "loss": 0.12125701904296875, + "step": 70710 + }, + { + "epoch": 0.6114516951863798, + "grad_norm": 23.987711542736687, + "learning_rate": 4.719414352800354e-06, + "loss": 0.21744613647460936, + "step": 70715 + }, + { + "epoch": 0.611494928707923, + "grad_norm": 3.0875243778720924, + "learning_rate": 4.719247397899492e-06, + "loss": 0.133807373046875, + "step": 70720 + }, + { + "epoch": 0.6115381622294662, + "grad_norm": 4.52121148964267, + "learning_rate": 4.719080435069613e-06, + "loss": 0.3181789398193359, + "step": 70725 + }, + { + "epoch": 0.6115813957510094, + "grad_norm": 0.05255272998567562, + "learning_rate": 4.7189134643114874e-06, + "loss": 0.226800537109375, + "step": 70730 + }, + { + "epoch": 0.6116246292725528, + "grad_norm": 4.945530069746751, + "learning_rate": 4.718746485625884e-06, + "loss": 0.11553878784179687, + "step": 70735 + }, + { + "epoch": 0.611667862794096, + "grad_norm": 6.155290148307665, + "learning_rate": 4.718579499013574e-06, + "loss": 0.068316650390625, + "step": 70740 + }, + { + "epoch": 0.6117110963156392, + "grad_norm": 6.252337026207378, + "learning_rate": 4.718412504475328e-06, + "loss": 0.11189346313476563, + "step": 70745 + }, + { + "epoch": 0.6117543298371826, + "grad_norm": 2.1661632571755094, + "learning_rate": 4.7182455020119145e-06, + "loss": 0.08667755126953125, + "step": 70750 + }, + { + "epoch": 0.6117975633587258, + "grad_norm": 13.850040194486564, + "learning_rate": 4.718078491624105e-06, + "loss": 0.124066162109375, + "step": 70755 + }, + { + "epoch": 0.611840796880269, + "grad_norm": 22.013029643795985, + "learning_rate": 4.717911473312668e-06, + "loss": 0.1340118408203125, + "step": 70760 + }, + { + "epoch": 0.6118840304018124, + "grad_norm": 0.8820818685649298, + "learning_rate": 4.717744447078376e-06, + "loss": 0.116357421875, + "step": 70765 + }, + { + "epoch": 0.6119272639233556, + "grad_norm": 14.64314801937578, + "learning_rate": 4.717577412921999e-06, + "loss": 0.151434326171875, + "step": 70770 + }, + { + "epoch": 0.6119704974448988, + "grad_norm": 26.84125449072862, + "learning_rate": 4.717410370844306e-06, + "loss": 0.38674201965332033, + "step": 70775 + }, + { + "epoch": 0.6120137309664422, + "grad_norm": 10.504347338971696, + "learning_rate": 4.717243320846068e-06, + "loss": 0.16900367736816407, + "step": 70780 + }, + { + "epoch": 0.6120569644879854, + "grad_norm": 8.907695826662758, + "learning_rate": 4.7170762629280565e-06, + "loss": 0.21310195922851563, + "step": 70785 + }, + { + "epoch": 0.6121001980095286, + "grad_norm": 1.2535558158581555, + "learning_rate": 4.716909197091041e-06, + "loss": 0.10559539794921875, + "step": 70790 + }, + { + "epoch": 0.612143431531072, + "grad_norm": 6.198648435796171, + "learning_rate": 4.716742123335791e-06, + "loss": 0.21022415161132812, + "step": 70795 + }, + { + "epoch": 0.6121866650526152, + "grad_norm": 0.1873531251054399, + "learning_rate": 4.716575041663078e-06, + "loss": 0.0645721435546875, + "step": 70800 + }, + { + "epoch": 0.6122298985741584, + "grad_norm": 3.5092089489212563, + "learning_rate": 4.7164079520736736e-06, + "loss": 0.0778839111328125, + "step": 70805 + }, + { + "epoch": 0.6122731320957018, + "grad_norm": 53.280455386451834, + "learning_rate": 4.7162408545683456e-06, + "loss": 0.28818798065185547, + "step": 70810 + }, + { + "epoch": 0.612316365617245, + "grad_norm": 2.7729181942715573, + "learning_rate": 4.716073749147868e-06, + "loss": 0.067303466796875, + "step": 70815 + }, + { + "epoch": 0.6123595991387882, + "grad_norm": 2.3629096447115447, + "learning_rate": 4.71590663581301e-06, + "loss": 0.1782611846923828, + "step": 70820 + }, + { + "epoch": 0.6124028326603315, + "grad_norm": 23.477842216625685, + "learning_rate": 4.715739514564541e-06, + "loss": 0.2334228515625, + "step": 70825 + }, + { + "epoch": 0.6124460661818748, + "grad_norm": 0.21938970853871104, + "learning_rate": 4.715572385403234e-06, + "loss": 0.025744247436523437, + "step": 70830 + }, + { + "epoch": 0.612489299703418, + "grad_norm": 6.8625179882034795, + "learning_rate": 4.715405248329859e-06, + "loss": 0.09102249145507812, + "step": 70835 + }, + { + "epoch": 0.6125325332249613, + "grad_norm": 0.2576514355846675, + "learning_rate": 4.715238103345186e-06, + "loss": 0.312115478515625, + "step": 70840 + }, + { + "epoch": 0.6125757667465046, + "grad_norm": 3.26945717689111, + "learning_rate": 4.715070950449986e-06, + "loss": 0.06608543395996094, + "step": 70845 + }, + { + "epoch": 0.6126190002680478, + "grad_norm": 5.345882579750098, + "learning_rate": 4.714903789645031e-06, + "loss": 0.10518798828125, + "step": 70850 + }, + { + "epoch": 0.6126622337895911, + "grad_norm": 1.8809617867904918, + "learning_rate": 4.714736620931091e-06, + "loss": 0.06464920043945313, + "step": 70855 + }, + { + "epoch": 0.6127054673111344, + "grad_norm": 24.498597236872133, + "learning_rate": 4.714569444308937e-06, + "loss": 0.166229248046875, + "step": 70860 + }, + { + "epoch": 0.6127487008326776, + "grad_norm": 32.34813197145942, + "learning_rate": 4.71440225977934e-06, + "loss": 0.39234580993652346, + "step": 70865 + }, + { + "epoch": 0.6127919343542209, + "grad_norm": 1.217185059013092, + "learning_rate": 4.714235067343073e-06, + "loss": 0.3764373779296875, + "step": 70870 + }, + { + "epoch": 0.6128351678757642, + "grad_norm": 26.62134183251501, + "learning_rate": 4.714067867000903e-06, + "loss": 0.222149658203125, + "step": 70875 + }, + { + "epoch": 0.6128784013973074, + "grad_norm": 0.6118680091178557, + "learning_rate": 4.713900658753605e-06, + "loss": 0.020769500732421876, + "step": 70880 + }, + { + "epoch": 0.6129216349188507, + "grad_norm": 3.592797920934089, + "learning_rate": 4.713733442601948e-06, + "loss": 0.13752059936523436, + "step": 70885 + }, + { + "epoch": 0.612964868440394, + "grad_norm": 32.83195872372796, + "learning_rate": 4.713566218546703e-06, + "loss": 0.2830291748046875, + "step": 70890 + }, + { + "epoch": 0.6130081019619372, + "grad_norm": 22.759417785853476, + "learning_rate": 4.713398986588644e-06, + "loss": 0.13501815795898436, + "step": 70895 + }, + { + "epoch": 0.6130513354834805, + "grad_norm": 6.473570700635561, + "learning_rate": 4.713231746728539e-06, + "loss": 0.19893875122070312, + "step": 70900 + }, + { + "epoch": 0.6130945690050237, + "grad_norm": 11.70764203755055, + "learning_rate": 4.713064498967161e-06, + "loss": 0.13205032348632811, + "step": 70905 + }, + { + "epoch": 0.613137802526567, + "grad_norm": 0.14833666192187836, + "learning_rate": 4.7128972433052805e-06, + "loss": 0.0662689208984375, + "step": 70910 + }, + { + "epoch": 0.6131810360481103, + "grad_norm": 5.682736649790792, + "learning_rate": 4.712729979743669e-06, + "loss": 0.03428955078125, + "step": 70915 + }, + { + "epoch": 0.6132242695696535, + "grad_norm": 3.8188502986007355, + "learning_rate": 4.712562708283099e-06, + "loss": 0.08623504638671875, + "step": 70920 + }, + { + "epoch": 0.6132675030911968, + "grad_norm": 8.968545278972321, + "learning_rate": 4.71239542892434e-06, + "loss": 0.49186553955078127, + "step": 70925 + }, + { + "epoch": 0.61331073661274, + "grad_norm": 6.411732516988485, + "learning_rate": 4.712228141668166e-06, + "loss": 0.455767822265625, + "step": 70930 + }, + { + "epoch": 0.6133539701342833, + "grad_norm": 0.15416151622613436, + "learning_rate": 4.7120608465153465e-06, + "loss": 0.10218124389648438, + "step": 70935 + }, + { + "epoch": 0.6133972036558266, + "grad_norm": 6.250319851703717, + "learning_rate": 4.711893543466654e-06, + "loss": 0.0301055908203125, + "step": 70940 + }, + { + "epoch": 0.6134404371773698, + "grad_norm": 0.3749241279415973, + "learning_rate": 4.711726232522858e-06, + "loss": 0.03080902099609375, + "step": 70945 + }, + { + "epoch": 0.6134836706989131, + "grad_norm": 1.121666953914615, + "learning_rate": 4.711558913684733e-06, + "loss": 0.0443878173828125, + "step": 70950 + }, + { + "epoch": 0.6135269042204564, + "grad_norm": 23.49791935289134, + "learning_rate": 4.71139158695305e-06, + "loss": 0.44829559326171875, + "step": 70955 + }, + { + "epoch": 0.6135701377419996, + "grad_norm": 22.386108825762037, + "learning_rate": 4.71122425232858e-06, + "loss": 0.4060943603515625, + "step": 70960 + }, + { + "epoch": 0.6136133712635429, + "grad_norm": 0.776491709908757, + "learning_rate": 4.711056909812095e-06, + "loss": 0.120062255859375, + "step": 70965 + }, + { + "epoch": 0.6136566047850862, + "grad_norm": 9.850097261515398, + "learning_rate": 4.710889559404367e-06, + "loss": 0.069024658203125, + "step": 70970 + }, + { + "epoch": 0.6136998383066294, + "grad_norm": 1.01361818084229, + "learning_rate": 4.710722201106167e-06, + "loss": 0.35207366943359375, + "step": 70975 + }, + { + "epoch": 0.6137430718281727, + "grad_norm": 20.062044477256407, + "learning_rate": 4.710554834918267e-06, + "loss": 0.112615966796875, + "step": 70980 + }, + { + "epoch": 0.613786305349716, + "grad_norm": 2.1944310393966884, + "learning_rate": 4.7103874608414405e-06, + "loss": 0.0619293212890625, + "step": 70985 + }, + { + "epoch": 0.6138295388712592, + "grad_norm": 28.57178555692045, + "learning_rate": 4.710220078876457e-06, + "loss": 0.21984939575195311, + "step": 70990 + }, + { + "epoch": 0.6138727723928025, + "grad_norm": 1.5599333529627286, + "learning_rate": 4.71005268902409e-06, + "loss": 0.0822509765625, + "step": 70995 + }, + { + "epoch": 0.6139160059143457, + "grad_norm": 0.2716180121289246, + "learning_rate": 4.709885291285112e-06, + "loss": 0.028983306884765626, + "step": 71000 + }, + { + "epoch": 0.613959239435889, + "grad_norm": 0.3268112268535512, + "learning_rate": 4.709717885660292e-06, + "loss": 0.16496810913085938, + "step": 71005 + }, + { + "epoch": 0.6140024729574323, + "grad_norm": 4.943594874069047, + "learning_rate": 4.7095504721504065e-06, + "loss": 0.07860870361328125, + "step": 71010 + }, + { + "epoch": 0.6140457064789755, + "grad_norm": 40.247455116239934, + "learning_rate": 4.709383050756224e-06, + "loss": 0.29867706298828123, + "step": 71015 + }, + { + "epoch": 0.6140889400005188, + "grad_norm": 2.573719184225712, + "learning_rate": 4.709215621478517e-06, + "loss": 0.26558914184570315, + "step": 71020 + }, + { + "epoch": 0.6141321735220621, + "grad_norm": 0.13631856989702368, + "learning_rate": 4.7090481843180605e-06, + "loss": 0.3486785888671875, + "step": 71025 + }, + { + "epoch": 0.6141754070436053, + "grad_norm": 2.471845381023466, + "learning_rate": 4.708880739275623e-06, + "loss": 0.12556915283203124, + "step": 71030 + }, + { + "epoch": 0.6142186405651486, + "grad_norm": 15.633536898886303, + "learning_rate": 4.708713286351981e-06, + "loss": 0.07292404174804687, + "step": 71035 + }, + { + "epoch": 0.6142618740866919, + "grad_norm": 32.188253296359306, + "learning_rate": 4.708545825547902e-06, + "loss": 0.1631500244140625, + "step": 71040 + }, + { + "epoch": 0.6143051076082351, + "grad_norm": 1.2429976684079145, + "learning_rate": 4.708378356864162e-06, + "loss": 0.027352523803710938, + "step": 71045 + }, + { + "epoch": 0.6143483411297784, + "grad_norm": 18.714281408841092, + "learning_rate": 4.708210880301531e-06, + "loss": 0.29161529541015624, + "step": 71050 + }, + { + "epoch": 0.6143915746513217, + "grad_norm": 18.03612068661483, + "learning_rate": 4.708043395860783e-06, + "loss": 0.5755523681640625, + "step": 71055 + }, + { + "epoch": 0.6144348081728649, + "grad_norm": 0.8860072186216058, + "learning_rate": 4.7078759035426896e-06, + "loss": 0.12698974609375, + "step": 71060 + }, + { + "epoch": 0.6144780416944082, + "grad_norm": 9.394990213034454, + "learning_rate": 4.707708403348024e-06, + "loss": 0.2329559326171875, + "step": 71065 + }, + { + "epoch": 0.6145212752159515, + "grad_norm": 10.122729941153674, + "learning_rate": 4.707540895277557e-06, + "loss": 0.39010162353515626, + "step": 71070 + }, + { + "epoch": 0.6145645087374947, + "grad_norm": 12.459545094969465, + "learning_rate": 4.707373379332063e-06, + "loss": 0.13714599609375, + "step": 71075 + }, + { + "epoch": 0.6146077422590379, + "grad_norm": 3.878384923843297, + "learning_rate": 4.707205855512314e-06, + "loss": 0.07022476196289062, + "step": 71080 + }, + { + "epoch": 0.6146509757805813, + "grad_norm": 0.33666324379831947, + "learning_rate": 4.707038323819081e-06, + "loss": 0.02994537353515625, + "step": 71085 + }, + { + "epoch": 0.6146942093021245, + "grad_norm": 15.702117027456257, + "learning_rate": 4.7068707842531406e-06, + "loss": 0.27308349609375, + "step": 71090 + }, + { + "epoch": 0.6147374428236677, + "grad_norm": 2.085530586565408, + "learning_rate": 4.706703236815261e-06, + "loss": 0.024484634399414062, + "step": 71095 + }, + { + "epoch": 0.614780676345211, + "grad_norm": 13.689185796900528, + "learning_rate": 4.706535681506217e-06, + "loss": 0.1847015380859375, + "step": 71100 + }, + { + "epoch": 0.6148239098667543, + "grad_norm": 2.3355698301852414, + "learning_rate": 4.706368118326782e-06, + "loss": 0.11896209716796875, + "step": 71105 + }, + { + "epoch": 0.6148671433882975, + "grad_norm": 13.559271713521238, + "learning_rate": 4.706200547277727e-06, + "loss": 0.11008033752441407, + "step": 71110 + }, + { + "epoch": 0.6149103769098409, + "grad_norm": 4.944843163460708, + "learning_rate": 4.706032968359827e-06, + "loss": 0.04045791625976562, + "step": 71115 + }, + { + "epoch": 0.6149536104313841, + "grad_norm": 0.4205572464285602, + "learning_rate": 4.705865381573854e-06, + "loss": 0.25116653442382814, + "step": 71120 + }, + { + "epoch": 0.6149968439529273, + "grad_norm": 24.99090614891355, + "learning_rate": 4.705697786920579e-06, + "loss": 0.3527214050292969, + "step": 71125 + }, + { + "epoch": 0.6150400774744706, + "grad_norm": 11.638502818731364, + "learning_rate": 4.705530184400777e-06, + "loss": 0.14764480590820311, + "step": 71130 + }, + { + "epoch": 0.6150833109960139, + "grad_norm": 2.8400801797855677, + "learning_rate": 4.705362574015221e-06, + "loss": 0.08713455200195312, + "step": 71135 + }, + { + "epoch": 0.6151265445175571, + "grad_norm": 1.3292273316715262, + "learning_rate": 4.7051949557646834e-06, + "loss": 0.06392745971679688, + "step": 71140 + }, + { + "epoch": 0.6151697780391004, + "grad_norm": 20.317308567333104, + "learning_rate": 4.705027329649937e-06, + "loss": 0.2685943603515625, + "step": 71145 + }, + { + "epoch": 0.6152130115606437, + "grad_norm": 3.4939705579313545, + "learning_rate": 4.704859695671756e-06, + "loss": 0.2382892608642578, + "step": 71150 + }, + { + "epoch": 0.6152562450821869, + "grad_norm": 15.38615845938821, + "learning_rate": 4.704692053830912e-06, + "loss": 0.212530517578125, + "step": 71155 + }, + { + "epoch": 0.6152994786037302, + "grad_norm": 6.04933332542173, + "learning_rate": 4.704524404128179e-06, + "loss": 0.0728668212890625, + "step": 71160 + }, + { + "epoch": 0.6153427121252735, + "grad_norm": 32.138259068316955, + "learning_rate": 4.70435674656433e-06, + "loss": 0.76878662109375, + "step": 71165 + }, + { + "epoch": 0.6153859456468167, + "grad_norm": 7.713853778834452, + "learning_rate": 4.7041890811401385e-06, + "loss": 0.2891349792480469, + "step": 71170 + }, + { + "epoch": 0.6154291791683599, + "grad_norm": 6.86217856412669, + "learning_rate": 4.704021407856378e-06, + "loss": 0.4066253662109375, + "step": 71175 + }, + { + "epoch": 0.6154724126899033, + "grad_norm": 0.6102147498691655, + "learning_rate": 4.703853726713821e-06, + "loss": 0.017262840270996095, + "step": 71180 + }, + { + "epoch": 0.6155156462114465, + "grad_norm": 1.655821625651656, + "learning_rate": 4.703686037713241e-06, + "loss": 0.08399658203125, + "step": 71185 + }, + { + "epoch": 0.6155588797329897, + "grad_norm": 10.856648776184693, + "learning_rate": 4.703518340855412e-06, + "loss": 0.236273193359375, + "step": 71190 + }, + { + "epoch": 0.6156021132545331, + "grad_norm": 15.187558752879747, + "learning_rate": 4.703350636141105e-06, + "loss": 0.108062744140625, + "step": 71195 + }, + { + "epoch": 0.6156453467760763, + "grad_norm": 20.218267421209767, + "learning_rate": 4.703182923571098e-06, + "loss": 0.0349212646484375, + "step": 71200 + }, + { + "epoch": 0.6156885802976195, + "grad_norm": 1.266972626735901, + "learning_rate": 4.70301520314616e-06, + "loss": 0.12664508819580078, + "step": 71205 + }, + { + "epoch": 0.6157318138191629, + "grad_norm": 22.871421593735374, + "learning_rate": 4.702847474867067e-06, + "loss": 0.4956687927246094, + "step": 71210 + }, + { + "epoch": 0.6157750473407061, + "grad_norm": 0.4012293755569376, + "learning_rate": 4.702679738734592e-06, + "loss": 0.10787506103515625, + "step": 71215 + }, + { + "epoch": 0.6158182808622493, + "grad_norm": 8.226790821206452, + "learning_rate": 4.7025119947495085e-06, + "loss": 0.1180084228515625, + "step": 71220 + }, + { + "epoch": 0.6158615143837927, + "grad_norm": 0.46558716187490623, + "learning_rate": 4.70234424291259e-06, + "loss": 0.0388946533203125, + "step": 71225 + }, + { + "epoch": 0.6159047479053359, + "grad_norm": 11.085279491478722, + "learning_rate": 4.70217648322461e-06, + "loss": 0.152423095703125, + "step": 71230 + }, + { + "epoch": 0.6159479814268791, + "grad_norm": 0.29751913276162556, + "learning_rate": 4.702008715686343e-06, + "loss": 0.0466552734375, + "step": 71235 + }, + { + "epoch": 0.6159912149484225, + "grad_norm": 1.3863970893835666, + "learning_rate": 4.7018409402985625e-06, + "loss": 0.09937667846679688, + "step": 71240 + }, + { + "epoch": 0.6160344484699657, + "grad_norm": 30.6130022091558, + "learning_rate": 4.701673157062041e-06, + "loss": 0.318218994140625, + "step": 71245 + }, + { + "epoch": 0.6160776819915089, + "grad_norm": 0.29032543149841344, + "learning_rate": 4.7015053659775545e-06, + "loss": 0.04087409973144531, + "step": 71250 + }, + { + "epoch": 0.6161209155130521, + "grad_norm": 27.574001639847406, + "learning_rate": 4.701337567045874e-06, + "loss": 0.1053558349609375, + "step": 71255 + }, + { + "epoch": 0.6161641490345955, + "grad_norm": 0.4435748144270253, + "learning_rate": 4.7011697602677755e-06, + "loss": 0.21506729125976562, + "step": 71260 + }, + { + "epoch": 0.6162073825561387, + "grad_norm": 6.568436947282997, + "learning_rate": 4.701001945644033e-06, + "loss": 0.107000732421875, + "step": 71265 + }, + { + "epoch": 0.616250616077682, + "grad_norm": 0.4238173325266665, + "learning_rate": 4.700834123175419e-06, + "loss": 0.034353446960449216, + "step": 71270 + }, + { + "epoch": 0.6162938495992253, + "grad_norm": 52.88809300584266, + "learning_rate": 4.70066629286271e-06, + "loss": 0.131024169921875, + "step": 71275 + }, + { + "epoch": 0.6163370831207685, + "grad_norm": 20.69753592439548, + "learning_rate": 4.700498454706677e-06, + "loss": 0.253045654296875, + "step": 71280 + }, + { + "epoch": 0.6163803166423117, + "grad_norm": 15.26516826749553, + "learning_rate": 4.700330608708095e-06, + "loss": 0.08364524841308593, + "step": 71285 + }, + { + "epoch": 0.6164235501638551, + "grad_norm": 9.70704120087994, + "learning_rate": 4.700162754867739e-06, + "loss": 0.07274169921875, + "step": 71290 + }, + { + "epoch": 0.6164667836853983, + "grad_norm": 12.166757405826244, + "learning_rate": 4.699994893186383e-06, + "loss": 0.6763626098632812, + "step": 71295 + }, + { + "epoch": 0.6165100172069415, + "grad_norm": 0.22032934081663583, + "learning_rate": 4.6998270236648e-06, + "loss": 0.1753662109375, + "step": 71300 + }, + { + "epoch": 0.6165532507284849, + "grad_norm": 15.906431274812478, + "learning_rate": 4.6996591463037646e-06, + "loss": 0.0890279769897461, + "step": 71305 + }, + { + "epoch": 0.6165964842500281, + "grad_norm": 7.795757150169628, + "learning_rate": 4.699491261104053e-06, + "loss": 0.1942230224609375, + "step": 71310 + }, + { + "epoch": 0.6166397177715713, + "grad_norm": 4.582811717918157, + "learning_rate": 4.699323368066436e-06, + "loss": 0.1169921875, + "step": 71315 + }, + { + "epoch": 0.6166829512931147, + "grad_norm": 18.118169510352832, + "learning_rate": 4.699155467191692e-06, + "loss": 0.16077327728271484, + "step": 71320 + }, + { + "epoch": 0.6167261848146579, + "grad_norm": 0.2503520748255049, + "learning_rate": 4.6989875584805915e-06, + "loss": 0.12052459716796875, + "step": 71325 + }, + { + "epoch": 0.6167694183362011, + "grad_norm": 23.623078493240683, + "learning_rate": 4.698819641933911e-06, + "loss": 0.22233352661132813, + "step": 71330 + }, + { + "epoch": 0.6168126518577445, + "grad_norm": 23.539653349894923, + "learning_rate": 4.698651717552424e-06, + "loss": 0.13735885620117189, + "step": 71335 + }, + { + "epoch": 0.6168558853792877, + "grad_norm": 0.1291099286636655, + "learning_rate": 4.6984837853369064e-06, + "loss": 0.2836112976074219, + "step": 71340 + }, + { + "epoch": 0.6168991189008309, + "grad_norm": 0.8230946151885311, + "learning_rate": 4.698315845288131e-06, + "loss": 0.021079254150390626, + "step": 71345 + }, + { + "epoch": 0.6169423524223742, + "grad_norm": 30.349101742382718, + "learning_rate": 4.698147897406873e-06, + "loss": 0.5356956481933594, + "step": 71350 + }, + { + "epoch": 0.6169855859439175, + "grad_norm": 2.419649647405508, + "learning_rate": 4.697979941693906e-06, + "loss": 0.11172256469726563, + "step": 71355 + }, + { + "epoch": 0.6170288194654607, + "grad_norm": 1.2617978979619358, + "learning_rate": 4.697811978150007e-06, + "loss": 0.179827880859375, + "step": 71360 + }, + { + "epoch": 0.617072052987004, + "grad_norm": 2.615683204344323, + "learning_rate": 4.6976440067759486e-06, + "loss": 0.2080169677734375, + "step": 71365 + }, + { + "epoch": 0.6171152865085473, + "grad_norm": 13.251680727149003, + "learning_rate": 4.697476027572506e-06, + "loss": 0.123046875, + "step": 71370 + }, + { + "epoch": 0.6171585200300905, + "grad_norm": 66.46919618046284, + "learning_rate": 4.697308040540455e-06, + "loss": 0.22144203186035155, + "step": 71375 + }, + { + "epoch": 0.6172017535516338, + "grad_norm": 4.866556655829688, + "learning_rate": 4.697140045680568e-06, + "loss": 0.0531951904296875, + "step": 71380 + }, + { + "epoch": 0.6172449870731771, + "grad_norm": 24.378476200999426, + "learning_rate": 4.6969720429936206e-06, + "loss": 0.17416229248046874, + "step": 71385 + }, + { + "epoch": 0.6172882205947203, + "grad_norm": 14.758669718779085, + "learning_rate": 4.6968040324803895e-06, + "loss": 0.1186004638671875, + "step": 71390 + }, + { + "epoch": 0.6173314541162636, + "grad_norm": 8.262933993144612, + "learning_rate": 4.696636014141647e-06, + "loss": 0.170770263671875, + "step": 71395 + }, + { + "epoch": 0.6173746876378069, + "grad_norm": 4.159399614277595, + "learning_rate": 4.69646798797817e-06, + "loss": 0.12092971801757812, + "step": 71400 + }, + { + "epoch": 0.6174179211593501, + "grad_norm": 0.49893701576022703, + "learning_rate": 4.6962999539907325e-06, + "loss": 0.097698974609375, + "step": 71405 + }, + { + "epoch": 0.6174611546808934, + "grad_norm": 39.73131219590867, + "learning_rate": 4.696131912180109e-06, + "loss": 0.18316268920898438, + "step": 71410 + }, + { + "epoch": 0.6175043882024367, + "grad_norm": 2.269809549150403, + "learning_rate": 4.695963862547075e-06, + "loss": 0.0615966796875, + "step": 71415 + }, + { + "epoch": 0.6175476217239799, + "grad_norm": 7.960466824216214, + "learning_rate": 4.695795805092406e-06, + "loss": 0.1056640625, + "step": 71420 + }, + { + "epoch": 0.6175908552455232, + "grad_norm": 3.0644067826657637, + "learning_rate": 4.695627739816876e-06, + "loss": 0.1900726318359375, + "step": 71425 + }, + { + "epoch": 0.6176340887670664, + "grad_norm": 3.20883706351111, + "learning_rate": 4.695459666721261e-06, + "loss": 0.31795463562011717, + "step": 71430 + }, + { + "epoch": 0.6176773222886097, + "grad_norm": 7.5663564181561025, + "learning_rate": 4.6952915858063364e-06, + "loss": 0.13306655883789062, + "step": 71435 + }, + { + "epoch": 0.617720555810153, + "grad_norm": 9.321316254352851, + "learning_rate": 4.695123497072876e-06, + "loss": 0.161236572265625, + "step": 71440 + }, + { + "epoch": 0.6177637893316962, + "grad_norm": 7.997116889369016, + "learning_rate": 4.694955400521656e-06, + "loss": 0.1392498016357422, + "step": 71445 + }, + { + "epoch": 0.6178070228532395, + "grad_norm": 2.755755333724769, + "learning_rate": 4.694787296153451e-06, + "loss": 0.054510498046875, + "step": 71450 + }, + { + "epoch": 0.6178502563747827, + "grad_norm": 16.077153266546453, + "learning_rate": 4.694619183969038e-06, + "loss": 0.1417083740234375, + "step": 71455 + }, + { + "epoch": 0.617893489896326, + "grad_norm": 12.98119619137466, + "learning_rate": 4.69445106396919e-06, + "loss": 0.3507072448730469, + "step": 71460 + }, + { + "epoch": 0.6179367234178693, + "grad_norm": 0.23240647629193403, + "learning_rate": 4.694282936154684e-06, + "loss": 0.3717742919921875, + "step": 71465 + }, + { + "epoch": 0.6179799569394125, + "grad_norm": 9.142643693592179, + "learning_rate": 4.694114800526294e-06, + "loss": 0.4641548156738281, + "step": 71470 + }, + { + "epoch": 0.6180231904609558, + "grad_norm": 4.911750328968176, + "learning_rate": 4.693946657084797e-06, + "loss": 0.139837646484375, + "step": 71475 + }, + { + "epoch": 0.6180664239824991, + "grad_norm": 0.25491378644938306, + "learning_rate": 4.693778505830967e-06, + "loss": 0.0099517822265625, + "step": 71480 + }, + { + "epoch": 0.6181096575040423, + "grad_norm": 11.796534886934001, + "learning_rate": 4.693610346765581e-06, + "loss": 0.15201282501220703, + "step": 71485 + }, + { + "epoch": 0.6181528910255856, + "grad_norm": 11.027347271901574, + "learning_rate": 4.693442179889413e-06, + "loss": 0.3233673095703125, + "step": 71490 + }, + { + "epoch": 0.6181961245471289, + "grad_norm": 2.806302199265286, + "learning_rate": 4.6932740052032405e-06, + "loss": 0.05402450561523438, + "step": 71495 + }, + { + "epoch": 0.6182393580686721, + "grad_norm": 1.9807398974819226, + "learning_rate": 4.693105822707837e-06, + "loss": 0.1360443115234375, + "step": 71500 + }, + { + "epoch": 0.6182825915902154, + "grad_norm": 0.5964209289411827, + "learning_rate": 4.692937632403979e-06, + "loss": 0.09316978454589844, + "step": 71505 + }, + { + "epoch": 0.6183258251117586, + "grad_norm": 15.66916725073616, + "learning_rate": 4.6927694342924424e-06, + "loss": 0.1549652099609375, + "step": 71510 + }, + { + "epoch": 0.6183690586333019, + "grad_norm": 6.028871148315239, + "learning_rate": 4.692601228374003e-06, + "loss": 0.19142532348632812, + "step": 71515 + }, + { + "epoch": 0.6184122921548452, + "grad_norm": 6.278665132026349, + "learning_rate": 4.692433014649436e-06, + "loss": 0.06600341796875, + "step": 71520 + }, + { + "epoch": 0.6184555256763884, + "grad_norm": 3.673486276004387, + "learning_rate": 4.692264793119518e-06, + "loss": 0.18766326904296876, + "step": 71525 + }, + { + "epoch": 0.6184987591979317, + "grad_norm": 1.0187695710867755, + "learning_rate": 4.692096563785023e-06, + "loss": 0.02534761428833008, + "step": 71530 + }, + { + "epoch": 0.618541992719475, + "grad_norm": 59.59463907888886, + "learning_rate": 4.6919283266467295e-06, + "loss": 0.24578170776367186, + "step": 71535 + }, + { + "epoch": 0.6185852262410182, + "grad_norm": 0.29630758953084807, + "learning_rate": 4.691760081705411e-06, + "loss": 0.1163543701171875, + "step": 71540 + }, + { + "epoch": 0.6186284597625615, + "grad_norm": 2.035137874208709, + "learning_rate": 4.6915918289618446e-06, + "loss": 0.0822296142578125, + "step": 71545 + }, + { + "epoch": 0.6186716932841048, + "grad_norm": 33.01135473050505, + "learning_rate": 4.691423568416807e-06, + "loss": 0.42851715087890624, + "step": 71550 + }, + { + "epoch": 0.618714926805648, + "grad_norm": 55.539057714002894, + "learning_rate": 4.691255300071073e-06, + "loss": 0.25916748046875, + "step": 71555 + }, + { + "epoch": 0.6187581603271913, + "grad_norm": 1.2622247323307654, + "learning_rate": 4.691087023925418e-06, + "loss": 0.19884109497070312, + "step": 71560 + }, + { + "epoch": 0.6188013938487346, + "grad_norm": 9.000346807347462, + "learning_rate": 4.690918739980621e-06, + "loss": 0.36397705078125, + "step": 71565 + }, + { + "epoch": 0.6188446273702778, + "grad_norm": 0.34067757937600374, + "learning_rate": 4.690750448237455e-06, + "loss": 0.1238800048828125, + "step": 71570 + }, + { + "epoch": 0.6188878608918211, + "grad_norm": 9.316328760218362, + "learning_rate": 4.690582148696697e-06, + "loss": 0.29282150268554685, + "step": 71575 + }, + { + "epoch": 0.6189310944133644, + "grad_norm": 22.779606163475, + "learning_rate": 4.690413841359125e-06, + "loss": 0.23923492431640625, + "step": 71580 + }, + { + "epoch": 0.6189743279349076, + "grad_norm": 4.754387216208763, + "learning_rate": 4.690245526225513e-06, + "loss": 0.098590087890625, + "step": 71585 + }, + { + "epoch": 0.6190175614564509, + "grad_norm": 20.775282774267836, + "learning_rate": 4.690077203296637e-06, + "loss": 0.3381683349609375, + "step": 71590 + }, + { + "epoch": 0.6190607949779942, + "grad_norm": 6.299881378356482, + "learning_rate": 4.689908872573274e-06, + "loss": 0.08994140625, + "step": 71595 + }, + { + "epoch": 0.6191040284995374, + "grad_norm": 19.162152959825125, + "learning_rate": 4.689740534056202e-06, + "loss": 0.18626670837402343, + "step": 71600 + }, + { + "epoch": 0.6191472620210806, + "grad_norm": 4.838841004918317, + "learning_rate": 4.689572187746196e-06, + "loss": 0.0699127197265625, + "step": 71605 + }, + { + "epoch": 0.619190495542624, + "grad_norm": 5.253786771614683, + "learning_rate": 4.689403833644032e-06, + "loss": 0.06626129150390625, + "step": 71610 + }, + { + "epoch": 0.6192337290641672, + "grad_norm": 12.367331595006888, + "learning_rate": 4.689235471750487e-06, + "loss": 0.1117950439453125, + "step": 71615 + }, + { + "epoch": 0.6192769625857104, + "grad_norm": 3.0659230064420355, + "learning_rate": 4.689067102066337e-06, + "loss": 0.12297859191894531, + "step": 71620 + }, + { + "epoch": 0.6193201961072538, + "grad_norm": 0.5208456533648633, + "learning_rate": 4.688898724592358e-06, + "loss": 0.0978363037109375, + "step": 71625 + }, + { + "epoch": 0.619363429628797, + "grad_norm": 8.961665460965127, + "learning_rate": 4.688730339329329e-06, + "loss": 0.3724170684814453, + "step": 71630 + }, + { + "epoch": 0.6194066631503402, + "grad_norm": 2.8469825316299056, + "learning_rate": 4.6885619462780234e-06, + "loss": 0.15980224609375, + "step": 71635 + }, + { + "epoch": 0.6194498966718835, + "grad_norm": 0.1494257269815934, + "learning_rate": 4.68839354543922e-06, + "loss": 0.03042449951171875, + "step": 71640 + }, + { + "epoch": 0.6194931301934268, + "grad_norm": 0.9951146509476951, + "learning_rate": 4.688225136813695e-06, + "loss": 0.13132247924804688, + "step": 71645 + }, + { + "epoch": 0.61953636371497, + "grad_norm": 1.568416488791026, + "learning_rate": 4.688056720402224e-06, + "loss": 0.32428665161132814, + "step": 71650 + }, + { + "epoch": 0.6195795972365133, + "grad_norm": 31.978764275084842, + "learning_rate": 4.687888296205585e-06, + "loss": 0.108843994140625, + "step": 71655 + }, + { + "epoch": 0.6196228307580566, + "grad_norm": 2.487147285122215, + "learning_rate": 4.6877198642245545e-06, + "loss": 0.4033470153808594, + "step": 71660 + }, + { + "epoch": 0.6196660642795998, + "grad_norm": 133.52710422239156, + "learning_rate": 4.687551424459909e-06, + "loss": 0.24740753173828126, + "step": 71665 + }, + { + "epoch": 0.6197092978011431, + "grad_norm": 2.6936934860839954, + "learning_rate": 4.687382976912425e-06, + "loss": 0.09574508666992188, + "step": 71670 + }, + { + "epoch": 0.6197525313226864, + "grad_norm": 4.2734340268022235, + "learning_rate": 4.687214521582881e-06, + "loss": 0.1281982421875, + "step": 71675 + }, + { + "epoch": 0.6197957648442296, + "grad_norm": 1.002004484423904, + "learning_rate": 4.687046058472052e-06, + "loss": 0.0534088134765625, + "step": 71680 + }, + { + "epoch": 0.6198389983657728, + "grad_norm": 1.4217783939993862, + "learning_rate": 4.686877587580716e-06, + "loss": 0.07192344665527343, + "step": 71685 + }, + { + "epoch": 0.6198822318873162, + "grad_norm": 33.81744152589878, + "learning_rate": 4.68670910890965e-06, + "loss": 0.27695159912109374, + "step": 71690 + }, + { + "epoch": 0.6199254654088594, + "grad_norm": 6.014299585320612, + "learning_rate": 4.686540622459629e-06, + "loss": 0.07437400817871094, + "step": 71695 + }, + { + "epoch": 0.6199686989304026, + "grad_norm": 12.190063941370365, + "learning_rate": 4.686372128231433e-06, + "loss": 0.1701740264892578, + "step": 71700 + }, + { + "epoch": 0.620011932451946, + "grad_norm": 13.086512647122856, + "learning_rate": 4.686203626225836e-06, + "loss": 0.1814117431640625, + "step": 71705 + }, + { + "epoch": 0.6200551659734892, + "grad_norm": 6.265873375679719, + "learning_rate": 4.6860351164436195e-06, + "loss": 0.02929229736328125, + "step": 71710 + }, + { + "epoch": 0.6200983994950324, + "grad_norm": 1.7301295894503876, + "learning_rate": 4.6858665988855565e-06, + "loss": 0.06250381469726562, + "step": 71715 + }, + { + "epoch": 0.6201416330165758, + "grad_norm": 3.946467300518699, + "learning_rate": 4.6856980735524256e-06, + "loss": 0.08127899169921875, + "step": 71720 + }, + { + "epoch": 0.620184866538119, + "grad_norm": 1.2874033383440102, + "learning_rate": 4.685529540445004e-06, + "loss": 0.0947723388671875, + "step": 71725 + }, + { + "epoch": 0.6202281000596622, + "grad_norm": 21.05256424509494, + "learning_rate": 4.68536099956407e-06, + "loss": 0.2451751708984375, + "step": 71730 + }, + { + "epoch": 0.6202713335812056, + "grad_norm": 4.468794135926028, + "learning_rate": 4.6851924509103995e-06, + "loss": 0.2553466796875, + "step": 71735 + }, + { + "epoch": 0.6203145671027488, + "grad_norm": 15.649331312106908, + "learning_rate": 4.68502389448477e-06, + "loss": 0.1416229248046875, + "step": 71740 + }, + { + "epoch": 0.620357800624292, + "grad_norm": 1.311999454537928, + "learning_rate": 4.68485533028796e-06, + "loss": 0.307806396484375, + "step": 71745 + }, + { + "epoch": 0.6204010341458354, + "grad_norm": 3.1192673187307283, + "learning_rate": 4.684686758320745e-06, + "loss": 0.19119720458984374, + "step": 71750 + }, + { + "epoch": 0.6204442676673786, + "grad_norm": 11.092775916036981, + "learning_rate": 4.684518178583905e-06, + "loss": 0.09163818359375, + "step": 71755 + }, + { + "epoch": 0.6204875011889218, + "grad_norm": 1.21831379181257, + "learning_rate": 4.684349591078215e-06, + "loss": 0.3414459228515625, + "step": 71760 + }, + { + "epoch": 0.6205307347104652, + "grad_norm": 1.0623128320629136, + "learning_rate": 4.684180995804453e-06, + "loss": 0.3432373046875, + "step": 71765 + }, + { + "epoch": 0.6205739682320084, + "grad_norm": 5.558850745913493, + "learning_rate": 4.684012392763398e-06, + "loss": 0.0950439453125, + "step": 71770 + }, + { + "epoch": 0.6206172017535516, + "grad_norm": 31.050332297925053, + "learning_rate": 4.683843781955827e-06, + "loss": 0.2901802062988281, + "step": 71775 + }, + { + "epoch": 0.6206604352750948, + "grad_norm": 4.2186023703788065, + "learning_rate": 4.683675163382516e-06, + "loss": 0.079168701171875, + "step": 71780 + }, + { + "epoch": 0.6207036687966382, + "grad_norm": 4.272117477841359, + "learning_rate": 4.683506537044245e-06, + "loss": 0.14034881591796874, + "step": 71785 + }, + { + "epoch": 0.6207469023181814, + "grad_norm": 1.3124766668627896, + "learning_rate": 4.68333790294179e-06, + "loss": 0.05115509033203125, + "step": 71790 + }, + { + "epoch": 0.6207901358397246, + "grad_norm": 0.6734147379806212, + "learning_rate": 4.683169261075929e-06, + "loss": 0.087225341796875, + "step": 71795 + }, + { + "epoch": 0.620833369361268, + "grad_norm": 29.479028586259684, + "learning_rate": 4.683000611447441e-06, + "loss": 0.14886016845703126, + "step": 71800 + }, + { + "epoch": 0.6208766028828112, + "grad_norm": 13.561890579679115, + "learning_rate": 4.682831954057103e-06, + "loss": 0.366900634765625, + "step": 71805 + }, + { + "epoch": 0.6209198364043544, + "grad_norm": 1.185146994033566, + "learning_rate": 4.682663288905692e-06, + "loss": 0.3483558654785156, + "step": 71810 + }, + { + "epoch": 0.6209630699258978, + "grad_norm": 40.55277645047572, + "learning_rate": 4.6824946159939865e-06, + "loss": 0.16467666625976562, + "step": 71815 + }, + { + "epoch": 0.621006303447441, + "grad_norm": 2.1940972582715914, + "learning_rate": 4.682325935322765e-06, + "loss": 0.11970291137695313, + "step": 71820 + }, + { + "epoch": 0.6210495369689842, + "grad_norm": 8.463955185832393, + "learning_rate": 4.682157246892805e-06, + "loss": 0.1141693115234375, + "step": 71825 + }, + { + "epoch": 0.6210927704905276, + "grad_norm": 19.217132439756906, + "learning_rate": 4.681988550704884e-06, + "loss": 0.4970001220703125, + "step": 71830 + }, + { + "epoch": 0.6211360040120708, + "grad_norm": 13.90458474969577, + "learning_rate": 4.681819846759781e-06, + "loss": 0.05439453125, + "step": 71835 + }, + { + "epoch": 0.621179237533614, + "grad_norm": 0.25575256122378925, + "learning_rate": 4.681651135058273e-06, + "loss": 0.16864089965820311, + "step": 71840 + }, + { + "epoch": 0.6212224710551574, + "grad_norm": 150.58124085235912, + "learning_rate": 4.681482415601139e-06, + "loss": 0.17587432861328126, + "step": 71845 + }, + { + "epoch": 0.6212657045767006, + "grad_norm": 1.5828767798635246, + "learning_rate": 4.681313688389156e-06, + "loss": 0.128839111328125, + "step": 71850 + }, + { + "epoch": 0.6213089380982438, + "grad_norm": 4.40177285777097, + "learning_rate": 4.681144953423104e-06, + "loss": 0.126318359375, + "step": 71855 + }, + { + "epoch": 0.6213521716197871, + "grad_norm": 9.181820022067146, + "learning_rate": 4.680976210703758e-06, + "loss": 0.12482452392578125, + "step": 71860 + }, + { + "epoch": 0.6213954051413304, + "grad_norm": 26.31545987034779, + "learning_rate": 4.6808074602318995e-06, + "loss": 0.171087646484375, + "step": 71865 + }, + { + "epoch": 0.6214386386628736, + "grad_norm": 2.1753561623516484, + "learning_rate": 4.680638702008306e-06, + "loss": 0.10682830810546876, + "step": 71870 + }, + { + "epoch": 0.6214818721844169, + "grad_norm": 6.076999651925037, + "learning_rate": 4.680469936033755e-06, + "loss": 0.116552734375, + "step": 71875 + }, + { + "epoch": 0.6215251057059602, + "grad_norm": 22.668984257799394, + "learning_rate": 4.6803011623090235e-06, + "loss": 0.318914794921875, + "step": 71880 + }, + { + "epoch": 0.6215683392275034, + "grad_norm": 0.7846450018710803, + "learning_rate": 4.680132380834894e-06, + "loss": 0.03103485107421875, + "step": 71885 + }, + { + "epoch": 0.6216115727490467, + "grad_norm": 3.7151318219177507, + "learning_rate": 4.679963591612141e-06, + "loss": 0.04377593994140625, + "step": 71890 + }, + { + "epoch": 0.62165480627059, + "grad_norm": 6.5958213471474405, + "learning_rate": 4.679794794641544e-06, + "loss": 0.2139892578125, + "step": 71895 + }, + { + "epoch": 0.6216980397921332, + "grad_norm": 4.982431618053043, + "learning_rate": 4.679625989923883e-06, + "loss": 0.169549560546875, + "step": 71900 + }, + { + "epoch": 0.6217412733136765, + "grad_norm": 5.2454522044295295, + "learning_rate": 4.679457177459935e-06, + "loss": 0.13359375, + "step": 71905 + }, + { + "epoch": 0.6217845068352198, + "grad_norm": 0.30347518724898603, + "learning_rate": 4.679288357250479e-06, + "loss": 0.2269012451171875, + "step": 71910 + }, + { + "epoch": 0.621827740356763, + "grad_norm": 26.03348759882705, + "learning_rate": 4.679119529296293e-06, + "loss": 0.12968597412109376, + "step": 71915 + }, + { + "epoch": 0.6218709738783063, + "grad_norm": 0.6129785649740604, + "learning_rate": 4.678950693598156e-06, + "loss": 0.11532058715820312, + "step": 71920 + }, + { + "epoch": 0.6219142073998496, + "grad_norm": 31.442521903222595, + "learning_rate": 4.678781850156847e-06, + "loss": 0.24797821044921875, + "step": 71925 + }, + { + "epoch": 0.6219574409213928, + "grad_norm": 3.0350267528041566, + "learning_rate": 4.678612998973145e-06, + "loss": 0.0633880615234375, + "step": 71930 + }, + { + "epoch": 0.622000674442936, + "grad_norm": 1.4652061139516321, + "learning_rate": 4.678444140047828e-06, + "loss": 0.09200973510742187, + "step": 71935 + }, + { + "epoch": 0.6220439079644794, + "grad_norm": 5.5378404251238145, + "learning_rate": 4.678275273381673e-06, + "loss": 0.11452817916870117, + "step": 71940 + }, + { + "epoch": 0.6220871414860226, + "grad_norm": 0.2554235696007412, + "learning_rate": 4.678106398975463e-06, + "loss": 0.17147445678710938, + "step": 71945 + }, + { + "epoch": 0.6221303750075659, + "grad_norm": 7.936784142506378, + "learning_rate": 4.677937516829974e-06, + "loss": 0.371966552734375, + "step": 71950 + }, + { + "epoch": 0.6221736085291091, + "grad_norm": 25.650434857094265, + "learning_rate": 4.677768626945986e-06, + "loss": 0.2634368896484375, + "step": 71955 + }, + { + "epoch": 0.6222168420506524, + "grad_norm": 3.872352367710746, + "learning_rate": 4.677599729324276e-06, + "loss": 0.028401947021484374, + "step": 71960 + }, + { + "epoch": 0.6222600755721956, + "grad_norm": 43.233578598051814, + "learning_rate": 4.677430823965626e-06, + "loss": 0.40528030395507814, + "step": 71965 + }, + { + "epoch": 0.6223033090937389, + "grad_norm": 6.0779029634717086, + "learning_rate": 4.677261910870812e-06, + "loss": 0.04152679443359375, + "step": 71970 + }, + { + "epoch": 0.6223465426152822, + "grad_norm": 5.077326739956006, + "learning_rate": 4.677092990040614e-06, + "loss": 0.219866943359375, + "step": 71975 + }, + { + "epoch": 0.6223897761368254, + "grad_norm": 2.8492895194508274, + "learning_rate": 4.676924061475812e-06, + "loss": 0.04097137451171875, + "step": 71980 + }, + { + "epoch": 0.6224330096583687, + "grad_norm": 0.6435361946773085, + "learning_rate": 4.6767551251771845e-06, + "loss": 0.3412956237792969, + "step": 71985 + }, + { + "epoch": 0.622476243179912, + "grad_norm": 1.5589143493363644, + "learning_rate": 4.676586181145511e-06, + "loss": 0.08083839416503906, + "step": 71990 + }, + { + "epoch": 0.6225194767014552, + "grad_norm": 8.904145524282901, + "learning_rate": 4.676417229381568e-06, + "loss": 0.0872161865234375, + "step": 71995 + }, + { + "epoch": 0.6225627102229985, + "grad_norm": 0.736756613740831, + "learning_rate": 4.6762482698861395e-06, + "loss": 0.07022552490234375, + "step": 72000 + }, + { + "epoch": 0.6226059437445418, + "grad_norm": 16.152764280741156, + "learning_rate": 4.676079302660001e-06, + "loss": 0.31047210693359373, + "step": 72005 + }, + { + "epoch": 0.622649177266085, + "grad_norm": 6.268481381676762, + "learning_rate": 4.675910327703932e-06, + "loss": 0.061851119995117186, + "step": 72010 + }, + { + "epoch": 0.6226924107876283, + "grad_norm": 23.157994221711096, + "learning_rate": 4.675741345018714e-06, + "loss": 0.19626846313476562, + "step": 72015 + }, + { + "epoch": 0.6227356443091716, + "grad_norm": 11.712582460935435, + "learning_rate": 4.675572354605124e-06, + "loss": 0.2088226318359375, + "step": 72020 + }, + { + "epoch": 0.6227788778307148, + "grad_norm": 21.028958250213147, + "learning_rate": 4.675403356463944e-06, + "loss": 0.24548492431640626, + "step": 72025 + }, + { + "epoch": 0.6228221113522581, + "grad_norm": 15.700424134534408, + "learning_rate": 4.6752343505959495e-06, + "loss": 0.20072021484375, + "step": 72030 + }, + { + "epoch": 0.6228653448738013, + "grad_norm": 21.432092107885854, + "learning_rate": 4.675065337001924e-06, + "loss": 0.19932861328125, + "step": 72035 + }, + { + "epoch": 0.6229085783953446, + "grad_norm": 6.268792565283696, + "learning_rate": 4.6748963156826435e-06, + "loss": 0.162615966796875, + "step": 72040 + }, + { + "epoch": 0.6229518119168879, + "grad_norm": 1.0611401538080385, + "learning_rate": 4.67472728663889e-06, + "loss": 0.034112548828125, + "step": 72045 + }, + { + "epoch": 0.6229950454384311, + "grad_norm": 11.650987275309424, + "learning_rate": 4.674558249871443e-06, + "loss": 0.15962371826171876, + "step": 72050 + }, + { + "epoch": 0.6230382789599744, + "grad_norm": 3.835898114394756, + "learning_rate": 4.674389205381081e-06, + "loss": 0.14696502685546875, + "step": 72055 + }, + { + "epoch": 0.6230815124815177, + "grad_norm": 33.45565877443333, + "learning_rate": 4.674220153168583e-06, + "loss": 0.42903594970703124, + "step": 72060 + }, + { + "epoch": 0.6231247460030609, + "grad_norm": 5.3562394889048495, + "learning_rate": 4.674051093234731e-06, + "loss": 0.14945831298828124, + "step": 72065 + }, + { + "epoch": 0.6231679795246042, + "grad_norm": 0.6864677082118756, + "learning_rate": 4.673882025580302e-06, + "loss": 0.30884552001953125, + "step": 72070 + }, + { + "epoch": 0.6232112130461475, + "grad_norm": 2.839897996298411, + "learning_rate": 4.673712950206077e-06, + "loss": 0.1288543701171875, + "step": 72075 + }, + { + "epoch": 0.6232544465676907, + "grad_norm": 58.52932965617086, + "learning_rate": 4.673543867112837e-06, + "loss": 0.9577896118164062, + "step": 72080 + }, + { + "epoch": 0.623297680089234, + "grad_norm": 2.0268678084171587, + "learning_rate": 4.673374776301359e-06, + "loss": 0.05337448120117187, + "step": 72085 + }, + { + "epoch": 0.6233409136107773, + "grad_norm": 0.24445488297030765, + "learning_rate": 4.673205677772425e-06, + "loss": 0.1634754180908203, + "step": 72090 + }, + { + "epoch": 0.6233841471323205, + "grad_norm": 42.76814196137411, + "learning_rate": 4.673036571526815e-06, + "loss": 0.5293556213378906, + "step": 72095 + }, + { + "epoch": 0.6234273806538638, + "grad_norm": 6.319515031979165, + "learning_rate": 4.672867457565308e-06, + "loss": 0.07950439453125, + "step": 72100 + }, + { + "epoch": 0.6234706141754071, + "grad_norm": 0.11709695329542871, + "learning_rate": 4.672698335888684e-06, + "loss": 0.17758560180664062, + "step": 72105 + }, + { + "epoch": 0.6235138476969503, + "grad_norm": 4.572435788814508, + "learning_rate": 4.6725292064977225e-06, + "loss": 0.09521141052246093, + "step": 72110 + }, + { + "epoch": 0.6235570812184936, + "grad_norm": 10.441387894215469, + "learning_rate": 4.672360069393204e-06, + "loss": 0.05540313720703125, + "step": 72115 + }, + { + "epoch": 0.6236003147400369, + "grad_norm": 19.830022249338473, + "learning_rate": 4.672190924575909e-06, + "loss": 0.18245773315429686, + "step": 72120 + }, + { + "epoch": 0.6236435482615801, + "grad_norm": 8.188751901738945, + "learning_rate": 4.672021772046617e-06, + "loss": 0.08414649963378906, + "step": 72125 + }, + { + "epoch": 0.6236867817831233, + "grad_norm": 19.638803023112704, + "learning_rate": 4.671852611806109e-06, + "loss": 0.08587265014648438, + "step": 72130 + }, + { + "epoch": 0.6237300153046667, + "grad_norm": 2.2237334089215235, + "learning_rate": 4.671683443855164e-06, + "loss": 0.2045961380004883, + "step": 72135 + }, + { + "epoch": 0.6237732488262099, + "grad_norm": 4.548447359578314, + "learning_rate": 4.671514268194563e-06, + "loss": 0.31260986328125, + "step": 72140 + }, + { + "epoch": 0.6238164823477531, + "grad_norm": 22.1333940225159, + "learning_rate": 4.671345084825086e-06, + "loss": 0.08846817016601563, + "step": 72145 + }, + { + "epoch": 0.6238597158692964, + "grad_norm": 3.9226894146450655, + "learning_rate": 4.6711758937475125e-06, + "loss": 0.1614696502685547, + "step": 72150 + }, + { + "epoch": 0.6239029493908397, + "grad_norm": 2.4578688228309, + "learning_rate": 4.671006694962623e-06, + "loss": 0.2603179931640625, + "step": 72155 + }, + { + "epoch": 0.6239461829123829, + "grad_norm": 13.4253504239464, + "learning_rate": 4.670837488471199e-06, + "loss": 0.1129486083984375, + "step": 72160 + }, + { + "epoch": 0.6239894164339262, + "grad_norm": 12.392806514674778, + "learning_rate": 4.67066827427402e-06, + "loss": 0.08337421417236328, + "step": 72165 + }, + { + "epoch": 0.6240326499554695, + "grad_norm": 36.52470364426462, + "learning_rate": 4.670499052371868e-06, + "loss": 0.298577880859375, + "step": 72170 + }, + { + "epoch": 0.6240758834770127, + "grad_norm": 45.96980515582958, + "learning_rate": 4.67032982276552e-06, + "loss": 0.416534423828125, + "step": 72175 + }, + { + "epoch": 0.624119116998556, + "grad_norm": 6.4861379506837595, + "learning_rate": 4.670160585455759e-06, + "loss": 0.182415771484375, + "step": 72180 + }, + { + "epoch": 0.6241623505200993, + "grad_norm": 8.843249824699697, + "learning_rate": 4.669991340443365e-06, + "loss": 0.26841278076171876, + "step": 72185 + }, + { + "epoch": 0.6242055840416425, + "grad_norm": 2.895878273352566, + "learning_rate": 4.669822087729119e-06, + "loss": 0.03760833740234375, + "step": 72190 + }, + { + "epoch": 0.6242488175631858, + "grad_norm": 0.44962166309088647, + "learning_rate": 4.6696528273138e-06, + "loss": 0.0537628173828125, + "step": 72195 + }, + { + "epoch": 0.6242920510847291, + "grad_norm": 23.371646058125837, + "learning_rate": 4.66948355919819e-06, + "loss": 0.30438232421875, + "step": 72200 + }, + { + "epoch": 0.6243352846062723, + "grad_norm": 1.163604581889322, + "learning_rate": 4.66931428338307e-06, + "loss": 0.0440277099609375, + "step": 72205 + }, + { + "epoch": 0.6243785181278155, + "grad_norm": 9.406116500871436, + "learning_rate": 4.6691449998692195e-06, + "loss": 0.220172119140625, + "step": 72210 + }, + { + "epoch": 0.6244217516493589, + "grad_norm": 0.4354172033855826, + "learning_rate": 4.6689757086574205e-06, + "loss": 0.1984588623046875, + "step": 72215 + }, + { + "epoch": 0.6244649851709021, + "grad_norm": 3.5387467729247484, + "learning_rate": 4.668806409748452e-06, + "loss": 0.13319091796875, + "step": 72220 + }, + { + "epoch": 0.6245082186924453, + "grad_norm": 1.3705260575553266, + "learning_rate": 4.668637103143097e-06, + "loss": 0.04420013427734375, + "step": 72225 + }, + { + "epoch": 0.6245514522139887, + "grad_norm": 1.0072171096565679, + "learning_rate": 4.668467788842134e-06, + "loss": 0.3560882568359375, + "step": 72230 + }, + { + "epoch": 0.6245946857355319, + "grad_norm": 4.432394808587987, + "learning_rate": 4.668298466846345e-06, + "loss": 0.13971099853515626, + "step": 72235 + }, + { + "epoch": 0.6246379192570751, + "grad_norm": 5.743110548366877, + "learning_rate": 4.668129137156512e-06, + "loss": 0.040556716918945315, + "step": 72240 + }, + { + "epoch": 0.6246811527786185, + "grad_norm": 53.79658798379153, + "learning_rate": 4.6679597997734135e-06, + "loss": 0.28690567016601565, + "step": 72245 + }, + { + "epoch": 0.6247243863001617, + "grad_norm": 14.436784298045335, + "learning_rate": 4.667790454697833e-06, + "loss": 0.1361297607421875, + "step": 72250 + }, + { + "epoch": 0.6247676198217049, + "grad_norm": 4.336525305013933, + "learning_rate": 4.667621101930549e-06, + "loss": 0.1388251304626465, + "step": 72255 + }, + { + "epoch": 0.6248108533432483, + "grad_norm": 1.3171865025477476, + "learning_rate": 4.6674517414723445e-06, + "loss": 0.0233978271484375, + "step": 72260 + }, + { + "epoch": 0.6248540868647915, + "grad_norm": 1.1897725354594657, + "learning_rate": 4.667282373324e-06, + "loss": 0.06420822143554687, + "step": 72265 + }, + { + "epoch": 0.6248973203863347, + "grad_norm": 9.948337941794149, + "learning_rate": 4.667112997486296e-06, + "loss": 0.06561393737792968, + "step": 72270 + }, + { + "epoch": 0.6249405539078781, + "grad_norm": 7.0747308837341, + "learning_rate": 4.666943613960014e-06, + "loss": 0.17857131958007813, + "step": 72275 + }, + { + "epoch": 0.6249837874294213, + "grad_norm": 3.8048533682290424, + "learning_rate": 4.666774222745937e-06, + "loss": 0.07599983215332032, + "step": 72280 + }, + { + "epoch": 0.6250270209509645, + "grad_norm": 1.9610500960203376, + "learning_rate": 4.666604823844843e-06, + "loss": 0.17620849609375, + "step": 72285 + }, + { + "epoch": 0.6250702544725079, + "grad_norm": 0.1619276315324704, + "learning_rate": 4.666435417257515e-06, + "loss": 0.13337249755859376, + "step": 72290 + }, + { + "epoch": 0.6251134879940511, + "grad_norm": 18.38490720938015, + "learning_rate": 4.666266002984735e-06, + "loss": 0.3040771484375, + "step": 72295 + }, + { + "epoch": 0.6251567215155943, + "grad_norm": 3.6242434714235783, + "learning_rate": 4.666096581027283e-06, + "loss": 0.24917755126953126, + "step": 72300 + }, + { + "epoch": 0.6251999550371375, + "grad_norm": 0.9242673632098335, + "learning_rate": 4.665927151385941e-06, + "loss": 0.100958251953125, + "step": 72305 + }, + { + "epoch": 0.6252431885586809, + "grad_norm": 1.8407868592030745, + "learning_rate": 4.6657577140614895e-06, + "loss": 0.1772674560546875, + "step": 72310 + }, + { + "epoch": 0.6252864220802241, + "grad_norm": 6.77940323894799, + "learning_rate": 4.665588269054711e-06, + "loss": 0.0542633056640625, + "step": 72315 + }, + { + "epoch": 0.6253296556017673, + "grad_norm": 19.993226137490744, + "learning_rate": 4.6654188163663865e-06, + "loss": 0.11573944091796876, + "step": 72320 + }, + { + "epoch": 0.6253728891233107, + "grad_norm": 0.18795106080392157, + "learning_rate": 4.665249355997297e-06, + "loss": 0.02766265869140625, + "step": 72325 + }, + { + "epoch": 0.6254161226448539, + "grad_norm": 12.355876062007368, + "learning_rate": 4.6650798879482254e-06, + "loss": 0.0851654052734375, + "step": 72330 + }, + { + "epoch": 0.6254593561663971, + "grad_norm": 25.018060926609287, + "learning_rate": 4.6649104122199525e-06, + "loss": 0.17064132690429687, + "step": 72335 + }, + { + "epoch": 0.6255025896879405, + "grad_norm": 34.740069672236935, + "learning_rate": 4.664740928813259e-06, + "loss": 0.15072021484375, + "step": 72340 + }, + { + "epoch": 0.6255458232094837, + "grad_norm": 0.5240438764820312, + "learning_rate": 4.664571437728928e-06, + "loss": 0.06882476806640625, + "step": 72345 + }, + { + "epoch": 0.6255890567310269, + "grad_norm": 2.906550113978483, + "learning_rate": 4.664401938967742e-06, + "loss": 0.03695831298828125, + "step": 72350 + }, + { + "epoch": 0.6256322902525703, + "grad_norm": 2.9408658120622664, + "learning_rate": 4.664232432530478e-06, + "loss": 0.04794921875, + "step": 72355 + }, + { + "epoch": 0.6256755237741135, + "grad_norm": 4.089610146925936, + "learning_rate": 4.664062918417925e-06, + "loss": 0.182965087890625, + "step": 72360 + }, + { + "epoch": 0.6257187572956567, + "grad_norm": 3.742513271580448, + "learning_rate": 4.663893396630858e-06, + "loss": 0.2373321533203125, + "step": 72365 + }, + { + "epoch": 0.6257619908172001, + "grad_norm": 7.356650962424453, + "learning_rate": 4.663723867170062e-06, + "loss": 0.5129117012023926, + "step": 72370 + }, + { + "epoch": 0.6258052243387433, + "grad_norm": 1.8269547242936317, + "learning_rate": 4.663554330036319e-06, + "loss": 0.23744468688964843, + "step": 72375 + }, + { + "epoch": 0.6258484578602865, + "grad_norm": 0.35436723556448707, + "learning_rate": 4.663384785230411e-06, + "loss": 0.04524078369140625, + "step": 72380 + }, + { + "epoch": 0.6258916913818298, + "grad_norm": 7.484330373765928, + "learning_rate": 4.66321523275312e-06, + "loss": 0.278582763671875, + "step": 72385 + }, + { + "epoch": 0.6259349249033731, + "grad_norm": 1.278400323521059, + "learning_rate": 4.663045672605225e-06, + "loss": 0.20712051391601563, + "step": 72390 + }, + { + "epoch": 0.6259781584249163, + "grad_norm": 14.266960759049272, + "learning_rate": 4.662876104787512e-06, + "loss": 0.33465118408203126, + "step": 72395 + }, + { + "epoch": 0.6260213919464596, + "grad_norm": 0.7144452777386546, + "learning_rate": 4.662706529300762e-06, + "loss": 0.043585205078125, + "step": 72400 + }, + { + "epoch": 0.6260646254680029, + "grad_norm": 46.27793065606635, + "learning_rate": 4.662536946145755e-06, + "loss": 0.598358154296875, + "step": 72405 + }, + { + "epoch": 0.6261078589895461, + "grad_norm": 15.591585156280315, + "learning_rate": 4.662367355323273e-06, + "loss": 0.2544281005859375, + "step": 72410 + }, + { + "epoch": 0.6261510925110894, + "grad_norm": 0.18547200995015087, + "learning_rate": 4.662197756834103e-06, + "loss": 0.1194488525390625, + "step": 72415 + }, + { + "epoch": 0.6261943260326327, + "grad_norm": 0.7786641932317924, + "learning_rate": 4.662028150679022e-06, + "loss": 0.0472381591796875, + "step": 72420 + }, + { + "epoch": 0.6262375595541759, + "grad_norm": 11.811544765610723, + "learning_rate": 4.661858536858814e-06, + "loss": 0.30687789916992186, + "step": 72425 + }, + { + "epoch": 0.6262807930757192, + "grad_norm": 8.641467006381367, + "learning_rate": 4.661688915374262e-06, + "loss": 0.061969757080078125, + "step": 72430 + }, + { + "epoch": 0.6263240265972625, + "grad_norm": 6.445059118663331, + "learning_rate": 4.661519286226147e-06, + "loss": 0.09630622863769531, + "step": 72435 + }, + { + "epoch": 0.6263672601188057, + "grad_norm": 18.619990785426733, + "learning_rate": 4.661349649415252e-06, + "loss": 0.1875908851623535, + "step": 72440 + }, + { + "epoch": 0.626410493640349, + "grad_norm": 1.9934430115374366, + "learning_rate": 4.661180004942359e-06, + "loss": 0.17171630859375, + "step": 72445 + }, + { + "epoch": 0.6264537271618923, + "grad_norm": 3.19081062302873, + "learning_rate": 4.66101035280825e-06, + "loss": 0.04076461791992188, + "step": 72450 + }, + { + "epoch": 0.6264969606834355, + "grad_norm": 50.83965473649644, + "learning_rate": 4.660840693013709e-06, + "loss": 0.2598722457885742, + "step": 72455 + }, + { + "epoch": 0.6265401942049788, + "grad_norm": 29.285503751340016, + "learning_rate": 4.660671025559518e-06, + "loss": 0.14857063293457032, + "step": 72460 + }, + { + "epoch": 0.6265834277265221, + "grad_norm": 0.5402766860122004, + "learning_rate": 4.660501350446458e-06, + "loss": 0.08519563674926758, + "step": 72465 + }, + { + "epoch": 0.6266266612480653, + "grad_norm": 28.92720640115952, + "learning_rate": 4.660331667675313e-06, + "loss": 0.068890380859375, + "step": 72470 + }, + { + "epoch": 0.6266698947696085, + "grad_norm": 0.8789608728868894, + "learning_rate": 4.660161977246865e-06, + "loss": 0.0555511474609375, + "step": 72475 + }, + { + "epoch": 0.6267131282911518, + "grad_norm": 20.97541000674025, + "learning_rate": 4.659992279161897e-06, + "loss": 0.20212860107421876, + "step": 72480 + }, + { + "epoch": 0.6267563618126951, + "grad_norm": 16.211331823872246, + "learning_rate": 4.65982257342119e-06, + "loss": 0.17656707763671875, + "step": 72485 + }, + { + "epoch": 0.6267995953342383, + "grad_norm": 1.6212832383942526, + "learning_rate": 4.659652860025529e-06, + "loss": 0.07218170166015625, + "step": 72490 + }, + { + "epoch": 0.6268428288557816, + "grad_norm": 6.941770876676933, + "learning_rate": 4.659483138975696e-06, + "loss": 0.197998046875, + "step": 72495 + }, + { + "epoch": 0.6268860623773249, + "grad_norm": 19.542567499654485, + "learning_rate": 4.659313410272472e-06, + "loss": 0.07116355895996093, + "step": 72500 + }, + { + "epoch": 0.6269292958988681, + "grad_norm": 5.5701884677718105, + "learning_rate": 4.6591436739166425e-06, + "loss": 0.07928466796875, + "step": 72505 + }, + { + "epoch": 0.6269725294204114, + "grad_norm": 0.8415297603333297, + "learning_rate": 4.658973929908988e-06, + "loss": 0.061944198608398435, + "step": 72510 + }, + { + "epoch": 0.6270157629419547, + "grad_norm": 21.733864156209645, + "learning_rate": 4.658804178250292e-06, + "loss": 0.18574676513671876, + "step": 72515 + }, + { + "epoch": 0.6270589964634979, + "grad_norm": 35.40582378696389, + "learning_rate": 4.65863441894134e-06, + "loss": 0.447601318359375, + "step": 72520 + }, + { + "epoch": 0.6271022299850412, + "grad_norm": 1.325477995477988, + "learning_rate": 4.65846465198291e-06, + "loss": 0.23363494873046875, + "step": 72525 + }, + { + "epoch": 0.6271454635065845, + "grad_norm": 0.26192485631569995, + "learning_rate": 4.658294877375789e-06, + "loss": 0.17976150512695313, + "step": 72530 + }, + { + "epoch": 0.6271886970281277, + "grad_norm": 12.833266818707177, + "learning_rate": 4.6581250951207585e-06, + "loss": 0.17283782958984376, + "step": 72535 + }, + { + "epoch": 0.627231930549671, + "grad_norm": 67.53348652131561, + "learning_rate": 4.657955305218601e-06, + "loss": 0.1933929443359375, + "step": 72540 + }, + { + "epoch": 0.6272751640712143, + "grad_norm": 24.02383615363367, + "learning_rate": 4.6577855076701e-06, + "loss": 0.094384765625, + "step": 72545 + }, + { + "epoch": 0.6273183975927575, + "grad_norm": 1.846956055349791, + "learning_rate": 4.65761570247604e-06, + "loss": 0.269525146484375, + "step": 72550 + }, + { + "epoch": 0.6273616311143008, + "grad_norm": 28.78744301994896, + "learning_rate": 4.657445889637201e-06, + "loss": 0.34694061279296873, + "step": 72555 + }, + { + "epoch": 0.627404864635844, + "grad_norm": 1.0666710290589458, + "learning_rate": 4.65727606915437e-06, + "loss": 0.02494964599609375, + "step": 72560 + }, + { + "epoch": 0.6274480981573873, + "grad_norm": 6.431103850681521, + "learning_rate": 4.657106241028327e-06, + "loss": 0.22310104370117187, + "step": 72565 + }, + { + "epoch": 0.6274913316789306, + "grad_norm": 3.667948849518206, + "learning_rate": 4.656936405259856e-06, + "loss": 0.08912811279296876, + "step": 72570 + }, + { + "epoch": 0.6275345652004738, + "grad_norm": 10.963778817457445, + "learning_rate": 4.656766561849742e-06, + "loss": 0.16524658203125, + "step": 72575 + }, + { + "epoch": 0.6275777987220171, + "grad_norm": 3.2329167540788672, + "learning_rate": 4.656596710798766e-06, + "loss": 0.054412841796875, + "step": 72580 + }, + { + "epoch": 0.6276210322435604, + "grad_norm": 0.6942101245401483, + "learning_rate": 4.6564268521077125e-06, + "loss": 0.11970291137695313, + "step": 72585 + }, + { + "epoch": 0.6276642657651036, + "grad_norm": 1.829336127892375, + "learning_rate": 4.656256985777365e-06, + "loss": 0.09317245483398437, + "step": 72590 + }, + { + "epoch": 0.6277074992866469, + "grad_norm": 1.2513194312231855, + "learning_rate": 4.656087111808507e-06, + "loss": 0.15912322998046874, + "step": 72595 + }, + { + "epoch": 0.6277507328081902, + "grad_norm": 5.941724630372414, + "learning_rate": 4.655917230201921e-06, + "loss": 0.3048736572265625, + "step": 72600 + }, + { + "epoch": 0.6277939663297334, + "grad_norm": 5.251354620653585, + "learning_rate": 4.655747340958391e-06, + "loss": 0.0947509765625, + "step": 72605 + }, + { + "epoch": 0.6278371998512767, + "grad_norm": 22.04062553116349, + "learning_rate": 4.6555774440787e-06, + "loss": 0.15498600006103516, + "step": 72610 + }, + { + "epoch": 0.62788043337282, + "grad_norm": 0.5750180704401057, + "learning_rate": 4.655407539563633e-06, + "loss": 0.17638168334960938, + "step": 72615 + }, + { + "epoch": 0.6279236668943632, + "grad_norm": 29.99205600730385, + "learning_rate": 4.655237627413973e-06, + "loss": 0.5485252380371094, + "step": 72620 + }, + { + "epoch": 0.6279669004159065, + "grad_norm": 52.557313912640645, + "learning_rate": 4.655067707630503e-06, + "loss": 0.397393798828125, + "step": 72625 + }, + { + "epoch": 0.6280101339374498, + "grad_norm": 2.251010548440045, + "learning_rate": 4.654897780214007e-06, + "loss": 0.0593048095703125, + "step": 72630 + }, + { + "epoch": 0.628053367458993, + "grad_norm": 5.368904404485302, + "learning_rate": 4.654727845165269e-06, + "loss": 0.2007007598876953, + "step": 72635 + }, + { + "epoch": 0.6280966009805363, + "grad_norm": 13.142812837497585, + "learning_rate": 4.6545579024850715e-06, + "loss": 0.263763427734375, + "step": 72640 + }, + { + "epoch": 0.6281398345020796, + "grad_norm": 20.25370564423151, + "learning_rate": 4.654387952174199e-06, + "loss": 0.1571197509765625, + "step": 72645 + }, + { + "epoch": 0.6281830680236228, + "grad_norm": 2.9961012749085367, + "learning_rate": 4.654217994233437e-06, + "loss": 0.172021484375, + "step": 72650 + }, + { + "epoch": 0.628226301545166, + "grad_norm": 6.840893171358137, + "learning_rate": 4.6540480286635664e-06, + "loss": 0.22275276184082032, + "step": 72655 + }, + { + "epoch": 0.6282695350667094, + "grad_norm": 39.79315113076917, + "learning_rate": 4.653878055465373e-06, + "loss": 0.2672920227050781, + "step": 72660 + }, + { + "epoch": 0.6283127685882526, + "grad_norm": 5.06016097983416, + "learning_rate": 4.65370807463964e-06, + "loss": 0.31494598388671874, + "step": 72665 + }, + { + "epoch": 0.6283560021097958, + "grad_norm": 14.112153746265538, + "learning_rate": 4.653538086187152e-06, + "loss": 0.4159526824951172, + "step": 72670 + }, + { + "epoch": 0.6283992356313391, + "grad_norm": 0.5360220653091728, + "learning_rate": 4.6533680901086916e-06, + "loss": 0.14949951171875, + "step": 72675 + }, + { + "epoch": 0.6284424691528824, + "grad_norm": 4.154914976744944, + "learning_rate": 4.653198086405043e-06, + "loss": 0.1273101806640625, + "step": 72680 + }, + { + "epoch": 0.6284857026744256, + "grad_norm": 15.378245913066698, + "learning_rate": 4.653028075076993e-06, + "loss": 0.5249298095703125, + "step": 72685 + }, + { + "epoch": 0.628528936195969, + "grad_norm": 2.7694008067823406, + "learning_rate": 4.652858056125322e-06, + "loss": 0.07305145263671875, + "step": 72690 + }, + { + "epoch": 0.6285721697175122, + "grad_norm": 16.117695037362072, + "learning_rate": 4.652688029550816e-06, + "loss": 0.2233043670654297, + "step": 72695 + }, + { + "epoch": 0.6286154032390554, + "grad_norm": 27.196823060924867, + "learning_rate": 4.65251799535426e-06, + "loss": 0.34730224609375, + "step": 72700 + }, + { + "epoch": 0.6286586367605987, + "grad_norm": 16.073995999548938, + "learning_rate": 4.652347953536435e-06, + "loss": 0.19242486953735352, + "step": 72705 + }, + { + "epoch": 0.628701870282142, + "grad_norm": 38.608131379654324, + "learning_rate": 4.652177904098128e-06, + "loss": 0.19718399047851562, + "step": 72710 + }, + { + "epoch": 0.6287451038036852, + "grad_norm": 3.1395717458472836, + "learning_rate": 4.652007847040124e-06, + "loss": 0.13707962036132812, + "step": 72715 + }, + { + "epoch": 0.6287883373252285, + "grad_norm": 1.1146533385779631, + "learning_rate": 4.6518377823632045e-06, + "loss": 0.04068603515625, + "step": 72720 + }, + { + "epoch": 0.6288315708467718, + "grad_norm": 30.417602173235892, + "learning_rate": 4.651667710068155e-06, + "loss": 0.209735107421875, + "step": 72725 + }, + { + "epoch": 0.628874804368315, + "grad_norm": 3.671714331695347, + "learning_rate": 4.65149763015576e-06, + "loss": 0.167303466796875, + "step": 72730 + }, + { + "epoch": 0.6289180378898582, + "grad_norm": 2.319880519219805, + "learning_rate": 4.651327542626804e-06, + "loss": 0.05481414794921875, + "step": 72735 + }, + { + "epoch": 0.6289612714114016, + "grad_norm": 37.121825775988945, + "learning_rate": 4.651157447482072e-06, + "loss": 0.24883079528808594, + "step": 72740 + }, + { + "epoch": 0.6290045049329448, + "grad_norm": 9.800281663554992, + "learning_rate": 4.650987344722347e-06, + "loss": 0.08042221069335938, + "step": 72745 + }, + { + "epoch": 0.629047738454488, + "grad_norm": 18.211975086473956, + "learning_rate": 4.6508172343484135e-06, + "loss": 0.1291259765625, + "step": 72750 + }, + { + "epoch": 0.6290909719760314, + "grad_norm": 1.326628334970323, + "learning_rate": 4.650647116361057e-06, + "loss": 0.0504241943359375, + "step": 72755 + }, + { + "epoch": 0.6291342054975746, + "grad_norm": 3.6426905435231753, + "learning_rate": 4.650476990761064e-06, + "loss": 0.08852882385253906, + "step": 72760 + }, + { + "epoch": 0.6291774390191178, + "grad_norm": 1.6687409347197992, + "learning_rate": 4.6503068575492144e-06, + "loss": 0.24366378784179688, + "step": 72765 + }, + { + "epoch": 0.6292206725406612, + "grad_norm": 2.804280135697225, + "learning_rate": 4.650136716726296e-06, + "loss": 0.07606048583984375, + "step": 72770 + }, + { + "epoch": 0.6292639060622044, + "grad_norm": 1.5499392035079544, + "learning_rate": 4.649966568293094e-06, + "loss": 0.2405414581298828, + "step": 72775 + }, + { + "epoch": 0.6293071395837476, + "grad_norm": 1.6255585454694608, + "learning_rate": 4.6497964122503915e-06, + "loss": 0.12297439575195312, + "step": 72780 + }, + { + "epoch": 0.629350373105291, + "grad_norm": 26.005657952944823, + "learning_rate": 4.649626248598972e-06, + "loss": 0.20774993896484376, + "step": 72785 + }, + { + "epoch": 0.6293936066268342, + "grad_norm": 12.609281083562406, + "learning_rate": 4.6494560773396246e-06, + "loss": 0.08129539489746093, + "step": 72790 + }, + { + "epoch": 0.6294368401483774, + "grad_norm": 6.52964284732758, + "learning_rate": 4.64928589847313e-06, + "loss": 0.14706268310546874, + "step": 72795 + }, + { + "epoch": 0.6294800736699208, + "grad_norm": 40.08630891820384, + "learning_rate": 4.649115712000275e-06, + "loss": 0.24681243896484376, + "step": 72800 + }, + { + "epoch": 0.629523307191464, + "grad_norm": 10.20328448244542, + "learning_rate": 4.648945517921844e-06, + "loss": 0.165496826171875, + "step": 72805 + }, + { + "epoch": 0.6295665407130072, + "grad_norm": 3.1929784892694215, + "learning_rate": 4.648775316238622e-06, + "loss": 0.0628854751586914, + "step": 72810 + }, + { + "epoch": 0.6296097742345506, + "grad_norm": 11.527524757314817, + "learning_rate": 4.648605106951394e-06, + "loss": 0.24449625015258789, + "step": 72815 + }, + { + "epoch": 0.6296530077560938, + "grad_norm": 0.9983435233196686, + "learning_rate": 4.648434890060944e-06, + "loss": 0.03585357666015625, + "step": 72820 + }, + { + "epoch": 0.629696241277637, + "grad_norm": 11.271404885373203, + "learning_rate": 4.64826466556806e-06, + "loss": 0.13656005859375, + "step": 72825 + }, + { + "epoch": 0.6297394747991802, + "grad_norm": 0.270002724036408, + "learning_rate": 4.648094433473524e-06, + "loss": 0.059835052490234374, + "step": 72830 + }, + { + "epoch": 0.6297827083207236, + "grad_norm": 5.1376200678765, + "learning_rate": 4.647924193778122e-06, + "loss": 0.09339942932128906, + "step": 72835 + }, + { + "epoch": 0.6298259418422668, + "grad_norm": 11.8464358999919, + "learning_rate": 4.647753946482639e-06, + "loss": 0.111553955078125, + "step": 72840 + }, + { + "epoch": 0.62986917536381, + "grad_norm": 45.46390169816913, + "learning_rate": 4.647583691587861e-06, + "loss": 0.3226276397705078, + "step": 72845 + }, + { + "epoch": 0.6299124088853534, + "grad_norm": 12.360037411371485, + "learning_rate": 4.647413429094571e-06, + "loss": 0.2329334259033203, + "step": 72850 + }, + { + "epoch": 0.6299556424068966, + "grad_norm": 3.4911033713216866, + "learning_rate": 4.647243159003557e-06, + "loss": 0.07192230224609375, + "step": 72855 + }, + { + "epoch": 0.6299988759284398, + "grad_norm": 16.310543649424087, + "learning_rate": 4.647072881315603e-06, + "loss": 0.1119537353515625, + "step": 72860 + }, + { + "epoch": 0.6300421094499832, + "grad_norm": 3.1159750316218244, + "learning_rate": 4.646902596031494e-06, + "loss": 0.12158966064453125, + "step": 72865 + }, + { + "epoch": 0.6300853429715264, + "grad_norm": 34.00658862218381, + "learning_rate": 4.646732303152017e-06, + "loss": 0.13615264892578124, + "step": 72870 + }, + { + "epoch": 0.6301285764930696, + "grad_norm": 4.300855556981042, + "learning_rate": 4.6465620026779556e-06, + "loss": 0.04999046325683594, + "step": 72875 + }, + { + "epoch": 0.630171810014613, + "grad_norm": 5.42012882582661, + "learning_rate": 4.646391694610095e-06, + "loss": 0.0725738525390625, + "step": 72880 + }, + { + "epoch": 0.6302150435361562, + "grad_norm": 3.4271268472563783, + "learning_rate": 4.6462213789492215e-06, + "loss": 0.0385009765625, + "step": 72885 + }, + { + "epoch": 0.6302582770576994, + "grad_norm": 9.842431330481986, + "learning_rate": 4.646051055696122e-06, + "loss": 0.055467987060546876, + "step": 72890 + }, + { + "epoch": 0.6303015105792428, + "grad_norm": 2.4847352042221695, + "learning_rate": 4.645880724851578e-06, + "loss": 0.10238494873046874, + "step": 72895 + }, + { + "epoch": 0.630344744100786, + "grad_norm": 0.6079070117176257, + "learning_rate": 4.64571038641638e-06, + "loss": 0.0942413330078125, + "step": 72900 + }, + { + "epoch": 0.6303879776223292, + "grad_norm": 1.5512226237722289, + "learning_rate": 4.645540040391309e-06, + "loss": 0.196142578125, + "step": 72905 + }, + { + "epoch": 0.6304312111438725, + "grad_norm": 0.39659554185299417, + "learning_rate": 4.645369686777154e-06, + "loss": 0.12905502319335938, + "step": 72910 + }, + { + "epoch": 0.6304744446654158, + "grad_norm": 4.942569527702036, + "learning_rate": 4.645199325574699e-06, + "loss": 0.05400390625, + "step": 72915 + }, + { + "epoch": 0.630517678186959, + "grad_norm": 17.703837238827614, + "learning_rate": 4.64502895678473e-06, + "loss": 0.26089935302734374, + "step": 72920 + }, + { + "epoch": 0.6305609117085023, + "grad_norm": 28.73627345807239, + "learning_rate": 4.644858580408033e-06, + "loss": 0.27858505249023435, + "step": 72925 + }, + { + "epoch": 0.6306041452300456, + "grad_norm": 9.020418177284498, + "learning_rate": 4.644688196445393e-06, + "loss": 0.10848541259765625, + "step": 72930 + }, + { + "epoch": 0.6306473787515888, + "grad_norm": 0.3891086299308258, + "learning_rate": 4.644517804897597e-06, + "loss": 0.2316619873046875, + "step": 72935 + }, + { + "epoch": 0.6306906122731321, + "grad_norm": 0.275293911359979, + "learning_rate": 4.644347405765431e-06, + "loss": 0.13135299682617188, + "step": 72940 + }, + { + "epoch": 0.6307338457946754, + "grad_norm": 10.49775755935382, + "learning_rate": 4.644176999049678e-06, + "loss": 0.2096691131591797, + "step": 72945 + }, + { + "epoch": 0.6307770793162186, + "grad_norm": 6.045568138544505, + "learning_rate": 4.644006584751127e-06, + "loss": 0.052996063232421876, + "step": 72950 + }, + { + "epoch": 0.6308203128377619, + "grad_norm": 9.191799469871818, + "learning_rate": 4.6438361628705635e-06, + "loss": 0.21483345031738282, + "step": 72955 + }, + { + "epoch": 0.6308635463593052, + "grad_norm": 3.988871862504563, + "learning_rate": 4.643665733408772e-06, + "loss": 0.252288818359375, + "step": 72960 + }, + { + "epoch": 0.6309067798808484, + "grad_norm": 4.365997857476758, + "learning_rate": 4.64349529636654e-06, + "loss": 0.0595947265625, + "step": 72965 + }, + { + "epoch": 0.6309500134023917, + "grad_norm": 5.063537003025891, + "learning_rate": 4.6433248517446535e-06, + "loss": 0.1469879150390625, + "step": 72970 + }, + { + "epoch": 0.630993246923935, + "grad_norm": 8.865995140123466, + "learning_rate": 4.643154399543897e-06, + "loss": 0.25567970275878904, + "step": 72975 + }, + { + "epoch": 0.6310364804454782, + "grad_norm": 2.068463312679183, + "learning_rate": 4.642983939765057e-06, + "loss": 0.0570709228515625, + "step": 72980 + }, + { + "epoch": 0.6310797139670214, + "grad_norm": 4.727331523777294, + "learning_rate": 4.6428134724089225e-06, + "loss": 0.1199493408203125, + "step": 72985 + }, + { + "epoch": 0.6311229474885647, + "grad_norm": 30.98845319359409, + "learning_rate": 4.642642997476275e-06, + "loss": 0.16135025024414062, + "step": 72990 + }, + { + "epoch": 0.631166181010108, + "grad_norm": 13.12008112484613, + "learning_rate": 4.642472514967904e-06, + "loss": 0.047259521484375, + "step": 72995 + }, + { + "epoch": 0.6312094145316512, + "grad_norm": 0.885042784579454, + "learning_rate": 4.642302024884595e-06, + "loss": 0.41246490478515624, + "step": 73000 + }, + { + "epoch": 0.6312526480531945, + "grad_norm": 39.022588716122556, + "learning_rate": 4.642131527227134e-06, + "loss": 0.3107147216796875, + "step": 73005 + }, + { + "epoch": 0.6312958815747378, + "grad_norm": 13.087922847397774, + "learning_rate": 4.641961021996308e-06, + "loss": 0.127655029296875, + "step": 73010 + }, + { + "epoch": 0.631339115096281, + "grad_norm": 15.41564386390275, + "learning_rate": 4.641790509192902e-06, + "loss": 0.2200481414794922, + "step": 73015 + }, + { + "epoch": 0.6313823486178243, + "grad_norm": 0.34651778143386985, + "learning_rate": 4.641619988817704e-06, + "loss": 0.2918067932128906, + "step": 73020 + }, + { + "epoch": 0.6314255821393676, + "grad_norm": 0.9664670981158099, + "learning_rate": 4.641449460871499e-06, + "loss": 0.020013046264648438, + "step": 73025 + }, + { + "epoch": 0.6314688156609108, + "grad_norm": 2.0782689628242035, + "learning_rate": 4.641278925355075e-06, + "loss": 0.05391845703125, + "step": 73030 + }, + { + "epoch": 0.6315120491824541, + "grad_norm": 2.4540693590010427, + "learning_rate": 4.641108382269217e-06, + "loss": 0.035219573974609376, + "step": 73035 + }, + { + "epoch": 0.6315552827039974, + "grad_norm": 11.943313673302756, + "learning_rate": 4.640937831614713e-06, + "loss": 0.1980712890625, + "step": 73040 + }, + { + "epoch": 0.6315985162255406, + "grad_norm": 17.405114525374607, + "learning_rate": 4.640767273392347e-06, + "loss": 0.5714988708496094, + "step": 73045 + }, + { + "epoch": 0.6316417497470839, + "grad_norm": 1.2512184452728299, + "learning_rate": 4.640596707602908e-06, + "loss": 0.028014373779296876, + "step": 73050 + }, + { + "epoch": 0.6316849832686272, + "grad_norm": 15.620627815173796, + "learning_rate": 4.640426134247183e-06, + "loss": 0.4059154510498047, + "step": 73055 + }, + { + "epoch": 0.6317282167901704, + "grad_norm": 2.2507967763780976, + "learning_rate": 4.640255553325956e-06, + "loss": 0.40847091674804686, + "step": 73060 + }, + { + "epoch": 0.6317714503117137, + "grad_norm": 39.96860404301258, + "learning_rate": 4.640084964840018e-06, + "loss": 0.32172470092773436, + "step": 73065 + }, + { + "epoch": 0.631814683833257, + "grad_norm": 7.083468578762124, + "learning_rate": 4.6399143687901495e-06, + "loss": 0.05097198486328125, + "step": 73070 + }, + { + "epoch": 0.6318579173548002, + "grad_norm": 32.634180897855686, + "learning_rate": 4.6397437651771435e-06, + "loss": 0.194879150390625, + "step": 73075 + }, + { + "epoch": 0.6319011508763435, + "grad_norm": 4.475262542106408, + "learning_rate": 4.639573154001783e-06, + "loss": 0.12809677124023439, + "step": 73080 + }, + { + "epoch": 0.6319443843978867, + "grad_norm": 0.467974755666529, + "learning_rate": 4.639402535264856e-06, + "loss": 0.17307510375976562, + "step": 73085 + }, + { + "epoch": 0.63198761791943, + "grad_norm": 27.91396334529479, + "learning_rate": 4.63923190896715e-06, + "loss": 0.18722114562988282, + "step": 73090 + }, + { + "epoch": 0.6320308514409733, + "grad_norm": 12.452257430184314, + "learning_rate": 4.6390612751094515e-06, + "loss": 0.05996856689453125, + "step": 73095 + }, + { + "epoch": 0.6320740849625165, + "grad_norm": 10.649647838405652, + "learning_rate": 4.638890633692547e-06, + "loss": 0.12464447021484375, + "step": 73100 + }, + { + "epoch": 0.6321173184840598, + "grad_norm": 2.740655150636677, + "learning_rate": 4.638719984717222e-06, + "loss": 0.017354202270507813, + "step": 73105 + }, + { + "epoch": 0.6321605520056031, + "grad_norm": 1.2454372186784533, + "learning_rate": 4.638549328184267e-06, + "loss": 0.06769981384277343, + "step": 73110 + }, + { + "epoch": 0.6322037855271463, + "grad_norm": 9.053874479035304, + "learning_rate": 4.638378664094466e-06, + "loss": 0.4334503173828125, + "step": 73115 + }, + { + "epoch": 0.6322470190486896, + "grad_norm": 34.890504156841445, + "learning_rate": 4.638207992448609e-06, + "loss": 0.21190338134765624, + "step": 73120 + }, + { + "epoch": 0.6322902525702329, + "grad_norm": 0.9891849713995415, + "learning_rate": 4.638037313247481e-06, + "loss": 0.056049346923828125, + "step": 73125 + }, + { + "epoch": 0.6323334860917761, + "grad_norm": 2.513597851047801, + "learning_rate": 4.6378666264918694e-06, + "loss": 0.05494537353515625, + "step": 73130 + }, + { + "epoch": 0.6323767196133194, + "grad_norm": 29.41177407179236, + "learning_rate": 4.637695932182562e-06, + "loss": 0.19943161010742189, + "step": 73135 + }, + { + "epoch": 0.6324199531348627, + "grad_norm": 17.686243443349007, + "learning_rate": 4.637525230320345e-06, + "loss": 0.1247039794921875, + "step": 73140 + }, + { + "epoch": 0.6324631866564059, + "grad_norm": 6.220339211496853, + "learning_rate": 4.637354520906006e-06, + "loss": 0.053947830200195314, + "step": 73145 + }, + { + "epoch": 0.6325064201779492, + "grad_norm": 30.86396956509311, + "learning_rate": 4.637183803940333e-06, + "loss": 0.10575637817382813, + "step": 73150 + }, + { + "epoch": 0.6325496536994925, + "grad_norm": 12.70477515468389, + "learning_rate": 4.637013079424114e-06, + "loss": 0.10717697143554687, + "step": 73155 + }, + { + "epoch": 0.6325928872210357, + "grad_norm": 16.424358986316367, + "learning_rate": 4.636842347358135e-06, + "loss": 0.24825286865234375, + "step": 73160 + }, + { + "epoch": 0.6326361207425789, + "grad_norm": 2.9716637985642387, + "learning_rate": 4.6366716077431825e-06, + "loss": 0.2079010009765625, + "step": 73165 + }, + { + "epoch": 0.6326793542641223, + "grad_norm": 18.857679917795483, + "learning_rate": 4.636500860580046e-06, + "loss": 0.18545608520507811, + "step": 73170 + }, + { + "epoch": 0.6327225877856655, + "grad_norm": 22.53277245115451, + "learning_rate": 4.636330105869513e-06, + "loss": 0.3283935546875, + "step": 73175 + }, + { + "epoch": 0.6327658213072087, + "grad_norm": 1.5889241054667467, + "learning_rate": 4.636159343612369e-06, + "loss": 0.19303359985351562, + "step": 73180 + }, + { + "epoch": 0.632809054828752, + "grad_norm": 25.231669898969972, + "learning_rate": 4.635988573809402e-06, + "loss": 0.2497802734375, + "step": 73185 + }, + { + "epoch": 0.6328522883502953, + "grad_norm": 2.435570959661678, + "learning_rate": 4.635817796461402e-06, + "loss": 0.273992919921875, + "step": 73190 + }, + { + "epoch": 0.6328955218718385, + "grad_norm": 19.086554850032414, + "learning_rate": 4.635647011569153e-06, + "loss": 0.1429443359375, + "step": 73195 + }, + { + "epoch": 0.6329387553933818, + "grad_norm": 3.964368241284076, + "learning_rate": 4.635476219133446e-06, + "loss": 0.18242359161376953, + "step": 73200 + }, + { + "epoch": 0.6329819889149251, + "grad_norm": 9.28450592996281, + "learning_rate": 4.635305419155067e-06, + "loss": 0.386737060546875, + "step": 73205 + }, + { + "epoch": 0.6330252224364683, + "grad_norm": 1.463526343493883, + "learning_rate": 4.635134611634803e-06, + "loss": 0.081396484375, + "step": 73210 + }, + { + "epoch": 0.6330684559580116, + "grad_norm": 11.85385492669003, + "learning_rate": 4.634963796573443e-06, + "loss": 0.05062713623046875, + "step": 73215 + }, + { + "epoch": 0.6331116894795549, + "grad_norm": 1.8325624858986078, + "learning_rate": 4.634792973971775e-06, + "loss": 0.11632843017578125, + "step": 73220 + }, + { + "epoch": 0.6331549230010981, + "grad_norm": 0.4153647300466885, + "learning_rate": 4.634622143830586e-06, + "loss": 0.05176239013671875, + "step": 73225 + }, + { + "epoch": 0.6331981565226414, + "grad_norm": 3.282387475310761, + "learning_rate": 4.634451306150664e-06, + "loss": 0.2034942626953125, + "step": 73230 + }, + { + "epoch": 0.6332413900441847, + "grad_norm": 3.8243765962584892, + "learning_rate": 4.634280460932797e-06, + "loss": 0.023665618896484376, + "step": 73235 + }, + { + "epoch": 0.6332846235657279, + "grad_norm": 5.217546977005335, + "learning_rate": 4.634109608177773e-06, + "loss": 0.11430511474609376, + "step": 73240 + }, + { + "epoch": 0.6333278570872712, + "grad_norm": 14.987195201193302, + "learning_rate": 4.633938747886379e-06, + "loss": 0.2447174072265625, + "step": 73245 + }, + { + "epoch": 0.6333710906088145, + "grad_norm": 44.018397892589654, + "learning_rate": 4.633767880059406e-06, + "loss": 0.3199066162109375, + "step": 73250 + }, + { + "epoch": 0.6334143241303577, + "grad_norm": 17.78171451625636, + "learning_rate": 4.633597004697638e-06, + "loss": 0.25635986328125, + "step": 73255 + }, + { + "epoch": 0.6334575576519009, + "grad_norm": 8.03577367829822, + "learning_rate": 4.6334261218018655e-06, + "loss": 0.13585205078125, + "step": 73260 + }, + { + "epoch": 0.6335007911734443, + "grad_norm": 18.121459096304743, + "learning_rate": 4.633255231372877e-06, + "loss": 0.3723907470703125, + "step": 73265 + }, + { + "epoch": 0.6335440246949875, + "grad_norm": 0.9986477749322462, + "learning_rate": 4.6330843334114585e-06, + "loss": 0.10592803955078126, + "step": 73270 + }, + { + "epoch": 0.6335872582165307, + "grad_norm": 2.350220740094799, + "learning_rate": 4.6329134279183995e-06, + "loss": 0.064202880859375, + "step": 73275 + }, + { + "epoch": 0.6336304917380741, + "grad_norm": 0.19134022885376895, + "learning_rate": 4.632742514894488e-06, + "loss": 0.18356170654296874, + "step": 73280 + }, + { + "epoch": 0.6336737252596173, + "grad_norm": 16.438843379071855, + "learning_rate": 4.632571594340513e-06, + "loss": 0.14118385314941406, + "step": 73285 + }, + { + "epoch": 0.6337169587811605, + "grad_norm": 5.82644297026451, + "learning_rate": 4.632400666257261e-06, + "loss": 0.5397171020507813, + "step": 73290 + }, + { + "epoch": 0.6337601923027039, + "grad_norm": 1.8370083262858101, + "learning_rate": 4.6322297306455225e-06, + "loss": 0.027594757080078126, + "step": 73295 + }, + { + "epoch": 0.6338034258242471, + "grad_norm": 4.063969140356677, + "learning_rate": 4.632058787506084e-06, + "loss": 0.231951904296875, + "step": 73300 + }, + { + "epoch": 0.6338466593457903, + "grad_norm": 2.8673504599889483, + "learning_rate": 4.631887836839735e-06, + "loss": 0.041607666015625, + "step": 73305 + }, + { + "epoch": 0.6338898928673337, + "grad_norm": 3.4695266238953386, + "learning_rate": 4.631716878647263e-06, + "loss": 0.06204833984375, + "step": 73310 + }, + { + "epoch": 0.6339331263888769, + "grad_norm": 20.37618326329848, + "learning_rate": 4.631545912929457e-06, + "loss": 0.38868408203125, + "step": 73315 + }, + { + "epoch": 0.6339763599104201, + "grad_norm": 1.389847818771827, + "learning_rate": 4.6313749396871064e-06, + "loss": 0.12081298828125, + "step": 73320 + }, + { + "epoch": 0.6340195934319635, + "grad_norm": 12.866489551888694, + "learning_rate": 4.6312039589209975e-06, + "loss": 0.13554534912109376, + "step": 73325 + }, + { + "epoch": 0.6340628269535067, + "grad_norm": 0.9760315703878264, + "learning_rate": 4.631032970631921e-06, + "loss": 0.04305877685546875, + "step": 73330 + }, + { + "epoch": 0.6341060604750499, + "grad_norm": 1.1884784246391167, + "learning_rate": 4.6308619748206635e-06, + "loss": 0.0586090087890625, + "step": 73335 + }, + { + "epoch": 0.6341492939965931, + "grad_norm": 3.0267125126888663, + "learning_rate": 4.6306909714880154e-06, + "loss": 0.11582260131835938, + "step": 73340 + }, + { + "epoch": 0.6341925275181365, + "grad_norm": 57.97012069895924, + "learning_rate": 4.630519960634764e-06, + "loss": 0.22597503662109375, + "step": 73345 + }, + { + "epoch": 0.6342357610396797, + "grad_norm": 26.612640415357035, + "learning_rate": 4.6303489422616995e-06, + "loss": 0.1190617561340332, + "step": 73350 + }, + { + "epoch": 0.6342789945612229, + "grad_norm": 9.07510823970158, + "learning_rate": 4.63017791636961e-06, + "loss": 0.1108856201171875, + "step": 73355 + }, + { + "epoch": 0.6343222280827663, + "grad_norm": 3.2757108333570337, + "learning_rate": 4.630006882959283e-06, + "loss": 0.116253662109375, + "step": 73360 + }, + { + "epoch": 0.6343654616043095, + "grad_norm": 0.6059635795456554, + "learning_rate": 4.629835842031508e-06, + "loss": 0.07918243408203125, + "step": 73365 + }, + { + "epoch": 0.6344086951258527, + "grad_norm": 0.17779846046540868, + "learning_rate": 4.629664793587075e-06, + "loss": 0.08547821044921874, + "step": 73370 + }, + { + "epoch": 0.6344519286473961, + "grad_norm": 6.926373112628298, + "learning_rate": 4.629493737626772e-06, + "loss": 0.06578216552734376, + "step": 73375 + }, + { + "epoch": 0.6344951621689393, + "grad_norm": 27.667204029374464, + "learning_rate": 4.629322674151387e-06, + "loss": 0.2132537841796875, + "step": 73380 + }, + { + "epoch": 0.6345383956904825, + "grad_norm": 5.7712028143278316, + "learning_rate": 4.629151603161711e-06, + "loss": 0.077783203125, + "step": 73385 + }, + { + "epoch": 0.6345816292120259, + "grad_norm": 1.0845115133095395, + "learning_rate": 4.62898052465853e-06, + "loss": 0.27181053161621094, + "step": 73390 + }, + { + "epoch": 0.6346248627335691, + "grad_norm": 0.9419844181198621, + "learning_rate": 4.6288094386426365e-06, + "loss": 0.25663299560546876, + "step": 73395 + }, + { + "epoch": 0.6346680962551123, + "grad_norm": 0.28866226313349325, + "learning_rate": 4.6286383451148175e-06, + "loss": 0.1354095458984375, + "step": 73400 + }, + { + "epoch": 0.6347113297766557, + "grad_norm": 2.4071310153263648, + "learning_rate": 4.628467244075861e-06, + "loss": 0.18293533325195313, + "step": 73405 + }, + { + "epoch": 0.6347545632981989, + "grad_norm": 33.330849460750564, + "learning_rate": 4.628296135526559e-06, + "loss": 0.12055320739746093, + "step": 73410 + }, + { + "epoch": 0.6347977968197421, + "grad_norm": 2.564531556112521, + "learning_rate": 4.6281250194676986e-06, + "loss": 0.15427398681640625, + "step": 73415 + }, + { + "epoch": 0.6348410303412855, + "grad_norm": 10.103229002223749, + "learning_rate": 4.62795389590007e-06, + "loss": 0.11287193298339844, + "step": 73420 + }, + { + "epoch": 0.6348842638628287, + "grad_norm": 1.5061758264008835, + "learning_rate": 4.627782764824461e-06, + "loss": 0.38090267181396487, + "step": 73425 + }, + { + "epoch": 0.6349274973843719, + "grad_norm": 4.702012460607804, + "learning_rate": 4.627611626241662e-06, + "loss": 0.0402587890625, + "step": 73430 + }, + { + "epoch": 0.6349707309059152, + "grad_norm": 2.7948433453594963, + "learning_rate": 4.627440480152463e-06, + "loss": 0.0486053466796875, + "step": 73435 + }, + { + "epoch": 0.6350139644274585, + "grad_norm": 13.261363173912937, + "learning_rate": 4.6272693265576505e-06, + "loss": 0.06224822998046875, + "step": 73440 + }, + { + "epoch": 0.6350571979490017, + "grad_norm": 14.947584674445622, + "learning_rate": 4.627098165458016e-06, + "loss": 0.06996917724609375, + "step": 73445 + }, + { + "epoch": 0.635100431470545, + "grad_norm": 1.439982879096766, + "learning_rate": 4.626926996854349e-06, + "loss": 0.2915641784667969, + "step": 73450 + }, + { + "epoch": 0.6351436649920883, + "grad_norm": 6.243833111830772, + "learning_rate": 4.626755820747439e-06, + "loss": 0.10009918212890626, + "step": 73455 + }, + { + "epoch": 0.6351868985136315, + "grad_norm": 0.5286546106761199, + "learning_rate": 4.626584637138075e-06, + "loss": 0.0173828125, + "step": 73460 + }, + { + "epoch": 0.6352301320351748, + "grad_norm": 5.892540232259591, + "learning_rate": 4.626413446027046e-06, + "loss": 0.09618682861328125, + "step": 73465 + }, + { + "epoch": 0.6352733655567181, + "grad_norm": 0.9991340892572853, + "learning_rate": 4.626242247415141e-06, + "loss": 0.0515045166015625, + "step": 73470 + }, + { + "epoch": 0.6353165990782613, + "grad_norm": 14.626090104013498, + "learning_rate": 4.626071041303151e-06, + "loss": 0.3661346435546875, + "step": 73475 + }, + { + "epoch": 0.6353598325998046, + "grad_norm": 18.58839594632303, + "learning_rate": 4.625899827691866e-06, + "loss": 0.13644256591796874, + "step": 73480 + }, + { + "epoch": 0.6354030661213479, + "grad_norm": 3.280821358450475, + "learning_rate": 4.625728606582073e-06, + "loss": 0.09921722412109375, + "step": 73485 + }, + { + "epoch": 0.6354462996428911, + "grad_norm": 6.249670098465486, + "learning_rate": 4.625557377974565e-06, + "loss": 0.22619667053222656, + "step": 73490 + }, + { + "epoch": 0.6354895331644344, + "grad_norm": 29.341468771255215, + "learning_rate": 4.625386141870129e-06, + "loss": 0.2490142822265625, + "step": 73495 + }, + { + "epoch": 0.6355327666859777, + "grad_norm": 3.3936274003133855, + "learning_rate": 4.625214898269555e-06, + "loss": 0.4287078857421875, + "step": 73500 + }, + { + "epoch": 0.6355760002075209, + "grad_norm": 13.057685704487948, + "learning_rate": 4.625043647173635e-06, + "loss": 0.16704940795898438, + "step": 73505 + }, + { + "epoch": 0.6356192337290641, + "grad_norm": 12.668064621170718, + "learning_rate": 4.6248723885831565e-06, + "loss": 0.06367416381835937, + "step": 73510 + }, + { + "epoch": 0.6356624672506074, + "grad_norm": 1.3544389562749488, + "learning_rate": 4.6247011224989105e-06, + "loss": 0.22695846557617189, + "step": 73515 + }, + { + "epoch": 0.6357057007721507, + "grad_norm": 2.2129874916100185, + "learning_rate": 4.624529848921686e-06, + "loss": 0.0701263427734375, + "step": 73520 + }, + { + "epoch": 0.635748934293694, + "grad_norm": 3.515470102477701, + "learning_rate": 4.624358567852274e-06, + "loss": 0.036936187744140626, + "step": 73525 + }, + { + "epoch": 0.6357921678152372, + "grad_norm": 2.077129620744728, + "learning_rate": 4.624187279291464e-06, + "loss": 0.09942245483398438, + "step": 73530 + }, + { + "epoch": 0.6358354013367805, + "grad_norm": 1.4102577784292594, + "learning_rate": 4.624015983240044e-06, + "loss": 0.3118896484375, + "step": 73535 + }, + { + "epoch": 0.6358786348583237, + "grad_norm": 4.448308223284196, + "learning_rate": 4.623844679698808e-06, + "loss": 0.14227676391601562, + "step": 73540 + }, + { + "epoch": 0.635921868379867, + "grad_norm": 0.291860403363193, + "learning_rate": 4.623673368668543e-06, + "loss": 0.053258514404296874, + "step": 73545 + }, + { + "epoch": 0.6359651019014103, + "grad_norm": 17.446972491625157, + "learning_rate": 4.6235020501500395e-06, + "loss": 0.3575897216796875, + "step": 73550 + }, + { + "epoch": 0.6360083354229535, + "grad_norm": 9.102298486989957, + "learning_rate": 4.623330724144088e-06, + "loss": 0.3917694091796875, + "step": 73555 + }, + { + "epoch": 0.6360515689444968, + "grad_norm": 0.8680617845075123, + "learning_rate": 4.62315939065148e-06, + "loss": 0.1655498504638672, + "step": 73560 + }, + { + "epoch": 0.6360948024660401, + "grad_norm": 0.4792586826141573, + "learning_rate": 4.622988049673002e-06, + "loss": 0.25936546325683596, + "step": 73565 + }, + { + "epoch": 0.6361380359875833, + "grad_norm": 13.96379153173553, + "learning_rate": 4.622816701209449e-06, + "loss": 0.26243896484375, + "step": 73570 + }, + { + "epoch": 0.6361812695091266, + "grad_norm": 0.31883890680561683, + "learning_rate": 4.622645345261607e-06, + "loss": 0.25216522216796877, + "step": 73575 + }, + { + "epoch": 0.6362245030306699, + "grad_norm": 7.689249541323969, + "learning_rate": 4.622473981830268e-06, + "loss": 0.24454803466796876, + "step": 73580 + }, + { + "epoch": 0.6362677365522131, + "grad_norm": 1.2223477780499172, + "learning_rate": 4.622302610916224e-06, + "loss": 0.21002197265625, + "step": 73585 + }, + { + "epoch": 0.6363109700737564, + "grad_norm": 24.203382128129224, + "learning_rate": 4.622131232520263e-06, + "loss": 0.15887298583984374, + "step": 73590 + }, + { + "epoch": 0.6363542035952997, + "grad_norm": 3.8046682456849434, + "learning_rate": 4.621959846643175e-06, + "loss": 0.11286964416503906, + "step": 73595 + }, + { + "epoch": 0.6363974371168429, + "grad_norm": 49.91309096482506, + "learning_rate": 4.6217884532857515e-06, + "loss": 0.3956329345703125, + "step": 73600 + }, + { + "epoch": 0.6364406706383862, + "grad_norm": 62.45958547746522, + "learning_rate": 4.621617052448785e-06, + "loss": 0.2956264495849609, + "step": 73605 + }, + { + "epoch": 0.6364839041599294, + "grad_norm": 8.07492921297128, + "learning_rate": 4.621445644133062e-06, + "loss": 0.105743408203125, + "step": 73610 + }, + { + "epoch": 0.6365271376814727, + "grad_norm": 4.6069443180516405, + "learning_rate": 4.621274228339375e-06, + "loss": 0.17231597900390624, + "step": 73615 + }, + { + "epoch": 0.636570371203016, + "grad_norm": 26.33878775293894, + "learning_rate": 4.621102805068515e-06, + "loss": 0.17017974853515624, + "step": 73620 + }, + { + "epoch": 0.6366136047245592, + "grad_norm": 28.052209010910953, + "learning_rate": 4.6209313743212734e-06, + "loss": 0.12615318298339845, + "step": 73625 + }, + { + "epoch": 0.6366568382461025, + "grad_norm": 0.863511910820279, + "learning_rate": 4.620759936098437e-06, + "loss": 0.18549041748046874, + "step": 73630 + }, + { + "epoch": 0.6367000717676458, + "grad_norm": 6.767912741866478, + "learning_rate": 4.620588490400801e-06, + "loss": 0.16337127685546876, + "step": 73635 + }, + { + "epoch": 0.636743305289189, + "grad_norm": 1.0099760301580694, + "learning_rate": 4.6204170372291535e-06, + "loss": 0.13145751953125, + "step": 73640 + }, + { + "epoch": 0.6367865388107323, + "grad_norm": 19.375881188780856, + "learning_rate": 4.6202455765842854e-06, + "loss": 0.10125961303710937, + "step": 73645 + }, + { + "epoch": 0.6368297723322756, + "grad_norm": 2.5768914577735402, + "learning_rate": 4.6200741084669885e-06, + "loss": 0.0887298583984375, + "step": 73650 + }, + { + "epoch": 0.6368730058538188, + "grad_norm": 9.938053341809166, + "learning_rate": 4.619902632878053e-06, + "loss": 0.05966110229492187, + "step": 73655 + }, + { + "epoch": 0.6369162393753621, + "grad_norm": 39.33786067039568, + "learning_rate": 4.619731149818269e-06, + "loss": 0.4162841796875, + "step": 73660 + }, + { + "epoch": 0.6369594728969054, + "grad_norm": 103.10205655707715, + "learning_rate": 4.619559659288428e-06, + "loss": 0.7772087097167969, + "step": 73665 + }, + { + "epoch": 0.6370027064184486, + "grad_norm": 5.147079199995623, + "learning_rate": 4.619388161289322e-06, + "loss": 0.19932861328125, + "step": 73670 + }, + { + "epoch": 0.6370459399399919, + "grad_norm": 26.265727923197606, + "learning_rate": 4.61921665582174e-06, + "loss": 0.1382354736328125, + "step": 73675 + }, + { + "epoch": 0.6370891734615352, + "grad_norm": 30.759715723149718, + "learning_rate": 4.619045142886475e-06, + "loss": 0.37249755859375, + "step": 73680 + }, + { + "epoch": 0.6371324069830784, + "grad_norm": 0.5798786331422464, + "learning_rate": 4.618873622484315e-06, + "loss": 0.12337493896484375, + "step": 73685 + }, + { + "epoch": 0.6371756405046216, + "grad_norm": 10.432549271691837, + "learning_rate": 4.618702094616054e-06, + "loss": 0.29149169921875, + "step": 73690 + }, + { + "epoch": 0.637218874026165, + "grad_norm": 28.896987600468883, + "learning_rate": 4.618530559282482e-06, + "loss": 0.18388290405273439, + "step": 73695 + }, + { + "epoch": 0.6372621075477082, + "grad_norm": 0.6121304399487201, + "learning_rate": 4.6183590164843894e-06, + "loss": 0.10250740051269532, + "step": 73700 + }, + { + "epoch": 0.6373053410692514, + "grad_norm": 1.5508096439083539, + "learning_rate": 4.61818746622257e-06, + "loss": 0.283428955078125, + "step": 73705 + }, + { + "epoch": 0.6373485745907947, + "grad_norm": 1.199649254199831, + "learning_rate": 4.618015908497811e-06, + "loss": 0.05492706298828125, + "step": 73710 + }, + { + "epoch": 0.637391808112338, + "grad_norm": 3.1252780258837043, + "learning_rate": 4.617844343310906e-06, + "loss": 0.25127449035644533, + "step": 73715 + }, + { + "epoch": 0.6374350416338812, + "grad_norm": 29.317880062939263, + "learning_rate": 4.617672770662646e-06, + "loss": 0.29482421875, + "step": 73720 + }, + { + "epoch": 0.6374782751554245, + "grad_norm": 0.5607618318054434, + "learning_rate": 4.617501190553822e-06, + "loss": 0.506109619140625, + "step": 73725 + }, + { + "epoch": 0.6375215086769678, + "grad_norm": 58.84917029660644, + "learning_rate": 4.617329602985226e-06, + "loss": 0.6729568481445313, + "step": 73730 + }, + { + "epoch": 0.637564742198511, + "grad_norm": 8.953717815243342, + "learning_rate": 4.617158007957648e-06, + "loss": 0.1438507080078125, + "step": 73735 + }, + { + "epoch": 0.6376079757200543, + "grad_norm": 2.067251751061946, + "learning_rate": 4.61698640547188e-06, + "loss": 0.0503387451171875, + "step": 73740 + }, + { + "epoch": 0.6376512092415976, + "grad_norm": 1.9903791521285066, + "learning_rate": 4.616814795528714e-06, + "loss": 0.10449371337890626, + "step": 73745 + }, + { + "epoch": 0.6376944427631408, + "grad_norm": 2.088442529678833, + "learning_rate": 4.616643178128942e-06, + "loss": 0.166143798828125, + "step": 73750 + }, + { + "epoch": 0.6377376762846841, + "grad_norm": 7.099687586937969, + "learning_rate": 4.616471553273352e-06, + "loss": 0.17095947265625, + "step": 73755 + }, + { + "epoch": 0.6377809098062274, + "grad_norm": 1.6617055905226934, + "learning_rate": 4.616299920962741e-06, + "loss": 0.03905410766601562, + "step": 73760 + }, + { + "epoch": 0.6378241433277706, + "grad_norm": 0.17411917039981267, + "learning_rate": 4.616128281197896e-06, + "loss": 0.18738937377929688, + "step": 73765 + }, + { + "epoch": 0.6378673768493139, + "grad_norm": 0.5643278003185719, + "learning_rate": 4.61595663397961e-06, + "loss": 0.18669891357421875, + "step": 73770 + }, + { + "epoch": 0.6379106103708572, + "grad_norm": 2.3081572331770532, + "learning_rate": 4.6157849793086755e-06, + "loss": 0.3494964599609375, + "step": 73775 + }, + { + "epoch": 0.6379538438924004, + "grad_norm": 22.491247328361325, + "learning_rate": 4.6156133171858825e-06, + "loss": 0.24088134765625, + "step": 73780 + }, + { + "epoch": 0.6379970774139436, + "grad_norm": 2.422794388602679, + "learning_rate": 4.615441647612025e-06, + "loss": 0.1484375, + "step": 73785 + }, + { + "epoch": 0.638040310935487, + "grad_norm": 15.848836220779408, + "learning_rate": 4.615269970587892e-06, + "loss": 0.19511566162109376, + "step": 73790 + }, + { + "epoch": 0.6380835444570302, + "grad_norm": 18.530804516044757, + "learning_rate": 4.6150982861142775e-06, + "loss": 0.2775531768798828, + "step": 73795 + }, + { + "epoch": 0.6381267779785734, + "grad_norm": 9.25465103778052, + "learning_rate": 4.614926594191973e-06, + "loss": 0.06426258087158203, + "step": 73800 + }, + { + "epoch": 0.6381700115001168, + "grad_norm": 0.918705392985054, + "learning_rate": 4.614754894821768e-06, + "loss": 0.08963165283203126, + "step": 73805 + }, + { + "epoch": 0.63821324502166, + "grad_norm": 4.6836198565859855, + "learning_rate": 4.614583188004457e-06, + "loss": 0.22214202880859374, + "step": 73810 + }, + { + "epoch": 0.6382564785432032, + "grad_norm": 4.402001675708031, + "learning_rate": 4.614411473740832e-06, + "loss": 0.100531005859375, + "step": 73815 + }, + { + "epoch": 0.6382997120647466, + "grad_norm": 4.968776055367012, + "learning_rate": 4.614239752031682e-06, + "loss": 0.20607528686523438, + "step": 73820 + }, + { + "epoch": 0.6383429455862898, + "grad_norm": 3.397065026145923, + "learning_rate": 4.614068022877802e-06, + "loss": 0.065338134765625, + "step": 73825 + }, + { + "epoch": 0.638386179107833, + "grad_norm": 10.8294863407646, + "learning_rate": 4.613896286279983e-06, + "loss": 0.17059478759765626, + "step": 73830 + }, + { + "epoch": 0.6384294126293764, + "grad_norm": 15.250047270436665, + "learning_rate": 4.613724542239016e-06, + "loss": 0.103631591796875, + "step": 73835 + }, + { + "epoch": 0.6384726461509196, + "grad_norm": 1.7241265086314583, + "learning_rate": 4.613552790755695e-06, + "loss": 0.18320770263671876, + "step": 73840 + }, + { + "epoch": 0.6385158796724628, + "grad_norm": 28.656104500260078, + "learning_rate": 4.613381031830811e-06, + "loss": 0.3641510009765625, + "step": 73845 + }, + { + "epoch": 0.6385591131940062, + "grad_norm": 189.1435942092042, + "learning_rate": 4.613209265465156e-06, + "loss": 0.15585479736328126, + "step": 73850 + }, + { + "epoch": 0.6386023467155494, + "grad_norm": 2.8527320627940385, + "learning_rate": 4.613037491659522e-06, + "loss": 0.2482269287109375, + "step": 73855 + }, + { + "epoch": 0.6386455802370926, + "grad_norm": 0.5943741454866135, + "learning_rate": 4.612865710414703e-06, + "loss": 0.26182861328125, + "step": 73860 + }, + { + "epoch": 0.6386888137586358, + "grad_norm": 19.199807449830335, + "learning_rate": 4.6126939217314885e-06, + "loss": 0.081298828125, + "step": 73865 + }, + { + "epoch": 0.6387320472801792, + "grad_norm": 6.309282535103507, + "learning_rate": 4.612522125610672e-06, + "loss": 0.04735107421875, + "step": 73870 + }, + { + "epoch": 0.6387752808017224, + "grad_norm": 25.235523509838675, + "learning_rate": 4.612350322053047e-06, + "loss": 0.40810546875, + "step": 73875 + }, + { + "epoch": 0.6388185143232656, + "grad_norm": 1.573155001972207, + "learning_rate": 4.612178511059404e-06, + "loss": 0.06175155639648437, + "step": 73880 + }, + { + "epoch": 0.638861747844809, + "grad_norm": 1.9671721618439941, + "learning_rate": 4.612006692630537e-06, + "loss": 0.04042205810546875, + "step": 73885 + }, + { + "epoch": 0.6389049813663522, + "grad_norm": 18.987832719581974, + "learning_rate": 4.611834866767236e-06, + "loss": 0.257061767578125, + "step": 73890 + }, + { + "epoch": 0.6389482148878954, + "grad_norm": 0.3258991243791618, + "learning_rate": 4.611663033470297e-06, + "loss": 0.07346229553222657, + "step": 73895 + }, + { + "epoch": 0.6389914484094388, + "grad_norm": 16.445239837843825, + "learning_rate": 4.611491192740509e-06, + "loss": 0.07132949829101562, + "step": 73900 + }, + { + "epoch": 0.639034681930982, + "grad_norm": 5.7957900513005525, + "learning_rate": 4.611319344578668e-06, + "loss": 0.16121482849121094, + "step": 73905 + }, + { + "epoch": 0.6390779154525252, + "grad_norm": 12.791837243138566, + "learning_rate": 4.611147488985562e-06, + "loss": 0.11405506134033203, + "step": 73910 + }, + { + "epoch": 0.6391211489740686, + "grad_norm": 5.584480061911928, + "learning_rate": 4.610975625961987e-06, + "loss": 0.0244110107421875, + "step": 73915 + }, + { + "epoch": 0.6391643824956118, + "grad_norm": 8.93373205397205, + "learning_rate": 4.610803755508735e-06, + "loss": 0.08374862670898438, + "step": 73920 + }, + { + "epoch": 0.639207616017155, + "grad_norm": 2.388766484974075, + "learning_rate": 4.610631877626598e-06, + "loss": 0.1804290771484375, + "step": 73925 + }, + { + "epoch": 0.6392508495386984, + "grad_norm": 16.446933403303007, + "learning_rate": 4.61045999231637e-06, + "loss": 0.14843597412109374, + "step": 73930 + }, + { + "epoch": 0.6392940830602416, + "grad_norm": 5.295984646359416, + "learning_rate": 4.610288099578843e-06, + "loss": 0.23493690490722657, + "step": 73935 + }, + { + "epoch": 0.6393373165817848, + "grad_norm": 11.285302838799184, + "learning_rate": 4.610116199414808e-06, + "loss": 0.137969970703125, + "step": 73940 + }, + { + "epoch": 0.6393805501033282, + "grad_norm": 5.737057574458202, + "learning_rate": 4.6099442918250604e-06, + "loss": 0.12243499755859374, + "step": 73945 + }, + { + "epoch": 0.6394237836248714, + "grad_norm": 1.681938147744449, + "learning_rate": 4.609772376810392e-06, + "loss": 0.1863574981689453, + "step": 73950 + }, + { + "epoch": 0.6394670171464146, + "grad_norm": 6.283549524944497, + "learning_rate": 4.609600454371596e-06, + "loss": 0.0981719970703125, + "step": 73955 + }, + { + "epoch": 0.6395102506679579, + "grad_norm": 1.799247960275507, + "learning_rate": 4.609428524509465e-06, + "loss": 0.093377685546875, + "step": 73960 + }, + { + "epoch": 0.6395534841895012, + "grad_norm": 8.214447820423487, + "learning_rate": 4.60925658722479e-06, + "loss": 0.10438919067382812, + "step": 73965 + }, + { + "epoch": 0.6395967177110444, + "grad_norm": 0.13672002881628117, + "learning_rate": 4.609084642518368e-06, + "loss": 0.13007965087890624, + "step": 73970 + }, + { + "epoch": 0.6396399512325877, + "grad_norm": 5.977618937554442, + "learning_rate": 4.608912690390988e-06, + "loss": 0.1655487060546875, + "step": 73975 + }, + { + "epoch": 0.639683184754131, + "grad_norm": 1.397979583072532, + "learning_rate": 4.6087407308434455e-06, + "loss": 0.12087974548339844, + "step": 73980 + }, + { + "epoch": 0.6397264182756742, + "grad_norm": 0.2465218580372998, + "learning_rate": 4.608568763876534e-06, + "loss": 0.05202932357788086, + "step": 73985 + }, + { + "epoch": 0.6397696517972175, + "grad_norm": 0.8234742550984498, + "learning_rate": 4.608396789491045e-06, + "loss": 0.03896331787109375, + "step": 73990 + }, + { + "epoch": 0.6398128853187608, + "grad_norm": 2.017957915277597, + "learning_rate": 4.6082248076877705e-06, + "loss": 0.13271141052246094, + "step": 73995 + }, + { + "epoch": 0.639856118840304, + "grad_norm": 0.8882997457330047, + "learning_rate": 4.608052818467507e-06, + "loss": 0.14520645141601562, + "step": 74000 + }, + { + "epoch": 0.6398993523618473, + "grad_norm": 29.87836001365475, + "learning_rate": 4.607880821831046e-06, + "loss": 0.36897735595703124, + "step": 74005 + }, + { + "epoch": 0.6399425858833906, + "grad_norm": 4.355386143550085, + "learning_rate": 4.60770881777918e-06, + "loss": 0.0562286376953125, + "step": 74010 + }, + { + "epoch": 0.6399858194049338, + "grad_norm": 26.54860112559389, + "learning_rate": 4.607536806312704e-06, + "loss": 0.10748443603515626, + "step": 74015 + }, + { + "epoch": 0.640029052926477, + "grad_norm": 0.9938643287257753, + "learning_rate": 4.607364787432409e-06, + "loss": 0.08958511352539063, + "step": 74020 + }, + { + "epoch": 0.6400722864480204, + "grad_norm": 30.98810545863923, + "learning_rate": 4.6071927611390904e-06, + "loss": 0.4216339111328125, + "step": 74025 + }, + { + "epoch": 0.6401155199695636, + "grad_norm": 0.9759882723444416, + "learning_rate": 4.607020727433541e-06, + "loss": 0.1329345703125, + "step": 74030 + }, + { + "epoch": 0.6401587534911068, + "grad_norm": 11.822282781717997, + "learning_rate": 4.6068486863165535e-06, + "loss": 0.1832916259765625, + "step": 74035 + }, + { + "epoch": 0.6402019870126501, + "grad_norm": 7.008883881245935, + "learning_rate": 4.606676637788922e-06, + "loss": 0.08012847900390625, + "step": 74040 + }, + { + "epoch": 0.6402452205341934, + "grad_norm": 2.2266687736149766, + "learning_rate": 4.6065045818514395e-06, + "loss": 0.047715377807617185, + "step": 74045 + }, + { + "epoch": 0.6402884540557366, + "grad_norm": 3.632680138844389, + "learning_rate": 4.6063325185049e-06, + "loss": 0.037149810791015626, + "step": 74050 + }, + { + "epoch": 0.6403316875772799, + "grad_norm": 2.512424162100486, + "learning_rate": 4.606160447750096e-06, + "loss": 0.11000900268554688, + "step": 74055 + }, + { + "epoch": 0.6403749210988232, + "grad_norm": 4.234452574116374, + "learning_rate": 4.605988369587823e-06, + "loss": 0.03324127197265625, + "step": 74060 + }, + { + "epoch": 0.6404181546203664, + "grad_norm": 8.66857606551382, + "learning_rate": 4.605816284018874e-06, + "loss": 0.176312255859375, + "step": 74065 + }, + { + "epoch": 0.6404613881419097, + "grad_norm": 0.2654776500265815, + "learning_rate": 4.605644191044042e-06, + "loss": 0.45424880981445315, + "step": 74070 + }, + { + "epoch": 0.640504621663453, + "grad_norm": 4.117843240539004, + "learning_rate": 4.60547209066412e-06, + "loss": 0.03013153076171875, + "step": 74075 + }, + { + "epoch": 0.6405478551849962, + "grad_norm": 0.15968708080361754, + "learning_rate": 4.605299982879903e-06, + "loss": 0.19935264587402343, + "step": 74080 + }, + { + "epoch": 0.6405910887065395, + "grad_norm": 10.418952327771706, + "learning_rate": 4.605127867692184e-06, + "loss": 0.07493782043457031, + "step": 74085 + }, + { + "epoch": 0.6406343222280828, + "grad_norm": 19.02765860487868, + "learning_rate": 4.604955745101757e-06, + "loss": 0.0921173095703125, + "step": 74090 + }, + { + "epoch": 0.640677555749626, + "grad_norm": 2.507621997139112, + "learning_rate": 4.604783615109418e-06, + "loss": 0.13766708374023437, + "step": 74095 + }, + { + "epoch": 0.6407207892711693, + "grad_norm": 0.12169706257396254, + "learning_rate": 4.604611477715957e-06, + "loss": 0.34311904907226565, + "step": 74100 + }, + { + "epoch": 0.6407640227927126, + "grad_norm": 10.11415683225373, + "learning_rate": 4.604439332922169e-06, + "loss": 0.138641357421875, + "step": 74105 + }, + { + "epoch": 0.6408072563142558, + "grad_norm": 1.6006941351740085, + "learning_rate": 4.60426718072885e-06, + "loss": 0.2416595458984375, + "step": 74110 + }, + { + "epoch": 0.6408504898357991, + "grad_norm": 1.2251342922627901, + "learning_rate": 4.604095021136792e-06, + "loss": 0.0476715087890625, + "step": 74115 + }, + { + "epoch": 0.6408937233573424, + "grad_norm": 11.716549560181056, + "learning_rate": 4.6039228541467885e-06, + "loss": 0.4826904296875, + "step": 74120 + }, + { + "epoch": 0.6409369568788856, + "grad_norm": 3.543546519976372, + "learning_rate": 4.6037506797596354e-06, + "loss": 0.1261016845703125, + "step": 74125 + }, + { + "epoch": 0.6409801904004289, + "grad_norm": 2.6178490592539454, + "learning_rate": 4.603578497976126e-06, + "loss": 0.31849365234375, + "step": 74130 + }, + { + "epoch": 0.6410234239219721, + "grad_norm": 22.449286085783907, + "learning_rate": 4.603406308797055e-06, + "loss": 0.16772689819335937, + "step": 74135 + }, + { + "epoch": 0.6410666574435154, + "grad_norm": 1.2839644690871708, + "learning_rate": 4.603234112223214e-06, + "loss": 0.1423248291015625, + "step": 74140 + }, + { + "epoch": 0.6411098909650587, + "grad_norm": 1.799600393838962, + "learning_rate": 4.6030619082554e-06, + "loss": 0.038959503173828125, + "step": 74145 + }, + { + "epoch": 0.6411531244866019, + "grad_norm": 5.363835593582387, + "learning_rate": 4.602889696894407e-06, + "loss": 0.09333000183105469, + "step": 74150 + }, + { + "epoch": 0.6411963580081452, + "grad_norm": 0.3111166615608293, + "learning_rate": 4.602717478141027e-06, + "loss": 0.1097412109375, + "step": 74155 + }, + { + "epoch": 0.6412395915296885, + "grad_norm": 27.25373878201681, + "learning_rate": 4.602545251996056e-06, + "loss": 0.12701950073242188, + "step": 74160 + }, + { + "epoch": 0.6412828250512317, + "grad_norm": 3.6513790825955543, + "learning_rate": 4.602373018460287e-06, + "loss": 0.416070556640625, + "step": 74165 + }, + { + "epoch": 0.641326058572775, + "grad_norm": 45.031001938078916, + "learning_rate": 4.602200777534516e-06, + "loss": 0.5077552795410156, + "step": 74170 + }, + { + "epoch": 0.6413692920943183, + "grad_norm": 32.25576161974131, + "learning_rate": 4.602028529219537e-06, + "loss": 0.19942703247070312, + "step": 74175 + }, + { + "epoch": 0.6414125256158615, + "grad_norm": 6.717105928187041, + "learning_rate": 4.601856273516145e-06, + "loss": 0.10433197021484375, + "step": 74180 + }, + { + "epoch": 0.6414557591374048, + "grad_norm": 4.713331871223265, + "learning_rate": 4.601684010425131e-06, + "loss": 0.09390411376953126, + "step": 74185 + }, + { + "epoch": 0.641498992658948, + "grad_norm": 4.519374725585295, + "learning_rate": 4.601511739947293e-06, + "loss": 0.19206771850585938, + "step": 74190 + }, + { + "epoch": 0.6415422261804913, + "grad_norm": 11.790380505008683, + "learning_rate": 4.601339462083425e-06, + "loss": 0.193719482421875, + "step": 74195 + }, + { + "epoch": 0.6415854597020346, + "grad_norm": 5.229423307156536, + "learning_rate": 4.6011671768343194e-06, + "loss": 0.24835052490234374, + "step": 74200 + }, + { + "epoch": 0.6416286932235779, + "grad_norm": 3.6740915368321154, + "learning_rate": 4.600994884200773e-06, + "loss": 0.18375701904296876, + "step": 74205 + }, + { + "epoch": 0.6416719267451211, + "grad_norm": 1.2115423669578462, + "learning_rate": 4.600822584183581e-06, + "loss": 0.40708160400390625, + "step": 74210 + }, + { + "epoch": 0.6417151602666643, + "grad_norm": 3.5172197007295583, + "learning_rate": 4.600650276783535e-06, + "loss": 0.08717327117919922, + "step": 74215 + }, + { + "epoch": 0.6417583937882076, + "grad_norm": 0.09590306111383766, + "learning_rate": 4.600477962001432e-06, + "loss": 0.06719493865966797, + "step": 74220 + }, + { + "epoch": 0.6418016273097509, + "grad_norm": 1.696834859392481, + "learning_rate": 4.600305639838065e-06, + "loss": 0.12809600830078124, + "step": 74225 + }, + { + "epoch": 0.6418448608312941, + "grad_norm": 3.5304957745734953, + "learning_rate": 4.6001333102942314e-06, + "loss": 0.05395660400390625, + "step": 74230 + }, + { + "epoch": 0.6418880943528374, + "grad_norm": 0.8965776513418446, + "learning_rate": 4.599960973370723e-06, + "loss": 0.16446418762207032, + "step": 74235 + }, + { + "epoch": 0.6419313278743807, + "grad_norm": 0.254927710478059, + "learning_rate": 4.599788629068337e-06, + "loss": 0.0937164306640625, + "step": 74240 + }, + { + "epoch": 0.6419745613959239, + "grad_norm": 27.066178153353615, + "learning_rate": 4.599616277387867e-06, + "loss": 0.16227455139160157, + "step": 74245 + }, + { + "epoch": 0.6420177949174672, + "grad_norm": 10.067114868087046, + "learning_rate": 4.599443918330107e-06, + "loss": 0.2726264953613281, + "step": 74250 + }, + { + "epoch": 0.6420610284390105, + "grad_norm": 1.9451696223608568, + "learning_rate": 4.599271551895854e-06, + "loss": 0.11338567733764648, + "step": 74255 + }, + { + "epoch": 0.6421042619605537, + "grad_norm": 0.8316348211716558, + "learning_rate": 4.599099178085903e-06, + "loss": 0.10381660461425782, + "step": 74260 + }, + { + "epoch": 0.642147495482097, + "grad_norm": 5.8179948277417, + "learning_rate": 4.598926796901045e-06, + "loss": 0.13802947998046874, + "step": 74265 + }, + { + "epoch": 0.6421907290036403, + "grad_norm": 2.0215088998644895, + "learning_rate": 4.59875440834208e-06, + "loss": 0.9203479766845704, + "step": 74270 + }, + { + "epoch": 0.6422339625251835, + "grad_norm": 15.282754236888785, + "learning_rate": 4.598582012409801e-06, + "loss": 0.168084716796875, + "step": 74275 + }, + { + "epoch": 0.6422771960467268, + "grad_norm": 2.0112006472977106, + "learning_rate": 4.598409609105002e-06, + "loss": 0.12069129943847656, + "step": 74280 + }, + { + "epoch": 0.6423204295682701, + "grad_norm": 17.984275021613147, + "learning_rate": 4.598237198428479e-06, + "loss": 0.10660247802734375, + "step": 74285 + }, + { + "epoch": 0.6423636630898133, + "grad_norm": 5.012769901091029, + "learning_rate": 4.598064780381029e-06, + "loss": 0.2563484191894531, + "step": 74290 + }, + { + "epoch": 0.6424068966113565, + "grad_norm": 7.733704346849398, + "learning_rate": 4.5978923549634446e-06, + "loss": 0.11615943908691406, + "step": 74295 + }, + { + "epoch": 0.6424501301328999, + "grad_norm": 28.965749705289735, + "learning_rate": 4.597719922176521e-06, + "loss": 0.09712944030761719, + "step": 74300 + }, + { + "epoch": 0.6424933636544431, + "grad_norm": 3.802541918628115, + "learning_rate": 4.597547482021054e-06, + "loss": 0.0669769287109375, + "step": 74305 + }, + { + "epoch": 0.6425365971759863, + "grad_norm": 30.537375678387605, + "learning_rate": 4.597375034497841e-06, + "loss": 0.38060569763183594, + "step": 74310 + }, + { + "epoch": 0.6425798306975297, + "grad_norm": 0.6382260774481742, + "learning_rate": 4.597202579607674e-06, + "loss": 0.05013427734375, + "step": 74315 + }, + { + "epoch": 0.6426230642190729, + "grad_norm": 2.315809521079082, + "learning_rate": 4.59703011735135e-06, + "loss": 0.05226821899414062, + "step": 74320 + }, + { + "epoch": 0.6426662977406161, + "grad_norm": 13.65193734709583, + "learning_rate": 4.596857647729666e-06, + "loss": 0.16170425415039064, + "step": 74325 + }, + { + "epoch": 0.6427095312621595, + "grad_norm": 25.252659767926655, + "learning_rate": 4.5966851707434135e-06, + "loss": 0.4862091064453125, + "step": 74330 + }, + { + "epoch": 0.6427527647837027, + "grad_norm": 1.4956916264847229, + "learning_rate": 4.59651268639339e-06, + "loss": 0.039776611328125, + "step": 74335 + }, + { + "epoch": 0.6427959983052459, + "grad_norm": 4.680757437097842, + "learning_rate": 4.596340194680393e-06, + "loss": 0.2208160400390625, + "step": 74340 + }, + { + "epoch": 0.6428392318267893, + "grad_norm": 9.366277142068778, + "learning_rate": 4.596167695605215e-06, + "loss": 0.17984619140625, + "step": 74345 + }, + { + "epoch": 0.6428824653483325, + "grad_norm": 6.072155616194733, + "learning_rate": 4.595995189168653e-06, + "loss": 0.0977020263671875, + "step": 74350 + }, + { + "epoch": 0.6429256988698757, + "grad_norm": 7.997170817085685, + "learning_rate": 4.5958226753715005e-06, + "loss": 0.1418010711669922, + "step": 74355 + }, + { + "epoch": 0.6429689323914191, + "grad_norm": 19.191490968899817, + "learning_rate": 4.595650154214556e-06, + "loss": 0.15846023559570313, + "step": 74360 + }, + { + "epoch": 0.6430121659129623, + "grad_norm": 2.7412522111929634, + "learning_rate": 4.595477625698614e-06, + "loss": 0.05455360412597656, + "step": 74365 + }, + { + "epoch": 0.6430553994345055, + "grad_norm": 16.676815158405994, + "learning_rate": 4.5953050898244705e-06, + "loss": 0.0827423095703125, + "step": 74370 + }, + { + "epoch": 0.6430986329560489, + "grad_norm": 0.6320785874748805, + "learning_rate": 4.59513254659292e-06, + "loss": 0.056103515625, + "step": 74375 + }, + { + "epoch": 0.6431418664775921, + "grad_norm": 18.858661949667933, + "learning_rate": 4.59495999600476e-06, + "loss": 0.049957275390625, + "step": 74380 + }, + { + "epoch": 0.6431850999991353, + "grad_norm": 21.98112029792371, + "learning_rate": 4.5947874380607865e-06, + "loss": 0.31192855834960936, + "step": 74385 + }, + { + "epoch": 0.6432283335206785, + "grad_norm": 0.696298735480143, + "learning_rate": 4.5946148727617915e-06, + "loss": 0.2406707763671875, + "step": 74390 + }, + { + "epoch": 0.6432715670422219, + "grad_norm": 6.996521278397048, + "learning_rate": 4.5944423001085745e-06, + "loss": 0.041588211059570314, + "step": 74395 + }, + { + "epoch": 0.6433148005637651, + "grad_norm": 6.116934102313834, + "learning_rate": 4.5942697201019324e-06, + "loss": 0.0589599609375, + "step": 74400 + }, + { + "epoch": 0.6433580340853083, + "grad_norm": 51.40308797206611, + "learning_rate": 4.594097132742657e-06, + "loss": 0.29633102416992185, + "step": 74405 + }, + { + "epoch": 0.6434012676068517, + "grad_norm": 5.3727471693810855, + "learning_rate": 4.593924538031548e-06, + "loss": 0.025933837890625, + "step": 74410 + }, + { + "epoch": 0.6434445011283949, + "grad_norm": 13.996108585966207, + "learning_rate": 4.593751935969399e-06, + "loss": 0.09443511962890624, + "step": 74415 + }, + { + "epoch": 0.6434877346499381, + "grad_norm": 7.634629391365439, + "learning_rate": 4.593579326557007e-06, + "loss": 0.17903003692626954, + "step": 74420 + }, + { + "epoch": 0.6435309681714815, + "grad_norm": 4.218707560703077, + "learning_rate": 4.593406709795168e-06, + "loss": 0.078204345703125, + "step": 74425 + }, + { + "epoch": 0.6435742016930247, + "grad_norm": 4.402823914142428, + "learning_rate": 4.593234085684679e-06, + "loss": 0.2558074951171875, + "step": 74430 + }, + { + "epoch": 0.6436174352145679, + "grad_norm": 13.450865614943403, + "learning_rate": 4.593061454226333e-06, + "loss": 0.1549163818359375, + "step": 74435 + }, + { + "epoch": 0.6436606687361113, + "grad_norm": 6.860739197479317, + "learning_rate": 4.59288881542093e-06, + "loss": 0.100762939453125, + "step": 74440 + }, + { + "epoch": 0.6437039022576545, + "grad_norm": 6.737647644979468, + "learning_rate": 4.592716169269265e-06, + "loss": 0.09935150146484376, + "step": 74445 + }, + { + "epoch": 0.6437471357791977, + "grad_norm": 6.410204880947861, + "learning_rate": 4.5925435157721324e-06, + "loss": 0.139483642578125, + "step": 74450 + }, + { + "epoch": 0.6437903693007411, + "grad_norm": 0.8480184711500961, + "learning_rate": 4.59237085493033e-06, + "loss": 0.1345458984375, + "step": 74455 + }, + { + "epoch": 0.6438336028222843, + "grad_norm": 22.929472341834405, + "learning_rate": 4.5921981867446544e-06, + "loss": 0.1183868408203125, + "step": 74460 + }, + { + "epoch": 0.6438768363438275, + "grad_norm": 48.44218156128871, + "learning_rate": 4.592025511215902e-06, + "loss": 0.22169189453125, + "step": 74465 + }, + { + "epoch": 0.6439200698653708, + "grad_norm": 2.849396468494553, + "learning_rate": 4.591852828344869e-06, + "loss": 0.13031005859375, + "step": 74470 + }, + { + "epoch": 0.6439633033869141, + "grad_norm": 0.9746994884009736, + "learning_rate": 4.591680138132349e-06, + "loss": 0.139141845703125, + "step": 74475 + }, + { + "epoch": 0.6440065369084573, + "grad_norm": 7.605286279525541, + "learning_rate": 4.5915074405791426e-06, + "loss": 0.26363525390625, + "step": 74480 + }, + { + "epoch": 0.6440497704300006, + "grad_norm": 12.64544235183988, + "learning_rate": 4.591334735686043e-06, + "loss": 0.10403633117675781, + "step": 74485 + }, + { + "epoch": 0.6440930039515439, + "grad_norm": 25.198360552322505, + "learning_rate": 4.591162023453851e-06, + "loss": 0.269488525390625, + "step": 74490 + }, + { + "epoch": 0.6441362374730871, + "grad_norm": 2.8504996200147863, + "learning_rate": 4.5909893038833575e-06, + "loss": 0.04682235717773438, + "step": 74495 + }, + { + "epoch": 0.6441794709946304, + "grad_norm": 4.466167403707793, + "learning_rate": 4.590816576975364e-06, + "loss": 0.2956024169921875, + "step": 74500 + }, + { + "epoch": 0.6442227045161737, + "grad_norm": 3.9241882025480455, + "learning_rate": 4.590643842730663e-06, + "loss": 0.360498046875, + "step": 74505 + }, + { + "epoch": 0.6442659380377169, + "grad_norm": 12.747871411662235, + "learning_rate": 4.590471101150055e-06, + "loss": 0.22486572265625, + "step": 74510 + }, + { + "epoch": 0.6443091715592602, + "grad_norm": 0.5052523841896406, + "learning_rate": 4.590298352234334e-06, + "loss": 0.02541351318359375, + "step": 74515 + }, + { + "epoch": 0.6443524050808035, + "grad_norm": 0.595895678515646, + "learning_rate": 4.590125595984297e-06, + "loss": 0.1314830780029297, + "step": 74520 + }, + { + "epoch": 0.6443956386023467, + "grad_norm": 16.071010476658625, + "learning_rate": 4.589952832400742e-06, + "loss": 0.17975540161132814, + "step": 74525 + }, + { + "epoch": 0.64443887212389, + "grad_norm": 19.28851879048348, + "learning_rate": 4.589780061484465e-06, + "loss": 0.2184429168701172, + "step": 74530 + }, + { + "epoch": 0.6444821056454333, + "grad_norm": 2.413452000812654, + "learning_rate": 4.589607283236263e-06, + "loss": 0.09564628601074218, + "step": 74535 + }, + { + "epoch": 0.6445253391669765, + "grad_norm": 21.27143260025927, + "learning_rate": 4.589434497656932e-06, + "loss": 0.5987815856933594, + "step": 74540 + }, + { + "epoch": 0.6445685726885197, + "grad_norm": 27.083209400488176, + "learning_rate": 4.58926170474727e-06, + "loss": 0.14036407470703124, + "step": 74545 + }, + { + "epoch": 0.6446118062100631, + "grad_norm": 11.53720274487668, + "learning_rate": 4.5890889045080735e-06, + "loss": 0.088128662109375, + "step": 74550 + }, + { + "epoch": 0.6446550397316063, + "grad_norm": 38.37568730679178, + "learning_rate": 4.58891609694014e-06, + "loss": 0.2641314506530762, + "step": 74555 + }, + { + "epoch": 0.6446982732531495, + "grad_norm": 11.671497904684163, + "learning_rate": 4.588743282044264e-06, + "loss": 0.20264129638671874, + "step": 74560 + }, + { + "epoch": 0.6447415067746928, + "grad_norm": 11.690458822658757, + "learning_rate": 4.588570459821246e-06, + "loss": 0.09694900512695312, + "step": 74565 + }, + { + "epoch": 0.6447847402962361, + "grad_norm": 5.003542565572646, + "learning_rate": 4.588397630271881e-06, + "loss": 0.1161764144897461, + "step": 74570 + }, + { + "epoch": 0.6448279738177793, + "grad_norm": 0.40092314162733506, + "learning_rate": 4.588224793396966e-06, + "loss": 0.1782459259033203, + "step": 74575 + }, + { + "epoch": 0.6448712073393226, + "grad_norm": 3.421280992549283, + "learning_rate": 4.5880519491973e-06, + "loss": 0.10012435913085938, + "step": 74580 + }, + { + "epoch": 0.6449144408608659, + "grad_norm": 35.904387403069755, + "learning_rate": 4.587879097673677e-06, + "loss": 0.4006011962890625, + "step": 74585 + }, + { + "epoch": 0.6449576743824091, + "grad_norm": 1.714192434513268, + "learning_rate": 4.5877062388268965e-06, + "loss": 0.4263916015625, + "step": 74590 + }, + { + "epoch": 0.6450009079039524, + "grad_norm": 13.493087917072309, + "learning_rate": 4.587533372657755e-06, + "loss": 0.100396728515625, + "step": 74595 + }, + { + "epoch": 0.6450441414254957, + "grad_norm": 18.504389736467555, + "learning_rate": 4.58736049916705e-06, + "loss": 0.152203369140625, + "step": 74600 + }, + { + "epoch": 0.6450873749470389, + "grad_norm": 3.8830554273603997, + "learning_rate": 4.587187618355579e-06, + "loss": 0.05614166259765625, + "step": 74605 + }, + { + "epoch": 0.6451306084685822, + "grad_norm": 6.356703315127631, + "learning_rate": 4.587014730224139e-06, + "loss": 0.150634765625, + "step": 74610 + }, + { + "epoch": 0.6451738419901255, + "grad_norm": 8.595039616694594, + "learning_rate": 4.586841834773526e-06, + "loss": 0.0656951904296875, + "step": 74615 + }, + { + "epoch": 0.6452170755116687, + "grad_norm": 13.37423887253168, + "learning_rate": 4.58666893200454e-06, + "loss": 0.20432281494140625, + "step": 74620 + }, + { + "epoch": 0.645260309033212, + "grad_norm": 72.5923779529411, + "learning_rate": 4.586496021917977e-06, + "loss": 0.20216064453125, + "step": 74625 + }, + { + "epoch": 0.6453035425547553, + "grad_norm": 11.998369811532177, + "learning_rate": 4.586323104514634e-06, + "loss": 0.3512767791748047, + "step": 74630 + }, + { + "epoch": 0.6453467760762985, + "grad_norm": 3.536121768185365, + "learning_rate": 4.58615017979531e-06, + "loss": 0.1511322021484375, + "step": 74635 + }, + { + "epoch": 0.6453900095978418, + "grad_norm": 5.324719121155961, + "learning_rate": 4.585977247760801e-06, + "loss": 0.30940399169921873, + "step": 74640 + }, + { + "epoch": 0.645433243119385, + "grad_norm": 12.66175344059472, + "learning_rate": 4.585804308411905e-06, + "loss": 0.420343017578125, + "step": 74645 + }, + { + "epoch": 0.6454764766409283, + "grad_norm": 5.991174354583864, + "learning_rate": 4.585631361749419e-06, + "loss": 0.16414909362792968, + "step": 74650 + }, + { + "epoch": 0.6455197101624716, + "grad_norm": 0.8564578133733907, + "learning_rate": 4.5854584077741415e-06, + "loss": 0.13811798095703126, + "step": 74655 + }, + { + "epoch": 0.6455629436840148, + "grad_norm": 1.21140828779813, + "learning_rate": 4.585285446486871e-06, + "loss": 0.2278651237487793, + "step": 74660 + }, + { + "epoch": 0.6456061772055581, + "grad_norm": 1.9195378768166285, + "learning_rate": 4.585112477888403e-06, + "loss": 0.06654129028320313, + "step": 74665 + }, + { + "epoch": 0.6456494107271014, + "grad_norm": 9.572999633770658, + "learning_rate": 4.584939501979536e-06, + "loss": 0.09894561767578125, + "step": 74670 + }, + { + "epoch": 0.6456926442486446, + "grad_norm": 65.98380648754085, + "learning_rate": 4.58476651876107e-06, + "loss": 0.06137847900390625, + "step": 74675 + }, + { + "epoch": 0.6457358777701879, + "grad_norm": 55.44651415877992, + "learning_rate": 4.584593528233798e-06, + "loss": 0.3770343780517578, + "step": 74680 + }, + { + "epoch": 0.6457791112917312, + "grad_norm": 55.53290982236381, + "learning_rate": 4.5844205303985225e-06, + "loss": 0.20182952880859376, + "step": 74685 + }, + { + "epoch": 0.6458223448132744, + "grad_norm": 14.474191123133272, + "learning_rate": 4.58424752525604e-06, + "loss": 0.2345611572265625, + "step": 74690 + }, + { + "epoch": 0.6458655783348177, + "grad_norm": 10.956675560541722, + "learning_rate": 4.584074512807147e-06, + "loss": 0.19451370239257812, + "step": 74695 + }, + { + "epoch": 0.645908811856361, + "grad_norm": 1.4174918675107464, + "learning_rate": 4.583901493052642e-06, + "loss": 0.06574172973632812, + "step": 74700 + }, + { + "epoch": 0.6459520453779042, + "grad_norm": 28.05259234468206, + "learning_rate": 4.583728465993323e-06, + "loss": 0.13361282348632814, + "step": 74705 + }, + { + "epoch": 0.6459952788994475, + "grad_norm": 3.626803276458315, + "learning_rate": 4.5835554316299895e-06, + "loss": 0.12046051025390625, + "step": 74710 + }, + { + "epoch": 0.6460385124209908, + "grad_norm": 11.634477223352423, + "learning_rate": 4.583382389963438e-06, + "loss": 0.1469738006591797, + "step": 74715 + }, + { + "epoch": 0.646081745942534, + "grad_norm": 5.356729884503018, + "learning_rate": 4.583209340994466e-06, + "loss": 0.11362457275390625, + "step": 74720 + }, + { + "epoch": 0.6461249794640773, + "grad_norm": 19.997829727276546, + "learning_rate": 4.583036284723873e-06, + "loss": 0.3137825012207031, + "step": 74725 + }, + { + "epoch": 0.6461682129856205, + "grad_norm": 13.252082362856846, + "learning_rate": 4.582863221152456e-06, + "loss": 0.12164535522460937, + "step": 74730 + }, + { + "epoch": 0.6462114465071638, + "grad_norm": 1.0097353979228014, + "learning_rate": 4.582690150281015e-06, + "loss": 0.2025623321533203, + "step": 74735 + }, + { + "epoch": 0.646254680028707, + "grad_norm": 21.51967923009965, + "learning_rate": 4.582517072110346e-06, + "loss": 0.13208847045898436, + "step": 74740 + }, + { + "epoch": 0.6462979135502503, + "grad_norm": 0.184861954281375, + "learning_rate": 4.582343986641248e-06, + "loss": 0.02713470458984375, + "step": 74745 + }, + { + "epoch": 0.6463411470717936, + "grad_norm": 1.1261141735064155, + "learning_rate": 4.58217089387452e-06, + "loss": 0.2669258117675781, + "step": 74750 + }, + { + "epoch": 0.6463843805933368, + "grad_norm": 34.073126010682984, + "learning_rate": 4.581997793810959e-06, + "loss": 0.433685302734375, + "step": 74755 + }, + { + "epoch": 0.6464276141148801, + "grad_norm": 1.1922085612292457, + "learning_rate": 4.581824686451363e-06, + "loss": 0.03560028076171875, + "step": 74760 + }, + { + "epoch": 0.6464708476364234, + "grad_norm": 5.892631971693643, + "learning_rate": 4.581651571796534e-06, + "loss": 0.16256103515625, + "step": 74765 + }, + { + "epoch": 0.6465140811579666, + "grad_norm": 0.4234476991838321, + "learning_rate": 4.581478449847265e-06, + "loss": 0.128851318359375, + "step": 74770 + }, + { + "epoch": 0.6465573146795099, + "grad_norm": 12.341321059622535, + "learning_rate": 4.581305320604359e-06, + "loss": 0.248297119140625, + "step": 74775 + }, + { + "epoch": 0.6466005482010532, + "grad_norm": 0.19119726764611064, + "learning_rate": 4.581132184068612e-06, + "loss": 0.45320968627929686, + "step": 74780 + }, + { + "epoch": 0.6466437817225964, + "grad_norm": 6.78406812456177, + "learning_rate": 4.580959040240823e-06, + "loss": 0.11596298217773438, + "step": 74785 + }, + { + "epoch": 0.6466870152441397, + "grad_norm": 28.630281686222858, + "learning_rate": 4.58078588912179e-06, + "loss": 0.24013671875, + "step": 74790 + }, + { + "epoch": 0.646730248765683, + "grad_norm": 1.4859158866752817, + "learning_rate": 4.580612730712313e-06, + "loss": 0.3124603271484375, + "step": 74795 + }, + { + "epoch": 0.6467734822872262, + "grad_norm": 4.9569581014162605, + "learning_rate": 4.58043956501319e-06, + "loss": 0.08203620910644531, + "step": 74800 + }, + { + "epoch": 0.6468167158087695, + "grad_norm": 2.2899120354293996, + "learning_rate": 4.580266392025218e-06, + "loss": 0.08788986206054687, + "step": 74805 + }, + { + "epoch": 0.6468599493303128, + "grad_norm": 19.00269899959483, + "learning_rate": 4.5800932117491986e-06, + "loss": 0.07432022094726562, + "step": 74810 + }, + { + "epoch": 0.646903182851856, + "grad_norm": 15.680792018201052, + "learning_rate": 4.579920024185928e-06, + "loss": 0.3112518310546875, + "step": 74815 + }, + { + "epoch": 0.6469464163733992, + "grad_norm": 18.17753583178992, + "learning_rate": 4.579746829336206e-06, + "loss": 0.13575267791748047, + "step": 74820 + }, + { + "epoch": 0.6469896498949426, + "grad_norm": 13.321468211263557, + "learning_rate": 4.579573627200831e-06, + "loss": 0.33111419677734377, + "step": 74825 + }, + { + "epoch": 0.6470328834164858, + "grad_norm": 1.6007176271529535, + "learning_rate": 4.5794004177806026e-06, + "loss": 0.1434844970703125, + "step": 74830 + }, + { + "epoch": 0.647076116938029, + "grad_norm": 11.566504087933454, + "learning_rate": 4.5792272010763185e-06, + "loss": 0.25000152587890623, + "step": 74835 + }, + { + "epoch": 0.6471193504595724, + "grad_norm": 9.677385716887036, + "learning_rate": 4.579053977088778e-06, + "loss": 0.16523895263671876, + "step": 74840 + }, + { + "epoch": 0.6471625839811156, + "grad_norm": 1.2446285389453948, + "learning_rate": 4.57888074581878e-06, + "loss": 0.0876556396484375, + "step": 74845 + }, + { + "epoch": 0.6472058175026588, + "grad_norm": 3.6894402316717025, + "learning_rate": 4.578707507267125e-06, + "loss": 0.1737579345703125, + "step": 74850 + }, + { + "epoch": 0.6472490510242022, + "grad_norm": 8.472884388214542, + "learning_rate": 4.578534261434609e-06, + "loss": 0.03208160400390625, + "step": 74855 + }, + { + "epoch": 0.6472922845457454, + "grad_norm": 7.885304698391263, + "learning_rate": 4.578361008322033e-06, + "loss": 0.15125579833984376, + "step": 74860 + }, + { + "epoch": 0.6473355180672886, + "grad_norm": 21.712960373229276, + "learning_rate": 4.578187747930196e-06, + "loss": 0.4804420471191406, + "step": 74865 + }, + { + "epoch": 0.647378751588832, + "grad_norm": 12.402119900041043, + "learning_rate": 4.578014480259894e-06, + "loss": 0.44145889282226564, + "step": 74870 + }, + { + "epoch": 0.6474219851103752, + "grad_norm": 3.0751031570991, + "learning_rate": 4.577841205311932e-06, + "loss": 0.07380218505859375, + "step": 74875 + }, + { + "epoch": 0.6474652186319184, + "grad_norm": 2.656275178013226, + "learning_rate": 4.577667923087104e-06, + "loss": 0.07314682006835938, + "step": 74880 + }, + { + "epoch": 0.6475084521534618, + "grad_norm": 0.13507602388175444, + "learning_rate": 4.577494633586211e-06, + "loss": 0.19590072631835936, + "step": 74885 + }, + { + "epoch": 0.647551685675005, + "grad_norm": 20.81865816179239, + "learning_rate": 4.577321336810052e-06, + "loss": 0.18535995483398438, + "step": 74890 + }, + { + "epoch": 0.6475949191965482, + "grad_norm": 2.5504395407167593, + "learning_rate": 4.577148032759427e-06, + "loss": 0.034346389770507815, + "step": 74895 + }, + { + "epoch": 0.6476381527180916, + "grad_norm": 13.918557344656557, + "learning_rate": 4.576974721435134e-06, + "loss": 0.05576400756835938, + "step": 74900 + }, + { + "epoch": 0.6476813862396348, + "grad_norm": 15.153539559353943, + "learning_rate": 4.576801402837974e-06, + "loss": 0.22513427734375, + "step": 74905 + }, + { + "epoch": 0.647724619761178, + "grad_norm": 1.036132165960198, + "learning_rate": 4.576628076968744e-06, + "loss": 0.060558319091796875, + "step": 74910 + }, + { + "epoch": 0.6477678532827212, + "grad_norm": 7.38476179035454, + "learning_rate": 4.576454743828247e-06, + "loss": 0.04070587158203125, + "step": 74915 + }, + { + "epoch": 0.6478110868042646, + "grad_norm": 2.156518860906239, + "learning_rate": 4.576281403417277e-06, + "loss": 0.1803863525390625, + "step": 74920 + }, + { + "epoch": 0.6478543203258078, + "grad_norm": 4.977111185405864, + "learning_rate": 4.576108055736638e-06, + "loss": 0.09898796081542968, + "step": 74925 + }, + { + "epoch": 0.647897553847351, + "grad_norm": 10.14787313009605, + "learning_rate": 4.575934700787127e-06, + "loss": 0.20294647216796874, + "step": 74930 + }, + { + "epoch": 0.6479407873688944, + "grad_norm": 7.86047571520432, + "learning_rate": 4.575761338569545e-06, + "loss": 0.07872238159179687, + "step": 74935 + }, + { + "epoch": 0.6479840208904376, + "grad_norm": 1.30693863101227, + "learning_rate": 4.575587969084691e-06, + "loss": 0.1471527099609375, + "step": 74940 + }, + { + "epoch": 0.6480272544119808, + "grad_norm": 15.36019365591339, + "learning_rate": 4.575414592333366e-06, + "loss": 0.1079376220703125, + "step": 74945 + }, + { + "epoch": 0.6480704879335242, + "grad_norm": 9.759088971388998, + "learning_rate": 4.575241208316365e-06, + "loss": 0.2800590515136719, + "step": 74950 + }, + { + "epoch": 0.6481137214550674, + "grad_norm": 19.9626835663502, + "learning_rate": 4.575067817034492e-06, + "loss": 0.126043701171875, + "step": 74955 + }, + { + "epoch": 0.6481569549766106, + "grad_norm": 0.6207569790120292, + "learning_rate": 4.5748944184885465e-06, + "loss": 0.17135887145996093, + "step": 74960 + }, + { + "epoch": 0.648200188498154, + "grad_norm": 1.3138453810261208, + "learning_rate": 4.5747210126793255e-06, + "loss": 0.06912574768066407, + "step": 74965 + }, + { + "epoch": 0.6482434220196972, + "grad_norm": 35.40521183192663, + "learning_rate": 4.574547599607632e-06, + "loss": 0.2335479736328125, + "step": 74970 + }, + { + "epoch": 0.6482866555412404, + "grad_norm": 6.9593126169001325, + "learning_rate": 4.574374179274263e-06, + "loss": 0.08271484375, + "step": 74975 + }, + { + "epoch": 0.6483298890627838, + "grad_norm": 8.561268452219698, + "learning_rate": 4.574200751680019e-06, + "loss": 0.04227180480957031, + "step": 74980 + }, + { + "epoch": 0.648373122584327, + "grad_norm": 17.04089513639992, + "learning_rate": 4.574027316825701e-06, + "loss": 0.07725982666015625, + "step": 74985 + }, + { + "epoch": 0.6484163561058702, + "grad_norm": 1.162718720195107, + "learning_rate": 4.573853874712109e-06, + "loss": 0.09673309326171875, + "step": 74990 + }, + { + "epoch": 0.6484595896274135, + "grad_norm": 7.255615664636072, + "learning_rate": 4.573680425340041e-06, + "loss": 0.28111801147460935, + "step": 74995 + }, + { + "epoch": 0.6485028231489568, + "grad_norm": 0.38868281963818807, + "learning_rate": 4.5735069687102975e-06, + "loss": 0.022150802612304687, + "step": 75000 + }, + { + "epoch": 0.6485460566705, + "grad_norm": 0.6262941667301577, + "learning_rate": 4.573333504823679e-06, + "loss": 0.04300117492675781, + "step": 75005 + }, + { + "epoch": 0.6485892901920433, + "grad_norm": 39.274881269961924, + "learning_rate": 4.5731600336809856e-06, + "loss": 0.8635971069335937, + "step": 75010 + }, + { + "epoch": 0.6486325237135866, + "grad_norm": 3.5872633494434294, + "learning_rate": 4.572986555283017e-06, + "loss": 0.08537063598632813, + "step": 75015 + }, + { + "epoch": 0.6486757572351298, + "grad_norm": 13.769916112644967, + "learning_rate": 4.572813069630574e-06, + "loss": 0.06451263427734374, + "step": 75020 + }, + { + "epoch": 0.648718990756673, + "grad_norm": 1.4094983508997294, + "learning_rate": 4.572639576724456e-06, + "loss": 0.025945281982421874, + "step": 75025 + }, + { + "epoch": 0.6487622242782164, + "grad_norm": 5.098666111271238, + "learning_rate": 4.572466076565462e-06, + "loss": 0.073944091796875, + "step": 75030 + }, + { + "epoch": 0.6488054577997596, + "grad_norm": 17.531544209956692, + "learning_rate": 4.572292569154395e-06, + "loss": 0.25752410888671873, + "step": 75035 + }, + { + "epoch": 0.6488486913213029, + "grad_norm": 0.8367877691047936, + "learning_rate": 4.572119054492053e-06, + "loss": 0.19330291748046874, + "step": 75040 + }, + { + "epoch": 0.6488919248428462, + "grad_norm": 0.3406825451204203, + "learning_rate": 4.571945532579236e-06, + "loss": 0.239508056640625, + "step": 75045 + }, + { + "epoch": 0.6489351583643894, + "grad_norm": 3.1152273554325958, + "learning_rate": 4.571772003416746e-06, + "loss": 0.097149658203125, + "step": 75050 + }, + { + "epoch": 0.6489783918859326, + "grad_norm": 1.0248749083541833, + "learning_rate": 4.571598467005383e-06, + "loss": 0.04582481384277344, + "step": 75055 + }, + { + "epoch": 0.649021625407476, + "grad_norm": 18.06846625046443, + "learning_rate": 4.571424923345946e-06, + "loss": 0.06775474548339844, + "step": 75060 + }, + { + "epoch": 0.6490648589290192, + "grad_norm": 23.76961368038139, + "learning_rate": 4.5712513724392365e-06, + "loss": 0.2817953109741211, + "step": 75065 + }, + { + "epoch": 0.6491080924505624, + "grad_norm": 2.101260738176022, + "learning_rate": 4.5710778142860536e-06, + "loss": 0.12809486389160157, + "step": 75070 + }, + { + "epoch": 0.6491513259721058, + "grad_norm": 36.53473254234812, + "learning_rate": 4.570904248887199e-06, + "loss": 0.18823928833007814, + "step": 75075 + }, + { + "epoch": 0.649194559493649, + "grad_norm": 1.4461995671424308, + "learning_rate": 4.570730676243473e-06, + "loss": 0.36910400390625, + "step": 75080 + }, + { + "epoch": 0.6492377930151922, + "grad_norm": 17.241012369662847, + "learning_rate": 4.570557096355676e-06, + "loss": 0.134918212890625, + "step": 75085 + }, + { + "epoch": 0.6492810265367355, + "grad_norm": 0.6169352951495357, + "learning_rate": 4.570383509224609e-06, + "loss": 0.17074928283691407, + "step": 75090 + }, + { + "epoch": 0.6493242600582788, + "grad_norm": 1.0659172896335576, + "learning_rate": 4.570209914851071e-06, + "loss": 0.16025848388671876, + "step": 75095 + }, + { + "epoch": 0.649367493579822, + "grad_norm": 0.625523632158434, + "learning_rate": 4.570036313235864e-06, + "loss": 0.17475242614746095, + "step": 75100 + }, + { + "epoch": 0.6494107271013653, + "grad_norm": 3.4914964135367925, + "learning_rate": 4.569862704379788e-06, + "loss": 0.3175640106201172, + "step": 75105 + }, + { + "epoch": 0.6494539606229086, + "grad_norm": 2.644487353422937, + "learning_rate": 4.569689088283644e-06, + "loss": 0.0515411376953125, + "step": 75110 + }, + { + "epoch": 0.6494971941444518, + "grad_norm": 27.573565961581764, + "learning_rate": 4.569515464948233e-06, + "loss": 0.10293426513671874, + "step": 75115 + }, + { + "epoch": 0.6495404276659951, + "grad_norm": 1.0902409842427925, + "learning_rate": 4.569341834374356e-06, + "loss": 0.055892562866210936, + "step": 75120 + }, + { + "epoch": 0.6495836611875384, + "grad_norm": 1.5406290568413346, + "learning_rate": 4.5691681965628115e-06, + "loss": 0.138055419921875, + "step": 75125 + }, + { + "epoch": 0.6496268947090816, + "grad_norm": 0.032788288898967614, + "learning_rate": 4.568994551514402e-06, + "loss": 0.0893585205078125, + "step": 75130 + }, + { + "epoch": 0.6496701282306249, + "grad_norm": 3.4348537188277044, + "learning_rate": 4.5688208992299295e-06, + "loss": 0.21505126953125, + "step": 75135 + }, + { + "epoch": 0.6497133617521682, + "grad_norm": 6.150180335488286, + "learning_rate": 4.568647239710194e-06, + "loss": 0.1860872268676758, + "step": 75140 + }, + { + "epoch": 0.6497565952737114, + "grad_norm": 13.721432590888822, + "learning_rate": 4.568473572955995e-06, + "loss": 0.2760894775390625, + "step": 75145 + }, + { + "epoch": 0.6497998287952547, + "grad_norm": 5.766079800456218, + "learning_rate": 4.568299898968134e-06, + "loss": 0.0689208984375, + "step": 75150 + }, + { + "epoch": 0.649843062316798, + "grad_norm": 7.631035968214457, + "learning_rate": 4.568126217747413e-06, + "loss": 0.33055877685546875, + "step": 75155 + }, + { + "epoch": 0.6498862958383412, + "grad_norm": 1.1857794109033575, + "learning_rate": 4.567952529294633e-06, + "loss": 0.2079315185546875, + "step": 75160 + }, + { + "epoch": 0.6499295293598845, + "grad_norm": 8.064031072853844, + "learning_rate": 4.567778833610594e-06, + "loss": 0.09006271362304688, + "step": 75165 + }, + { + "epoch": 0.6499727628814277, + "grad_norm": 1.1937345140375053, + "learning_rate": 4.567605130696098e-06, + "loss": 0.09777412414550782, + "step": 75170 + }, + { + "epoch": 0.650015996402971, + "grad_norm": 6.19224808563283, + "learning_rate": 4.567431420551945e-06, + "loss": 0.3125244140625, + "step": 75175 + }, + { + "epoch": 0.6500592299245143, + "grad_norm": 7.919126456779183, + "learning_rate": 4.567257703178936e-06, + "loss": 0.18207244873046874, + "step": 75180 + }, + { + "epoch": 0.6501024634460575, + "grad_norm": 12.520588742989334, + "learning_rate": 4.5670839785778755e-06, + "loss": 0.36324920654296877, + "step": 75185 + }, + { + "epoch": 0.6501456969676008, + "grad_norm": 6.847591508878233, + "learning_rate": 4.566910246749559e-06, + "loss": 0.23040771484375, + "step": 75190 + }, + { + "epoch": 0.6501889304891441, + "grad_norm": 23.70384146968068, + "learning_rate": 4.566736507694793e-06, + "loss": 0.2184051513671875, + "step": 75195 + }, + { + "epoch": 0.6502321640106873, + "grad_norm": 17.693726166631272, + "learning_rate": 4.5665627614143765e-06, + "loss": 0.14890213012695314, + "step": 75200 + }, + { + "epoch": 0.6502753975322306, + "grad_norm": 30.030665340096938, + "learning_rate": 4.566389007909111e-06, + "loss": 0.14877777099609374, + "step": 75205 + }, + { + "epoch": 0.6503186310537739, + "grad_norm": 13.51745339714628, + "learning_rate": 4.566215247179797e-06, + "loss": 0.22025642395019532, + "step": 75210 + }, + { + "epoch": 0.6503618645753171, + "grad_norm": 15.01292728600095, + "learning_rate": 4.566041479227237e-06, + "loss": 0.07257919311523438, + "step": 75215 + }, + { + "epoch": 0.6504050980968604, + "grad_norm": 3.9452223828371147, + "learning_rate": 4.565867704052232e-06, + "loss": 0.21024932861328124, + "step": 75220 + }, + { + "epoch": 0.6504483316184037, + "grad_norm": 7.68174888235615, + "learning_rate": 4.5656939216555845e-06, + "loss": 0.08846511840820312, + "step": 75225 + }, + { + "epoch": 0.6504915651399469, + "grad_norm": 0.9698893464407072, + "learning_rate": 4.565520132038094e-06, + "loss": 0.07454185485839844, + "step": 75230 + }, + { + "epoch": 0.6505347986614902, + "grad_norm": 6.776140513484127, + "learning_rate": 4.565346335200563e-06, + "loss": 0.10995330810546874, + "step": 75235 + }, + { + "epoch": 0.6505780321830335, + "grad_norm": 11.844249273137427, + "learning_rate": 4.565172531143793e-06, + "loss": 0.22312164306640625, + "step": 75240 + }, + { + "epoch": 0.6506212657045767, + "grad_norm": 2.3090294969388463, + "learning_rate": 4.564998719868585e-06, + "loss": 0.270782470703125, + "step": 75245 + }, + { + "epoch": 0.65066449922612, + "grad_norm": 20.938582430409507, + "learning_rate": 4.5648249013757425e-06, + "loss": 0.0767852783203125, + "step": 75250 + }, + { + "epoch": 0.6507077327476632, + "grad_norm": 12.531277081918065, + "learning_rate": 4.564651075666065e-06, + "loss": 0.2094696044921875, + "step": 75255 + }, + { + "epoch": 0.6507509662692065, + "grad_norm": 13.290389923914617, + "learning_rate": 4.564477242740355e-06, + "loss": 0.09093093872070312, + "step": 75260 + }, + { + "epoch": 0.6507941997907497, + "grad_norm": 5.035278441809691, + "learning_rate": 4.5643034025994155e-06, + "loss": 0.10399856567382812, + "step": 75265 + }, + { + "epoch": 0.650837433312293, + "grad_norm": 2.2139564430568726, + "learning_rate": 4.564129555244046e-06, + "loss": 0.20756149291992188, + "step": 75270 + }, + { + "epoch": 0.6508806668338363, + "grad_norm": 4.828569410420537, + "learning_rate": 4.5639557006750485e-06, + "loss": 0.45070343017578124, + "step": 75275 + }, + { + "epoch": 0.6509239003553795, + "grad_norm": 26.234021850148988, + "learning_rate": 4.5637818388932265e-06, + "loss": 0.2432159423828125, + "step": 75280 + }, + { + "epoch": 0.6509671338769228, + "grad_norm": 40.24315910637952, + "learning_rate": 4.56360796989938e-06, + "loss": 0.27427520751953127, + "step": 75285 + }, + { + "epoch": 0.6510103673984661, + "grad_norm": 3.9430169447995826, + "learning_rate": 4.563434093694313e-06, + "loss": 0.09317398071289062, + "step": 75290 + }, + { + "epoch": 0.6510536009200093, + "grad_norm": 4.117518459986426, + "learning_rate": 4.563260210278825e-06, + "loss": 0.15433578491210936, + "step": 75295 + }, + { + "epoch": 0.6510968344415526, + "grad_norm": 23.1447719708329, + "learning_rate": 4.5630863196537184e-06, + "loss": 0.177392578125, + "step": 75300 + }, + { + "epoch": 0.6511400679630959, + "grad_norm": 1.4526027407706887, + "learning_rate": 4.5629124218197975e-06, + "loss": 0.05306396484375, + "step": 75305 + }, + { + "epoch": 0.6511833014846391, + "grad_norm": 7.733897414255729, + "learning_rate": 4.5627385167778625e-06, + "loss": 0.11132278442382812, + "step": 75310 + }, + { + "epoch": 0.6512265350061824, + "grad_norm": 0.216855258057323, + "learning_rate": 4.562564604528714e-06, + "loss": 0.07626113891601563, + "step": 75315 + }, + { + "epoch": 0.6512697685277257, + "grad_norm": 5.636299787353812, + "learning_rate": 4.562390685073157e-06, + "loss": 0.1080657958984375, + "step": 75320 + }, + { + "epoch": 0.6513130020492689, + "grad_norm": 33.114434687163296, + "learning_rate": 4.562216758411991e-06, + "loss": 0.384771728515625, + "step": 75325 + }, + { + "epoch": 0.6513562355708122, + "grad_norm": 0.7879955668205855, + "learning_rate": 4.56204282454602e-06, + "loss": 0.17141036987304686, + "step": 75330 + }, + { + "epoch": 0.6513994690923555, + "grad_norm": 5.702600435796597, + "learning_rate": 4.5618688834760454e-06, + "loss": 0.0827972412109375, + "step": 75335 + }, + { + "epoch": 0.6514427026138987, + "grad_norm": 20.72477746560003, + "learning_rate": 4.56169493520287e-06, + "loss": 0.0899566650390625, + "step": 75340 + }, + { + "epoch": 0.6514859361354419, + "grad_norm": 8.418164373312907, + "learning_rate": 4.561520979727296e-06, + "loss": 0.2607889175415039, + "step": 75345 + }, + { + "epoch": 0.6515291696569853, + "grad_norm": 22.04646814713398, + "learning_rate": 4.561347017050124e-06, + "loss": 0.0970123291015625, + "step": 75350 + }, + { + "epoch": 0.6515724031785285, + "grad_norm": 0.4768371585545882, + "learning_rate": 4.5611730471721575e-06, + "loss": 0.05951995849609375, + "step": 75355 + }, + { + "epoch": 0.6516156367000717, + "grad_norm": 0.8886120572195966, + "learning_rate": 4.5609990700942e-06, + "loss": 0.026635360717773438, + "step": 75360 + }, + { + "epoch": 0.6516588702216151, + "grad_norm": 20.199029681135723, + "learning_rate": 4.560825085817052e-06, + "loss": 0.23103675842285157, + "step": 75365 + }, + { + "epoch": 0.6517021037431583, + "grad_norm": 3.3542025102224406, + "learning_rate": 4.560651094341516e-06, + "loss": 0.1380340576171875, + "step": 75370 + }, + { + "epoch": 0.6517453372647015, + "grad_norm": 2.2049819647963567, + "learning_rate": 4.5604770956683965e-06, + "loss": 0.07642440795898438, + "step": 75375 + }, + { + "epoch": 0.6517885707862449, + "grad_norm": 0.9868805300236088, + "learning_rate": 4.560303089798494e-06, + "loss": 0.15608139038085939, + "step": 75380 + }, + { + "epoch": 0.6518318043077881, + "grad_norm": 12.428058325255043, + "learning_rate": 4.5601290767326104e-06, + "loss": 0.04861373901367187, + "step": 75385 + }, + { + "epoch": 0.6518750378293313, + "grad_norm": 1.1986540480255408, + "learning_rate": 4.559955056471551e-06, + "loss": 0.13717060089111327, + "step": 75390 + }, + { + "epoch": 0.6519182713508747, + "grad_norm": 13.397164096868092, + "learning_rate": 4.559781029016115e-06, + "loss": 0.5453178405761718, + "step": 75395 + }, + { + "epoch": 0.6519615048724179, + "grad_norm": 0.5365334973976157, + "learning_rate": 4.559606994367108e-06, + "loss": 0.12396430969238281, + "step": 75400 + }, + { + "epoch": 0.6520047383939611, + "grad_norm": 1.5682725967216906, + "learning_rate": 4.5594329525253315e-06, + "loss": 0.0372161865234375, + "step": 75405 + }, + { + "epoch": 0.6520479719155045, + "grad_norm": 0.29144291596846594, + "learning_rate": 4.559258903491588e-06, + "loss": 0.16330337524414062, + "step": 75410 + }, + { + "epoch": 0.6520912054370477, + "grad_norm": 13.48364879275411, + "learning_rate": 4.559084847266679e-06, + "loss": 0.16486053466796874, + "step": 75415 + }, + { + "epoch": 0.6521344389585909, + "grad_norm": 15.656174437181644, + "learning_rate": 4.558910783851409e-06, + "loss": 0.08248672485351563, + "step": 75420 + }, + { + "epoch": 0.6521776724801343, + "grad_norm": 9.119349126698163, + "learning_rate": 4.55873671324658e-06, + "loss": 0.10410423278808593, + "step": 75425 + }, + { + "epoch": 0.6522209060016775, + "grad_norm": 45.803420065090755, + "learning_rate": 4.558562635452996e-06, + "loss": 0.2818733215332031, + "step": 75430 + }, + { + "epoch": 0.6522641395232207, + "grad_norm": 5.137632572609606, + "learning_rate": 4.558388550471458e-06, + "loss": 0.09641876220703124, + "step": 75435 + }, + { + "epoch": 0.6523073730447639, + "grad_norm": 34.454919601731866, + "learning_rate": 4.55821445830277e-06, + "loss": 0.22740573883056642, + "step": 75440 + }, + { + "epoch": 0.6523506065663073, + "grad_norm": 12.075498677609112, + "learning_rate": 4.5580403589477345e-06, + "loss": 0.10547103881835937, + "step": 75445 + }, + { + "epoch": 0.6523938400878505, + "grad_norm": 0.16175841587411832, + "learning_rate": 4.557866252407155e-06, + "loss": 0.015883636474609376, + "step": 75450 + }, + { + "epoch": 0.6524370736093937, + "grad_norm": 8.254015482714697, + "learning_rate": 4.557692138681834e-06, + "loss": 0.2392974853515625, + "step": 75455 + }, + { + "epoch": 0.6524803071309371, + "grad_norm": 2.0479224684177004, + "learning_rate": 4.557518017772574e-06, + "loss": 0.08755874633789062, + "step": 75460 + }, + { + "epoch": 0.6525235406524803, + "grad_norm": 18.8605161761929, + "learning_rate": 4.557343889680178e-06, + "loss": 0.36229190826416013, + "step": 75465 + }, + { + "epoch": 0.6525667741740235, + "grad_norm": 25.903911221084925, + "learning_rate": 4.5571697544054505e-06, + "loss": 0.29275970458984374, + "step": 75470 + }, + { + "epoch": 0.6526100076955669, + "grad_norm": 3.060151526553671, + "learning_rate": 4.556995611949193e-06, + "loss": 0.17662200927734376, + "step": 75475 + }, + { + "epoch": 0.6526532412171101, + "grad_norm": 0.9573005883090135, + "learning_rate": 4.55682146231221e-06, + "loss": 0.08346748352050781, + "step": 75480 + }, + { + "epoch": 0.6526964747386533, + "grad_norm": 21.904089743567727, + "learning_rate": 4.556647305495304e-06, + "loss": 0.2722440719604492, + "step": 75485 + }, + { + "epoch": 0.6527397082601967, + "grad_norm": 0.40477455909499704, + "learning_rate": 4.556473141499278e-06, + "loss": 0.16421241760253907, + "step": 75490 + }, + { + "epoch": 0.6527829417817399, + "grad_norm": 2.398283979274457, + "learning_rate": 4.556298970324935e-06, + "loss": 0.15057373046875, + "step": 75495 + }, + { + "epoch": 0.6528261753032831, + "grad_norm": 6.463584375026799, + "learning_rate": 4.556124791973078e-06, + "loss": 0.189788818359375, + "step": 75500 + }, + { + "epoch": 0.6528694088248265, + "grad_norm": 0.11894743130165053, + "learning_rate": 4.5559506064445126e-06, + "loss": 0.03857498168945313, + "step": 75505 + }, + { + "epoch": 0.6529126423463697, + "grad_norm": 0.4879238043650314, + "learning_rate": 4.5557764137400395e-06, + "loss": 0.13661041259765624, + "step": 75510 + }, + { + "epoch": 0.6529558758679129, + "grad_norm": 14.721271876209443, + "learning_rate": 4.555602213860464e-06, + "loss": 0.0807037353515625, + "step": 75515 + }, + { + "epoch": 0.6529991093894562, + "grad_norm": 0.9774836494631295, + "learning_rate": 4.555428006806588e-06, + "loss": 0.9114776611328125, + "step": 75520 + }, + { + "epoch": 0.6530423429109995, + "grad_norm": 20.371803388547125, + "learning_rate": 4.555253792579214e-06, + "loss": 0.41552886962890623, + "step": 75525 + }, + { + "epoch": 0.6530855764325427, + "grad_norm": 20.886110462245664, + "learning_rate": 4.555079571179149e-06, + "loss": 0.16577835083007814, + "step": 75530 + }, + { + "epoch": 0.653128809954086, + "grad_norm": 39.986860774136325, + "learning_rate": 4.554905342607194e-06, + "loss": 0.17197742462158203, + "step": 75535 + }, + { + "epoch": 0.6531720434756293, + "grad_norm": 0.6779812235389552, + "learning_rate": 4.554731106864153e-06, + "loss": 0.019037628173828126, + "step": 75540 + }, + { + "epoch": 0.6532152769971725, + "grad_norm": 26.557704839758447, + "learning_rate": 4.5545568639508295e-06, + "loss": 0.2661834716796875, + "step": 75545 + }, + { + "epoch": 0.6532585105187158, + "grad_norm": 0.9393608544432395, + "learning_rate": 4.554382613868026e-06, + "loss": 0.04736824035644531, + "step": 75550 + }, + { + "epoch": 0.6533017440402591, + "grad_norm": 4.764039682522629, + "learning_rate": 4.5542083566165474e-06, + "loss": 0.08035125732421874, + "step": 75555 + }, + { + "epoch": 0.6533449775618023, + "grad_norm": 20.575515084194056, + "learning_rate": 4.554034092197198e-06, + "loss": 0.179046630859375, + "step": 75560 + }, + { + "epoch": 0.6533882110833455, + "grad_norm": 2.6928941176553227, + "learning_rate": 4.553859820610782e-06, + "loss": 0.12999725341796875, + "step": 75565 + }, + { + "epoch": 0.6534314446048889, + "grad_norm": 10.059713230949159, + "learning_rate": 4.5536855418580995e-06, + "loss": 0.103173828125, + "step": 75570 + }, + { + "epoch": 0.6534746781264321, + "grad_norm": 2.1983862214563294, + "learning_rate": 4.553511255939957e-06, + "loss": 0.09832382202148438, + "step": 75575 + }, + { + "epoch": 0.6535179116479753, + "grad_norm": 1.9003872394246961, + "learning_rate": 4.5533369628571585e-06, + "loss": 0.062242889404296876, + "step": 75580 + }, + { + "epoch": 0.6535611451695187, + "grad_norm": 2.9822117502434766, + "learning_rate": 4.553162662610507e-06, + "loss": 0.24293899536132812, + "step": 75585 + }, + { + "epoch": 0.6536043786910619, + "grad_norm": 2.3696980003682664, + "learning_rate": 4.552988355200807e-06, + "loss": 0.09941558837890625, + "step": 75590 + }, + { + "epoch": 0.6536476122126051, + "grad_norm": 45.13601641502207, + "learning_rate": 4.552814040628861e-06, + "loss": 0.1501974105834961, + "step": 75595 + }, + { + "epoch": 0.6536908457341485, + "grad_norm": 32.25895516009417, + "learning_rate": 4.552639718895474e-06, + "loss": 0.20605316162109374, + "step": 75600 + }, + { + "epoch": 0.6537340792556917, + "grad_norm": 1.1916297192590042, + "learning_rate": 4.552465390001449e-06, + "loss": 0.16968536376953125, + "step": 75605 + }, + { + "epoch": 0.6537773127772349, + "grad_norm": 10.705415276852294, + "learning_rate": 4.552291053947593e-06, + "loss": 0.27530059814453123, + "step": 75610 + }, + { + "epoch": 0.6538205462987782, + "grad_norm": 24.384896872717718, + "learning_rate": 4.552116710734707e-06, + "loss": 0.09554672241210938, + "step": 75615 + }, + { + "epoch": 0.6538637798203215, + "grad_norm": 30.86175922159469, + "learning_rate": 4.551942360363595e-06, + "loss": 0.30312576293945315, + "step": 75620 + }, + { + "epoch": 0.6539070133418647, + "grad_norm": 0.40993874116387163, + "learning_rate": 4.551768002835063e-06, + "loss": 0.07748031616210938, + "step": 75625 + }, + { + "epoch": 0.653950246863408, + "grad_norm": 34.20887058667518, + "learning_rate": 4.551593638149913e-06, + "loss": 0.287945556640625, + "step": 75630 + }, + { + "epoch": 0.6539934803849513, + "grad_norm": 0.5135404827617508, + "learning_rate": 4.5514192663089506e-06, + "loss": 0.08913116455078125, + "step": 75635 + }, + { + "epoch": 0.6540367139064945, + "grad_norm": 0.7653674122669792, + "learning_rate": 4.55124488731298e-06, + "loss": 0.12115020751953125, + "step": 75640 + }, + { + "epoch": 0.6540799474280378, + "grad_norm": 7.072185893382836, + "learning_rate": 4.551070501162806e-06, + "loss": 0.09646453857421874, + "step": 75645 + }, + { + "epoch": 0.6541231809495811, + "grad_norm": 6.128807446002314, + "learning_rate": 4.55089610785923e-06, + "loss": 0.1078369140625, + "step": 75650 + }, + { + "epoch": 0.6541664144711243, + "grad_norm": 14.0503163052047, + "learning_rate": 4.550721707403059e-06, + "loss": 0.110076904296875, + "step": 75655 + }, + { + "epoch": 0.6542096479926676, + "grad_norm": 0.3476067276933997, + "learning_rate": 4.550547299795097e-06, + "loss": 0.12443695068359376, + "step": 75660 + }, + { + "epoch": 0.6542528815142109, + "grad_norm": 0.23947925044177776, + "learning_rate": 4.5503728850361474e-06, + "loss": 0.30490570068359374, + "step": 75665 + }, + { + "epoch": 0.6542961150357541, + "grad_norm": 4.202286695664399, + "learning_rate": 4.550198463127014e-06, + "loss": 0.15144815444946289, + "step": 75670 + }, + { + "epoch": 0.6543393485572974, + "grad_norm": 14.097840076703395, + "learning_rate": 4.550024034068504e-06, + "loss": 0.31886749267578124, + "step": 75675 + }, + { + "epoch": 0.6543825820788407, + "grad_norm": 1.2596821939827076, + "learning_rate": 4.549849597861419e-06, + "loss": 0.1257488250732422, + "step": 75680 + }, + { + "epoch": 0.6544258156003839, + "grad_norm": 1.1729473659568248, + "learning_rate": 4.549675154506566e-06, + "loss": 0.20535888671875, + "step": 75685 + }, + { + "epoch": 0.6544690491219272, + "grad_norm": 19.928141701241824, + "learning_rate": 4.549500704004746e-06, + "loss": 0.15814361572265626, + "step": 75690 + }, + { + "epoch": 0.6545122826434704, + "grad_norm": 7.396253146486615, + "learning_rate": 4.549326246356767e-06, + "loss": 0.06957130432128907, + "step": 75695 + }, + { + "epoch": 0.6545555161650137, + "grad_norm": 0.5289139874107315, + "learning_rate": 4.549151781563433e-06, + "loss": 0.1639801025390625, + "step": 75700 + }, + { + "epoch": 0.654598749686557, + "grad_norm": 27.812946557642363, + "learning_rate": 4.548977309625546e-06, + "loss": 0.08728179931640626, + "step": 75705 + }, + { + "epoch": 0.6546419832081002, + "grad_norm": 9.39386208495906, + "learning_rate": 4.548802830543914e-06, + "loss": 0.1148956298828125, + "step": 75710 + }, + { + "epoch": 0.6546852167296435, + "grad_norm": 1.9050311074036423, + "learning_rate": 4.548628344319339e-06, + "loss": 0.0870330810546875, + "step": 75715 + }, + { + "epoch": 0.6547284502511868, + "grad_norm": 18.081941584454807, + "learning_rate": 4.548453850952628e-06, + "loss": 0.17063064575195314, + "step": 75720 + }, + { + "epoch": 0.65477168377273, + "grad_norm": 2.2359531563544968, + "learning_rate": 4.548279350444585e-06, + "loss": 0.17593002319335938, + "step": 75725 + }, + { + "epoch": 0.6548149172942733, + "grad_norm": 5.234784151854341, + "learning_rate": 4.548104842796013e-06, + "loss": 0.14066848754882813, + "step": 75730 + }, + { + "epoch": 0.6548581508158166, + "grad_norm": 3.6000278718461534, + "learning_rate": 4.547930328007719e-06, + "loss": 0.0303924560546875, + "step": 75735 + }, + { + "epoch": 0.6549013843373598, + "grad_norm": 49.110300711005046, + "learning_rate": 4.547755806080507e-06, + "loss": 0.5972076416015625, + "step": 75740 + }, + { + "epoch": 0.6549446178589031, + "grad_norm": 0.17175190264493032, + "learning_rate": 4.547581277015181e-06, + "loss": 0.11053390502929687, + "step": 75745 + }, + { + "epoch": 0.6549878513804464, + "grad_norm": 3.3825785668496464, + "learning_rate": 4.547406740812549e-06, + "loss": 0.26373291015625, + "step": 75750 + }, + { + "epoch": 0.6550310849019896, + "grad_norm": 2.1205221754655206, + "learning_rate": 4.547232197473413e-06, + "loss": 0.03260993957519531, + "step": 75755 + }, + { + "epoch": 0.6550743184235329, + "grad_norm": 8.547960174792896, + "learning_rate": 4.547057646998577e-06, + "loss": 0.1347076416015625, + "step": 75760 + }, + { + "epoch": 0.6551175519450761, + "grad_norm": 7.529594666652603, + "learning_rate": 4.546883089388851e-06, + "loss": 0.0726715087890625, + "step": 75765 + }, + { + "epoch": 0.6551607854666194, + "grad_norm": 2.0693428654711976, + "learning_rate": 4.546708524645035e-06, + "loss": 0.157720947265625, + "step": 75770 + }, + { + "epoch": 0.6552040189881626, + "grad_norm": 6.498716662747156, + "learning_rate": 4.546533952767935e-06, + "loss": 0.03326873779296875, + "step": 75775 + }, + { + "epoch": 0.655247252509706, + "grad_norm": 12.907636738891616, + "learning_rate": 4.546359373758359e-06, + "loss": 0.059014892578125, + "step": 75780 + }, + { + "epoch": 0.6552904860312492, + "grad_norm": 4.914055259748412, + "learning_rate": 4.54618478761711e-06, + "loss": 0.1002685546875, + "step": 75785 + }, + { + "epoch": 0.6553337195527924, + "grad_norm": 1.1746162872002146, + "learning_rate": 4.546010194344993e-06, + "loss": 0.0882843017578125, + "step": 75790 + }, + { + "epoch": 0.6553769530743357, + "grad_norm": 22.28978803207979, + "learning_rate": 4.545835593942813e-06, + "loss": 0.16729888916015626, + "step": 75795 + }, + { + "epoch": 0.655420186595879, + "grad_norm": 11.575581878449057, + "learning_rate": 4.545660986411376e-06, + "loss": 0.344805908203125, + "step": 75800 + }, + { + "epoch": 0.6554634201174222, + "grad_norm": 3.98309541918297, + "learning_rate": 4.545486371751487e-06, + "loss": 0.37526779174804686, + "step": 75805 + }, + { + "epoch": 0.6555066536389655, + "grad_norm": 6.613967082894372, + "learning_rate": 4.545311749963952e-06, + "loss": 0.05475540161132812, + "step": 75810 + }, + { + "epoch": 0.6555498871605088, + "grad_norm": 23.16319159686809, + "learning_rate": 4.545137121049576e-06, + "loss": 0.09606475830078125, + "step": 75815 + }, + { + "epoch": 0.655593120682052, + "grad_norm": 5.960923303340144, + "learning_rate": 4.544962485009164e-06, + "loss": 0.620098876953125, + "step": 75820 + }, + { + "epoch": 0.6556363542035953, + "grad_norm": 36.115565781307254, + "learning_rate": 4.5447878418435215e-06, + "loss": 0.18161277770996093, + "step": 75825 + }, + { + "epoch": 0.6556795877251386, + "grad_norm": 0.08445100189890416, + "learning_rate": 4.544613191553453e-06, + "loss": 0.10424728393554687, + "step": 75830 + }, + { + "epoch": 0.6557228212466818, + "grad_norm": 2.125438208314465, + "learning_rate": 4.544438534139767e-06, + "loss": 0.25353469848632815, + "step": 75835 + }, + { + "epoch": 0.6557660547682251, + "grad_norm": 14.59163946557489, + "learning_rate": 4.5442638696032644e-06, + "loss": 0.114215087890625, + "step": 75840 + }, + { + "epoch": 0.6558092882897684, + "grad_norm": 4.8709909614722555, + "learning_rate": 4.544089197944755e-06, + "loss": 0.13773727416992188, + "step": 75845 + }, + { + "epoch": 0.6558525218113116, + "grad_norm": 11.906888733651241, + "learning_rate": 4.543914519165042e-06, + "loss": 0.28834381103515627, + "step": 75850 + }, + { + "epoch": 0.6558957553328549, + "grad_norm": 18.015801143879305, + "learning_rate": 4.543739833264932e-06, + "loss": 0.36954193115234374, + "step": 75855 + }, + { + "epoch": 0.6559389888543982, + "grad_norm": 1.3255236181175287, + "learning_rate": 4.54356514024523e-06, + "loss": 0.22991485595703126, + "step": 75860 + }, + { + "epoch": 0.6559822223759414, + "grad_norm": 0.9673805856930955, + "learning_rate": 4.543390440106743e-06, + "loss": 0.36873626708984375, + "step": 75865 + }, + { + "epoch": 0.6560254558974846, + "grad_norm": 0.1707768197089351, + "learning_rate": 4.5432157328502736e-06, + "loss": 0.11653900146484375, + "step": 75870 + }, + { + "epoch": 0.656068689419028, + "grad_norm": 3.545016536907956, + "learning_rate": 4.54304101847663e-06, + "loss": 0.08140182495117188, + "step": 75875 + }, + { + "epoch": 0.6561119229405712, + "grad_norm": 1.525965403168843, + "learning_rate": 4.54286629698662e-06, + "loss": 0.08939590454101562, + "step": 75880 + }, + { + "epoch": 0.6561551564621144, + "grad_norm": 11.350072654591274, + "learning_rate": 4.542691568381044e-06, + "loss": 0.04369354248046875, + "step": 75885 + }, + { + "epoch": 0.6561983899836578, + "grad_norm": 9.803645269580766, + "learning_rate": 4.542516832660712e-06, + "loss": 0.20133209228515625, + "step": 75890 + }, + { + "epoch": 0.656241623505201, + "grad_norm": 19.200341286281247, + "learning_rate": 4.54234208982643e-06, + "loss": 0.28311767578125, + "step": 75895 + }, + { + "epoch": 0.6562848570267442, + "grad_norm": 4.717191473316156, + "learning_rate": 4.542167339879001e-06, + "loss": 0.12740478515625, + "step": 75900 + }, + { + "epoch": 0.6563280905482876, + "grad_norm": 0.7074292136193612, + "learning_rate": 4.541992582819233e-06, + "loss": 0.2798919677734375, + "step": 75905 + }, + { + "epoch": 0.6563713240698308, + "grad_norm": 13.74613714775428, + "learning_rate": 4.541817818647931e-06, + "loss": 0.11328125, + "step": 75910 + }, + { + "epoch": 0.656414557591374, + "grad_norm": 0.4603670430643765, + "learning_rate": 4.541643047365901e-06, + "loss": 0.19914093017578124, + "step": 75915 + }, + { + "epoch": 0.6564577911129174, + "grad_norm": 5.75740727655194, + "learning_rate": 4.54146826897395e-06, + "loss": 0.05657501220703125, + "step": 75920 + }, + { + "epoch": 0.6565010246344606, + "grad_norm": 3.5891448320027752, + "learning_rate": 4.541293483472884e-06, + "loss": 0.0596435546875, + "step": 75925 + }, + { + "epoch": 0.6565442581560038, + "grad_norm": 11.235558627027663, + "learning_rate": 4.541118690863509e-06, + "loss": 0.2376739501953125, + "step": 75930 + }, + { + "epoch": 0.6565874916775472, + "grad_norm": 25.545602184855593, + "learning_rate": 4.540943891146629e-06, + "loss": 0.52843017578125, + "step": 75935 + }, + { + "epoch": 0.6566307251990904, + "grad_norm": 16.314162192547286, + "learning_rate": 4.540769084323055e-06, + "loss": 0.49070205688476565, + "step": 75940 + }, + { + "epoch": 0.6566739587206336, + "grad_norm": 1.983567103978766, + "learning_rate": 4.540594270393587e-06, + "loss": 0.05732879638671875, + "step": 75945 + }, + { + "epoch": 0.6567171922421768, + "grad_norm": 11.18993803198225, + "learning_rate": 4.540419449359036e-06, + "loss": 0.40599594116210935, + "step": 75950 + }, + { + "epoch": 0.6567604257637202, + "grad_norm": 0.5180984070410767, + "learning_rate": 4.540244621220207e-06, + "loss": 0.0210357666015625, + "step": 75955 + }, + { + "epoch": 0.6568036592852634, + "grad_norm": 5.834942949146578, + "learning_rate": 4.540069785977904e-06, + "loss": 0.09295387268066406, + "step": 75960 + }, + { + "epoch": 0.6568468928068066, + "grad_norm": 9.745374682455639, + "learning_rate": 4.539894943632936e-06, + "loss": 0.084808349609375, + "step": 75965 + }, + { + "epoch": 0.65689012632835, + "grad_norm": 10.2252277894298, + "learning_rate": 4.5397200941861086e-06, + "loss": 0.23465423583984374, + "step": 75970 + }, + { + "epoch": 0.6569333598498932, + "grad_norm": 5.011470361759208, + "learning_rate": 4.539545237638229e-06, + "loss": 0.16402587890625, + "step": 75975 + }, + { + "epoch": 0.6569765933714364, + "grad_norm": 2.1169111458512413, + "learning_rate": 4.539370373990103e-06, + "loss": 0.01979217529296875, + "step": 75980 + }, + { + "epoch": 0.6570198268929798, + "grad_norm": 14.663440893027035, + "learning_rate": 4.539195503242536e-06, + "loss": 0.13490066528320313, + "step": 75985 + }, + { + "epoch": 0.657063060414523, + "grad_norm": 6.520563208314991, + "learning_rate": 4.539020625396336e-06, + "loss": 0.04998092651367188, + "step": 75990 + }, + { + "epoch": 0.6571062939360662, + "grad_norm": 4.393678141800739, + "learning_rate": 4.538845740452308e-06, + "loss": 0.14142990112304688, + "step": 75995 + }, + { + "epoch": 0.6571495274576096, + "grad_norm": 3.8258360095045787, + "learning_rate": 4.538670848411259e-06, + "loss": 0.13386764526367187, + "step": 76000 + }, + { + "epoch": 0.6571927609791528, + "grad_norm": 13.45712932475012, + "learning_rate": 4.538495949273998e-06, + "loss": 0.166888427734375, + "step": 76005 + }, + { + "epoch": 0.657235994500696, + "grad_norm": 5.068250584323297, + "learning_rate": 4.538321043041328e-06, + "loss": 0.026751708984375, + "step": 76010 + }, + { + "epoch": 0.6572792280222394, + "grad_norm": 0.5501706593876656, + "learning_rate": 4.538146129714057e-06, + "loss": 0.139678955078125, + "step": 76015 + }, + { + "epoch": 0.6573224615437826, + "grad_norm": 0.4203035666459783, + "learning_rate": 4.537971209292993e-06, + "loss": 0.2732656478881836, + "step": 76020 + }, + { + "epoch": 0.6573656950653258, + "grad_norm": 6.291321494602763, + "learning_rate": 4.537796281778941e-06, + "loss": 0.22567710876464844, + "step": 76025 + }, + { + "epoch": 0.6574089285868692, + "grad_norm": 0.42769145783915496, + "learning_rate": 4.537621347172709e-06, + "loss": 0.12746734619140626, + "step": 76030 + }, + { + "epoch": 0.6574521621084124, + "grad_norm": 16.28979911307027, + "learning_rate": 4.537446405475103e-06, + "loss": 0.1494293212890625, + "step": 76035 + }, + { + "epoch": 0.6574953956299556, + "grad_norm": 4.365833548611852, + "learning_rate": 4.5372714566869305e-06, + "loss": 0.10404319763183593, + "step": 76040 + }, + { + "epoch": 0.6575386291514989, + "grad_norm": 9.449524964116888, + "learning_rate": 4.5370965008089975e-06, + "loss": 0.32475624084472654, + "step": 76045 + }, + { + "epoch": 0.6575818626730422, + "grad_norm": 7.54693469097031, + "learning_rate": 4.536921537842111e-06, + "loss": 0.06175537109375, + "step": 76050 + }, + { + "epoch": 0.6576250961945854, + "grad_norm": 2.3605221175978994, + "learning_rate": 4.536746567787078e-06, + "loss": 0.03158721923828125, + "step": 76055 + }, + { + "epoch": 0.6576683297161287, + "grad_norm": 0.6787141348291984, + "learning_rate": 4.536571590644707e-06, + "loss": 0.28432464599609375, + "step": 76060 + }, + { + "epoch": 0.657711563237672, + "grad_norm": 1.5622492647878694, + "learning_rate": 4.536396606415801e-06, + "loss": 0.0993743896484375, + "step": 76065 + }, + { + "epoch": 0.6577547967592152, + "grad_norm": 3.5684206708220927, + "learning_rate": 4.536221615101171e-06, + "loss": 0.0708587646484375, + "step": 76070 + }, + { + "epoch": 0.6577980302807585, + "grad_norm": 10.611020066489328, + "learning_rate": 4.5360466167016245e-06, + "loss": 0.09609909057617187, + "step": 76075 + }, + { + "epoch": 0.6578412638023018, + "grad_norm": 2.5466405337051357, + "learning_rate": 4.535871611217964e-06, + "loss": 0.30932464599609377, + "step": 76080 + }, + { + "epoch": 0.657884497323845, + "grad_norm": 63.210276539704765, + "learning_rate": 4.535696598651e-06, + "loss": 0.367041015625, + "step": 76085 + }, + { + "epoch": 0.6579277308453882, + "grad_norm": 0.5315869124774398, + "learning_rate": 4.5355215790015395e-06, + "loss": 0.11577568054199219, + "step": 76090 + }, + { + "epoch": 0.6579709643669316, + "grad_norm": 1.4933734242021222, + "learning_rate": 4.53534655227039e-06, + "loss": 0.046649169921875, + "step": 76095 + }, + { + "epoch": 0.6580141978884748, + "grad_norm": 9.717620122723167, + "learning_rate": 4.535171518458356e-06, + "loss": 0.05386199951171875, + "step": 76100 + }, + { + "epoch": 0.658057431410018, + "grad_norm": 1.23515394585501, + "learning_rate": 4.534996477566248e-06, + "loss": 0.1103759765625, + "step": 76105 + }, + { + "epoch": 0.6581006649315614, + "grad_norm": 3.6639899127587507, + "learning_rate": 4.534821429594871e-06, + "loss": 0.15411605834960937, + "step": 76110 + }, + { + "epoch": 0.6581438984531046, + "grad_norm": 1.3458276199694088, + "learning_rate": 4.534646374545034e-06, + "loss": 0.0639556884765625, + "step": 76115 + }, + { + "epoch": 0.6581871319746478, + "grad_norm": 18.399597382224105, + "learning_rate": 4.534471312417544e-06, + "loss": 0.1584625244140625, + "step": 76120 + }, + { + "epoch": 0.6582303654961911, + "grad_norm": 29.34478793883277, + "learning_rate": 4.534296243213206e-06, + "loss": 0.10559463500976562, + "step": 76125 + }, + { + "epoch": 0.6582735990177344, + "grad_norm": 27.99421880729404, + "learning_rate": 4.53412116693283e-06, + "loss": 0.23617172241210938, + "step": 76130 + }, + { + "epoch": 0.6583168325392776, + "grad_norm": 3.83525430214928, + "learning_rate": 4.533946083577224e-06, + "loss": 0.1303955078125, + "step": 76135 + }, + { + "epoch": 0.6583600660608209, + "grad_norm": 23.306293240670794, + "learning_rate": 4.533770993147193e-06, + "loss": 0.21598663330078124, + "step": 76140 + }, + { + "epoch": 0.6584032995823642, + "grad_norm": 0.32102420022803857, + "learning_rate": 4.533595895643545e-06, + "loss": 0.063763427734375, + "step": 76145 + }, + { + "epoch": 0.6584465331039074, + "grad_norm": 1.4893729205321566, + "learning_rate": 4.5334207910670895e-06, + "loss": 0.2761322021484375, + "step": 76150 + }, + { + "epoch": 0.6584897666254507, + "grad_norm": 40.470337139094035, + "learning_rate": 4.533245679418633e-06, + "loss": 0.2986095428466797, + "step": 76155 + }, + { + "epoch": 0.658533000146994, + "grad_norm": 10.559822975022204, + "learning_rate": 4.5330705606989815e-06, + "loss": 0.05604095458984375, + "step": 76160 + }, + { + "epoch": 0.6585762336685372, + "grad_norm": 61.063169646614014, + "learning_rate": 4.5328954349089455e-06, + "loss": 0.4264556884765625, + "step": 76165 + }, + { + "epoch": 0.6586194671900805, + "grad_norm": 2.264630374650702, + "learning_rate": 4.532720302049331e-06, + "loss": 0.03583831787109375, + "step": 76170 + }, + { + "epoch": 0.6586627007116238, + "grad_norm": 2.430528614137823, + "learning_rate": 4.532545162120945e-06, + "loss": 0.14871292114257811, + "step": 76175 + }, + { + "epoch": 0.658705934233167, + "grad_norm": 19.78757040809363, + "learning_rate": 4.532370015124596e-06, + "loss": 0.23031558990478515, + "step": 76180 + }, + { + "epoch": 0.6587491677547103, + "grad_norm": 7.558210574423873, + "learning_rate": 4.532194861061093e-06, + "loss": 0.1252777099609375, + "step": 76185 + }, + { + "epoch": 0.6587924012762536, + "grad_norm": 12.185537253874221, + "learning_rate": 4.532019699931242e-06, + "loss": 0.13678970336914062, + "step": 76190 + }, + { + "epoch": 0.6588356347977968, + "grad_norm": 10.225352458891498, + "learning_rate": 4.531844531735852e-06, + "loss": 0.07686538696289062, + "step": 76195 + }, + { + "epoch": 0.6588788683193401, + "grad_norm": 29.890176101806684, + "learning_rate": 4.53166935647573e-06, + "loss": 0.24203643798828126, + "step": 76200 + }, + { + "epoch": 0.6589221018408834, + "grad_norm": 8.868185995045604, + "learning_rate": 4.531494174151684e-06, + "loss": 0.08882293701171876, + "step": 76205 + }, + { + "epoch": 0.6589653353624266, + "grad_norm": 26.15594606009714, + "learning_rate": 4.531318984764523e-06, + "loss": 0.3848602294921875, + "step": 76210 + }, + { + "epoch": 0.6590085688839699, + "grad_norm": 13.657621178681515, + "learning_rate": 4.531143788315054e-06, + "loss": 0.116461181640625, + "step": 76215 + }, + { + "epoch": 0.6590518024055131, + "grad_norm": 8.325120068706276, + "learning_rate": 4.530968584804085e-06, + "loss": 0.10323028564453125, + "step": 76220 + }, + { + "epoch": 0.6590950359270564, + "grad_norm": 0.4830930180614213, + "learning_rate": 4.530793374232423e-06, + "loss": 0.06581859588623047, + "step": 76225 + }, + { + "epoch": 0.6591382694485997, + "grad_norm": 5.685759817703638, + "learning_rate": 4.53061815660088e-06, + "loss": 0.10037841796875, + "step": 76230 + }, + { + "epoch": 0.6591815029701429, + "grad_norm": 30.699613830950145, + "learning_rate": 4.530442931910259e-06, + "loss": 0.22041015625, + "step": 76235 + }, + { + "epoch": 0.6592247364916862, + "grad_norm": 1.3056467138474066, + "learning_rate": 4.530267700161371e-06, + "loss": 0.1151397705078125, + "step": 76240 + }, + { + "epoch": 0.6592679700132295, + "grad_norm": 2.896748168794002, + "learning_rate": 4.530092461355024e-06, + "loss": 0.08333358764648438, + "step": 76245 + }, + { + "epoch": 0.6593112035347727, + "grad_norm": 4.641641456830486, + "learning_rate": 4.529917215492024e-06, + "loss": 0.17071380615234374, + "step": 76250 + }, + { + "epoch": 0.659354437056316, + "grad_norm": 2.792303361318766, + "learning_rate": 4.529741962573183e-06, + "loss": 0.07151565551757813, + "step": 76255 + }, + { + "epoch": 0.6593976705778593, + "grad_norm": 0.24478149039502728, + "learning_rate": 4.529566702599307e-06, + "loss": 0.28666534423828127, + "step": 76260 + }, + { + "epoch": 0.6594409040994025, + "grad_norm": 18.9318866454667, + "learning_rate": 4.529391435571204e-06, + "loss": 0.3408927917480469, + "step": 76265 + }, + { + "epoch": 0.6594841376209458, + "grad_norm": 22.23202797005512, + "learning_rate": 4.529216161489682e-06, + "loss": 0.12100830078125, + "step": 76270 + }, + { + "epoch": 0.659527371142489, + "grad_norm": 10.772493799614695, + "learning_rate": 4.5290408803555525e-06, + "loss": 0.141851806640625, + "step": 76275 + }, + { + "epoch": 0.6595706046640323, + "grad_norm": 0.6408685660950586, + "learning_rate": 4.52886559216962e-06, + "loss": 0.017229413986206053, + "step": 76280 + }, + { + "epoch": 0.6596138381855756, + "grad_norm": 8.211203486692021, + "learning_rate": 4.528690296932693e-06, + "loss": 0.13335113525390624, + "step": 76285 + }, + { + "epoch": 0.6596570717071188, + "grad_norm": 15.165990421056563, + "learning_rate": 4.528514994645584e-06, + "loss": 0.10712013244628907, + "step": 76290 + }, + { + "epoch": 0.6597003052286621, + "grad_norm": 39.01402630165187, + "learning_rate": 4.528339685309099e-06, + "loss": 0.18365478515625, + "step": 76295 + }, + { + "epoch": 0.6597435387502053, + "grad_norm": 10.205004174992741, + "learning_rate": 4.528164368924046e-06, + "loss": 0.06409873962402343, + "step": 76300 + }, + { + "epoch": 0.6597867722717486, + "grad_norm": 24.78899510674462, + "learning_rate": 4.527989045491232e-06, + "loss": 0.18012237548828125, + "step": 76305 + }, + { + "epoch": 0.6598300057932919, + "grad_norm": 38.250565892706284, + "learning_rate": 4.527813715011469e-06, + "loss": 0.1884521484375, + "step": 76310 + }, + { + "epoch": 0.6598732393148351, + "grad_norm": 5.924307321590875, + "learning_rate": 4.527638377485564e-06, + "loss": 0.09127197265625, + "step": 76315 + }, + { + "epoch": 0.6599164728363784, + "grad_norm": 0.9442751274984539, + "learning_rate": 4.5274630329143255e-06, + "loss": 0.06366958618164062, + "step": 76320 + }, + { + "epoch": 0.6599597063579217, + "grad_norm": 9.927588767781334, + "learning_rate": 4.527287681298563e-06, + "loss": 0.1762237548828125, + "step": 76325 + }, + { + "epoch": 0.6600029398794649, + "grad_norm": 0.8783028263088025, + "learning_rate": 4.527112322639085e-06, + "loss": 0.15050735473632812, + "step": 76330 + }, + { + "epoch": 0.6600461734010082, + "grad_norm": 4.112705854623762, + "learning_rate": 4.5269369569366995e-06, + "loss": 0.06397171020507812, + "step": 76335 + }, + { + "epoch": 0.6600894069225515, + "grad_norm": 0.058726188285134436, + "learning_rate": 4.526761584192215e-06, + "loss": 0.0350250244140625, + "step": 76340 + }, + { + "epoch": 0.6601326404440947, + "grad_norm": 16.27182668235603, + "learning_rate": 4.5265862044064415e-06, + "loss": 0.49736175537109373, + "step": 76345 + }, + { + "epoch": 0.660175873965638, + "grad_norm": 39.626278325568684, + "learning_rate": 4.526410817580187e-06, + "loss": 0.8968437194824219, + "step": 76350 + }, + { + "epoch": 0.6602191074871813, + "grad_norm": 0.32773908879631436, + "learning_rate": 4.526235423714262e-06, + "loss": 0.091400146484375, + "step": 76355 + }, + { + "epoch": 0.6602623410087245, + "grad_norm": 8.075091908590068, + "learning_rate": 4.526060022809474e-06, + "loss": 0.282720947265625, + "step": 76360 + }, + { + "epoch": 0.6603055745302678, + "grad_norm": 1.3240603661163595, + "learning_rate": 4.52588461486663e-06, + "loss": 0.040362548828125, + "step": 76365 + }, + { + "epoch": 0.6603488080518111, + "grad_norm": 1.8377402470977597, + "learning_rate": 4.525709199886542e-06, + "loss": 0.14944076538085938, + "step": 76370 + }, + { + "epoch": 0.6603920415733543, + "grad_norm": 1.0496860945303197, + "learning_rate": 4.525533777870018e-06, + "loss": 0.19607391357421874, + "step": 76375 + }, + { + "epoch": 0.6604352750948976, + "grad_norm": 2.6280403991182983, + "learning_rate": 4.525358348817867e-06, + "loss": 0.1216552734375, + "step": 76380 + }, + { + "epoch": 0.6604785086164409, + "grad_norm": 0.21192926790698155, + "learning_rate": 4.525182912730899e-06, + "loss": 0.09141349792480469, + "step": 76385 + }, + { + "epoch": 0.6605217421379841, + "grad_norm": 3.7212663768273666, + "learning_rate": 4.525007469609921e-06, + "loss": 0.17332305908203124, + "step": 76390 + }, + { + "epoch": 0.6605649756595273, + "grad_norm": 14.630120631379045, + "learning_rate": 4.524832019455744e-06, + "loss": 0.4137054443359375, + "step": 76395 + }, + { + "epoch": 0.6606082091810707, + "grad_norm": 5.631237573989136, + "learning_rate": 4.524656562269176e-06, + "loss": 0.03771209716796875, + "step": 76400 + }, + { + "epoch": 0.6606514427026139, + "grad_norm": 11.312177499472288, + "learning_rate": 4.524481098051026e-06, + "loss": 0.06839599609375, + "step": 76405 + }, + { + "epoch": 0.6606946762241571, + "grad_norm": 4.8497744154773645, + "learning_rate": 4.524305626802105e-06, + "loss": 0.147637939453125, + "step": 76410 + }, + { + "epoch": 0.6607379097457005, + "grad_norm": 4.610717081966094, + "learning_rate": 4.524130148523222e-06, + "loss": 0.040102386474609376, + "step": 76415 + }, + { + "epoch": 0.6607811432672437, + "grad_norm": 4.580812519981046, + "learning_rate": 4.523954663215183e-06, + "loss": 0.128662109375, + "step": 76420 + }, + { + "epoch": 0.6608243767887869, + "grad_norm": 5.5428450585844535, + "learning_rate": 4.523779170878802e-06, + "loss": 0.18350830078125, + "step": 76425 + }, + { + "epoch": 0.6608676103103303, + "grad_norm": 0.8324212926183464, + "learning_rate": 4.523603671514885e-06, + "loss": 0.06468276977539063, + "step": 76430 + }, + { + "epoch": 0.6609108438318735, + "grad_norm": 22.855835942625998, + "learning_rate": 4.523428165124242e-06, + "loss": 0.14440040588378905, + "step": 76435 + }, + { + "epoch": 0.6609540773534167, + "grad_norm": 24.034599500261695, + "learning_rate": 4.523252651707685e-06, + "loss": 0.261761474609375, + "step": 76440 + }, + { + "epoch": 0.66099731087496, + "grad_norm": 0.3046251287033761, + "learning_rate": 4.523077131266018e-06, + "loss": 0.29711456298828126, + "step": 76445 + }, + { + "epoch": 0.6610405443965033, + "grad_norm": 0.31905561205730265, + "learning_rate": 4.522901603800056e-06, + "loss": 0.05454864501953125, + "step": 76450 + }, + { + "epoch": 0.6610837779180465, + "grad_norm": 4.168461009908697, + "learning_rate": 4.522726069310607e-06, + "loss": 0.185430908203125, + "step": 76455 + }, + { + "epoch": 0.6611270114395899, + "grad_norm": 1.0540041028225602, + "learning_rate": 4.522550527798478e-06, + "loss": 0.119866943359375, + "step": 76460 + }, + { + "epoch": 0.6611702449611331, + "grad_norm": 4.290385562174598, + "learning_rate": 4.522374979264482e-06, + "loss": 0.1181640625, + "step": 76465 + }, + { + "epoch": 0.6612134784826763, + "grad_norm": 4.484187935409744, + "learning_rate": 4.522199423709427e-06, + "loss": 0.0414520263671875, + "step": 76470 + }, + { + "epoch": 0.6612567120042195, + "grad_norm": 1.675393149346225, + "learning_rate": 4.522023861134122e-06, + "loss": 0.08172416687011719, + "step": 76475 + }, + { + "epoch": 0.6612999455257629, + "grad_norm": 5.90597158155271, + "learning_rate": 4.521848291539378e-06, + "loss": 0.0654022216796875, + "step": 76480 + }, + { + "epoch": 0.6613431790473061, + "grad_norm": 0.27587183607602395, + "learning_rate": 4.5216727149260045e-06, + "loss": 0.30675277709960935, + "step": 76485 + }, + { + "epoch": 0.6613864125688493, + "grad_norm": 1.7297798809632718, + "learning_rate": 4.52149713129481e-06, + "loss": 0.09037742614746094, + "step": 76490 + }, + { + "epoch": 0.6614296460903927, + "grad_norm": 1.9061821021231584, + "learning_rate": 4.5213215406466055e-06, + "loss": 0.04896125793457031, + "step": 76495 + }, + { + "epoch": 0.6614728796119359, + "grad_norm": 12.874182108459115, + "learning_rate": 4.521145942982201e-06, + "loss": 0.051699161529541016, + "step": 76500 + }, + { + "epoch": 0.6615161131334791, + "grad_norm": 38.297451870761606, + "learning_rate": 4.520970338302405e-06, + "loss": 0.409210205078125, + "step": 76505 + }, + { + "epoch": 0.6615593466550225, + "grad_norm": 12.293498870853872, + "learning_rate": 4.520794726608028e-06, + "loss": 0.08375473022460937, + "step": 76510 + }, + { + "epoch": 0.6616025801765657, + "grad_norm": 18.486095546581048, + "learning_rate": 4.520619107899881e-06, + "loss": 0.14914398193359374, + "step": 76515 + }, + { + "epoch": 0.6616458136981089, + "grad_norm": 0.14353525876368997, + "learning_rate": 4.520443482178774e-06, + "loss": 0.20721702575683593, + "step": 76520 + }, + { + "epoch": 0.6616890472196523, + "grad_norm": 2.402110716598855, + "learning_rate": 4.520267849445514e-06, + "loss": 0.2309906005859375, + "step": 76525 + }, + { + "epoch": 0.6617322807411955, + "grad_norm": 6.597174961159192, + "learning_rate": 4.520092209700914e-06, + "loss": 0.12200775146484374, + "step": 76530 + }, + { + "epoch": 0.6617755142627387, + "grad_norm": 53.13006251666629, + "learning_rate": 4.519916562945782e-06, + "loss": 0.3418552398681641, + "step": 76535 + }, + { + "epoch": 0.6618187477842821, + "grad_norm": 2.008789357792722, + "learning_rate": 4.51974090918093e-06, + "loss": 0.12931175231933595, + "step": 76540 + }, + { + "epoch": 0.6618619813058253, + "grad_norm": 3.9077678683780728, + "learning_rate": 4.519565248407167e-06, + "loss": 0.044084930419921876, + "step": 76545 + }, + { + "epoch": 0.6619052148273685, + "grad_norm": 1.2752794967852685, + "learning_rate": 4.5193895806253045e-06, + "loss": 0.09061126708984375, + "step": 76550 + }, + { + "epoch": 0.6619484483489119, + "grad_norm": 7.851569493200638, + "learning_rate": 4.51921390583615e-06, + "loss": 0.11149520874023437, + "step": 76555 + }, + { + "epoch": 0.6619916818704551, + "grad_norm": 0.19168756102486165, + "learning_rate": 4.519038224040516e-06, + "loss": 0.051981353759765626, + "step": 76560 + }, + { + "epoch": 0.6620349153919983, + "grad_norm": 16.319716717094643, + "learning_rate": 4.518862535239211e-06, + "loss": 0.4750732421875, + "step": 76565 + }, + { + "epoch": 0.6620781489135416, + "grad_norm": 9.574309186067934, + "learning_rate": 4.518686839433047e-06, + "loss": 0.08802108764648438, + "step": 76570 + }, + { + "epoch": 0.6621213824350849, + "grad_norm": 3.605399901964548, + "learning_rate": 4.518511136622833e-06, + "loss": 0.0907928466796875, + "step": 76575 + }, + { + "epoch": 0.6621646159566281, + "grad_norm": 16.295544035018192, + "learning_rate": 4.518335426809381e-06, + "loss": 0.21413803100585938, + "step": 76580 + }, + { + "epoch": 0.6622078494781714, + "grad_norm": 10.181615624277352, + "learning_rate": 4.518159709993499e-06, + "loss": 0.08733577728271484, + "step": 76585 + }, + { + "epoch": 0.6622510829997147, + "grad_norm": 1.4723161543836711, + "learning_rate": 4.517983986175999e-06, + "loss": 0.41033477783203126, + "step": 76590 + }, + { + "epoch": 0.6622943165212579, + "grad_norm": 1.8519798157212464, + "learning_rate": 4.51780825535769e-06, + "loss": 0.19197502136230468, + "step": 76595 + }, + { + "epoch": 0.6623375500428011, + "grad_norm": 37.29036314721975, + "learning_rate": 4.5176325175393845e-06, + "loss": 0.20469589233398439, + "step": 76600 + }, + { + "epoch": 0.6623807835643445, + "grad_norm": 1.4417782560005632, + "learning_rate": 4.517456772721891e-06, + "loss": 0.150048828125, + "step": 76605 + }, + { + "epoch": 0.6624240170858877, + "grad_norm": 7.261769796779425, + "learning_rate": 4.517281020906022e-06, + "loss": 0.1369647979736328, + "step": 76610 + }, + { + "epoch": 0.662467250607431, + "grad_norm": 1.9265535366837212, + "learning_rate": 4.517105262092588e-06, + "loss": 0.0909820556640625, + "step": 76615 + }, + { + "epoch": 0.6625104841289743, + "grad_norm": 10.543824821116905, + "learning_rate": 4.516929496282396e-06, + "loss": 0.117938232421875, + "step": 76620 + }, + { + "epoch": 0.6625537176505175, + "grad_norm": 0.9131042917178768, + "learning_rate": 4.516753723476261e-06, + "loss": 0.2097026824951172, + "step": 76625 + }, + { + "epoch": 0.6625969511720607, + "grad_norm": 35.816066553668, + "learning_rate": 4.5165779436749915e-06, + "loss": 0.16213302612304686, + "step": 76630 + }, + { + "epoch": 0.6626401846936041, + "grad_norm": 0.8340151551066118, + "learning_rate": 4.5164021568793975e-06, + "loss": 0.027256011962890625, + "step": 76635 + }, + { + "epoch": 0.6626834182151473, + "grad_norm": 8.014492371333802, + "learning_rate": 4.516226363090292e-06, + "loss": 0.2222137451171875, + "step": 76640 + }, + { + "epoch": 0.6627266517366905, + "grad_norm": 3.5111869925137627, + "learning_rate": 4.516050562308484e-06, + "loss": 0.16026611328125, + "step": 76645 + }, + { + "epoch": 0.6627698852582338, + "grad_norm": 6.196228926225239, + "learning_rate": 4.515874754534784e-06, + "loss": 0.19028491973876954, + "step": 76650 + }, + { + "epoch": 0.6628131187797771, + "grad_norm": 1.6340481115085352, + "learning_rate": 4.515698939770005e-06, + "loss": 0.23248291015625, + "step": 76655 + }, + { + "epoch": 0.6628563523013203, + "grad_norm": 0.9044815116975514, + "learning_rate": 4.5155231180149565e-06, + "loss": 0.047107696533203125, + "step": 76660 + }, + { + "epoch": 0.6628995858228636, + "grad_norm": 0.31973436495638746, + "learning_rate": 4.515347289270448e-06, + "loss": 0.1796112060546875, + "step": 76665 + }, + { + "epoch": 0.6629428193444069, + "grad_norm": 8.237800719678683, + "learning_rate": 4.5151714535372926e-06, + "loss": 0.616485595703125, + "step": 76670 + }, + { + "epoch": 0.6629860528659501, + "grad_norm": 5.823106317116713, + "learning_rate": 4.514995610816299e-06, + "loss": 0.0495513916015625, + "step": 76675 + }, + { + "epoch": 0.6630292863874934, + "grad_norm": 2.317900133843622, + "learning_rate": 4.514819761108282e-06, + "loss": 0.31602783203125, + "step": 76680 + }, + { + "epoch": 0.6630725199090367, + "grad_norm": 0.099245762532497, + "learning_rate": 4.514643904414048e-06, + "loss": 0.2378154754638672, + "step": 76685 + }, + { + "epoch": 0.6631157534305799, + "grad_norm": 14.487735630332969, + "learning_rate": 4.5144680407344104e-06, + "loss": 0.1799713134765625, + "step": 76690 + }, + { + "epoch": 0.6631589869521232, + "grad_norm": 2.485826440263468, + "learning_rate": 4.514292170070181e-06, + "loss": 0.2680198669433594, + "step": 76695 + }, + { + "epoch": 0.6632022204736665, + "grad_norm": 1.238120307303795, + "learning_rate": 4.51411629242217e-06, + "loss": 0.0620361328125, + "step": 76700 + }, + { + "epoch": 0.6632454539952097, + "grad_norm": 0.18196635090338056, + "learning_rate": 4.513940407791187e-06, + "loss": 0.09514389038085938, + "step": 76705 + }, + { + "epoch": 0.663288687516753, + "grad_norm": 2.7977992107295697, + "learning_rate": 4.513764516178046e-06, + "loss": 0.17421112060546876, + "step": 76710 + }, + { + "epoch": 0.6633319210382963, + "grad_norm": 17.939702216390923, + "learning_rate": 4.513588617583556e-06, + "loss": 0.09887580871582032, + "step": 76715 + }, + { + "epoch": 0.6633751545598395, + "grad_norm": 0.8201636507596217, + "learning_rate": 4.513412712008529e-06, + "loss": 0.12387351989746094, + "step": 76720 + }, + { + "epoch": 0.6634183880813828, + "grad_norm": 11.317492421730233, + "learning_rate": 4.513236799453777e-06, + "loss": 0.5955078125, + "step": 76725 + }, + { + "epoch": 0.6634616216029261, + "grad_norm": 4.125159045095781, + "learning_rate": 4.513060879920109e-06, + "loss": 0.21605224609375, + "step": 76730 + }, + { + "epoch": 0.6635048551244693, + "grad_norm": 0.15186844422035964, + "learning_rate": 4.51288495340834e-06, + "loss": 0.11755867004394531, + "step": 76735 + }, + { + "epoch": 0.6635480886460126, + "grad_norm": 0.7755504995649432, + "learning_rate": 4.512709019919278e-06, + "loss": 0.14241943359375, + "step": 76740 + }, + { + "epoch": 0.6635913221675558, + "grad_norm": 12.247852971551806, + "learning_rate": 4.512533079453736e-06, + "loss": 0.091900634765625, + "step": 76745 + }, + { + "epoch": 0.6636345556890991, + "grad_norm": 16.71825490773526, + "learning_rate": 4.512357132012526e-06, + "loss": 0.09826869964599609, + "step": 76750 + }, + { + "epoch": 0.6636777892106424, + "grad_norm": 1.4224899596384473, + "learning_rate": 4.512181177596457e-06, + "loss": 0.09847412109375, + "step": 76755 + }, + { + "epoch": 0.6637210227321856, + "grad_norm": 11.655862360988078, + "learning_rate": 4.512005216206342e-06, + "loss": 0.2764427185058594, + "step": 76760 + }, + { + "epoch": 0.6637642562537289, + "grad_norm": 7.165175434913332, + "learning_rate": 4.511829247842993e-06, + "loss": 0.06830902099609375, + "step": 76765 + }, + { + "epoch": 0.6638074897752722, + "grad_norm": 2.461924758750683, + "learning_rate": 4.511653272507221e-06, + "loss": 0.3119384765625, + "step": 76770 + }, + { + "epoch": 0.6638507232968154, + "grad_norm": 0.6846567289115699, + "learning_rate": 4.511477290199839e-06, + "loss": 0.344195556640625, + "step": 76775 + }, + { + "epoch": 0.6638939568183587, + "grad_norm": 38.48469911121888, + "learning_rate": 4.511301300921657e-06, + "loss": 0.10904045104980468, + "step": 76780 + }, + { + "epoch": 0.663937190339902, + "grad_norm": 5.037987859807689, + "learning_rate": 4.511125304673485e-06, + "loss": 0.26533203125, + "step": 76785 + }, + { + "epoch": 0.6639804238614452, + "grad_norm": 3.535655058364793, + "learning_rate": 4.5109493014561386e-06, + "loss": 0.21944961547851563, + "step": 76790 + }, + { + "epoch": 0.6640236573829885, + "grad_norm": 25.44612477067453, + "learning_rate": 4.510773291270426e-06, + "loss": 0.16377716064453124, + "step": 76795 + }, + { + "epoch": 0.6640668909045317, + "grad_norm": 0.17304954339273965, + "learning_rate": 4.510597274117162e-06, + "loss": 0.17208786010742189, + "step": 76800 + }, + { + "epoch": 0.664110124426075, + "grad_norm": 22.509193784956175, + "learning_rate": 4.510421249997156e-06, + "loss": 0.16317138671875, + "step": 76805 + }, + { + "epoch": 0.6641533579476183, + "grad_norm": 1.377155569383674, + "learning_rate": 4.51024521891122e-06, + "loss": 0.3743927001953125, + "step": 76810 + }, + { + "epoch": 0.6641965914691615, + "grad_norm": 2.0109378227326107, + "learning_rate": 4.510069180860168e-06, + "loss": 0.047137451171875, + "step": 76815 + }, + { + "epoch": 0.6642398249907048, + "grad_norm": 5.489664337499126, + "learning_rate": 4.5098931358448095e-06, + "loss": 0.09999465942382812, + "step": 76820 + }, + { + "epoch": 0.664283058512248, + "grad_norm": 4.633093779609062, + "learning_rate": 4.509717083865958e-06, + "loss": 0.15672149658203124, + "step": 76825 + }, + { + "epoch": 0.6643262920337913, + "grad_norm": 8.871303533571222, + "learning_rate": 4.509541024924423e-06, + "loss": 0.23943252563476564, + "step": 76830 + }, + { + "epoch": 0.6643695255553346, + "grad_norm": 21.51068424583155, + "learning_rate": 4.50936495902102e-06, + "loss": 0.19177093505859374, + "step": 76835 + }, + { + "epoch": 0.6644127590768778, + "grad_norm": 8.11458247164116, + "learning_rate": 4.5091888861565585e-06, + "loss": 0.2577239990234375, + "step": 76840 + }, + { + "epoch": 0.6644559925984211, + "grad_norm": 0.8812761360076973, + "learning_rate": 4.50901280633185e-06, + "loss": 0.223553466796875, + "step": 76845 + }, + { + "epoch": 0.6644992261199644, + "grad_norm": 4.432167397708576, + "learning_rate": 4.50883671954771e-06, + "loss": 0.3305030822753906, + "step": 76850 + }, + { + "epoch": 0.6645424596415076, + "grad_norm": 40.15584005271467, + "learning_rate": 4.508660625804948e-06, + "loss": 0.45413970947265625, + "step": 76855 + }, + { + "epoch": 0.6645856931630509, + "grad_norm": 14.416695165500066, + "learning_rate": 4.5084845251043746e-06, + "loss": 0.14036865234375, + "step": 76860 + }, + { + "epoch": 0.6646289266845942, + "grad_norm": 0.5952306141623545, + "learning_rate": 4.5083084174468065e-06, + "loss": 0.30375518798828127, + "step": 76865 + }, + { + "epoch": 0.6646721602061374, + "grad_norm": 1.0138353795997064, + "learning_rate": 4.508132302833051e-06, + "loss": 0.25291290283203127, + "step": 76870 + }, + { + "epoch": 0.6647153937276807, + "grad_norm": 3.624793250642585, + "learning_rate": 4.507956181263923e-06, + "loss": 0.174444580078125, + "step": 76875 + }, + { + "epoch": 0.664758627249224, + "grad_norm": 0.8049561061502732, + "learning_rate": 4.507780052740235e-06, + "loss": 0.13471221923828125, + "step": 76880 + }, + { + "epoch": 0.6648018607707672, + "grad_norm": 0.29307465155310847, + "learning_rate": 4.507603917262799e-06, + "loss": 0.09513320922851562, + "step": 76885 + }, + { + "epoch": 0.6648450942923105, + "grad_norm": 6.319636613853477, + "learning_rate": 4.5074277748324254e-06, + "loss": 0.2123565673828125, + "step": 76890 + }, + { + "epoch": 0.6648883278138538, + "grad_norm": 2.830882117805078, + "learning_rate": 4.50725162544993e-06, + "loss": 0.05711822509765625, + "step": 76895 + }, + { + "epoch": 0.664931561335397, + "grad_norm": 5.002789333616627, + "learning_rate": 4.507075469116121e-06, + "loss": 0.3036918640136719, + "step": 76900 + }, + { + "epoch": 0.6649747948569403, + "grad_norm": 7.941627484053565, + "learning_rate": 4.506899305831815e-06, + "loss": 0.05268173217773438, + "step": 76905 + }, + { + "epoch": 0.6650180283784836, + "grad_norm": 2.034853010796832, + "learning_rate": 4.506723135597822e-06, + "loss": 0.029021072387695312, + "step": 76910 + }, + { + "epoch": 0.6650612619000268, + "grad_norm": 18.856293649271038, + "learning_rate": 4.506546958414955e-06, + "loss": 0.23370895385742188, + "step": 76915 + }, + { + "epoch": 0.66510449542157, + "grad_norm": 20.66831749215768, + "learning_rate": 4.506370774284027e-06, + "loss": 0.22921371459960938, + "step": 76920 + }, + { + "epoch": 0.6651477289431134, + "grad_norm": 5.266695989780764, + "learning_rate": 4.50619458320585e-06, + "loss": 0.09750175476074219, + "step": 76925 + }, + { + "epoch": 0.6651909624646566, + "grad_norm": 5.223126999034638, + "learning_rate": 4.506018385181235e-06, + "loss": 0.10504302978515626, + "step": 76930 + }, + { + "epoch": 0.6652341959861998, + "grad_norm": 7.610472695376158, + "learning_rate": 4.505842180210998e-06, + "loss": 0.2698402404785156, + "step": 76935 + }, + { + "epoch": 0.6652774295077432, + "grad_norm": 23.983955433074097, + "learning_rate": 4.505665968295949e-06, + "loss": 0.09026107788085938, + "step": 76940 + }, + { + "epoch": 0.6653206630292864, + "grad_norm": 5.055239064904683, + "learning_rate": 4.505489749436903e-06, + "loss": 0.09484519958496093, + "step": 76945 + }, + { + "epoch": 0.6653638965508296, + "grad_norm": 9.564134829747609, + "learning_rate": 4.505313523634671e-06, + "loss": 0.2192962646484375, + "step": 76950 + }, + { + "epoch": 0.665407130072373, + "grad_norm": 33.289831251211446, + "learning_rate": 4.5051372908900654e-06, + "loss": 0.25631866455078123, + "step": 76955 + }, + { + "epoch": 0.6654503635939162, + "grad_norm": 1.30888458090648, + "learning_rate": 4.504961051203899e-06, + "loss": 0.06894989013671875, + "step": 76960 + }, + { + "epoch": 0.6654935971154594, + "grad_norm": 19.559237349009134, + "learning_rate": 4.504784804576986e-06, + "loss": 0.07462196350097657, + "step": 76965 + }, + { + "epoch": 0.6655368306370028, + "grad_norm": 0.1912457961003238, + "learning_rate": 4.504608551010138e-06, + "loss": 0.03213424682617187, + "step": 76970 + }, + { + "epoch": 0.665580064158546, + "grad_norm": 4.093324840964101, + "learning_rate": 4.504432290504169e-06, + "loss": 0.14709701538085937, + "step": 76975 + }, + { + "epoch": 0.6656232976800892, + "grad_norm": 8.65280467521397, + "learning_rate": 4.504256023059891e-06, + "loss": 0.1692779541015625, + "step": 76980 + }, + { + "epoch": 0.6656665312016325, + "grad_norm": 12.870375231938324, + "learning_rate": 4.504079748678117e-06, + "loss": 0.22512741088867189, + "step": 76985 + }, + { + "epoch": 0.6657097647231758, + "grad_norm": 5.3356568955417, + "learning_rate": 4.50390346735966e-06, + "loss": 0.3206672668457031, + "step": 76990 + }, + { + "epoch": 0.665752998244719, + "grad_norm": 3.3548987351576516, + "learning_rate": 4.503727179105333e-06, + "loss": 0.15429000854492186, + "step": 76995 + }, + { + "epoch": 0.6657962317662622, + "grad_norm": 0.5414072690482822, + "learning_rate": 4.5035508839159494e-06, + "loss": 0.05811553001403809, + "step": 77000 + }, + { + "epoch": 0.6658394652878056, + "grad_norm": 3.504472083471062, + "learning_rate": 4.503374581792322e-06, + "loss": 0.22240085601806642, + "step": 77005 + }, + { + "epoch": 0.6658826988093488, + "grad_norm": 1.4079068627443394, + "learning_rate": 4.5031982727352644e-06, + "loss": 0.20937347412109375, + "step": 77010 + }, + { + "epoch": 0.665925932330892, + "grad_norm": 30.300644421342025, + "learning_rate": 4.503021956745588e-06, + "loss": 0.213751220703125, + "step": 77015 + }, + { + "epoch": 0.6659691658524354, + "grad_norm": 1.1699902230378167, + "learning_rate": 4.502845633824107e-06, + "loss": 0.4417724609375, + "step": 77020 + }, + { + "epoch": 0.6660123993739786, + "grad_norm": 4.102236103967093, + "learning_rate": 4.502669303971636e-06, + "loss": 0.02492694854736328, + "step": 77025 + }, + { + "epoch": 0.6660556328955218, + "grad_norm": 16.824668909630486, + "learning_rate": 4.502492967188987e-06, + "loss": 0.283740234375, + "step": 77030 + }, + { + "epoch": 0.6660988664170652, + "grad_norm": 24.708926345593913, + "learning_rate": 4.502316623476972e-06, + "loss": 0.07356338500976563, + "step": 77035 + }, + { + "epoch": 0.6661420999386084, + "grad_norm": 1.6895962601175707, + "learning_rate": 4.502140272836406e-06, + "loss": 0.11608047485351562, + "step": 77040 + }, + { + "epoch": 0.6661853334601516, + "grad_norm": 1.7212428745877935, + "learning_rate": 4.501963915268102e-06, + "loss": 0.2569232940673828, + "step": 77045 + }, + { + "epoch": 0.666228566981695, + "grad_norm": 5.885818639610094, + "learning_rate": 4.5017875507728724e-06, + "loss": 0.23987579345703125, + "step": 77050 + }, + { + "epoch": 0.6662718005032382, + "grad_norm": 10.331596090984014, + "learning_rate": 4.5016111793515325e-06, + "loss": 0.12103805541992188, + "step": 77055 + }, + { + "epoch": 0.6663150340247814, + "grad_norm": 1.1071825592923454, + "learning_rate": 4.5014348010048935e-06, + "loss": 0.0632537841796875, + "step": 77060 + }, + { + "epoch": 0.6663582675463248, + "grad_norm": 1.0684395455983038, + "learning_rate": 4.501258415733771e-06, + "loss": 0.10921478271484375, + "step": 77065 + }, + { + "epoch": 0.666401501067868, + "grad_norm": 10.652071122986635, + "learning_rate": 4.501082023538975e-06, + "loss": 0.163482666015625, + "step": 77070 + }, + { + "epoch": 0.6664447345894112, + "grad_norm": 4.533526202474426, + "learning_rate": 4.500905624421324e-06, + "loss": 0.03512229919433594, + "step": 77075 + }, + { + "epoch": 0.6664879681109546, + "grad_norm": 3.1181291229633112, + "learning_rate": 4.500729218381628e-06, + "loss": 0.10481300354003906, + "step": 77080 + }, + { + "epoch": 0.6665312016324978, + "grad_norm": 8.045703810035334, + "learning_rate": 4.500552805420701e-06, + "loss": 0.14932098388671874, + "step": 77085 + }, + { + "epoch": 0.666574435154041, + "grad_norm": 0.833488907976956, + "learning_rate": 4.500376385539359e-06, + "loss": 0.269830322265625, + "step": 77090 + }, + { + "epoch": 0.6666176686755843, + "grad_norm": 27.59612479397434, + "learning_rate": 4.500199958738411e-06, + "loss": 0.1466583251953125, + "step": 77095 + }, + { + "epoch": 0.6666609021971276, + "grad_norm": 30.075928280005062, + "learning_rate": 4.5000235250186735e-06, + "loss": 0.17821044921875, + "step": 77100 + }, + { + "epoch": 0.6667041357186708, + "grad_norm": 1.401913581497293, + "learning_rate": 4.499847084380961e-06, + "loss": 0.12170372009277344, + "step": 77105 + }, + { + "epoch": 0.666747369240214, + "grad_norm": 27.756771744914065, + "learning_rate": 4.499670636826087e-06, + "loss": 0.08237972259521484, + "step": 77110 + }, + { + "epoch": 0.6667906027617574, + "grad_norm": 28.81345361441092, + "learning_rate": 4.499494182354864e-06, + "loss": 0.33491973876953124, + "step": 77115 + }, + { + "epoch": 0.6668338362833006, + "grad_norm": 7.549003575392963, + "learning_rate": 4.4993177209681055e-06, + "loss": 0.04538669586181641, + "step": 77120 + }, + { + "epoch": 0.6668770698048438, + "grad_norm": 1.33841612927409, + "learning_rate": 4.499141252666627e-06, + "loss": 0.22858810424804688, + "step": 77125 + }, + { + "epoch": 0.6669203033263872, + "grad_norm": 26.9879807574719, + "learning_rate": 4.498964777451242e-06, + "loss": 0.3179058074951172, + "step": 77130 + }, + { + "epoch": 0.6669635368479304, + "grad_norm": 23.83493556980475, + "learning_rate": 4.498788295322762e-06, + "loss": 0.16911392211914061, + "step": 77135 + }, + { + "epoch": 0.6670067703694736, + "grad_norm": 19.47800775531193, + "learning_rate": 4.498611806282005e-06, + "loss": 0.310479736328125, + "step": 77140 + }, + { + "epoch": 0.667050003891017, + "grad_norm": 2.146713626653315, + "learning_rate": 4.498435310329781e-06, + "loss": 0.11460628509521484, + "step": 77145 + }, + { + "epoch": 0.6670932374125602, + "grad_norm": 21.184223366396115, + "learning_rate": 4.498258807466907e-06, + "loss": 0.13990745544433594, + "step": 77150 + }, + { + "epoch": 0.6671364709341034, + "grad_norm": 0.37435052044577316, + "learning_rate": 4.498082297694195e-06, + "loss": 0.1338714599609375, + "step": 77155 + }, + { + "epoch": 0.6671797044556468, + "grad_norm": 21.10040058174205, + "learning_rate": 4.49790578101246e-06, + "loss": 0.13390121459960938, + "step": 77160 + }, + { + "epoch": 0.66722293797719, + "grad_norm": 32.91431757939785, + "learning_rate": 4.497729257422516e-06, + "loss": 0.6465118408203125, + "step": 77165 + }, + { + "epoch": 0.6672661714987332, + "grad_norm": 13.897942664437902, + "learning_rate": 4.4975527269251774e-06, + "loss": 0.3006317138671875, + "step": 77170 + }, + { + "epoch": 0.6673094050202765, + "grad_norm": 21.65829894051794, + "learning_rate": 4.4973761895212576e-06, + "loss": 0.3554981231689453, + "step": 77175 + }, + { + "epoch": 0.6673526385418198, + "grad_norm": 4.811571933966041, + "learning_rate": 4.497199645211571e-06, + "loss": 0.0911285400390625, + "step": 77180 + }, + { + "epoch": 0.667395872063363, + "grad_norm": 16.010921726439793, + "learning_rate": 4.497023093996932e-06, + "loss": 0.0888916015625, + "step": 77185 + }, + { + "epoch": 0.6674391055849063, + "grad_norm": 30.89705843602666, + "learning_rate": 4.4968465358781544e-06, + "loss": 0.21442108154296874, + "step": 77190 + }, + { + "epoch": 0.6674823391064496, + "grad_norm": 4.377226838521585, + "learning_rate": 4.496669970856053e-06, + "loss": 0.07942047119140624, + "step": 77195 + }, + { + "epoch": 0.6675255726279928, + "grad_norm": 35.05772211757537, + "learning_rate": 4.496493398931442e-06, + "loss": 0.2450439453125, + "step": 77200 + }, + { + "epoch": 0.6675688061495361, + "grad_norm": 25.399568405999993, + "learning_rate": 4.496316820105136e-06, + "loss": 0.3827861785888672, + "step": 77205 + }, + { + "epoch": 0.6676120396710794, + "grad_norm": 6.439948316805434, + "learning_rate": 4.4961402343779496e-06, + "loss": 0.19005126953125, + "step": 77210 + }, + { + "epoch": 0.6676552731926226, + "grad_norm": 1.981073155358982, + "learning_rate": 4.495963641750694e-06, + "loss": 0.058242416381835936, + "step": 77215 + }, + { + "epoch": 0.6676985067141659, + "grad_norm": 0.7346894544353587, + "learning_rate": 4.49578704222419e-06, + "loss": 0.16640129089355468, + "step": 77220 + }, + { + "epoch": 0.6677417402357092, + "grad_norm": 6.317903150855223, + "learning_rate": 4.4956104357992455e-06, + "loss": 0.42835845947265627, + "step": 77225 + }, + { + "epoch": 0.6677849737572524, + "grad_norm": 12.877581656016694, + "learning_rate": 4.495433822476679e-06, + "loss": 0.36433563232421873, + "step": 77230 + }, + { + "epoch": 0.6678282072787957, + "grad_norm": 2.378848077508897, + "learning_rate": 4.495257202257304e-06, + "loss": 0.06695098876953125, + "step": 77235 + }, + { + "epoch": 0.667871440800339, + "grad_norm": 18.333943399700438, + "learning_rate": 4.495080575141934e-06, + "loss": 0.1877277374267578, + "step": 77240 + }, + { + "epoch": 0.6679146743218822, + "grad_norm": 10.689168835716995, + "learning_rate": 4.4949039411313856e-06, + "loss": 0.45319442749023436, + "step": 77245 + }, + { + "epoch": 0.6679579078434255, + "grad_norm": 23.891310450710584, + "learning_rate": 4.494727300226472e-06, + "loss": 0.2705837249755859, + "step": 77250 + }, + { + "epoch": 0.6680011413649687, + "grad_norm": 5.521252947409342, + "learning_rate": 4.494550652428008e-06, + "loss": 0.15378189086914062, + "step": 77255 + }, + { + "epoch": 0.668044374886512, + "grad_norm": 9.366761056165855, + "learning_rate": 4.494373997736808e-06, + "loss": 0.378076171875, + "step": 77260 + }, + { + "epoch": 0.6680876084080553, + "grad_norm": 16.492026446829353, + "learning_rate": 4.494197336153687e-06, + "loss": 0.14039306640625, + "step": 77265 + }, + { + "epoch": 0.6681308419295985, + "grad_norm": 19.967704515377648, + "learning_rate": 4.494020667679461e-06, + "loss": 0.068048095703125, + "step": 77270 + }, + { + "epoch": 0.6681740754511418, + "grad_norm": 2.598681336669276, + "learning_rate": 4.493843992314943e-06, + "loss": 0.285400390625, + "step": 77275 + }, + { + "epoch": 0.668217308972685, + "grad_norm": 0.7043904464151368, + "learning_rate": 4.493667310060949e-06, + "loss": 0.137799072265625, + "step": 77280 + }, + { + "epoch": 0.6682605424942283, + "grad_norm": 0.8790574847646329, + "learning_rate": 4.493490620918293e-06, + "loss": 0.21229820251464843, + "step": 77285 + }, + { + "epoch": 0.6683037760157716, + "grad_norm": 2.5324234847945477, + "learning_rate": 4.49331392488779e-06, + "loss": 0.156817626953125, + "step": 77290 + }, + { + "epoch": 0.6683470095373149, + "grad_norm": 14.156030793239811, + "learning_rate": 4.4931372219702555e-06, + "loss": 0.13975372314453124, + "step": 77295 + }, + { + "epoch": 0.6683902430588581, + "grad_norm": 0.43876487186514396, + "learning_rate": 4.492960512166504e-06, + "loss": 0.12219581604003907, + "step": 77300 + }, + { + "epoch": 0.6684334765804014, + "grad_norm": 3.001718999823138, + "learning_rate": 4.492783795477351e-06, + "loss": 0.0370758056640625, + "step": 77305 + }, + { + "epoch": 0.6684767101019446, + "grad_norm": 1.0750353157158976, + "learning_rate": 4.49260707190361e-06, + "loss": 0.08882522583007812, + "step": 77310 + }, + { + "epoch": 0.6685199436234879, + "grad_norm": 4.0546738805897435, + "learning_rate": 4.492430341446098e-06, + "loss": 0.199346923828125, + "step": 77315 + }, + { + "epoch": 0.6685631771450312, + "grad_norm": 8.206654343667822, + "learning_rate": 4.492253604105628e-06, + "loss": 0.2442291259765625, + "step": 77320 + }, + { + "epoch": 0.6686064106665744, + "grad_norm": 0.23412649379216127, + "learning_rate": 4.492076859883017e-06, + "loss": 0.15955657958984376, + "step": 77325 + }, + { + "epoch": 0.6686496441881177, + "grad_norm": 11.276057689233141, + "learning_rate": 4.49190010877908e-06, + "loss": 0.07388992309570312, + "step": 77330 + }, + { + "epoch": 0.668692877709661, + "grad_norm": 6.098249098416783, + "learning_rate": 4.49172335079463e-06, + "loss": 0.08606147766113281, + "step": 77335 + }, + { + "epoch": 0.6687361112312042, + "grad_norm": 58.2358762437482, + "learning_rate": 4.491546585930485e-06, + "loss": 0.40774993896484374, + "step": 77340 + }, + { + "epoch": 0.6687793447527475, + "grad_norm": 1.3096655149570098, + "learning_rate": 4.4913698141874584e-06, + "loss": 0.2573150634765625, + "step": 77345 + }, + { + "epoch": 0.6688225782742907, + "grad_norm": 2.7304753409312434, + "learning_rate": 4.491193035566366e-06, + "loss": 0.116046142578125, + "step": 77350 + }, + { + "epoch": 0.668865811795834, + "grad_norm": 5.587545087552799, + "learning_rate": 4.491016250068023e-06, + "loss": 0.07375106811523438, + "step": 77355 + }, + { + "epoch": 0.6689090453173773, + "grad_norm": 2.120015388148049, + "learning_rate": 4.490839457693246e-06, + "loss": 0.295916748046875, + "step": 77360 + }, + { + "epoch": 0.6689522788389205, + "grad_norm": 4.597707851713197, + "learning_rate": 4.490662658442848e-06, + "loss": 0.08565902709960938, + "step": 77365 + }, + { + "epoch": 0.6689955123604638, + "grad_norm": 30.696346245116484, + "learning_rate": 4.490485852317645e-06, + "loss": 0.251947021484375, + "step": 77370 + }, + { + "epoch": 0.6690387458820071, + "grad_norm": 28.291303242849576, + "learning_rate": 4.490309039318454e-06, + "loss": 0.3995628356933594, + "step": 77375 + }, + { + "epoch": 0.6690819794035503, + "grad_norm": 14.004212733017871, + "learning_rate": 4.490132219446089e-06, + "loss": 0.09134292602539062, + "step": 77380 + }, + { + "epoch": 0.6691252129250936, + "grad_norm": 2.462954037230116, + "learning_rate": 4.4899553927013655e-06, + "loss": 0.1059539794921875, + "step": 77385 + }, + { + "epoch": 0.6691684464466369, + "grad_norm": 6.725476595242596, + "learning_rate": 4.4897785590851e-06, + "loss": 0.1247589111328125, + "step": 77390 + }, + { + "epoch": 0.6692116799681801, + "grad_norm": 14.325203225985172, + "learning_rate": 4.489601718598108e-06, + "loss": 0.457867431640625, + "step": 77395 + }, + { + "epoch": 0.6692549134897234, + "grad_norm": 16.274916567228377, + "learning_rate": 4.4894248712412035e-06, + "loss": 0.41234130859375, + "step": 77400 + }, + { + "epoch": 0.6692981470112667, + "grad_norm": 4.999978455154525, + "learning_rate": 4.4892480170152036e-06, + "loss": 0.1481781005859375, + "step": 77405 + }, + { + "epoch": 0.6693413805328099, + "grad_norm": 12.034825820500451, + "learning_rate": 4.489071155920923e-06, + "loss": 0.1605224609375, + "step": 77410 + }, + { + "epoch": 0.6693846140543532, + "grad_norm": 18.38050118053968, + "learning_rate": 4.488894287959177e-06, + "loss": 0.1929107666015625, + "step": 77415 + }, + { + "epoch": 0.6694278475758965, + "grad_norm": 5.927244323208206, + "learning_rate": 4.488717413130783e-06, + "loss": 0.069122314453125, + "step": 77420 + }, + { + "epoch": 0.6694710810974397, + "grad_norm": 6.704952885928954, + "learning_rate": 4.4885405314365574e-06, + "loss": 0.14330368041992186, + "step": 77425 + }, + { + "epoch": 0.6695143146189829, + "grad_norm": 5.3846339723928205, + "learning_rate": 4.488363642877313e-06, + "loss": 0.047821044921875, + "step": 77430 + }, + { + "epoch": 0.6695575481405263, + "grad_norm": 5.421942515452395, + "learning_rate": 4.4881867474538665e-06, + "loss": 0.1874755859375, + "step": 77435 + }, + { + "epoch": 0.6696007816620695, + "grad_norm": 1.1963300057733484, + "learning_rate": 4.488009845167034e-06, + "loss": 0.1370075225830078, + "step": 77440 + }, + { + "epoch": 0.6696440151836127, + "grad_norm": 2.3461117277249617, + "learning_rate": 4.487832936017632e-06, + "loss": 0.25789794921875, + "step": 77445 + }, + { + "epoch": 0.6696872487051561, + "grad_norm": 12.839855478442688, + "learning_rate": 4.487656020006477e-06, + "loss": 0.51365966796875, + "step": 77450 + }, + { + "epoch": 0.6697304822266993, + "grad_norm": 22.981175827341488, + "learning_rate": 4.487479097134382e-06, + "loss": 0.0985809326171875, + "step": 77455 + }, + { + "epoch": 0.6697737157482425, + "grad_norm": 2.831095811968119, + "learning_rate": 4.487302167402166e-06, + "loss": 0.09263916015625, + "step": 77460 + }, + { + "epoch": 0.6698169492697859, + "grad_norm": 37.99328722379374, + "learning_rate": 4.487125230810644e-06, + "loss": 0.634640121459961, + "step": 77465 + }, + { + "epoch": 0.6698601827913291, + "grad_norm": 32.48996425466985, + "learning_rate": 4.486948287360632e-06, + "loss": 0.40725021362304686, + "step": 77470 + }, + { + "epoch": 0.6699034163128723, + "grad_norm": 7.116981966980816, + "learning_rate": 4.486771337052945e-06, + "loss": 0.08123092651367188, + "step": 77475 + }, + { + "epoch": 0.6699466498344157, + "grad_norm": 6.358187516351054, + "learning_rate": 4.4865943798884e-06, + "loss": 0.1672119140625, + "step": 77480 + }, + { + "epoch": 0.6699898833559589, + "grad_norm": 14.520185993662947, + "learning_rate": 4.486417415867814e-06, + "loss": 0.11297760009765626, + "step": 77485 + }, + { + "epoch": 0.6700331168775021, + "grad_norm": 8.001447993966824, + "learning_rate": 4.486240444992001e-06, + "loss": 0.11006011962890624, + "step": 77490 + }, + { + "epoch": 0.6700763503990455, + "grad_norm": 0.24531745250233303, + "learning_rate": 4.486063467261779e-06, + "loss": 0.07339820861816407, + "step": 77495 + }, + { + "epoch": 0.6701195839205887, + "grad_norm": 0.8277436545887555, + "learning_rate": 4.485886482677965e-06, + "loss": 0.0260162353515625, + "step": 77500 + }, + { + "epoch": 0.6701628174421319, + "grad_norm": 0.7215505855190691, + "learning_rate": 4.485709491241372e-06, + "loss": 0.07859344482421875, + "step": 77505 + }, + { + "epoch": 0.6702060509636752, + "grad_norm": 11.9320209966947, + "learning_rate": 4.485532492952819e-06, + "loss": 0.0682769775390625, + "step": 77510 + }, + { + "epoch": 0.6702492844852185, + "grad_norm": 0.22305053629251065, + "learning_rate": 4.4853554878131206e-06, + "loss": 0.11408157348632812, + "step": 77515 + }, + { + "epoch": 0.6702925180067617, + "grad_norm": 4.983106586839446, + "learning_rate": 4.485178475823094e-06, + "loss": 0.46454925537109376, + "step": 77520 + }, + { + "epoch": 0.6703357515283049, + "grad_norm": 11.198829641061122, + "learning_rate": 4.4850014569835565e-06, + "loss": 0.10024871826171874, + "step": 77525 + }, + { + "epoch": 0.6703789850498483, + "grad_norm": 0.11943048558343751, + "learning_rate": 4.484824431295322e-06, + "loss": 0.09291152954101563, + "step": 77530 + }, + { + "epoch": 0.6704222185713915, + "grad_norm": 1.5146772706957865, + "learning_rate": 4.4846473987592105e-06, + "loss": 0.05146865844726563, + "step": 77535 + }, + { + "epoch": 0.6704654520929347, + "grad_norm": 31.321192620872342, + "learning_rate": 4.484470359376035e-06, + "loss": 0.540740966796875, + "step": 77540 + }, + { + "epoch": 0.6705086856144781, + "grad_norm": 5.373094367009103, + "learning_rate": 4.484293313146613e-06, + "loss": 0.1506561279296875, + "step": 77545 + }, + { + "epoch": 0.6705519191360213, + "grad_norm": 1.4456176285592612, + "learning_rate": 4.484116260071762e-06, + "loss": 0.05494203567504883, + "step": 77550 + }, + { + "epoch": 0.6705951526575645, + "grad_norm": 3.8293119248500025, + "learning_rate": 4.4839392001522985e-06, + "loss": 0.3261451721191406, + "step": 77555 + }, + { + "epoch": 0.6706383861791079, + "grad_norm": 14.151333251343173, + "learning_rate": 4.483762133389038e-06, + "loss": 0.07609481811523437, + "step": 77560 + }, + { + "epoch": 0.6706816197006511, + "grad_norm": 2.7870151021424565, + "learning_rate": 4.483585059782797e-06, + "loss": 0.02509613037109375, + "step": 77565 + }, + { + "epoch": 0.6707248532221943, + "grad_norm": 2.728060662352371, + "learning_rate": 4.483407979334394e-06, + "loss": 0.057990646362304686, + "step": 77570 + }, + { + "epoch": 0.6707680867437377, + "grad_norm": 7.809116533619171, + "learning_rate": 4.483230892044643e-06, + "loss": 0.13907394409179688, + "step": 77575 + }, + { + "epoch": 0.6708113202652809, + "grad_norm": 5.79217304788701, + "learning_rate": 4.483053797914363e-06, + "loss": 0.05609817504882812, + "step": 77580 + }, + { + "epoch": 0.6708545537868241, + "grad_norm": 0.20760380331353007, + "learning_rate": 4.48287669694437e-06, + "loss": 0.1297637939453125, + "step": 77585 + }, + { + "epoch": 0.6708977873083675, + "grad_norm": 12.80102917535969, + "learning_rate": 4.482699589135481e-06, + "loss": 0.14437255859375, + "step": 77590 + }, + { + "epoch": 0.6709410208299107, + "grad_norm": 0.6673471000199792, + "learning_rate": 4.4825224744885126e-06, + "loss": 0.08557662963867188, + "step": 77595 + }, + { + "epoch": 0.6709842543514539, + "grad_norm": 3.6149772286068673, + "learning_rate": 4.482345353004281e-06, + "loss": 0.063336181640625, + "step": 77600 + }, + { + "epoch": 0.6710274878729972, + "grad_norm": 3.457951115104985, + "learning_rate": 4.482168224683603e-06, + "loss": 0.08084983825683593, + "step": 77605 + }, + { + "epoch": 0.6710707213945405, + "grad_norm": 55.809031836732345, + "learning_rate": 4.481991089527297e-06, + "loss": 0.19163589477539061, + "step": 77610 + }, + { + "epoch": 0.6711139549160837, + "grad_norm": 1.0738811508619606, + "learning_rate": 4.481813947536179e-06, + "loss": 0.1491485595703125, + "step": 77615 + }, + { + "epoch": 0.671157188437627, + "grad_norm": 2.720975247084631, + "learning_rate": 4.481636798711067e-06, + "loss": 0.06437568664550782, + "step": 77620 + }, + { + "epoch": 0.6712004219591703, + "grad_norm": 6.987635859678522, + "learning_rate": 4.481459643052775e-06, + "loss": 0.1558990478515625, + "step": 77625 + }, + { + "epoch": 0.6712436554807135, + "grad_norm": 4.014476781159799, + "learning_rate": 4.481282480562123e-06, + "loss": 0.4080482482910156, + "step": 77630 + }, + { + "epoch": 0.6712868890022567, + "grad_norm": 11.215334414342118, + "learning_rate": 4.481105311239927e-06, + "loss": 0.036244964599609374, + "step": 77635 + }, + { + "epoch": 0.6713301225238001, + "grad_norm": 30.65624596801941, + "learning_rate": 4.480928135087004e-06, + "loss": 0.436187744140625, + "step": 77640 + }, + { + "epoch": 0.6713733560453433, + "grad_norm": 5.528856715891408, + "learning_rate": 4.480750952104172e-06, + "loss": 0.12528076171875, + "step": 77645 + }, + { + "epoch": 0.6714165895668865, + "grad_norm": 0.5582335639099544, + "learning_rate": 4.480573762292247e-06, + "loss": 0.0840057373046875, + "step": 77650 + }, + { + "epoch": 0.6714598230884299, + "grad_norm": 2.1737637741055438, + "learning_rate": 4.4803965656520464e-06, + "loss": 0.08704910278320313, + "step": 77655 + }, + { + "epoch": 0.6715030566099731, + "grad_norm": 17.07082151056475, + "learning_rate": 4.480219362184388e-06, + "loss": 0.2681671142578125, + "step": 77660 + }, + { + "epoch": 0.6715462901315163, + "grad_norm": 10.62801118956696, + "learning_rate": 4.480042151890088e-06, + "loss": 0.231573486328125, + "step": 77665 + }, + { + "epoch": 0.6715895236530597, + "grad_norm": 0.6837407511706993, + "learning_rate": 4.479864934769965e-06, + "loss": 0.0670440673828125, + "step": 77670 + }, + { + "epoch": 0.6716327571746029, + "grad_norm": 0.4122871507647522, + "learning_rate": 4.479687710824836e-06, + "loss": 0.08516387939453125, + "step": 77675 + }, + { + "epoch": 0.6716759906961461, + "grad_norm": 3.084725730482454, + "learning_rate": 4.479510480055518e-06, + "loss": 0.03136425018310547, + "step": 77680 + }, + { + "epoch": 0.6717192242176895, + "grad_norm": 10.000758572032282, + "learning_rate": 4.479333242462828e-06, + "loss": 0.1346649169921875, + "step": 77685 + }, + { + "epoch": 0.6717624577392327, + "grad_norm": 3.9504365174312017, + "learning_rate": 4.479155998047583e-06, + "loss": 0.096917724609375, + "step": 77690 + }, + { + "epoch": 0.6718056912607759, + "grad_norm": 0.5368113242656509, + "learning_rate": 4.478978746810602e-06, + "loss": 0.05660247802734375, + "step": 77695 + }, + { + "epoch": 0.6718489247823192, + "grad_norm": 0.860108146720087, + "learning_rate": 4.478801488752702e-06, + "loss": 0.59873046875, + "step": 77700 + }, + { + "epoch": 0.6718921583038625, + "grad_norm": 7.04434250464685, + "learning_rate": 4.4786242238747e-06, + "loss": 0.054257774353027345, + "step": 77705 + }, + { + "epoch": 0.6719353918254057, + "grad_norm": 0.29007540405251, + "learning_rate": 4.478446952177414e-06, + "loss": 0.6593391418457031, + "step": 77710 + }, + { + "epoch": 0.671978625346949, + "grad_norm": 2.6221581457751775, + "learning_rate": 4.478269673661661e-06, + "loss": 0.07044620513916015, + "step": 77715 + }, + { + "epoch": 0.6720218588684923, + "grad_norm": 4.484477425834912, + "learning_rate": 4.478092388328259e-06, + "loss": 0.06994171142578125, + "step": 77720 + }, + { + "epoch": 0.6720650923900355, + "grad_norm": 15.841930250098963, + "learning_rate": 4.477915096178025e-06, + "loss": 0.158331298828125, + "step": 77725 + }, + { + "epoch": 0.6721083259115788, + "grad_norm": 10.530334456376352, + "learning_rate": 4.477737797211778e-06, + "loss": 0.24693450927734376, + "step": 77730 + }, + { + "epoch": 0.6721515594331221, + "grad_norm": 2.407795056977839, + "learning_rate": 4.4775604914303335e-06, + "loss": 0.09427032470703126, + "step": 77735 + }, + { + "epoch": 0.6721947929546653, + "grad_norm": 22.8896642522067, + "learning_rate": 4.477383178834512e-06, + "loss": 0.194146728515625, + "step": 77740 + }, + { + "epoch": 0.6722380264762086, + "grad_norm": 7.483901693249754, + "learning_rate": 4.477205859425129e-06, + "loss": 0.108563232421875, + "step": 77745 + }, + { + "epoch": 0.6722812599977519, + "grad_norm": 38.41908576597944, + "learning_rate": 4.477028533203003e-06, + "loss": 0.3806243896484375, + "step": 77750 + }, + { + "epoch": 0.6723244935192951, + "grad_norm": 17.543630380316614, + "learning_rate": 4.476851200168953e-06, + "loss": 0.087567138671875, + "step": 77755 + }, + { + "epoch": 0.6723677270408384, + "grad_norm": 17.470469108351622, + "learning_rate": 4.476673860323795e-06, + "loss": 0.10986328125, + "step": 77760 + }, + { + "epoch": 0.6724109605623817, + "grad_norm": 8.317482946161446, + "learning_rate": 4.476496513668348e-06, + "loss": 0.1718597412109375, + "step": 77765 + }, + { + "epoch": 0.6724541940839249, + "grad_norm": 33.76644938727535, + "learning_rate": 4.476319160203429e-06, + "loss": 0.6440338134765625, + "step": 77770 + }, + { + "epoch": 0.6724974276054682, + "grad_norm": 15.756064511675829, + "learning_rate": 4.476141799929857e-06, + "loss": 0.2144378662109375, + "step": 77775 + }, + { + "epoch": 0.6725406611270114, + "grad_norm": 115.03202980482446, + "learning_rate": 4.475964432848449e-06, + "loss": 0.09249114990234375, + "step": 77780 + }, + { + "epoch": 0.6725838946485547, + "grad_norm": 10.224808429040998, + "learning_rate": 4.475787058960023e-06, + "loss": 0.1647216796875, + "step": 77785 + }, + { + "epoch": 0.672627128170098, + "grad_norm": 2.4775521408248324, + "learning_rate": 4.475609678265399e-06, + "loss": 0.03083648681640625, + "step": 77790 + }, + { + "epoch": 0.6726703616916412, + "grad_norm": 34.85988766737483, + "learning_rate": 4.475432290765392e-06, + "loss": 0.22249908447265626, + "step": 77795 + }, + { + "epoch": 0.6727135952131845, + "grad_norm": 22.242276634763936, + "learning_rate": 4.475254896460822e-06, + "loss": 0.1601715087890625, + "step": 77800 + }, + { + "epoch": 0.6727568287347278, + "grad_norm": 3.8297608711312834, + "learning_rate": 4.475077495352507e-06, + "loss": 0.17686080932617188, + "step": 77805 + }, + { + "epoch": 0.672800062256271, + "grad_norm": 37.127724592186816, + "learning_rate": 4.474900087441266e-06, + "loss": 0.0386199951171875, + "step": 77810 + }, + { + "epoch": 0.6728432957778143, + "grad_norm": 14.35521225933131, + "learning_rate": 4.4747226727279145e-06, + "loss": 0.11035003662109374, + "step": 77815 + }, + { + "epoch": 0.6728865292993575, + "grad_norm": 34.410971797436105, + "learning_rate": 4.474545251213274e-06, + "loss": 0.2287109375, + "step": 77820 + }, + { + "epoch": 0.6729297628209008, + "grad_norm": 13.264246944612582, + "learning_rate": 4.47436782289816e-06, + "loss": 0.03760814666748047, + "step": 77825 + }, + { + "epoch": 0.6729729963424441, + "grad_norm": 0.8862118337511752, + "learning_rate": 4.474190387783391e-06, + "loss": 0.42549915313720704, + "step": 77830 + }, + { + "epoch": 0.6730162298639873, + "grad_norm": 1.3923701294962614, + "learning_rate": 4.474012945869787e-06, + "loss": 0.3749542236328125, + "step": 77835 + }, + { + "epoch": 0.6730594633855306, + "grad_norm": 21.85175332235309, + "learning_rate": 4.473835497158166e-06, + "loss": 0.3942527770996094, + "step": 77840 + }, + { + "epoch": 0.6731026969070739, + "grad_norm": 1.4525825494600821, + "learning_rate": 4.473658041649345e-06, + "loss": 0.03462982177734375, + "step": 77845 + }, + { + "epoch": 0.6731459304286171, + "grad_norm": 4.275760028457671, + "learning_rate": 4.473480579344145e-06, + "loss": 0.15362091064453126, + "step": 77850 + }, + { + "epoch": 0.6731891639501604, + "grad_norm": 0.7323365021330526, + "learning_rate": 4.473303110243382e-06, + "loss": 0.03622932434082031, + "step": 77855 + }, + { + "epoch": 0.6732323974717037, + "grad_norm": 9.466485319487813, + "learning_rate": 4.473125634347875e-06, + "loss": 0.3869384765625, + "step": 77860 + }, + { + "epoch": 0.6732756309932469, + "grad_norm": 1.509559728393512, + "learning_rate": 4.472948151658443e-06, + "loss": 0.0596893310546875, + "step": 77865 + }, + { + "epoch": 0.6733188645147902, + "grad_norm": 6.2452426944209485, + "learning_rate": 4.472770662175904e-06, + "loss": 0.11727752685546874, + "step": 77870 + }, + { + "epoch": 0.6733620980363334, + "grad_norm": 0.6455235276275674, + "learning_rate": 4.472593165901076e-06, + "loss": 0.08521270751953125, + "step": 77875 + }, + { + "epoch": 0.6734053315578767, + "grad_norm": 11.221851909351502, + "learning_rate": 4.47241566283478e-06, + "loss": 0.07368011474609375, + "step": 77880 + }, + { + "epoch": 0.67344856507942, + "grad_norm": 30.10300884894198, + "learning_rate": 4.472238152977831e-06, + "loss": 0.2178680419921875, + "step": 77885 + }, + { + "epoch": 0.6734917986009632, + "grad_norm": 4.6908698286020325, + "learning_rate": 4.4720606363310525e-06, + "loss": 0.0721099853515625, + "step": 77890 + }, + { + "epoch": 0.6735350321225065, + "grad_norm": 13.058969285040607, + "learning_rate": 4.471883112895258e-06, + "loss": 0.5581817626953125, + "step": 77895 + }, + { + "epoch": 0.6735782656440498, + "grad_norm": 52.70398444019203, + "learning_rate": 4.47170558267127e-06, + "loss": 0.2188124656677246, + "step": 77900 + }, + { + "epoch": 0.673621499165593, + "grad_norm": 1.9940960595109143, + "learning_rate": 4.471528045659906e-06, + "loss": 0.13357086181640626, + "step": 77905 + }, + { + "epoch": 0.6736647326871363, + "grad_norm": 2.820115017309687, + "learning_rate": 4.471350501861984e-06, + "loss": 0.0584716796875, + "step": 77910 + }, + { + "epoch": 0.6737079662086796, + "grad_norm": 0.7190827826169718, + "learning_rate": 4.471172951278323e-06, + "loss": 0.1585845947265625, + "step": 77915 + }, + { + "epoch": 0.6737511997302228, + "grad_norm": 0.9073078094604288, + "learning_rate": 4.470995393909744e-06, + "loss": 0.121356201171875, + "step": 77920 + }, + { + "epoch": 0.6737944332517661, + "grad_norm": 4.611247265628842, + "learning_rate": 4.470817829757062e-06, + "loss": 0.058469390869140624, + "step": 77925 + }, + { + "epoch": 0.6738376667733094, + "grad_norm": 3.8586825090240806, + "learning_rate": 4.4706402588211e-06, + "loss": 0.07601318359375, + "step": 77930 + }, + { + "epoch": 0.6738809002948526, + "grad_norm": 12.733767389279068, + "learning_rate": 4.470462681102674e-06, + "loss": 0.085845947265625, + "step": 77935 + }, + { + "epoch": 0.6739241338163959, + "grad_norm": 3.318154604717987, + "learning_rate": 4.470285096602605e-06, + "loss": 0.02951812744140625, + "step": 77940 + }, + { + "epoch": 0.6739673673379392, + "grad_norm": 6.623546110779521, + "learning_rate": 4.47010750532171e-06, + "loss": 0.09341278076171874, + "step": 77945 + }, + { + "epoch": 0.6740106008594824, + "grad_norm": 3.207536467944273, + "learning_rate": 4.46992990726081e-06, + "loss": 0.06495208740234375, + "step": 77950 + }, + { + "epoch": 0.6740538343810256, + "grad_norm": 2.1868672851076063, + "learning_rate": 4.469752302420723e-06, + "loss": 0.059075927734375, + "step": 77955 + }, + { + "epoch": 0.674097067902569, + "grad_norm": 8.28505904987618, + "learning_rate": 4.469574690802268e-06, + "loss": 0.121112060546875, + "step": 77960 + }, + { + "epoch": 0.6741403014241122, + "grad_norm": 9.588144609537355, + "learning_rate": 4.4693970724062644e-06, + "loss": 0.2593963623046875, + "step": 77965 + }, + { + "epoch": 0.6741835349456554, + "grad_norm": 3.331144980123652, + "learning_rate": 4.469219447233531e-06, + "loss": 0.17438507080078125, + "step": 77970 + }, + { + "epoch": 0.6742267684671988, + "grad_norm": 29.552618624456315, + "learning_rate": 4.469041815284887e-06, + "loss": 0.338818359375, + "step": 77975 + }, + { + "epoch": 0.674270001988742, + "grad_norm": 13.451804544225022, + "learning_rate": 4.4688641765611536e-06, + "loss": 0.25917205810546873, + "step": 77980 + }, + { + "epoch": 0.6743132355102852, + "grad_norm": 48.03478215731525, + "learning_rate": 4.468686531063148e-06, + "loss": 0.5712615966796875, + "step": 77985 + }, + { + "epoch": 0.6743564690318286, + "grad_norm": 2.575452370871625, + "learning_rate": 4.4685088787916885e-06, + "loss": 0.03543167114257813, + "step": 77990 + }, + { + "epoch": 0.6743997025533718, + "grad_norm": 0.9041396718031547, + "learning_rate": 4.468331219747597e-06, + "loss": 0.08023834228515625, + "step": 77995 + }, + { + "epoch": 0.674442936074915, + "grad_norm": 2.694716877044519, + "learning_rate": 4.468153553931691e-06, + "loss": 0.0368896484375, + "step": 78000 + }, + { + "epoch": 0.6744861695964584, + "grad_norm": 2.9797653730838856, + "learning_rate": 4.467975881344791e-06, + "loss": 0.0725250244140625, + "step": 78005 + }, + { + "epoch": 0.6745294031180016, + "grad_norm": 0.2690786562145419, + "learning_rate": 4.467798201987716e-06, + "loss": 0.5172409057617188, + "step": 78010 + }, + { + "epoch": 0.6745726366395448, + "grad_norm": 3.2964030139199245, + "learning_rate": 4.467620515861285e-06, + "loss": 0.059064483642578124, + "step": 78015 + }, + { + "epoch": 0.6746158701610881, + "grad_norm": 9.215431042144639, + "learning_rate": 4.467442822966318e-06, + "loss": 0.12458648681640624, + "step": 78020 + }, + { + "epoch": 0.6746591036826314, + "grad_norm": 21.87475300791389, + "learning_rate": 4.467265123303634e-06, + "loss": 0.2563041687011719, + "step": 78025 + }, + { + "epoch": 0.6747023372041746, + "grad_norm": 9.775003010073045, + "learning_rate": 4.467087416874054e-06, + "loss": 0.4883880615234375, + "step": 78030 + }, + { + "epoch": 0.674745570725718, + "grad_norm": 21.424120222319438, + "learning_rate": 4.466909703678395e-06, + "loss": 0.23464202880859375, + "step": 78035 + }, + { + "epoch": 0.6747888042472612, + "grad_norm": 10.107725965941606, + "learning_rate": 4.46673198371748e-06, + "loss": 0.175054931640625, + "step": 78040 + }, + { + "epoch": 0.6748320377688044, + "grad_norm": 0.18703367270151477, + "learning_rate": 4.466554256992125e-06, + "loss": 0.11189651489257812, + "step": 78045 + }, + { + "epoch": 0.6748752712903476, + "grad_norm": 4.755786082843167, + "learning_rate": 4.466376523503153e-06, + "loss": 0.242144775390625, + "step": 78050 + }, + { + "epoch": 0.674918504811891, + "grad_norm": 4.809460744469562, + "learning_rate": 4.466198783251381e-06, + "loss": 0.27805023193359374, + "step": 78055 + }, + { + "epoch": 0.6749617383334342, + "grad_norm": 5.513696968451718, + "learning_rate": 4.466021036237629e-06, + "loss": 0.0606292724609375, + "step": 78060 + }, + { + "epoch": 0.6750049718549774, + "grad_norm": 13.497297353964163, + "learning_rate": 4.465843282462718e-06, + "loss": 0.07564811706542969, + "step": 78065 + }, + { + "epoch": 0.6750482053765208, + "grad_norm": 2.248292236717895, + "learning_rate": 4.465665521927467e-06, + "loss": 0.3344482421875, + "step": 78070 + }, + { + "epoch": 0.675091438898064, + "grad_norm": 14.38877839932039, + "learning_rate": 4.465487754632698e-06, + "loss": 0.045009803771972653, + "step": 78075 + }, + { + "epoch": 0.6751346724196072, + "grad_norm": 1.2408267283531376, + "learning_rate": 4.465309980579228e-06, + "loss": 0.05119171142578125, + "step": 78080 + }, + { + "epoch": 0.6751779059411506, + "grad_norm": 2.6764058614472233, + "learning_rate": 4.465132199767876e-06, + "loss": 0.1199798583984375, + "step": 78085 + }, + { + "epoch": 0.6752211394626938, + "grad_norm": 15.689988342210386, + "learning_rate": 4.464954412199465e-06, + "loss": 0.0920135498046875, + "step": 78090 + }, + { + "epoch": 0.675264372984237, + "grad_norm": 34.879008264953576, + "learning_rate": 4.464776617874815e-06, + "loss": 0.288287353515625, + "step": 78095 + }, + { + "epoch": 0.6753076065057804, + "grad_norm": 4.3387906502547215, + "learning_rate": 4.4645988167947426e-06, + "loss": 0.09820098876953125, + "step": 78100 + }, + { + "epoch": 0.6753508400273236, + "grad_norm": 21.065044834899215, + "learning_rate": 4.464421008960072e-06, + "loss": 0.1241851806640625, + "step": 78105 + }, + { + "epoch": 0.6753940735488668, + "grad_norm": 7.089053640344369, + "learning_rate": 4.46424319437162e-06, + "loss": 0.229449462890625, + "step": 78110 + }, + { + "epoch": 0.6754373070704102, + "grad_norm": 9.336079759061432, + "learning_rate": 4.464065373030207e-06, + "loss": 0.04299850463867187, + "step": 78115 + }, + { + "epoch": 0.6754805405919534, + "grad_norm": 3.682992979039183, + "learning_rate": 4.463887544936654e-06, + "loss": 0.0840972900390625, + "step": 78120 + }, + { + "epoch": 0.6755237741134966, + "grad_norm": 6.806283119716982, + "learning_rate": 4.463709710091782e-06, + "loss": 0.04892730712890625, + "step": 78125 + }, + { + "epoch": 0.6755670076350399, + "grad_norm": 2.435195641098452, + "learning_rate": 4.463531868496411e-06, + "loss": 0.23171539306640626, + "step": 78130 + }, + { + "epoch": 0.6756102411565832, + "grad_norm": 17.346091706981483, + "learning_rate": 4.463354020151358e-06, + "loss": 0.465362548828125, + "step": 78135 + }, + { + "epoch": 0.6756534746781264, + "grad_norm": 0.2072724706660666, + "learning_rate": 4.463176165057446e-06, + "loss": 0.012729644775390625, + "step": 78140 + }, + { + "epoch": 0.6756967081996696, + "grad_norm": 8.290978052623084, + "learning_rate": 4.462998303215497e-06, + "loss": 0.16686553955078126, + "step": 78145 + }, + { + "epoch": 0.675739941721213, + "grad_norm": 2.119659493888509, + "learning_rate": 4.462820434626327e-06, + "loss": 0.07040557861328126, + "step": 78150 + }, + { + "epoch": 0.6757831752427562, + "grad_norm": 0.5233062325404587, + "learning_rate": 4.462642559290759e-06, + "loss": 0.3948486328125, + "step": 78155 + }, + { + "epoch": 0.6758264087642994, + "grad_norm": 0.6245889701533283, + "learning_rate": 4.462464677209613e-06, + "loss": 0.07313232421875, + "step": 78160 + }, + { + "epoch": 0.6758696422858428, + "grad_norm": 7.176203294714931, + "learning_rate": 4.462286788383709e-06, + "loss": 0.11832733154296875, + "step": 78165 + }, + { + "epoch": 0.675912875807386, + "grad_norm": 19.458416254439435, + "learning_rate": 4.462108892813866e-06, + "loss": 0.0785400390625, + "step": 78170 + }, + { + "epoch": 0.6759561093289292, + "grad_norm": 1.6551107256860058, + "learning_rate": 4.461930990500909e-06, + "loss": 0.14361572265625, + "step": 78175 + }, + { + "epoch": 0.6759993428504726, + "grad_norm": 15.915273534360304, + "learning_rate": 4.4617530814456526e-06, + "loss": 0.25569915771484375, + "step": 78180 + }, + { + "epoch": 0.6760425763720158, + "grad_norm": 15.361025501299848, + "learning_rate": 4.461575165648922e-06, + "loss": 0.07881336212158203, + "step": 78185 + }, + { + "epoch": 0.676085809893559, + "grad_norm": 13.636615833806907, + "learning_rate": 4.461397243111535e-06, + "loss": 0.090130615234375, + "step": 78190 + }, + { + "epoch": 0.6761290434151024, + "grad_norm": 8.327450769871648, + "learning_rate": 4.461219313834314e-06, + "loss": 0.08662109375, + "step": 78195 + }, + { + "epoch": 0.6761722769366456, + "grad_norm": 1.9782447356445536, + "learning_rate": 4.461041377818078e-06, + "loss": 0.0687225341796875, + "step": 78200 + }, + { + "epoch": 0.6762155104581888, + "grad_norm": 2.1359149163495332, + "learning_rate": 4.460863435063647e-06, + "loss": 0.13403701782226562, + "step": 78205 + }, + { + "epoch": 0.6762587439797322, + "grad_norm": 0.5683326866740604, + "learning_rate": 4.460685485571845e-06, + "loss": 0.01384735107421875, + "step": 78210 + }, + { + "epoch": 0.6763019775012754, + "grad_norm": 9.372932438617358, + "learning_rate": 4.46050752934349e-06, + "loss": 0.03936767578125, + "step": 78215 + }, + { + "epoch": 0.6763452110228186, + "grad_norm": 1.4418705835754928, + "learning_rate": 4.460329566379403e-06, + "loss": 0.09546432495117188, + "step": 78220 + }, + { + "epoch": 0.6763884445443619, + "grad_norm": 3.6655858919991293, + "learning_rate": 4.460151596680405e-06, + "loss": 0.17840499877929689, + "step": 78225 + }, + { + "epoch": 0.6764316780659052, + "grad_norm": 13.67447212138401, + "learning_rate": 4.459973620247317e-06, + "loss": 0.05204563140869141, + "step": 78230 + }, + { + "epoch": 0.6764749115874484, + "grad_norm": 0.944090000255752, + "learning_rate": 4.45979563708096e-06, + "loss": 0.6088714599609375, + "step": 78235 + }, + { + "epoch": 0.6765181451089917, + "grad_norm": 1.1244782756858882, + "learning_rate": 4.459617647182154e-06, + "loss": 0.1100982666015625, + "step": 78240 + }, + { + "epoch": 0.676561378630535, + "grad_norm": 8.29392527806701, + "learning_rate": 4.4594396505517205e-06, + "loss": 0.15171356201171876, + "step": 78245 + }, + { + "epoch": 0.6766046121520782, + "grad_norm": 0.8771015164872167, + "learning_rate": 4.45926164719048e-06, + "loss": 0.09925079345703125, + "step": 78250 + }, + { + "epoch": 0.6766478456736215, + "grad_norm": 4.106293446391332, + "learning_rate": 4.4590836370992545e-06, + "loss": 0.09483795166015625, + "step": 78255 + }, + { + "epoch": 0.6766910791951648, + "grad_norm": 26.377454402020586, + "learning_rate": 4.458905620278864e-06, + "loss": 0.3924163818359375, + "step": 78260 + }, + { + "epoch": 0.676734312716708, + "grad_norm": 8.639235469739827, + "learning_rate": 4.458727596730129e-06, + "loss": 0.10136566162109376, + "step": 78265 + }, + { + "epoch": 0.6767775462382513, + "grad_norm": 9.35748745912855, + "learning_rate": 4.458549566453872e-06, + "loss": 0.33876953125, + "step": 78270 + }, + { + "epoch": 0.6768207797597946, + "grad_norm": 2.8499437129025167, + "learning_rate": 4.458371529450913e-06, + "loss": 0.08017959594726562, + "step": 78275 + }, + { + "epoch": 0.6768640132813378, + "grad_norm": 6.926414256372419, + "learning_rate": 4.4581934857220735e-06, + "loss": 0.17286529541015624, + "step": 78280 + }, + { + "epoch": 0.6769072468028811, + "grad_norm": 12.414389034518726, + "learning_rate": 4.458015435268174e-06, + "loss": 0.19811477661132812, + "step": 78285 + }, + { + "epoch": 0.6769504803244244, + "grad_norm": 2.4717976320646744, + "learning_rate": 4.457837378090036e-06, + "loss": 0.35706787109375, + "step": 78290 + }, + { + "epoch": 0.6769937138459676, + "grad_norm": 0.5710766412537572, + "learning_rate": 4.457659314188482e-06, + "loss": 0.14129486083984374, + "step": 78295 + }, + { + "epoch": 0.6770369473675109, + "grad_norm": 1.2973954320558925, + "learning_rate": 4.457481243564331e-06, + "loss": 0.1584686279296875, + "step": 78300 + }, + { + "epoch": 0.6770801808890541, + "grad_norm": 0.704348989449239, + "learning_rate": 4.457303166218406e-06, + "loss": 0.21964797973632813, + "step": 78305 + }, + { + "epoch": 0.6771234144105974, + "grad_norm": 23.422760408609246, + "learning_rate": 4.457125082151527e-06, + "loss": 0.30290374755859373, + "step": 78310 + }, + { + "epoch": 0.6771666479321407, + "grad_norm": 3.230174749873681, + "learning_rate": 4.456946991364516e-06, + "loss": 0.0680938720703125, + "step": 78315 + }, + { + "epoch": 0.6772098814536839, + "grad_norm": 9.647480372879764, + "learning_rate": 4.456768893858194e-06, + "loss": 0.09000244140625, + "step": 78320 + }, + { + "epoch": 0.6772531149752272, + "grad_norm": 28.448934414742368, + "learning_rate": 4.4565907896333835e-06, + "loss": 0.159661865234375, + "step": 78325 + }, + { + "epoch": 0.6772963484967705, + "grad_norm": 12.380170830955908, + "learning_rate": 4.456412678690905e-06, + "loss": 0.135162353515625, + "step": 78330 + }, + { + "epoch": 0.6773395820183137, + "grad_norm": 53.637050891683074, + "learning_rate": 4.456234561031579e-06, + "loss": 0.43961944580078127, + "step": 78335 + }, + { + "epoch": 0.677382815539857, + "grad_norm": 18.577367473690135, + "learning_rate": 4.456056436656227e-06, + "loss": 0.12413711547851562, + "step": 78340 + }, + { + "epoch": 0.6774260490614002, + "grad_norm": 29.463965200019327, + "learning_rate": 4.4558783055656735e-06, + "loss": 0.14653472900390624, + "step": 78345 + }, + { + "epoch": 0.6774692825829435, + "grad_norm": 0.12432074974695607, + "learning_rate": 4.455700167760737e-06, + "loss": 0.15274887084960936, + "step": 78350 + }, + { + "epoch": 0.6775125161044868, + "grad_norm": 2.8508994828697873, + "learning_rate": 4.4555220232422395e-06, + "loss": 0.06238250732421875, + "step": 78355 + }, + { + "epoch": 0.67755574962603, + "grad_norm": 3.5290525334244136, + "learning_rate": 4.455343872011004e-06, + "loss": 0.12114791870117188, + "step": 78360 + }, + { + "epoch": 0.6775989831475733, + "grad_norm": 24.44016192973375, + "learning_rate": 4.45516571406785e-06, + "loss": 0.14523611068725586, + "step": 78365 + }, + { + "epoch": 0.6776422166691166, + "grad_norm": 0.641996789908666, + "learning_rate": 4.4549875494136004e-06, + "loss": 0.12684669494628906, + "step": 78370 + }, + { + "epoch": 0.6776854501906598, + "grad_norm": 11.80768595327274, + "learning_rate": 4.454809378049078e-06, + "loss": 0.09430084228515626, + "step": 78375 + }, + { + "epoch": 0.6777286837122031, + "grad_norm": 10.443881976668868, + "learning_rate": 4.4546311999751025e-06, + "loss": 0.12591705322265626, + "step": 78380 + }, + { + "epoch": 0.6777719172337464, + "grad_norm": 6.07777445377843, + "learning_rate": 4.4544530151924965e-06, + "loss": 0.0953643798828125, + "step": 78385 + }, + { + "epoch": 0.6778151507552896, + "grad_norm": 4.113221773812824, + "learning_rate": 4.4542748237020824e-06, + "loss": 0.11781196594238282, + "step": 78390 + }, + { + "epoch": 0.6778583842768329, + "grad_norm": 6.841756998506343, + "learning_rate": 4.45409662550468e-06, + "loss": 0.0819915771484375, + "step": 78395 + }, + { + "epoch": 0.6779016177983761, + "grad_norm": 25.422379450427744, + "learning_rate": 4.453918420601113e-06, + "loss": 0.40739288330078127, + "step": 78400 + }, + { + "epoch": 0.6779448513199194, + "grad_norm": 2.415694664588431, + "learning_rate": 4.453740208992203e-06, + "loss": 0.4667877197265625, + "step": 78405 + }, + { + "epoch": 0.6779880848414627, + "grad_norm": 9.415825727280714, + "learning_rate": 4.453561990678771e-06, + "loss": 0.2527435302734375, + "step": 78410 + }, + { + "epoch": 0.6780313183630059, + "grad_norm": 8.575457458545177, + "learning_rate": 4.453383765661641e-06, + "loss": 0.1279815673828125, + "step": 78415 + }, + { + "epoch": 0.6780745518845492, + "grad_norm": 3.2387496024945883, + "learning_rate": 4.453205533941633e-06, + "loss": 0.3271644592285156, + "step": 78420 + }, + { + "epoch": 0.6781177854060925, + "grad_norm": 27.707611062494102, + "learning_rate": 4.453027295519568e-06, + "loss": 0.100244140625, + "step": 78425 + }, + { + "epoch": 0.6781610189276357, + "grad_norm": 15.862848136642134, + "learning_rate": 4.45284905039627e-06, + "loss": 0.17846183776855468, + "step": 78430 + }, + { + "epoch": 0.678204252449179, + "grad_norm": 0.29663715476016406, + "learning_rate": 4.452670798572561e-06, + "loss": 0.13393478393554686, + "step": 78435 + }, + { + "epoch": 0.6782474859707223, + "grad_norm": 0.4399525543310315, + "learning_rate": 4.452492540049263e-06, + "loss": 0.041815185546875, + "step": 78440 + }, + { + "epoch": 0.6782907194922655, + "grad_norm": 3.8783250825836744, + "learning_rate": 4.452314274827198e-06, + "loss": 0.1748016357421875, + "step": 78445 + }, + { + "epoch": 0.6783339530138088, + "grad_norm": 2.678537891472739, + "learning_rate": 4.452136002907187e-06, + "loss": 0.3592182159423828, + "step": 78450 + }, + { + "epoch": 0.6783771865353521, + "grad_norm": 12.166099310057602, + "learning_rate": 4.451957724290052e-06, + "loss": 0.13737564086914061, + "step": 78455 + }, + { + "epoch": 0.6784204200568953, + "grad_norm": 85.7566562664436, + "learning_rate": 4.451779438976619e-06, + "loss": 0.466265869140625, + "step": 78460 + }, + { + "epoch": 0.6784636535784386, + "grad_norm": 5.829466004035663, + "learning_rate": 4.451601146967706e-06, + "loss": 0.325927734375, + "step": 78465 + }, + { + "epoch": 0.6785068870999819, + "grad_norm": 44.73082006531527, + "learning_rate": 4.451422848264137e-06, + "loss": 0.3505523681640625, + "step": 78470 + }, + { + "epoch": 0.6785501206215251, + "grad_norm": 25.34068360475861, + "learning_rate": 4.451244542866734e-06, + "loss": 0.21663360595703124, + "step": 78475 + }, + { + "epoch": 0.6785933541430683, + "grad_norm": 25.357366023884392, + "learning_rate": 4.451066230776319e-06, + "loss": 0.36607666015625, + "step": 78480 + }, + { + "epoch": 0.6786365876646117, + "grad_norm": 1.1979641592755494, + "learning_rate": 4.450887911993715e-06, + "loss": 0.27502899169921874, + "step": 78485 + }, + { + "epoch": 0.6786798211861549, + "grad_norm": 5.969063015050704, + "learning_rate": 4.450709586519744e-06, + "loss": 0.03638916015625, + "step": 78490 + }, + { + "epoch": 0.6787230547076981, + "grad_norm": 4.43857731238891, + "learning_rate": 4.45053125435523e-06, + "loss": 0.20280418395996094, + "step": 78495 + }, + { + "epoch": 0.6787662882292415, + "grad_norm": 27.8547919604175, + "learning_rate": 4.450352915500992e-06, + "loss": 0.21817169189453126, + "step": 78500 + }, + { + "epoch": 0.6788095217507847, + "grad_norm": 1.2377763775546273, + "learning_rate": 4.450174569957856e-06, + "loss": 0.1527984619140625, + "step": 78505 + }, + { + "epoch": 0.6788527552723279, + "grad_norm": 1.6764545659858279, + "learning_rate": 4.4499962177266415e-06, + "loss": 0.13065872192382813, + "step": 78510 + }, + { + "epoch": 0.6788959887938713, + "grad_norm": 5.177884215233893, + "learning_rate": 4.449817858808174e-06, + "loss": 0.077325439453125, + "step": 78515 + }, + { + "epoch": 0.6789392223154145, + "grad_norm": 29.218736457190726, + "learning_rate": 4.449639493203274e-06, + "loss": 0.25194549560546875, + "step": 78520 + }, + { + "epoch": 0.6789824558369577, + "grad_norm": 5.092289537744149, + "learning_rate": 4.449461120912765e-06, + "loss": 0.0535064697265625, + "step": 78525 + }, + { + "epoch": 0.679025689358501, + "grad_norm": 5.867240319215254, + "learning_rate": 4.44928274193747e-06, + "loss": 0.1818115234375, + "step": 78530 + }, + { + "epoch": 0.6790689228800443, + "grad_norm": 22.33188581048321, + "learning_rate": 4.449104356278209e-06, + "loss": 0.19774017333984376, + "step": 78535 + }, + { + "epoch": 0.6791121564015875, + "grad_norm": 1.0141975859530277, + "learning_rate": 4.448925963935809e-06, + "loss": 0.023789215087890624, + "step": 78540 + }, + { + "epoch": 0.6791553899231308, + "grad_norm": 13.347026765670051, + "learning_rate": 4.4487475649110886e-06, + "loss": 0.2691337585449219, + "step": 78545 + }, + { + "epoch": 0.6791986234446741, + "grad_norm": 14.457526423722436, + "learning_rate": 4.448569159204874e-06, + "loss": 0.19566192626953124, + "step": 78550 + }, + { + "epoch": 0.6792418569662173, + "grad_norm": 0.28810556909104984, + "learning_rate": 4.448390746817986e-06, + "loss": 0.04397659301757813, + "step": 78555 + }, + { + "epoch": 0.6792850904877606, + "grad_norm": 1.7102969632811786, + "learning_rate": 4.4482123277512475e-06, + "loss": 0.06896705627441406, + "step": 78560 + }, + { + "epoch": 0.6793283240093039, + "grad_norm": 110.03061856062372, + "learning_rate": 4.448033902005482e-06, + "loss": 0.115936279296875, + "step": 78565 + }, + { + "epoch": 0.6793715575308471, + "grad_norm": 6.4273576228045455, + "learning_rate": 4.447855469581512e-06, + "loss": 0.06407623291015625, + "step": 78570 + }, + { + "epoch": 0.6794147910523903, + "grad_norm": 1.7987979161884817, + "learning_rate": 4.447677030480161e-06, + "loss": 0.18688507080078126, + "step": 78575 + }, + { + "epoch": 0.6794580245739337, + "grad_norm": 0.5054887617732691, + "learning_rate": 4.447498584702251e-06, + "loss": 0.13720970153808593, + "step": 78580 + }, + { + "epoch": 0.6795012580954769, + "grad_norm": 2.183543664165433, + "learning_rate": 4.447320132248605e-06, + "loss": 0.2565673828125, + "step": 78585 + }, + { + "epoch": 0.6795444916170201, + "grad_norm": 15.890997499421651, + "learning_rate": 4.447141673120047e-06, + "loss": 0.21960983276367188, + "step": 78590 + }, + { + "epoch": 0.6795877251385635, + "grad_norm": 1.0830378602567825, + "learning_rate": 4.4469632073173985e-06, + "loss": 0.2587747573852539, + "step": 78595 + }, + { + "epoch": 0.6796309586601067, + "grad_norm": 15.040813028822896, + "learning_rate": 4.446784734841485e-06, + "loss": 0.13619728088378907, + "step": 78600 + }, + { + "epoch": 0.6796741921816499, + "grad_norm": 22.881811982399736, + "learning_rate": 4.446606255693128e-06, + "loss": 0.38353691101074217, + "step": 78605 + }, + { + "epoch": 0.6797174257031933, + "grad_norm": 5.7603239917441025, + "learning_rate": 4.44642776987315e-06, + "loss": 0.126068115234375, + "step": 78610 + }, + { + "epoch": 0.6797606592247365, + "grad_norm": 4.796285245904553, + "learning_rate": 4.446249277382375e-06, + "loss": 0.3597869873046875, + "step": 78615 + }, + { + "epoch": 0.6798038927462797, + "grad_norm": 4.578184993494697, + "learning_rate": 4.446070778221626e-06, + "loss": 0.3143653869628906, + "step": 78620 + }, + { + "epoch": 0.6798471262678231, + "grad_norm": 2.0932185505860526, + "learning_rate": 4.4458922723917265e-06, + "loss": 0.10408935546875, + "step": 78625 + }, + { + "epoch": 0.6798903597893663, + "grad_norm": 1.4670811805594632, + "learning_rate": 4.4457137598935004e-06, + "loss": 0.6646274566650391, + "step": 78630 + }, + { + "epoch": 0.6799335933109095, + "grad_norm": 8.983281446498633, + "learning_rate": 4.445535240727769e-06, + "loss": 0.19523162841796876, + "step": 78635 + }, + { + "epoch": 0.6799768268324529, + "grad_norm": 0.09706955953379909, + "learning_rate": 4.445356714895357e-06, + "loss": 0.1046783447265625, + "step": 78640 + }, + { + "epoch": 0.6800200603539961, + "grad_norm": 43.05770049269207, + "learning_rate": 4.445178182397089e-06, + "loss": 0.1761199951171875, + "step": 78645 + }, + { + "epoch": 0.6800632938755393, + "grad_norm": 32.30856812927354, + "learning_rate": 4.444999643233786e-06, + "loss": 0.2051666259765625, + "step": 78650 + }, + { + "epoch": 0.6801065273970825, + "grad_norm": 12.284112781425724, + "learning_rate": 4.4448210974062725e-06, + "loss": 0.12267532348632812, + "step": 78655 + }, + { + "epoch": 0.6801497609186259, + "grad_norm": 8.491664257885107, + "learning_rate": 4.444642544915371e-06, + "loss": 0.16271209716796875, + "step": 78660 + }, + { + "epoch": 0.6801929944401691, + "grad_norm": 20.649291215229553, + "learning_rate": 4.444463985761906e-06, + "loss": 0.5964958190917968, + "step": 78665 + }, + { + "epoch": 0.6802362279617123, + "grad_norm": 4.791432899069559, + "learning_rate": 4.444285419946701e-06, + "loss": 0.22628097534179686, + "step": 78670 + }, + { + "epoch": 0.6802794614832557, + "grad_norm": 2.201442367449401, + "learning_rate": 4.444106847470579e-06, + "loss": 0.3388458251953125, + "step": 78675 + }, + { + "epoch": 0.6803226950047989, + "grad_norm": 7.7795056457489125, + "learning_rate": 4.443928268334363e-06, + "loss": 0.111602783203125, + "step": 78680 + }, + { + "epoch": 0.6803659285263421, + "grad_norm": 1.827047652074952, + "learning_rate": 4.443749682538879e-06, + "loss": 0.09647712707519532, + "step": 78685 + }, + { + "epoch": 0.6804091620478855, + "grad_norm": 14.722600857185066, + "learning_rate": 4.443571090084948e-06, + "loss": 0.32739105224609377, + "step": 78690 + }, + { + "epoch": 0.6804523955694287, + "grad_norm": 13.276419204522275, + "learning_rate": 4.443392490973396e-06, + "loss": 0.06354827880859375, + "step": 78695 + }, + { + "epoch": 0.6804956290909719, + "grad_norm": 4.958259539196937, + "learning_rate": 4.443213885205045e-06, + "loss": 0.27458038330078127, + "step": 78700 + }, + { + "epoch": 0.6805388626125153, + "grad_norm": 3.77467318908604, + "learning_rate": 4.443035272780718e-06, + "loss": 0.19210662841796874, + "step": 78705 + }, + { + "epoch": 0.6805820961340585, + "grad_norm": 13.611115242904098, + "learning_rate": 4.44285665370124e-06, + "loss": 0.241455078125, + "step": 78710 + }, + { + "epoch": 0.6806253296556017, + "grad_norm": 51.29104275906365, + "learning_rate": 4.442678027967436e-06, + "loss": 0.21119070053100586, + "step": 78715 + }, + { + "epoch": 0.6806685631771451, + "grad_norm": 0.12684065724197477, + "learning_rate": 4.442499395580126e-06, + "loss": 0.07528915405273437, + "step": 78720 + }, + { + "epoch": 0.6807117966986883, + "grad_norm": 18.15271057971316, + "learning_rate": 4.442320756540139e-06, + "loss": 0.22972869873046875, + "step": 78725 + }, + { + "epoch": 0.6807550302202315, + "grad_norm": 0.48860510040918803, + "learning_rate": 4.442142110848295e-06, + "loss": 0.163116455078125, + "step": 78730 + }, + { + "epoch": 0.6807982637417748, + "grad_norm": 9.320212490826462, + "learning_rate": 4.441963458505418e-06, + "loss": 0.14264144897460937, + "step": 78735 + }, + { + "epoch": 0.6808414972633181, + "grad_norm": 0.32811788091910055, + "learning_rate": 4.441784799512334e-06, + "loss": 0.08936386108398438, + "step": 78740 + }, + { + "epoch": 0.6808847307848613, + "grad_norm": 0.33886438209518904, + "learning_rate": 4.441606133869866e-06, + "loss": 0.052433013916015625, + "step": 78745 + }, + { + "epoch": 0.6809279643064046, + "grad_norm": 0.24927019644830328, + "learning_rate": 4.441427461578838e-06, + "loss": 0.0274749755859375, + "step": 78750 + }, + { + "epoch": 0.6809711978279479, + "grad_norm": 18.754738624863734, + "learning_rate": 4.441248782640072e-06, + "loss": 0.23900985717773438, + "step": 78755 + }, + { + "epoch": 0.6810144313494911, + "grad_norm": 8.77086283332812, + "learning_rate": 4.441070097054396e-06, + "loss": 0.04049072265625, + "step": 78760 + }, + { + "epoch": 0.6810576648710344, + "grad_norm": 19.95600191125792, + "learning_rate": 4.4408914048226315e-06, + "loss": 0.2192138671875, + "step": 78765 + }, + { + "epoch": 0.6811008983925777, + "grad_norm": 0.7516771015934167, + "learning_rate": 4.440712705945603e-06, + "loss": 0.15726318359375, + "step": 78770 + }, + { + "epoch": 0.6811441319141209, + "grad_norm": 5.632091812231879, + "learning_rate": 4.4405340004241346e-06, + "loss": 0.26759185791015627, + "step": 78775 + }, + { + "epoch": 0.6811873654356642, + "grad_norm": 11.644449149213221, + "learning_rate": 4.440355288259052e-06, + "loss": 0.12726058959960937, + "step": 78780 + }, + { + "epoch": 0.6812305989572075, + "grad_norm": 7.0432453006458555, + "learning_rate": 4.440176569451177e-06, + "loss": 0.3050323486328125, + "step": 78785 + }, + { + "epoch": 0.6812738324787507, + "grad_norm": 5.48086583483673, + "learning_rate": 4.439997844001334e-06, + "loss": 0.3036712646484375, + "step": 78790 + }, + { + "epoch": 0.681317066000294, + "grad_norm": 15.88218540468881, + "learning_rate": 4.439819111910349e-06, + "loss": 0.19593353271484376, + "step": 78795 + }, + { + "epoch": 0.6813602995218373, + "grad_norm": 4.4362158108452885, + "learning_rate": 4.439640373179046e-06, + "loss": 0.07021026611328125, + "step": 78800 + }, + { + "epoch": 0.6814035330433805, + "grad_norm": 6.444317327095255, + "learning_rate": 4.439461627808248e-06, + "loss": 0.051275634765625, + "step": 78805 + }, + { + "epoch": 0.6814467665649238, + "grad_norm": 27.606974681551968, + "learning_rate": 4.4392828757987815e-06, + "loss": 0.14270248413085937, + "step": 78810 + }, + { + "epoch": 0.6814900000864671, + "grad_norm": 28.543909178724462, + "learning_rate": 4.439104117151469e-06, + "loss": 0.15251731872558594, + "step": 78815 + }, + { + "epoch": 0.6815332336080103, + "grad_norm": 32.924511737929244, + "learning_rate": 4.438925351867134e-06, + "loss": 0.2517242431640625, + "step": 78820 + }, + { + "epoch": 0.6815764671295536, + "grad_norm": 0.3443682906214535, + "learning_rate": 4.438746579946605e-06, + "loss": 0.2039875030517578, + "step": 78825 + }, + { + "epoch": 0.6816197006510968, + "grad_norm": 1.9118676501630556, + "learning_rate": 4.438567801390702e-06, + "loss": 0.1908447265625, + "step": 78830 + }, + { + "epoch": 0.6816629341726401, + "grad_norm": 21.453306778631656, + "learning_rate": 4.438389016200252e-06, + "loss": 0.1964508056640625, + "step": 78835 + }, + { + "epoch": 0.6817061676941834, + "grad_norm": 4.930825027743918, + "learning_rate": 4.438210224376078e-06, + "loss": 0.096771240234375, + "step": 78840 + }, + { + "epoch": 0.6817494012157266, + "grad_norm": 1.59687241034881, + "learning_rate": 4.4380314259190075e-06, + "loss": 0.2601280212402344, + "step": 78845 + }, + { + "epoch": 0.6817926347372699, + "grad_norm": 4.678079370887026, + "learning_rate": 4.437852620829861e-06, + "loss": 0.07843475341796875, + "step": 78850 + }, + { + "epoch": 0.6818358682588131, + "grad_norm": 1.4975422686387807, + "learning_rate": 4.437673809109466e-06, + "loss": 0.10477218627929688, + "step": 78855 + }, + { + "epoch": 0.6818791017803564, + "grad_norm": 13.673870986949135, + "learning_rate": 4.437494990758648e-06, + "loss": 0.09892196655273437, + "step": 78860 + }, + { + "epoch": 0.6819223353018997, + "grad_norm": 1.5229750309054269, + "learning_rate": 4.437316165778228e-06, + "loss": 0.0204315185546875, + "step": 78865 + }, + { + "epoch": 0.681965568823443, + "grad_norm": 0.045854624512387074, + "learning_rate": 4.437137334169034e-06, + "loss": 0.10958080291748047, + "step": 78870 + }, + { + "epoch": 0.6820088023449862, + "grad_norm": 2.548276693228458, + "learning_rate": 4.4369584959318895e-06, + "loss": 0.1794292449951172, + "step": 78875 + }, + { + "epoch": 0.6820520358665295, + "grad_norm": 24.736384841100403, + "learning_rate": 4.436779651067618e-06, + "loss": 0.15267181396484375, + "step": 78880 + }, + { + "epoch": 0.6820952693880727, + "grad_norm": 10.84419507780117, + "learning_rate": 4.436600799577048e-06, + "loss": 0.40291671752929686, + "step": 78885 + }, + { + "epoch": 0.682138502909616, + "grad_norm": 13.27270071476014, + "learning_rate": 4.436421941461001e-06, + "loss": 0.23011283874511718, + "step": 78890 + }, + { + "epoch": 0.6821817364311593, + "grad_norm": 11.513093569598206, + "learning_rate": 4.436243076720302e-06, + "loss": 0.08929595947265626, + "step": 78895 + }, + { + "epoch": 0.6822249699527025, + "grad_norm": 5.910461575055883, + "learning_rate": 4.436064205355779e-06, + "loss": 0.13745040893554689, + "step": 78900 + }, + { + "epoch": 0.6822682034742458, + "grad_norm": 2.2405645925128557, + "learning_rate": 4.435885327368253e-06, + "loss": 0.37235107421875, + "step": 78905 + }, + { + "epoch": 0.682311436995789, + "grad_norm": 8.160203510057048, + "learning_rate": 4.435706442758551e-06, + "loss": 0.3814178466796875, + "step": 78910 + }, + { + "epoch": 0.6823546705173323, + "grad_norm": 6.964864525535168, + "learning_rate": 4.435527551527498e-06, + "loss": 0.108026123046875, + "step": 78915 + }, + { + "epoch": 0.6823979040388756, + "grad_norm": 30.407316507404712, + "learning_rate": 4.435348653675919e-06, + "loss": 0.477630615234375, + "step": 78920 + }, + { + "epoch": 0.6824411375604188, + "grad_norm": 2.1426386035217275, + "learning_rate": 4.435169749204639e-06, + "loss": 0.03076629638671875, + "step": 78925 + }, + { + "epoch": 0.6824843710819621, + "grad_norm": 18.377047779212994, + "learning_rate": 4.434990838114482e-06, + "loss": 0.099639892578125, + "step": 78930 + }, + { + "epoch": 0.6825276046035054, + "grad_norm": 34.49382248787865, + "learning_rate": 4.434811920406275e-06, + "loss": 0.13539810180664064, + "step": 78935 + }, + { + "epoch": 0.6825708381250486, + "grad_norm": 4.332527341557594, + "learning_rate": 4.434632996080841e-06, + "loss": 0.0597412109375, + "step": 78940 + }, + { + "epoch": 0.6826140716465919, + "grad_norm": 13.829821647152594, + "learning_rate": 4.434454065139008e-06, + "loss": 0.27607574462890627, + "step": 78945 + }, + { + "epoch": 0.6826573051681352, + "grad_norm": 8.114251744809193, + "learning_rate": 4.434275127581598e-06, + "loss": 0.098699951171875, + "step": 78950 + }, + { + "epoch": 0.6827005386896784, + "grad_norm": 0.8282924285762633, + "learning_rate": 4.4340961834094395e-06, + "loss": 0.2760955810546875, + "step": 78955 + }, + { + "epoch": 0.6827437722112217, + "grad_norm": 31.952579688977043, + "learning_rate": 4.433917232623354e-06, + "loss": 0.20951385498046876, + "step": 78960 + }, + { + "epoch": 0.682787005732765, + "grad_norm": 4.195639860381694, + "learning_rate": 4.433738275224171e-06, + "loss": 0.10490570068359376, + "step": 78965 + }, + { + "epoch": 0.6828302392543082, + "grad_norm": 10.513870917565631, + "learning_rate": 4.4335593112127125e-06, + "loss": 0.176971435546875, + "step": 78970 + }, + { + "epoch": 0.6828734727758515, + "grad_norm": 9.980603852076538, + "learning_rate": 4.433380340589806e-06, + "loss": 0.2963233947753906, + "step": 78975 + }, + { + "epoch": 0.6829167062973948, + "grad_norm": 20.498835047889926, + "learning_rate": 4.433201363356276e-06, + "loss": 0.275921630859375, + "step": 78980 + }, + { + "epoch": 0.682959939818938, + "grad_norm": 20.685582345479116, + "learning_rate": 4.433022379512946e-06, + "loss": 0.23342742919921874, + "step": 78985 + }, + { + "epoch": 0.6830031733404813, + "grad_norm": 24.926900368942846, + "learning_rate": 4.432843389060645e-06, + "loss": 0.1854888916015625, + "step": 78990 + }, + { + "epoch": 0.6830464068620246, + "grad_norm": 10.902378849769802, + "learning_rate": 4.432664392000198e-06, + "loss": 0.18425369262695312, + "step": 78995 + }, + { + "epoch": 0.6830896403835678, + "grad_norm": 11.462894706731305, + "learning_rate": 4.4324853883324285e-06, + "loss": 0.15496559143066407, + "step": 79000 + }, + { + "epoch": 0.683132873905111, + "grad_norm": 36.39508868034355, + "learning_rate": 4.432306378058162e-06, + "loss": 0.0822845458984375, + "step": 79005 + }, + { + "epoch": 0.6831761074266544, + "grad_norm": 2.1050862411598232, + "learning_rate": 4.432127361178226e-06, + "loss": 0.32235107421875, + "step": 79010 + }, + { + "epoch": 0.6832193409481976, + "grad_norm": 9.796462515214634, + "learning_rate": 4.431948337693444e-06, + "loss": 0.20914955139160157, + "step": 79015 + }, + { + "epoch": 0.6832625744697408, + "grad_norm": 17.934546656161775, + "learning_rate": 4.4317693076046445e-06, + "loss": 0.14595746994018555, + "step": 79020 + }, + { + "epoch": 0.6833058079912842, + "grad_norm": 13.280992852936574, + "learning_rate": 4.431590270912651e-06, + "loss": 0.15506973266601562, + "step": 79025 + }, + { + "epoch": 0.6833490415128274, + "grad_norm": 4.236591057012159, + "learning_rate": 4.43141122761829e-06, + "loss": 0.06438751220703125, + "step": 79030 + }, + { + "epoch": 0.6833922750343706, + "grad_norm": 1.6915307569712261, + "learning_rate": 4.4312321777223854e-06, + "loss": 0.342059326171875, + "step": 79035 + }, + { + "epoch": 0.683435508555914, + "grad_norm": 15.489855190855506, + "learning_rate": 4.431053121225765e-06, + "loss": 0.11027755737304687, + "step": 79040 + }, + { + "epoch": 0.6834787420774572, + "grad_norm": 21.834223240717574, + "learning_rate": 4.430874058129254e-06, + "loss": 0.19644775390625, + "step": 79045 + }, + { + "epoch": 0.6835219755990004, + "grad_norm": 18.2613284871636, + "learning_rate": 4.430694988433679e-06, + "loss": 0.17708740234375, + "step": 79050 + }, + { + "epoch": 0.6835652091205437, + "grad_norm": 33.78169094930276, + "learning_rate": 4.430515912139865e-06, + "loss": 0.10205612182617188, + "step": 79055 + }, + { + "epoch": 0.683608442642087, + "grad_norm": 3.3491390077993883, + "learning_rate": 4.430336829248638e-06, + "loss": 0.1400379180908203, + "step": 79060 + }, + { + "epoch": 0.6836516761636302, + "grad_norm": 0.9509092697039457, + "learning_rate": 4.430157739760824e-06, + "loss": 0.09246139526367188, + "step": 79065 + }, + { + "epoch": 0.6836949096851735, + "grad_norm": 0.45349561068043415, + "learning_rate": 4.429978643677248e-06, + "loss": 0.111456298828125, + "step": 79070 + }, + { + "epoch": 0.6837381432067168, + "grad_norm": 11.803397695751212, + "learning_rate": 4.429799540998737e-06, + "loss": 0.07029037475585938, + "step": 79075 + }, + { + "epoch": 0.68378137672826, + "grad_norm": 4.270958782097284, + "learning_rate": 4.429620431726117e-06, + "loss": 0.06103515625, + "step": 79080 + }, + { + "epoch": 0.6838246102498032, + "grad_norm": 2.327347910069881, + "learning_rate": 4.429441315860216e-06, + "loss": 0.087933349609375, + "step": 79085 + }, + { + "epoch": 0.6838678437713466, + "grad_norm": 6.715175493099556, + "learning_rate": 4.429262193401855e-06, + "loss": 0.3208984375, + "step": 79090 + }, + { + "epoch": 0.6839110772928898, + "grad_norm": 22.270036283027018, + "learning_rate": 4.429083064351864e-06, + "loss": 0.098760986328125, + "step": 79095 + }, + { + "epoch": 0.683954310814433, + "grad_norm": 2.1174911735632453, + "learning_rate": 4.428903928711069e-06, + "loss": 0.04910888671875, + "step": 79100 + }, + { + "epoch": 0.6839975443359764, + "grad_norm": 0.2784757164432963, + "learning_rate": 4.428724786480294e-06, + "loss": 0.062259674072265625, + "step": 79105 + }, + { + "epoch": 0.6840407778575196, + "grad_norm": 37.15876409885193, + "learning_rate": 4.4285456376603675e-06, + "loss": 0.3153778076171875, + "step": 79110 + }, + { + "epoch": 0.6840840113790628, + "grad_norm": 1.5729382098380922, + "learning_rate": 4.428366482252115e-06, + "loss": 0.05020751953125, + "step": 79115 + }, + { + "epoch": 0.6841272449006062, + "grad_norm": 1.000591484950851, + "learning_rate": 4.428187320256361e-06, + "loss": 0.03612823486328125, + "step": 79120 + }, + { + "epoch": 0.6841704784221494, + "grad_norm": 34.544874621254365, + "learning_rate": 4.428008151673935e-06, + "loss": 0.25997314453125, + "step": 79125 + }, + { + "epoch": 0.6842137119436926, + "grad_norm": 9.663548585386962, + "learning_rate": 4.42782897650566e-06, + "loss": 0.32767333984375, + "step": 79130 + }, + { + "epoch": 0.684256945465236, + "grad_norm": 1.315694822426019, + "learning_rate": 4.427649794752366e-06, + "loss": 0.20683135986328124, + "step": 79135 + }, + { + "epoch": 0.6843001789867792, + "grad_norm": 13.439353514998645, + "learning_rate": 4.427470606414876e-06, + "loss": 0.11748046875, + "step": 79140 + }, + { + "epoch": 0.6843434125083224, + "grad_norm": 6.980136742354738, + "learning_rate": 4.427291411494018e-06, + "loss": 0.2186920166015625, + "step": 79145 + }, + { + "epoch": 0.6843866460298658, + "grad_norm": 1.4201290023688158, + "learning_rate": 4.427112209990618e-06, + "loss": 0.047582149505615234, + "step": 79150 + }, + { + "epoch": 0.684429879551409, + "grad_norm": 28.274931356258588, + "learning_rate": 4.426933001905502e-06, + "loss": 0.3024150848388672, + "step": 79155 + }, + { + "epoch": 0.6844731130729522, + "grad_norm": 6.17007431737914, + "learning_rate": 4.426753787239498e-06, + "loss": 0.05065841674804687, + "step": 79160 + }, + { + "epoch": 0.6845163465944956, + "grad_norm": 0.10825957239679894, + "learning_rate": 4.42657456599343e-06, + "loss": 0.19214324951171874, + "step": 79165 + }, + { + "epoch": 0.6845595801160388, + "grad_norm": 0.9503145981147251, + "learning_rate": 4.426395338168128e-06, + "loss": 0.0855560302734375, + "step": 79170 + }, + { + "epoch": 0.684602813637582, + "grad_norm": 2.6979614349458894, + "learning_rate": 4.4262161037644165e-06, + "loss": 0.052386474609375, + "step": 79175 + }, + { + "epoch": 0.6846460471591252, + "grad_norm": 4.427939968839042, + "learning_rate": 4.426036862783121e-06, + "loss": 0.2167144775390625, + "step": 79180 + }, + { + "epoch": 0.6846892806806686, + "grad_norm": 44.386410525369534, + "learning_rate": 4.4258576152250706e-06, + "loss": 0.313043212890625, + "step": 79185 + }, + { + "epoch": 0.6847325142022118, + "grad_norm": 8.552365691238, + "learning_rate": 4.425678361091091e-06, + "loss": 0.0508331298828125, + "step": 79190 + }, + { + "epoch": 0.684775747723755, + "grad_norm": 4.334297972010298, + "learning_rate": 4.425499100382009e-06, + "loss": 0.10777702331542968, + "step": 79195 + }, + { + "epoch": 0.6848189812452984, + "grad_norm": 0.43286121974009556, + "learning_rate": 4.42531983309865e-06, + "loss": 0.24683303833007814, + "step": 79200 + }, + { + "epoch": 0.6848622147668416, + "grad_norm": 23.04997956956976, + "learning_rate": 4.425140559241843e-06, + "loss": 0.178350830078125, + "step": 79205 + }, + { + "epoch": 0.6849054482883848, + "grad_norm": 0.5611020341573977, + "learning_rate": 4.424961278812412e-06, + "loss": 0.0174346923828125, + "step": 79210 + }, + { + "epoch": 0.6849486818099282, + "grad_norm": 2.10774302035751, + "learning_rate": 4.424781991811187e-06, + "loss": 0.0512115478515625, + "step": 79215 + }, + { + "epoch": 0.6849919153314714, + "grad_norm": 3.0216324634210676, + "learning_rate": 4.424602698238992e-06, + "loss": 0.04781036376953125, + "step": 79220 + }, + { + "epoch": 0.6850351488530146, + "grad_norm": 13.81994709382783, + "learning_rate": 4.424423398096657e-06, + "loss": 0.21814918518066406, + "step": 79225 + }, + { + "epoch": 0.685078382374558, + "grad_norm": 7.390465155200327, + "learning_rate": 4.424244091385006e-06, + "loss": 0.5071884155273437, + "step": 79230 + }, + { + "epoch": 0.6851216158961012, + "grad_norm": 21.683455812324052, + "learning_rate": 4.424064778104868e-06, + "loss": 0.21757354736328124, + "step": 79235 + }, + { + "epoch": 0.6851648494176444, + "grad_norm": 11.927409347329949, + "learning_rate": 4.423885458257069e-06, + "loss": 0.07994918823242188, + "step": 79240 + }, + { + "epoch": 0.6852080829391878, + "grad_norm": 0.6007170365431139, + "learning_rate": 4.423706131842435e-06, + "loss": 0.19586029052734374, + "step": 79245 + }, + { + "epoch": 0.685251316460731, + "grad_norm": 20.372387672942743, + "learning_rate": 4.423526798861795e-06, + "loss": 0.14241485595703124, + "step": 79250 + }, + { + "epoch": 0.6852945499822742, + "grad_norm": 5.796007570310613, + "learning_rate": 4.4233474593159754e-06, + "loss": 0.09360733032226562, + "step": 79255 + }, + { + "epoch": 0.6853377835038175, + "grad_norm": 0.5410467045548384, + "learning_rate": 4.423168113205802e-06, + "loss": 0.18240966796875, + "step": 79260 + }, + { + "epoch": 0.6853810170253608, + "grad_norm": 1.7534599657496186, + "learning_rate": 4.422988760532103e-06, + "loss": 0.053095245361328126, + "step": 79265 + }, + { + "epoch": 0.685424250546904, + "grad_norm": 42.94411992649671, + "learning_rate": 4.422809401295707e-06, + "loss": 0.613592529296875, + "step": 79270 + }, + { + "epoch": 0.6854674840684473, + "grad_norm": 42.14514665570287, + "learning_rate": 4.422630035497439e-06, + "loss": 0.112896728515625, + "step": 79275 + }, + { + "epoch": 0.6855107175899906, + "grad_norm": 10.231440431543719, + "learning_rate": 4.422450663138126e-06, + "loss": 0.09791946411132812, + "step": 79280 + }, + { + "epoch": 0.6855539511115338, + "grad_norm": 20.070720938642662, + "learning_rate": 4.422271284218598e-06, + "loss": 0.11038970947265625, + "step": 79285 + }, + { + "epoch": 0.6855971846330771, + "grad_norm": 4.683741799014605, + "learning_rate": 4.4220918987396804e-06, + "loss": 0.07178878784179688, + "step": 79290 + }, + { + "epoch": 0.6856404181546204, + "grad_norm": 7.4381829301824425, + "learning_rate": 4.421912506702199e-06, + "loss": 0.12197437286376953, + "step": 79295 + }, + { + "epoch": 0.6856836516761636, + "grad_norm": 1.4535214013668207, + "learning_rate": 4.421733108106983e-06, + "loss": 0.27331085205078126, + "step": 79300 + }, + { + "epoch": 0.6857268851977069, + "grad_norm": 37.02914368935304, + "learning_rate": 4.42155370295486e-06, + "loss": 0.34212684631347656, + "step": 79305 + }, + { + "epoch": 0.6857701187192502, + "grad_norm": 3.1191305520550574, + "learning_rate": 4.421374291246658e-06, + "loss": 0.18989486694335939, + "step": 79310 + }, + { + "epoch": 0.6858133522407934, + "grad_norm": 4.848642936115992, + "learning_rate": 4.421194872983202e-06, + "loss": 0.0917510986328125, + "step": 79315 + }, + { + "epoch": 0.6858565857623367, + "grad_norm": 25.531701701841087, + "learning_rate": 4.421015448165322e-06, + "loss": 0.15911102294921875, + "step": 79320 + }, + { + "epoch": 0.68589981928388, + "grad_norm": 4.256785012093481, + "learning_rate": 4.420836016793844e-06, + "loss": 0.3640247344970703, + "step": 79325 + }, + { + "epoch": 0.6859430528054232, + "grad_norm": 39.789259413660936, + "learning_rate": 4.420656578869594e-06, + "loss": 0.27682952880859374, + "step": 79330 + }, + { + "epoch": 0.6859862863269665, + "grad_norm": 8.385330504563894, + "learning_rate": 4.420477134393404e-06, + "loss": 0.02697410583496094, + "step": 79335 + }, + { + "epoch": 0.6860295198485098, + "grad_norm": 8.943127715467632, + "learning_rate": 4.420297683366097e-06, + "loss": 0.037711715698242186, + "step": 79340 + }, + { + "epoch": 0.686072753370053, + "grad_norm": 0.3515722083936109, + "learning_rate": 4.420118225788504e-06, + "loss": 0.10389556884765624, + "step": 79345 + }, + { + "epoch": 0.6861159868915963, + "grad_norm": 4.240071853405783, + "learning_rate": 4.419938761661451e-06, + "loss": 0.2718971252441406, + "step": 79350 + }, + { + "epoch": 0.6861592204131395, + "grad_norm": 3.837827982591462, + "learning_rate": 4.419759290985765e-06, + "loss": 0.319879150390625, + "step": 79355 + }, + { + "epoch": 0.6862024539346828, + "grad_norm": 16.06773403487195, + "learning_rate": 4.419579813762275e-06, + "loss": 0.3690065383911133, + "step": 79360 + }, + { + "epoch": 0.686245687456226, + "grad_norm": 16.50179955263367, + "learning_rate": 4.419400329991809e-06, + "loss": 0.08072509765625, + "step": 79365 + }, + { + "epoch": 0.6862889209777693, + "grad_norm": 19.985248384559135, + "learning_rate": 4.4192208396751935e-06, + "loss": 0.18672714233398438, + "step": 79370 + }, + { + "epoch": 0.6863321544993126, + "grad_norm": 9.497072731705405, + "learning_rate": 4.4190413428132565e-06, + "loss": 0.18584976196289063, + "step": 79375 + }, + { + "epoch": 0.6863753880208558, + "grad_norm": 7.72178944872862, + "learning_rate": 4.418861839406826e-06, + "loss": 0.1009613037109375, + "step": 79380 + }, + { + "epoch": 0.6864186215423991, + "grad_norm": 15.259409287471392, + "learning_rate": 4.4186823294567315e-06, + "loss": 0.09854812622070312, + "step": 79385 + }, + { + "epoch": 0.6864618550639424, + "grad_norm": 6.82507494734533, + "learning_rate": 4.418502812963799e-06, + "loss": 0.22947235107421876, + "step": 79390 + }, + { + "epoch": 0.6865050885854856, + "grad_norm": 1.319131871332491, + "learning_rate": 4.418323289928857e-06, + "loss": 0.098980712890625, + "step": 79395 + }, + { + "epoch": 0.6865483221070289, + "grad_norm": 13.079478257853113, + "learning_rate": 4.418143760352733e-06, + "loss": 0.159625244140625, + "step": 79400 + }, + { + "epoch": 0.6865915556285722, + "grad_norm": 9.042281325124517, + "learning_rate": 4.417964224236254e-06, + "loss": 0.1824951171875, + "step": 79405 + }, + { + "epoch": 0.6866347891501154, + "grad_norm": 4.853537995499292, + "learning_rate": 4.417784681580251e-06, + "loss": 0.10067901611328126, + "step": 79410 + }, + { + "epoch": 0.6866780226716587, + "grad_norm": 19.68375895589187, + "learning_rate": 4.417605132385549e-06, + "loss": 0.21686248779296874, + "step": 79415 + }, + { + "epoch": 0.686721256193202, + "grad_norm": 7.413102398602109, + "learning_rate": 4.417425576652978e-06, + "loss": 0.1746490478515625, + "step": 79420 + }, + { + "epoch": 0.6867644897147452, + "grad_norm": 1.6574070335952837, + "learning_rate": 4.417246014383366e-06, + "loss": 0.0448883056640625, + "step": 79425 + }, + { + "epoch": 0.6868077232362885, + "grad_norm": 1.753317689053433, + "learning_rate": 4.417066445577541e-06, + "loss": 0.0171875, + "step": 79430 + }, + { + "epoch": 0.6868509567578317, + "grad_norm": 2.512371701767158, + "learning_rate": 4.41688687023633e-06, + "loss": 0.05228729248046875, + "step": 79435 + }, + { + "epoch": 0.686894190279375, + "grad_norm": 16.41072326267099, + "learning_rate": 4.416707288360561e-06, + "loss": 0.131243896484375, + "step": 79440 + }, + { + "epoch": 0.6869374238009183, + "grad_norm": 23.202002270984146, + "learning_rate": 4.4165276999510645e-06, + "loss": 0.24652099609375, + "step": 79445 + }, + { + "epoch": 0.6869806573224615, + "grad_norm": 1.624808733115685, + "learning_rate": 4.4163481050086676e-06, + "loss": 0.1241119384765625, + "step": 79450 + }, + { + "epoch": 0.6870238908440048, + "grad_norm": 2.1118451529685154, + "learning_rate": 4.416168503534198e-06, + "loss": 0.11709747314453126, + "step": 79455 + }, + { + "epoch": 0.6870671243655481, + "grad_norm": 8.2670854526768, + "learning_rate": 4.415988895528484e-06, + "loss": 0.08079452514648437, + "step": 79460 + }, + { + "epoch": 0.6871103578870913, + "grad_norm": 4.97789962386485, + "learning_rate": 4.415809280992355e-06, + "loss": 0.05003814697265625, + "step": 79465 + }, + { + "epoch": 0.6871535914086346, + "grad_norm": 14.629041840959207, + "learning_rate": 4.415629659926638e-06, + "loss": 0.09443130493164062, + "step": 79470 + }, + { + "epoch": 0.6871968249301779, + "grad_norm": 18.803775477607278, + "learning_rate": 4.415450032332162e-06, + "loss": 0.33843460083007815, + "step": 79475 + }, + { + "epoch": 0.6872400584517211, + "grad_norm": 18.45462029241031, + "learning_rate": 4.415270398209756e-06, + "loss": 0.1960052490234375, + "step": 79480 + }, + { + "epoch": 0.6872832919732644, + "grad_norm": 27.949766037728956, + "learning_rate": 4.415090757560248e-06, + "loss": 0.18395042419433594, + "step": 79485 + }, + { + "epoch": 0.6873265254948077, + "grad_norm": 8.009735565298973, + "learning_rate": 4.414911110384466e-06, + "loss": 0.25223445892333984, + "step": 79490 + }, + { + "epoch": 0.6873697590163509, + "grad_norm": 3.446567557605477, + "learning_rate": 4.41473145668324e-06, + "loss": 0.38727340698242185, + "step": 79495 + }, + { + "epoch": 0.6874129925378942, + "grad_norm": 7.1406280082641205, + "learning_rate": 4.414551796457396e-06, + "loss": 0.2123554229736328, + "step": 79500 + }, + { + "epoch": 0.6874562260594375, + "grad_norm": 28.40000408911721, + "learning_rate": 4.414372129707765e-06, + "loss": 0.19184532165527343, + "step": 79505 + }, + { + "epoch": 0.6874994595809807, + "grad_norm": 2.5582172089194053, + "learning_rate": 4.414192456435176e-06, + "loss": 0.51763916015625, + "step": 79510 + }, + { + "epoch": 0.687542693102524, + "grad_norm": 3.8626249461868594, + "learning_rate": 4.414012776640454e-06, + "loss": 0.06270675659179688, + "step": 79515 + }, + { + "epoch": 0.6875859266240673, + "grad_norm": 1.8805864757762922, + "learning_rate": 4.413833090324432e-06, + "loss": 0.34761695861816405, + "step": 79520 + }, + { + "epoch": 0.6876291601456105, + "grad_norm": 1.60874501157796, + "learning_rate": 4.413653397487935e-06, + "loss": 0.03283767700195313, + "step": 79525 + }, + { + "epoch": 0.6876723936671537, + "grad_norm": 28.597013790841718, + "learning_rate": 4.413473698131794e-06, + "loss": 0.164398193359375, + "step": 79530 + }, + { + "epoch": 0.687715627188697, + "grad_norm": 7.870168551873204, + "learning_rate": 4.413293992256837e-06, + "loss": 0.1658843994140625, + "step": 79535 + }, + { + "epoch": 0.6877588607102403, + "grad_norm": 5.305374809314256, + "learning_rate": 4.413114279863895e-06, + "loss": 0.1702953338623047, + "step": 79540 + }, + { + "epoch": 0.6878020942317835, + "grad_norm": 41.03947336841876, + "learning_rate": 4.412934560953793e-06, + "loss": 0.21444625854492189, + "step": 79545 + }, + { + "epoch": 0.6878453277533269, + "grad_norm": 4.219110066367743, + "learning_rate": 4.412754835527361e-06, + "loss": 0.1852691650390625, + "step": 79550 + }, + { + "epoch": 0.6878885612748701, + "grad_norm": 2.5470206703824316, + "learning_rate": 4.412575103585429e-06, + "loss": 0.11379165649414062, + "step": 79555 + }, + { + "epoch": 0.6879317947964133, + "grad_norm": 0.1679056589491123, + "learning_rate": 4.412395365128826e-06, + "loss": 0.09196929931640625, + "step": 79560 + }, + { + "epoch": 0.6879750283179566, + "grad_norm": 2.564437567768462, + "learning_rate": 4.41221562015838e-06, + "loss": 0.05916748046875, + "step": 79565 + }, + { + "epoch": 0.6880182618394999, + "grad_norm": 18.243559460759595, + "learning_rate": 4.412035868674921e-06, + "loss": 0.267529296875, + "step": 79570 + }, + { + "epoch": 0.6880614953610431, + "grad_norm": 0.4333793155631662, + "learning_rate": 4.4118561106792775e-06, + "loss": 0.061734771728515624, + "step": 79575 + }, + { + "epoch": 0.6881047288825864, + "grad_norm": 26.90261271143253, + "learning_rate": 4.411676346172276e-06, + "loss": 0.4134002685546875, + "step": 79580 + }, + { + "epoch": 0.6881479624041297, + "grad_norm": 14.837344236956119, + "learning_rate": 4.41149657515475e-06, + "loss": 0.42244720458984375, + "step": 79585 + }, + { + "epoch": 0.6881911959256729, + "grad_norm": 2.9603114321801187, + "learning_rate": 4.411316797627527e-06, + "loss": 0.0963623046875, + "step": 79590 + }, + { + "epoch": 0.6882344294472162, + "grad_norm": 4.618714563854575, + "learning_rate": 4.411137013591434e-06, + "loss": 0.1909881591796875, + "step": 79595 + }, + { + "epoch": 0.6882776629687595, + "grad_norm": 3.7230424034787117, + "learning_rate": 4.4109572230473035e-06, + "loss": 0.2575721740722656, + "step": 79600 + }, + { + "epoch": 0.6883208964903027, + "grad_norm": 3.245932856946656, + "learning_rate": 4.410777425995962e-06, + "loss": 0.14938201904296874, + "step": 79605 + }, + { + "epoch": 0.6883641300118459, + "grad_norm": 1.4284470268174392, + "learning_rate": 4.410597622438239e-06, + "loss": 0.1537628173828125, + "step": 79610 + }, + { + "epoch": 0.6884073635333893, + "grad_norm": 47.90267844911512, + "learning_rate": 4.410417812374965e-06, + "loss": 0.5453826904296875, + "step": 79615 + }, + { + "epoch": 0.6884505970549325, + "grad_norm": 4.0800911278486796, + "learning_rate": 4.41023799580697e-06, + "loss": 0.34625396728515623, + "step": 79620 + }, + { + "epoch": 0.6884938305764757, + "grad_norm": 17.17638313616081, + "learning_rate": 4.410058172735081e-06, + "loss": 0.11629486083984375, + "step": 79625 + }, + { + "epoch": 0.6885370640980191, + "grad_norm": 17.766658827612897, + "learning_rate": 4.409878343160128e-06, + "loss": 0.09478759765625, + "step": 79630 + }, + { + "epoch": 0.6885802976195623, + "grad_norm": 0.8513711751321341, + "learning_rate": 4.409698507082941e-06, + "loss": 0.16925430297851562, + "step": 79635 + }, + { + "epoch": 0.6886235311411055, + "grad_norm": 14.586810608908904, + "learning_rate": 4.409518664504349e-06, + "loss": 0.07858810424804688, + "step": 79640 + }, + { + "epoch": 0.6886667646626489, + "grad_norm": 1.6579036839473225, + "learning_rate": 4.409338815425181e-06, + "loss": 0.2336669921875, + "step": 79645 + }, + { + "epoch": 0.6887099981841921, + "grad_norm": 4.972372140539161, + "learning_rate": 4.409158959846266e-06, + "loss": 0.07065544128417969, + "step": 79650 + }, + { + "epoch": 0.6887532317057353, + "grad_norm": 1.067546790078104, + "learning_rate": 4.408979097768436e-06, + "loss": 0.28070602416992185, + "step": 79655 + }, + { + "epoch": 0.6887964652272787, + "grad_norm": 17.54993217849025, + "learning_rate": 4.408799229192519e-06, + "loss": 0.13788719177246095, + "step": 79660 + }, + { + "epoch": 0.6888396987488219, + "grad_norm": 0.24291884211404616, + "learning_rate": 4.408619354119344e-06, + "loss": 0.07696151733398438, + "step": 79665 + }, + { + "epoch": 0.6888829322703651, + "grad_norm": 11.066533683809698, + "learning_rate": 4.408439472549741e-06, + "loss": 0.130108642578125, + "step": 79670 + }, + { + "epoch": 0.6889261657919085, + "grad_norm": 5.944402846633997, + "learning_rate": 4.408259584484539e-06, + "loss": 0.1083526611328125, + "step": 79675 + }, + { + "epoch": 0.6889693993134517, + "grad_norm": 0.16177220353680052, + "learning_rate": 4.408079689924569e-06, + "loss": 0.04918365478515625, + "step": 79680 + }, + { + "epoch": 0.6890126328349949, + "grad_norm": 3.184702601276275, + "learning_rate": 4.40789978887066e-06, + "loss": 0.034050369262695314, + "step": 79685 + }, + { + "epoch": 0.6890558663565383, + "grad_norm": 8.669507747616017, + "learning_rate": 4.407719881323641e-06, + "loss": 0.054744720458984375, + "step": 79690 + }, + { + "epoch": 0.6890990998780815, + "grad_norm": 31.335602067807425, + "learning_rate": 4.407539967284343e-06, + "loss": 0.1148040771484375, + "step": 79695 + }, + { + "epoch": 0.6891423333996247, + "grad_norm": 37.24071299353892, + "learning_rate": 4.407360046753595e-06, + "loss": 0.2752593994140625, + "step": 79700 + }, + { + "epoch": 0.689185566921168, + "grad_norm": 1.4857485453189694, + "learning_rate": 4.4071801197322264e-06, + "loss": 0.5950408935546875, + "step": 79705 + }, + { + "epoch": 0.6892288004427113, + "grad_norm": 3.7228381783014237, + "learning_rate": 4.407000186221067e-06, + "loss": 0.156085205078125, + "step": 79710 + }, + { + "epoch": 0.6892720339642545, + "grad_norm": 45.24168044090355, + "learning_rate": 4.4068202462209475e-06, + "loss": 0.3298492431640625, + "step": 79715 + }, + { + "epoch": 0.6893152674857977, + "grad_norm": 0.7924256308195903, + "learning_rate": 4.406640299732697e-06, + "loss": 0.038409423828125, + "step": 79720 + }, + { + "epoch": 0.6893585010073411, + "grad_norm": 0.360883685188594, + "learning_rate": 4.406460346757146e-06, + "loss": 0.042650604248046876, + "step": 79725 + }, + { + "epoch": 0.6894017345288843, + "grad_norm": 0.6957566300620491, + "learning_rate": 4.406280387295125e-06, + "loss": 0.307440185546875, + "step": 79730 + }, + { + "epoch": 0.6894449680504275, + "grad_norm": 12.203727387179125, + "learning_rate": 4.406100421347462e-06, + "loss": 0.14582290649414062, + "step": 79735 + }, + { + "epoch": 0.6894882015719709, + "grad_norm": 1.1835701624733062, + "learning_rate": 4.405920448914989e-06, + "loss": 0.09320831298828125, + "step": 79740 + }, + { + "epoch": 0.6895314350935141, + "grad_norm": 19.47081435430692, + "learning_rate": 4.405740469998534e-06, + "loss": 0.1878498077392578, + "step": 79745 + }, + { + "epoch": 0.6895746686150573, + "grad_norm": 1.0416282832748816, + "learning_rate": 4.4055604845989286e-06, + "loss": 0.06822662353515625, + "step": 79750 + }, + { + "epoch": 0.6896179021366007, + "grad_norm": 8.34631452221807, + "learning_rate": 4.4053804927170025e-06, + "loss": 0.34036407470703123, + "step": 79755 + }, + { + "epoch": 0.6896611356581439, + "grad_norm": 0.12369883302850428, + "learning_rate": 4.405200494353586e-06, + "loss": 0.15207672119140625, + "step": 79760 + }, + { + "epoch": 0.6897043691796871, + "grad_norm": 0.6250859316998634, + "learning_rate": 4.405020489509509e-06, + "loss": 0.23799209594726561, + "step": 79765 + }, + { + "epoch": 0.6897476027012305, + "grad_norm": 6.107617234680743, + "learning_rate": 4.4048404781856e-06, + "loss": 0.21576461791992188, + "step": 79770 + }, + { + "epoch": 0.6897908362227737, + "grad_norm": 5.265330338580283, + "learning_rate": 4.404660460382693e-06, + "loss": 0.420794677734375, + "step": 79775 + }, + { + "epoch": 0.6898340697443169, + "grad_norm": 11.840880156878255, + "learning_rate": 4.404480436101616e-06, + "loss": 0.13254432678222655, + "step": 79780 + }, + { + "epoch": 0.6898773032658602, + "grad_norm": 10.994560957261351, + "learning_rate": 4.404300405343198e-06, + "loss": 0.28280029296875, + "step": 79785 + }, + { + "epoch": 0.6899205367874035, + "grad_norm": 12.66246342314059, + "learning_rate": 4.404120368108271e-06, + "loss": 0.07966804504394531, + "step": 79790 + }, + { + "epoch": 0.6899637703089467, + "grad_norm": 12.105290512381465, + "learning_rate": 4.403940324397666e-06, + "loss": 0.10407562255859375, + "step": 79795 + }, + { + "epoch": 0.69000700383049, + "grad_norm": 4.285380067172142, + "learning_rate": 4.403760274212212e-06, + "loss": 0.0293670654296875, + "step": 79800 + }, + { + "epoch": 0.6900502373520333, + "grad_norm": 4.513443501463452, + "learning_rate": 4.403580217552738e-06, + "loss": 0.2481903076171875, + "step": 79805 + }, + { + "epoch": 0.6900934708735765, + "grad_norm": 6.753265430184558, + "learning_rate": 4.403400154420078e-06, + "loss": 0.10242462158203125, + "step": 79810 + }, + { + "epoch": 0.6901367043951198, + "grad_norm": 4.18418748926444, + "learning_rate": 4.40322008481506e-06, + "loss": 0.06407470703125, + "step": 79815 + }, + { + "epoch": 0.6901799379166631, + "grad_norm": 12.453967040975325, + "learning_rate": 4.403040008738514e-06, + "loss": 0.18285751342773438, + "step": 79820 + }, + { + "epoch": 0.6902231714382063, + "grad_norm": 6.503423092123739, + "learning_rate": 4.402859926191273e-06, + "loss": 0.095745849609375, + "step": 79825 + }, + { + "epoch": 0.6902664049597496, + "grad_norm": 5.645780681809807, + "learning_rate": 4.402679837174164e-06, + "loss": 0.346435546875, + "step": 79830 + }, + { + "epoch": 0.6903096384812929, + "grad_norm": 0.19740808239049035, + "learning_rate": 4.402499741688021e-06, + "loss": 0.4401580810546875, + "step": 79835 + }, + { + "epoch": 0.6903528720028361, + "grad_norm": 5.181241618621504, + "learning_rate": 4.402319639733673e-06, + "loss": 0.18500022888183593, + "step": 79840 + }, + { + "epoch": 0.6903961055243794, + "grad_norm": 12.179173322011835, + "learning_rate": 4.4021395313119505e-06, + "loss": 0.3654212951660156, + "step": 79845 + }, + { + "epoch": 0.6904393390459227, + "grad_norm": 10.040509283814655, + "learning_rate": 4.401959416423685e-06, + "loss": 0.16338653564453126, + "step": 79850 + }, + { + "epoch": 0.6904825725674659, + "grad_norm": 4.44416842910343, + "learning_rate": 4.401779295069706e-06, + "loss": 0.027556991577148436, + "step": 79855 + }, + { + "epoch": 0.6905258060890092, + "grad_norm": 3.755601973672619, + "learning_rate": 4.401599167250844e-06, + "loss": 0.07494583129882812, + "step": 79860 + }, + { + "epoch": 0.6905690396105525, + "grad_norm": 1.3763254878318016, + "learning_rate": 4.40141903296793e-06, + "loss": 0.15307540893554689, + "step": 79865 + }, + { + "epoch": 0.6906122731320957, + "grad_norm": 3.6575056805739465, + "learning_rate": 4.401238892221798e-06, + "loss": 0.055831527709960936, + "step": 79870 + }, + { + "epoch": 0.690655506653639, + "grad_norm": 0.19971325725357236, + "learning_rate": 4.4010587450132745e-06, + "loss": 0.1817943572998047, + "step": 79875 + }, + { + "epoch": 0.6906987401751822, + "grad_norm": 2.787050282501064, + "learning_rate": 4.400878591343193e-06, + "loss": 0.08720703125, + "step": 79880 + }, + { + "epoch": 0.6907419736967255, + "grad_norm": 17.428996269218647, + "learning_rate": 4.400698431212381e-06, + "loss": 0.13957061767578124, + "step": 79885 + }, + { + "epoch": 0.6907852072182687, + "grad_norm": 1.8021013411971623, + "learning_rate": 4.400518264621672e-06, + "loss": 0.31690444946289065, + "step": 79890 + }, + { + "epoch": 0.690828440739812, + "grad_norm": 22.70574238725061, + "learning_rate": 4.400338091571897e-06, + "loss": 0.2010162353515625, + "step": 79895 + }, + { + "epoch": 0.6908716742613553, + "grad_norm": 24.412766254948597, + "learning_rate": 4.400157912063887e-06, + "loss": 0.18383560180664063, + "step": 79900 + }, + { + "epoch": 0.6909149077828985, + "grad_norm": 9.010654077587853, + "learning_rate": 4.399977726098472e-06, + "loss": 0.12534713745117188, + "step": 79905 + }, + { + "epoch": 0.6909581413044418, + "grad_norm": 21.022808424739154, + "learning_rate": 4.399797533676485e-06, + "loss": 0.1215118408203125, + "step": 79910 + }, + { + "epoch": 0.6910013748259851, + "grad_norm": 3.430604192472896, + "learning_rate": 4.399617334798753e-06, + "loss": 0.154443359375, + "step": 79915 + }, + { + "epoch": 0.6910446083475283, + "grad_norm": 12.724488586673676, + "learning_rate": 4.3994371294661105e-06, + "loss": 0.07326812744140625, + "step": 79920 + }, + { + "epoch": 0.6910878418690716, + "grad_norm": 2.7868404443937513, + "learning_rate": 4.399256917679387e-06, + "loss": 0.03507080078125, + "step": 79925 + }, + { + "epoch": 0.6911310753906149, + "grad_norm": 46.12314584769832, + "learning_rate": 4.399076699439415e-06, + "loss": 0.47356491088867186, + "step": 79930 + }, + { + "epoch": 0.6911743089121581, + "grad_norm": 3.7113476018916898, + "learning_rate": 4.398896474747025e-06, + "loss": 0.05295486450195312, + "step": 79935 + }, + { + "epoch": 0.6912175424337014, + "grad_norm": 3.011459922833166, + "learning_rate": 4.398716243603048e-06, + "loss": 0.0562957763671875, + "step": 79940 + }, + { + "epoch": 0.6912607759552447, + "grad_norm": 0.21992563332425713, + "learning_rate": 4.398536006008315e-06, + "loss": 0.021150970458984376, + "step": 79945 + }, + { + "epoch": 0.6913040094767879, + "grad_norm": 4.767046634559379, + "learning_rate": 4.3983557619636576e-06, + "loss": 0.07113265991210938, + "step": 79950 + }, + { + "epoch": 0.6913472429983312, + "grad_norm": 1.390923367704698, + "learning_rate": 4.398175511469907e-06, + "loss": 0.29359588623046873, + "step": 79955 + }, + { + "epoch": 0.6913904765198744, + "grad_norm": 0.2294023851887735, + "learning_rate": 4.397995254527895e-06, + "loss": 0.1956512451171875, + "step": 79960 + }, + { + "epoch": 0.6914337100414177, + "grad_norm": 2.214595964495952, + "learning_rate": 4.397814991138451e-06, + "loss": 0.04943008422851562, + "step": 79965 + }, + { + "epoch": 0.691476943562961, + "grad_norm": 8.173192920145421, + "learning_rate": 4.397634721302409e-06, + "loss": 0.128179931640625, + "step": 79970 + }, + { + "epoch": 0.6915201770845042, + "grad_norm": 0.9046369624085191, + "learning_rate": 4.397454445020599e-06, + "loss": 0.06264190673828125, + "step": 79975 + }, + { + "epoch": 0.6915634106060475, + "grad_norm": 1.0965822241292957, + "learning_rate": 4.397274162293851e-06, + "loss": 0.099200439453125, + "step": 79980 + }, + { + "epoch": 0.6916066441275908, + "grad_norm": 15.104649720397227, + "learning_rate": 4.397093873123e-06, + "loss": 0.2029857635498047, + "step": 79985 + }, + { + "epoch": 0.691649877649134, + "grad_norm": 25.442401949729504, + "learning_rate": 4.396913577508875e-06, + "loss": 0.17149658203125, + "step": 79990 + }, + { + "epoch": 0.6916931111706773, + "grad_norm": 12.463555668490372, + "learning_rate": 4.396733275452308e-06, + "loss": 0.1973703384399414, + "step": 79995 + }, + { + "epoch": 0.6917363446922206, + "grad_norm": 14.550485133962635, + "learning_rate": 4.396552966954131e-06, + "loss": 0.097686767578125, + "step": 80000 + }, + { + "epoch": 0.6917363446922206, + "eval_loss": 0.09788688272237778, + "eval_margin": 0.15175552666187286, + "eval_mean_neg": 0.00430450402200222, + "eval_mean_pos": 0.7209447622299194, + "eval_runtime": 19.5877, + "eval_samples_per_second": 11.793, + "eval_steps_per_second": 5.922, + "step": 80000 + }, + { + "epoch": 0.6917795782137638, + "grad_norm": 25.760090669140787, + "learning_rate": 4.396372652015174e-06, + "loss": 0.095318603515625, + "step": 80005 + }, + { + "epoch": 0.6918228117353071, + "grad_norm": 3.0764886446099236, + "learning_rate": 4.39619233063627e-06, + "loss": 0.15178985595703126, + "step": 80010 + }, + { + "epoch": 0.6918660452568504, + "grad_norm": 0.16715373777772355, + "learning_rate": 4.396012002818251e-06, + "loss": 0.3785667419433594, + "step": 80015 + }, + { + "epoch": 0.6919092787783936, + "grad_norm": 1.3927232971037478, + "learning_rate": 4.395831668561948e-06, + "loss": 0.061322021484375, + "step": 80020 + }, + { + "epoch": 0.6919525122999369, + "grad_norm": 26.049206207038015, + "learning_rate": 4.395651327868193e-06, + "loss": 0.21397705078125, + "step": 80025 + }, + { + "epoch": 0.6919957458214802, + "grad_norm": 17.662324038735093, + "learning_rate": 4.3954709807378155e-06, + "loss": 0.11591644287109375, + "step": 80030 + }, + { + "epoch": 0.6920389793430234, + "grad_norm": 4.639139091359825, + "learning_rate": 4.3952906271716504e-06, + "loss": 0.13803596496582032, + "step": 80035 + }, + { + "epoch": 0.6920822128645666, + "grad_norm": 2.459954521082901, + "learning_rate": 4.395110267170529e-06, + "loss": 0.0544830322265625, + "step": 80040 + }, + { + "epoch": 0.69212544638611, + "grad_norm": 13.390362418066601, + "learning_rate": 4.394929900735282e-06, + "loss": 0.328363037109375, + "step": 80045 + }, + { + "epoch": 0.6921686799076532, + "grad_norm": 1.5589521082134012, + "learning_rate": 4.39474952786674e-06, + "loss": 0.11138477325439453, + "step": 80050 + }, + { + "epoch": 0.6922119134291964, + "grad_norm": 8.653971396474207, + "learning_rate": 4.394569148565738e-06, + "loss": 0.04267120361328125, + "step": 80055 + }, + { + "epoch": 0.6922551469507398, + "grad_norm": 5.39554548617643, + "learning_rate": 4.3943887628331055e-06, + "loss": 0.10698394775390625, + "step": 80060 + }, + { + "epoch": 0.692298380472283, + "grad_norm": 11.104895739140508, + "learning_rate": 4.394208370669676e-06, + "loss": 0.06566390991210938, + "step": 80065 + }, + { + "epoch": 0.6923416139938262, + "grad_norm": 10.129176595203933, + "learning_rate": 4.39402797207628e-06, + "loss": 0.11710433959960938, + "step": 80070 + }, + { + "epoch": 0.6923848475153696, + "grad_norm": 22.43882807208922, + "learning_rate": 4.393847567053751e-06, + "loss": 0.49037628173828124, + "step": 80075 + }, + { + "epoch": 0.6924280810369128, + "grad_norm": 2.630873865879285, + "learning_rate": 4.393667155602919e-06, + "loss": 0.28253936767578125, + "step": 80080 + }, + { + "epoch": 0.692471314558456, + "grad_norm": 6.804446795175428, + "learning_rate": 4.393486737724617e-06, + "loss": 0.19211177825927733, + "step": 80085 + }, + { + "epoch": 0.6925145480799993, + "grad_norm": 4.528375451489793, + "learning_rate": 4.393306313419678e-06, + "loss": 0.16987342834472657, + "step": 80090 + }, + { + "epoch": 0.6925577816015426, + "grad_norm": 4.620207663760422, + "learning_rate": 4.393125882688934e-06, + "loss": 0.06054344177246094, + "step": 80095 + }, + { + "epoch": 0.6926010151230858, + "grad_norm": 3.600278827210535, + "learning_rate": 4.392945445533217e-06, + "loss": 0.2383392333984375, + "step": 80100 + }, + { + "epoch": 0.6926442486446291, + "grad_norm": 1.413650042069444, + "learning_rate": 4.392765001953357e-06, + "loss": 0.05276336669921875, + "step": 80105 + }, + { + "epoch": 0.6926874821661724, + "grad_norm": 27.620134035130565, + "learning_rate": 4.3925845519501885e-06, + "loss": 0.10845375061035156, + "step": 80110 + }, + { + "epoch": 0.6927307156877156, + "grad_norm": 1.8326405629579094, + "learning_rate": 4.392404095524544e-06, + "loss": 0.07599945068359375, + "step": 80115 + }, + { + "epoch": 0.6927739492092589, + "grad_norm": 33.11264033697161, + "learning_rate": 4.392223632677253e-06, + "loss": 0.3251045227050781, + "step": 80120 + }, + { + "epoch": 0.6928171827308022, + "grad_norm": 0.9961429193095677, + "learning_rate": 4.392043163409151e-06, + "loss": 0.1105316162109375, + "step": 80125 + }, + { + "epoch": 0.6928604162523454, + "grad_norm": 5.545491718467361, + "learning_rate": 4.391862687721069e-06, + "loss": 0.09194183349609375, + "step": 80130 + }, + { + "epoch": 0.6929036497738886, + "grad_norm": 2.403003961208082, + "learning_rate": 4.391682205613839e-06, + "loss": 0.023264694213867187, + "step": 80135 + }, + { + "epoch": 0.692946883295432, + "grad_norm": 25.129269603728854, + "learning_rate": 4.3915017170882934e-06, + "loss": 0.238800048828125, + "step": 80140 + }, + { + "epoch": 0.6929901168169752, + "grad_norm": 33.004240340451666, + "learning_rate": 4.391321222145264e-06, + "loss": 0.13949432373046874, + "step": 80145 + }, + { + "epoch": 0.6930333503385184, + "grad_norm": 29.273824139085242, + "learning_rate": 4.3911407207855855e-06, + "loss": 0.1360565185546875, + "step": 80150 + }, + { + "epoch": 0.6930765838600618, + "grad_norm": 6.094521821467758, + "learning_rate": 4.390960213010088e-06, + "loss": 0.0562469482421875, + "step": 80155 + }, + { + "epoch": 0.693119817381605, + "grad_norm": 44.85052402283103, + "learning_rate": 4.390779698819606e-06, + "loss": 0.288079833984375, + "step": 80160 + }, + { + "epoch": 0.6931630509031482, + "grad_norm": 0.16259890208400007, + "learning_rate": 4.390599178214971e-06, + "loss": 0.04585189819335937, + "step": 80165 + }, + { + "epoch": 0.6932062844246916, + "grad_norm": 11.961530588858105, + "learning_rate": 4.3904186511970134e-06, + "loss": 0.24109268188476562, + "step": 80170 + }, + { + "epoch": 0.6932495179462348, + "grad_norm": 20.87228002644118, + "learning_rate": 4.390238117766569e-06, + "loss": 0.10719127655029297, + "step": 80175 + }, + { + "epoch": 0.693292751467778, + "grad_norm": 0.9947513644268949, + "learning_rate": 4.390057577924471e-06, + "loss": 0.1978144645690918, + "step": 80180 + }, + { + "epoch": 0.6933359849893214, + "grad_norm": 10.549458778524238, + "learning_rate": 4.3898770316715475e-06, + "loss": 0.0593994140625, + "step": 80185 + }, + { + "epoch": 0.6933792185108646, + "grad_norm": 24.63921089600646, + "learning_rate": 4.389696479008636e-06, + "loss": 0.19571151733398437, + "step": 80190 + }, + { + "epoch": 0.6934224520324078, + "grad_norm": 1.6946343149811047, + "learning_rate": 4.389515919936566e-06, + "loss": 0.08400421142578125, + "step": 80195 + }, + { + "epoch": 0.6934656855539512, + "grad_norm": 4.983989856242595, + "learning_rate": 4.389335354456171e-06, + "loss": 0.2799407958984375, + "step": 80200 + }, + { + "epoch": 0.6935089190754944, + "grad_norm": 0.5544813569551624, + "learning_rate": 4.3891547825682845e-06, + "loss": 0.21232147216796876, + "step": 80205 + }, + { + "epoch": 0.6935521525970376, + "grad_norm": 0.2873587934257675, + "learning_rate": 4.38897420427374e-06, + "loss": 0.2840259552001953, + "step": 80210 + }, + { + "epoch": 0.6935953861185808, + "grad_norm": 18.445331555873118, + "learning_rate": 4.388793619573368e-06, + "loss": 0.12227630615234375, + "step": 80215 + }, + { + "epoch": 0.6936386196401242, + "grad_norm": 1.6961104012214012, + "learning_rate": 4.388613028468003e-06, + "loss": 0.0539581298828125, + "step": 80220 + }, + { + "epoch": 0.6936818531616674, + "grad_norm": 10.613509135031103, + "learning_rate": 4.3884324309584765e-06, + "loss": 0.40103759765625, + "step": 80225 + }, + { + "epoch": 0.6937250866832106, + "grad_norm": 17.1168062550291, + "learning_rate": 4.3882518270456236e-06, + "loss": 0.10294342041015625, + "step": 80230 + }, + { + "epoch": 0.693768320204754, + "grad_norm": 2.263948501652716, + "learning_rate": 4.3880712167302755e-06, + "loss": 0.04701938629150391, + "step": 80235 + }, + { + "epoch": 0.6938115537262972, + "grad_norm": 5.603180645746583, + "learning_rate": 4.387890600013265e-06, + "loss": 0.17346572875976562, + "step": 80240 + }, + { + "epoch": 0.6938547872478404, + "grad_norm": 2.2692266173291706, + "learning_rate": 4.387709976895427e-06, + "loss": 0.127435302734375, + "step": 80245 + }, + { + "epoch": 0.6938980207693838, + "grad_norm": 1.8440848095820186, + "learning_rate": 4.387529347377592e-06, + "loss": 0.023904037475585938, + "step": 80250 + }, + { + "epoch": 0.693941254290927, + "grad_norm": 8.557688607263662, + "learning_rate": 4.387348711460594e-06, + "loss": 0.36151123046875, + "step": 80255 + }, + { + "epoch": 0.6939844878124702, + "grad_norm": 16.384837403964, + "learning_rate": 4.387168069145268e-06, + "loss": 0.08166351318359374, + "step": 80260 + }, + { + "epoch": 0.6940277213340136, + "grad_norm": 44.98766947044041, + "learning_rate": 4.386987420432444e-06, + "loss": 0.45255126953125, + "step": 80265 + }, + { + "epoch": 0.6940709548555568, + "grad_norm": 0.6695050194386268, + "learning_rate": 4.3868067653229575e-06, + "loss": 0.20630645751953125, + "step": 80270 + }, + { + "epoch": 0.6941141883771, + "grad_norm": 34.18019664087713, + "learning_rate": 4.38662610381764e-06, + "loss": 0.2651641845703125, + "step": 80275 + }, + { + "epoch": 0.6941574218986434, + "grad_norm": 0.7034686915493867, + "learning_rate": 4.3864454359173255e-06, + "loss": 0.0471099853515625, + "step": 80280 + }, + { + "epoch": 0.6942006554201866, + "grad_norm": 11.234498918164908, + "learning_rate": 4.386264761622847e-06, + "loss": 0.04134445190429688, + "step": 80285 + }, + { + "epoch": 0.6942438889417298, + "grad_norm": 0.736859249009056, + "learning_rate": 4.386084080935039e-06, + "loss": 0.22433061599731446, + "step": 80290 + }, + { + "epoch": 0.6942871224632732, + "grad_norm": 8.691737271489384, + "learning_rate": 4.385903393854732e-06, + "loss": 0.10727691650390625, + "step": 80295 + }, + { + "epoch": 0.6943303559848164, + "grad_norm": 23.85305523710238, + "learning_rate": 4.385722700382763e-06, + "loss": 0.2954254150390625, + "step": 80300 + }, + { + "epoch": 0.6943735895063596, + "grad_norm": 8.652056771047542, + "learning_rate": 4.385542000519962e-06, + "loss": 0.02991619110107422, + "step": 80305 + }, + { + "epoch": 0.6944168230279029, + "grad_norm": 9.837649003323502, + "learning_rate": 4.385361294267164e-06, + "loss": 0.0426513671875, + "step": 80310 + }, + { + "epoch": 0.6944600565494462, + "grad_norm": 3.2459609075501903, + "learning_rate": 4.385180581625202e-06, + "loss": 0.057930374145507814, + "step": 80315 + }, + { + "epoch": 0.6945032900709894, + "grad_norm": 1.133859283676475, + "learning_rate": 4.38499986259491e-06, + "loss": 0.13835983276367186, + "step": 80320 + }, + { + "epoch": 0.6945465235925327, + "grad_norm": 31.173453989739134, + "learning_rate": 4.384819137177121e-06, + "loss": 0.15821914672851561, + "step": 80325 + }, + { + "epoch": 0.694589757114076, + "grad_norm": 10.546112764427347, + "learning_rate": 4.384638405372668e-06, + "loss": 0.08037261962890625, + "step": 80330 + }, + { + "epoch": 0.6946329906356192, + "grad_norm": 13.180733830055004, + "learning_rate": 4.384457667182386e-06, + "loss": 0.25207061767578126, + "step": 80335 + }, + { + "epoch": 0.6946762241571625, + "grad_norm": 14.797596801876274, + "learning_rate": 4.384276922607106e-06, + "loss": 0.26521453857421873, + "step": 80340 + }, + { + "epoch": 0.6947194576787058, + "grad_norm": 0.7751120504684359, + "learning_rate": 4.384096171647664e-06, + "loss": 0.22791290283203125, + "step": 80345 + }, + { + "epoch": 0.694762691200249, + "grad_norm": 32.22647995166355, + "learning_rate": 4.383915414304893e-06, + "loss": 0.26539955139160154, + "step": 80350 + }, + { + "epoch": 0.6948059247217923, + "grad_norm": 0.18084359319126508, + "learning_rate": 4.383734650579626e-06, + "loss": 0.09528007507324218, + "step": 80355 + }, + { + "epoch": 0.6948491582433356, + "grad_norm": 36.62871072054293, + "learning_rate": 4.383553880472697e-06, + "loss": 0.4185394287109375, + "step": 80360 + }, + { + "epoch": 0.6948923917648788, + "grad_norm": 4.244067887120449, + "learning_rate": 4.383373103984941e-06, + "loss": 0.13158416748046875, + "step": 80365 + }, + { + "epoch": 0.694935625286422, + "grad_norm": 4.116341713127551, + "learning_rate": 4.383192321117188e-06, + "loss": 0.08471412658691406, + "step": 80370 + }, + { + "epoch": 0.6949788588079654, + "grad_norm": 40.940393492023134, + "learning_rate": 4.383011531870275e-06, + "loss": 0.5745414733886719, + "step": 80375 + }, + { + "epoch": 0.6950220923295086, + "grad_norm": 0.4792680564132186, + "learning_rate": 4.382830736245037e-06, + "loss": 0.18038482666015626, + "step": 80380 + }, + { + "epoch": 0.6950653258510519, + "grad_norm": 20.137720975520594, + "learning_rate": 4.382649934242305e-06, + "loss": 0.13983497619628907, + "step": 80385 + }, + { + "epoch": 0.6951085593725951, + "grad_norm": 34.71491064254816, + "learning_rate": 4.382469125862913e-06, + "loss": 0.1213592529296875, + "step": 80390 + }, + { + "epoch": 0.6951517928941384, + "grad_norm": 15.435878443905926, + "learning_rate": 4.382288311107696e-06, + "loss": 0.06557083129882812, + "step": 80395 + }, + { + "epoch": 0.6951950264156816, + "grad_norm": 5.722105618917314, + "learning_rate": 4.382107489977486e-06, + "loss": 0.04581184387207031, + "step": 80400 + }, + { + "epoch": 0.6952382599372249, + "grad_norm": 3.649368202847393, + "learning_rate": 4.38192666247312e-06, + "loss": 0.14638671875, + "step": 80405 + }, + { + "epoch": 0.6952814934587682, + "grad_norm": 10.937375630764594, + "learning_rate": 4.38174582859543e-06, + "loss": 0.32974395751953123, + "step": 80410 + }, + { + "epoch": 0.6953247269803114, + "grad_norm": 7.758973461641776, + "learning_rate": 4.38156498834525e-06, + "loss": 0.3136383056640625, + "step": 80415 + }, + { + "epoch": 0.6953679605018547, + "grad_norm": 60.35106355776779, + "learning_rate": 4.381384141723415e-06, + "loss": 0.3012584686279297, + "step": 80420 + }, + { + "epoch": 0.695411194023398, + "grad_norm": 2.8220821068877187, + "learning_rate": 4.381203288730757e-06, + "loss": 0.109295654296875, + "step": 80425 + }, + { + "epoch": 0.6954544275449412, + "grad_norm": 0.734419089230378, + "learning_rate": 4.381022429368113e-06, + "loss": 0.10237312316894531, + "step": 80430 + }, + { + "epoch": 0.6954976610664845, + "grad_norm": 4.890514732869459, + "learning_rate": 4.3808415636363154e-06, + "loss": 0.2519378662109375, + "step": 80435 + }, + { + "epoch": 0.6955408945880278, + "grad_norm": 17.391955206130902, + "learning_rate": 4.380660691536198e-06, + "loss": 0.07033023834228516, + "step": 80440 + }, + { + "epoch": 0.695584128109571, + "grad_norm": 4.743796951654303, + "learning_rate": 4.3804798130685954e-06, + "loss": 0.12616729736328125, + "step": 80445 + }, + { + "epoch": 0.6956273616311143, + "grad_norm": 1.1523602055880706, + "learning_rate": 4.3802989282343416e-06, + "loss": 0.0281005859375, + "step": 80450 + }, + { + "epoch": 0.6956705951526576, + "grad_norm": 18.580215613672802, + "learning_rate": 4.3801180370342725e-06, + "loss": 0.176507568359375, + "step": 80455 + }, + { + "epoch": 0.6957138286742008, + "grad_norm": 4.497685287501815, + "learning_rate": 4.379937139469219e-06, + "loss": 0.18629417419433594, + "step": 80460 + }, + { + "epoch": 0.6957570621957441, + "grad_norm": 0.7576052550483554, + "learning_rate": 4.37975623554002e-06, + "loss": 0.0635589599609375, + "step": 80465 + }, + { + "epoch": 0.6958002957172874, + "grad_norm": 1.373483123486058, + "learning_rate": 4.3795753252475044e-06, + "loss": 0.28695831298828123, + "step": 80470 + }, + { + "epoch": 0.6958435292388306, + "grad_norm": 2.947729339288054, + "learning_rate": 4.379394408592511e-06, + "loss": 0.16945886611938477, + "step": 80475 + }, + { + "epoch": 0.6958867627603739, + "grad_norm": 1.2030443417708068, + "learning_rate": 4.379213485575872e-06, + "loss": 0.04716682434082031, + "step": 80480 + }, + { + "epoch": 0.6959299962819171, + "grad_norm": 15.690933755895387, + "learning_rate": 4.379032556198422e-06, + "loss": 0.20386028289794922, + "step": 80485 + }, + { + "epoch": 0.6959732298034604, + "grad_norm": 3.6226061575220205, + "learning_rate": 4.378851620460997e-06, + "loss": 0.04309234619140625, + "step": 80490 + }, + { + "epoch": 0.6960164633250037, + "grad_norm": 1.6180397895047707, + "learning_rate": 4.378670678364429e-06, + "loss": 0.02412071228027344, + "step": 80495 + }, + { + "epoch": 0.6960596968465469, + "grad_norm": 5.255315244692986, + "learning_rate": 4.378489729909553e-06, + "loss": 0.041996002197265625, + "step": 80500 + }, + { + "epoch": 0.6961029303680902, + "grad_norm": 2.5789983698460843, + "learning_rate": 4.378308775097206e-06, + "loss": 0.03975410461425781, + "step": 80505 + }, + { + "epoch": 0.6961461638896335, + "grad_norm": 76.31603155383385, + "learning_rate": 4.378127813928219e-06, + "loss": 0.5489532470703125, + "step": 80510 + }, + { + "epoch": 0.6961893974111767, + "grad_norm": 0.9403620393938659, + "learning_rate": 4.377946846403429e-06, + "loss": 0.100714111328125, + "step": 80515 + }, + { + "epoch": 0.69623263093272, + "grad_norm": 27.127159571446963, + "learning_rate": 4.377765872523669e-06, + "loss": 0.088140869140625, + "step": 80520 + }, + { + "epoch": 0.6962758644542633, + "grad_norm": 0.6878605960718276, + "learning_rate": 4.377584892289776e-06, + "loss": 0.11584033966064453, + "step": 80525 + }, + { + "epoch": 0.6963190979758065, + "grad_norm": 4.855682233210517, + "learning_rate": 4.377403905702583e-06, + "loss": 0.12762832641601562, + "step": 80530 + }, + { + "epoch": 0.6963623314973498, + "grad_norm": 3.8351545781643446, + "learning_rate": 4.377222912762925e-06, + "loss": 0.08481597900390625, + "step": 80535 + }, + { + "epoch": 0.6964055650188931, + "grad_norm": 1.696723631851398, + "learning_rate": 4.377041913471635e-06, + "loss": 0.3401275634765625, + "step": 80540 + }, + { + "epoch": 0.6964487985404363, + "grad_norm": 3.450870310662008, + "learning_rate": 4.376860907829551e-06, + "loss": 0.408367919921875, + "step": 80545 + }, + { + "epoch": 0.6964920320619796, + "grad_norm": 1.2372083880600437, + "learning_rate": 4.376679895837505e-06, + "loss": 0.016944122314453126, + "step": 80550 + }, + { + "epoch": 0.6965352655835229, + "grad_norm": 5.6413913032324325, + "learning_rate": 4.376498877496335e-06, + "loss": 0.1143218994140625, + "step": 80555 + }, + { + "epoch": 0.6965784991050661, + "grad_norm": 1.152905986834382, + "learning_rate": 4.376317852806872e-06, + "loss": 0.07795791625976563, + "step": 80560 + }, + { + "epoch": 0.6966217326266093, + "grad_norm": 0.19399237467106065, + "learning_rate": 4.376136821769953e-06, + "loss": 0.2796722412109375, + "step": 80565 + }, + { + "epoch": 0.6966649661481527, + "grad_norm": 2.429335837084522, + "learning_rate": 4.375955784386412e-06, + "loss": 0.26481475830078127, + "step": 80570 + }, + { + "epoch": 0.6967081996696959, + "grad_norm": 0.3843220643476471, + "learning_rate": 4.375774740657086e-06, + "loss": 0.2957038879394531, + "step": 80575 + }, + { + "epoch": 0.6967514331912391, + "grad_norm": 3.1803804759277807, + "learning_rate": 4.375593690582808e-06, + "loss": 0.22648468017578124, + "step": 80580 + }, + { + "epoch": 0.6967946667127825, + "grad_norm": 10.976223033171651, + "learning_rate": 4.375412634164413e-06, + "loss": 0.1790008544921875, + "step": 80585 + }, + { + "epoch": 0.6968379002343257, + "grad_norm": 19.006061320525685, + "learning_rate": 4.375231571402736e-06, + "loss": 0.29530029296875, + "step": 80590 + }, + { + "epoch": 0.6968811337558689, + "grad_norm": 2.3881109852374927, + "learning_rate": 4.375050502298613e-06, + "loss": 0.045886993408203125, + "step": 80595 + }, + { + "epoch": 0.6969243672774122, + "grad_norm": 2.136657861941869, + "learning_rate": 4.3748694268528776e-06, + "loss": 0.042776298522949216, + "step": 80600 + }, + { + "epoch": 0.6969676007989555, + "grad_norm": 14.232306489708671, + "learning_rate": 4.374688345066368e-06, + "loss": 0.5568294525146484, + "step": 80605 + }, + { + "epoch": 0.6970108343204987, + "grad_norm": 9.292403724776282, + "learning_rate": 4.374507256939916e-06, + "loss": 0.17546463012695312, + "step": 80610 + }, + { + "epoch": 0.697054067842042, + "grad_norm": 28.603656646085938, + "learning_rate": 4.374326162474358e-06, + "loss": 0.11335067749023438, + "step": 80615 + }, + { + "epoch": 0.6970973013635853, + "grad_norm": 0.9512720914541609, + "learning_rate": 4.37414506167053e-06, + "loss": 0.29546051025390624, + "step": 80620 + }, + { + "epoch": 0.6971405348851285, + "grad_norm": 9.01624154004321, + "learning_rate": 4.3739639545292654e-06, + "loss": 0.48757286071777345, + "step": 80625 + }, + { + "epoch": 0.6971837684066718, + "grad_norm": 2.460001260631571, + "learning_rate": 4.3737828410514e-06, + "loss": 0.12361183166503906, + "step": 80630 + }, + { + "epoch": 0.6972270019282151, + "grad_norm": 7.624342308594692, + "learning_rate": 4.373601721237771e-06, + "loss": 0.04899606704711914, + "step": 80635 + }, + { + "epoch": 0.6972702354497583, + "grad_norm": 3.982644449218571, + "learning_rate": 4.373420595089212e-06, + "loss": 0.24936676025390625, + "step": 80640 + }, + { + "epoch": 0.6973134689713016, + "grad_norm": 0.19458949295855438, + "learning_rate": 4.373239462606557e-06, + "loss": 0.05046844482421875, + "step": 80645 + }, + { + "epoch": 0.6973567024928449, + "grad_norm": 1.9228888739610914, + "learning_rate": 4.3730583237906445e-06, + "loss": 0.4867767333984375, + "step": 80650 + }, + { + "epoch": 0.6973999360143881, + "grad_norm": 14.940021584452124, + "learning_rate": 4.372877178642308e-06, + "loss": 0.0834442138671875, + "step": 80655 + }, + { + "epoch": 0.6974431695359313, + "grad_norm": 3.039095206749311, + "learning_rate": 4.372696027162384e-06, + "loss": 0.0608332633972168, + "step": 80660 + }, + { + "epoch": 0.6974864030574747, + "grad_norm": 7.923583951861586, + "learning_rate": 4.3725148693517056e-06, + "loss": 0.0987274169921875, + "step": 80665 + }, + { + "epoch": 0.6975296365790179, + "grad_norm": 4.151866186197445, + "learning_rate": 4.372333705211111e-06, + "loss": 0.042791748046875, + "step": 80670 + }, + { + "epoch": 0.6975728701005611, + "grad_norm": 13.270227826546066, + "learning_rate": 4.372152534741434e-06, + "loss": 0.098541259765625, + "step": 80675 + }, + { + "epoch": 0.6976161036221045, + "grad_norm": 6.2231805115933545, + "learning_rate": 4.37197135794351e-06, + "loss": 0.26015625, + "step": 80680 + }, + { + "epoch": 0.6976593371436477, + "grad_norm": 9.122099291501168, + "learning_rate": 4.371790174818177e-06, + "loss": 0.147869873046875, + "step": 80685 + }, + { + "epoch": 0.6977025706651909, + "grad_norm": 31.182322190852307, + "learning_rate": 4.3716089853662686e-06, + "loss": 0.4598186492919922, + "step": 80690 + }, + { + "epoch": 0.6977458041867343, + "grad_norm": 20.498531485592018, + "learning_rate": 4.37142778958862e-06, + "loss": 0.15393142700195311, + "step": 80695 + }, + { + "epoch": 0.6977890377082775, + "grad_norm": 10.85046501291198, + "learning_rate": 4.3712465874860685e-06, + "loss": 0.43834228515625, + "step": 80700 + }, + { + "epoch": 0.6978322712298207, + "grad_norm": 8.568204019797529, + "learning_rate": 4.3710653790594476e-06, + "loss": 0.17032012939453126, + "step": 80705 + }, + { + "epoch": 0.6978755047513641, + "grad_norm": 5.575185403716382, + "learning_rate": 4.370884164309595e-06, + "loss": 0.12352886199951171, + "step": 80710 + }, + { + "epoch": 0.6979187382729073, + "grad_norm": 27.52967982225393, + "learning_rate": 4.370702943237347e-06, + "loss": 0.140411376953125, + "step": 80715 + }, + { + "epoch": 0.6979619717944505, + "grad_norm": 33.069882894913405, + "learning_rate": 4.370521715843537e-06, + "loss": 0.19788284301757814, + "step": 80720 + }, + { + "epoch": 0.6980052053159939, + "grad_norm": 0.43234933602564646, + "learning_rate": 4.370340482129001e-06, + "loss": 0.18298759460449218, + "step": 80725 + }, + { + "epoch": 0.6980484388375371, + "grad_norm": 16.180232134506603, + "learning_rate": 4.3701592420945774e-06, + "loss": 0.11689796447753906, + "step": 80730 + }, + { + "epoch": 0.6980916723590803, + "grad_norm": 10.736346917270811, + "learning_rate": 4.3699779957411e-06, + "loss": 0.27422027587890624, + "step": 80735 + }, + { + "epoch": 0.6981349058806235, + "grad_norm": 10.547758696235416, + "learning_rate": 4.369796743069405e-06, + "loss": 0.16002197265625, + "step": 80740 + }, + { + "epoch": 0.6981781394021669, + "grad_norm": 10.430640981657856, + "learning_rate": 4.369615484080329e-06, + "loss": 0.15510406494140624, + "step": 80745 + }, + { + "epoch": 0.6982213729237101, + "grad_norm": 12.385139898738545, + "learning_rate": 4.369434218774707e-06, + "loss": 0.08232002258300782, + "step": 80750 + }, + { + "epoch": 0.6982646064452533, + "grad_norm": 4.722575632615744, + "learning_rate": 4.369252947153375e-06, + "loss": 0.18407669067382812, + "step": 80755 + }, + { + "epoch": 0.6983078399667967, + "grad_norm": 1.7492990943353908, + "learning_rate": 4.36907166921717e-06, + "loss": 0.13588447570800782, + "step": 80760 + }, + { + "epoch": 0.6983510734883399, + "grad_norm": 7.84975297818509, + "learning_rate": 4.368890384966926e-06, + "loss": 0.1366445541381836, + "step": 80765 + }, + { + "epoch": 0.6983943070098831, + "grad_norm": 21.73094259276337, + "learning_rate": 4.368709094403482e-06, + "loss": 0.15058612823486328, + "step": 80770 + }, + { + "epoch": 0.6984375405314265, + "grad_norm": 1.356046853777832, + "learning_rate": 4.368527797527672e-06, + "loss": 0.07675628662109375, + "step": 80775 + }, + { + "epoch": 0.6984807740529697, + "grad_norm": 1.1002981546158543, + "learning_rate": 4.368346494340333e-06, + "loss": 0.2696014404296875, + "step": 80780 + }, + { + "epoch": 0.6985240075745129, + "grad_norm": 0.03737263523281962, + "learning_rate": 4.368165184842301e-06, + "loss": 0.05733680725097656, + "step": 80785 + }, + { + "epoch": 0.6985672410960563, + "grad_norm": 5.753388958281246, + "learning_rate": 4.367983869034412e-06, + "loss": 0.076605224609375, + "step": 80790 + }, + { + "epoch": 0.6986104746175995, + "grad_norm": 25.461519493938987, + "learning_rate": 4.3678025469175015e-06, + "loss": 0.20672607421875, + "step": 80795 + }, + { + "epoch": 0.6986537081391427, + "grad_norm": 0.3712087841599953, + "learning_rate": 4.367621218492407e-06, + "loss": 0.161541748046875, + "step": 80800 + }, + { + "epoch": 0.6986969416606861, + "grad_norm": 25.306154271472803, + "learning_rate": 4.3674398837599646e-06, + "loss": 0.1252288818359375, + "step": 80805 + }, + { + "epoch": 0.6987401751822293, + "grad_norm": 3.049164840330242, + "learning_rate": 4.36725854272101e-06, + "loss": 0.09427032470703126, + "step": 80810 + }, + { + "epoch": 0.6987834087037725, + "grad_norm": 37.65822085324358, + "learning_rate": 4.36707719537638e-06, + "loss": 0.3368133544921875, + "step": 80815 + }, + { + "epoch": 0.6988266422253159, + "grad_norm": 4.886164003587345, + "learning_rate": 4.366895841726911e-06, + "loss": 0.1961334228515625, + "step": 80820 + }, + { + "epoch": 0.6988698757468591, + "grad_norm": 16.686249021353007, + "learning_rate": 4.366714481773439e-06, + "loss": 0.20936851501464843, + "step": 80825 + }, + { + "epoch": 0.6989131092684023, + "grad_norm": 16.05640277470129, + "learning_rate": 4.366533115516801e-06, + "loss": 0.08395805358886718, + "step": 80830 + }, + { + "epoch": 0.6989563427899456, + "grad_norm": 0.5519327892149667, + "learning_rate": 4.3663517429578315e-06, + "loss": 0.22354202270507811, + "step": 80835 + }, + { + "epoch": 0.6989995763114889, + "grad_norm": 42.287360830190735, + "learning_rate": 4.366170364097369e-06, + "loss": 0.5849200248718261, + "step": 80840 + }, + { + "epoch": 0.6990428098330321, + "grad_norm": 10.331881822542563, + "learning_rate": 4.365988978936251e-06, + "loss": 0.07455368041992187, + "step": 80845 + }, + { + "epoch": 0.6990860433545754, + "grad_norm": 17.6559930919158, + "learning_rate": 4.365807587475311e-06, + "loss": 0.13377685546875, + "step": 80850 + }, + { + "epoch": 0.6991292768761187, + "grad_norm": 23.06250604872629, + "learning_rate": 4.365626189715387e-06, + "loss": 0.2459186553955078, + "step": 80855 + }, + { + "epoch": 0.6991725103976619, + "grad_norm": 0.5290233175860047, + "learning_rate": 4.365444785657317e-06, + "loss": 0.38845367431640626, + "step": 80860 + }, + { + "epoch": 0.6992157439192052, + "grad_norm": 2.900806693202586, + "learning_rate": 4.365263375301936e-06, + "loss": 0.07702808380126953, + "step": 80865 + }, + { + "epoch": 0.6992589774407485, + "grad_norm": 12.077358258588344, + "learning_rate": 4.36508195865008e-06, + "loss": 0.20599365234375, + "step": 80870 + }, + { + "epoch": 0.6993022109622917, + "grad_norm": 7.959377790790982, + "learning_rate": 4.364900535702587e-06, + "loss": 0.111175537109375, + "step": 80875 + }, + { + "epoch": 0.699345444483835, + "grad_norm": 1.09331638779252, + "learning_rate": 4.364719106460293e-06, + "loss": 0.416864013671875, + "step": 80880 + }, + { + "epoch": 0.6993886780053783, + "grad_norm": 2.401733648077829, + "learning_rate": 4.364537670924036e-06, + "loss": 0.096429443359375, + "step": 80885 + }, + { + "epoch": 0.6994319115269215, + "grad_norm": 5.383140405542601, + "learning_rate": 4.3643562290946505e-06, + "loss": 0.13349227905273436, + "step": 80890 + }, + { + "epoch": 0.6994751450484648, + "grad_norm": 31.079211172114835, + "learning_rate": 4.364174780972977e-06, + "loss": 0.17025012969970704, + "step": 80895 + }, + { + "epoch": 0.6995183785700081, + "grad_norm": 17.809092541190076, + "learning_rate": 4.363993326559848e-06, + "loss": 0.164703369140625, + "step": 80900 + }, + { + "epoch": 0.6995616120915513, + "grad_norm": 6.488837306800001, + "learning_rate": 4.363811865856102e-06, + "loss": 0.17033767700195312, + "step": 80905 + }, + { + "epoch": 0.6996048456130946, + "grad_norm": 12.195657082123974, + "learning_rate": 4.363630398862577e-06, + "loss": 0.29298248291015627, + "step": 80910 + }, + { + "epoch": 0.6996480791346378, + "grad_norm": 0.16109740039930676, + "learning_rate": 4.363448925580109e-06, + "loss": 0.20536346435546876, + "step": 80915 + }, + { + "epoch": 0.6996913126561811, + "grad_norm": 15.076502261548725, + "learning_rate": 4.363267446009535e-06, + "loss": 0.107421875, + "step": 80920 + }, + { + "epoch": 0.6997345461777243, + "grad_norm": 3.163652876193587, + "learning_rate": 4.363085960151692e-06, + "loss": 0.052394866943359375, + "step": 80925 + }, + { + "epoch": 0.6997777796992676, + "grad_norm": 9.475792961428327, + "learning_rate": 4.362904468007417e-06, + "loss": 0.15839080810546874, + "step": 80930 + }, + { + "epoch": 0.6998210132208109, + "grad_norm": 9.768928299282335, + "learning_rate": 4.362722969577546e-06, + "loss": 0.33429107666015623, + "step": 80935 + }, + { + "epoch": 0.6998642467423541, + "grad_norm": 1.262629922795859, + "learning_rate": 4.362541464862919e-06, + "loss": 0.256024169921875, + "step": 80940 + }, + { + "epoch": 0.6999074802638974, + "grad_norm": 22.30460753352465, + "learning_rate": 4.362359953864371e-06, + "loss": 0.1268707275390625, + "step": 80945 + }, + { + "epoch": 0.6999507137854407, + "grad_norm": 22.56874815644318, + "learning_rate": 4.362178436582738e-06, + "loss": 0.14808349609375, + "step": 80950 + }, + { + "epoch": 0.6999939473069839, + "grad_norm": 1.9540584760054254, + "learning_rate": 4.361996913018859e-06, + "loss": 0.2290557861328125, + "step": 80955 + }, + { + "epoch": 0.7000371808285272, + "grad_norm": 22.28453603607343, + "learning_rate": 4.361815383173571e-06, + "loss": 0.12570724487304688, + "step": 80960 + }, + { + "epoch": 0.7000804143500705, + "grad_norm": 5.813721892499418, + "learning_rate": 4.36163384704771e-06, + "loss": 0.16096954345703124, + "step": 80965 + }, + { + "epoch": 0.7001236478716137, + "grad_norm": 7.574897002073507, + "learning_rate": 4.361452304642114e-06, + "loss": 0.24718017578125, + "step": 80970 + }, + { + "epoch": 0.700166881393157, + "grad_norm": 9.007664497787477, + "learning_rate": 4.3612707559576215e-06, + "loss": 0.15011768341064452, + "step": 80975 + }, + { + "epoch": 0.7002101149147003, + "grad_norm": 2.205643540004327, + "learning_rate": 4.3610892009950675e-06, + "loss": 0.18792877197265626, + "step": 80980 + }, + { + "epoch": 0.7002533484362435, + "grad_norm": 1.9643604822448757, + "learning_rate": 4.36090763975529e-06, + "loss": 0.205938720703125, + "step": 80985 + }, + { + "epoch": 0.7002965819577868, + "grad_norm": 6.086293454031793, + "learning_rate": 4.360726072239128e-06, + "loss": 0.13241424560546874, + "step": 80990 + }, + { + "epoch": 0.7003398154793301, + "grad_norm": 29.796484988782122, + "learning_rate": 4.360544498447417e-06, + "loss": 0.16619796752929689, + "step": 80995 + }, + { + "epoch": 0.7003830490008733, + "grad_norm": 15.615007404557883, + "learning_rate": 4.360362918380994e-06, + "loss": 0.5532562255859375, + "step": 81000 + }, + { + "epoch": 0.7004262825224166, + "grad_norm": 0.7191749381809232, + "learning_rate": 4.360181332040698e-06, + "loss": 0.19051895141601563, + "step": 81005 + }, + { + "epoch": 0.7004695160439598, + "grad_norm": 33.557808346083874, + "learning_rate": 4.359999739427368e-06, + "loss": 0.3359893798828125, + "step": 81010 + }, + { + "epoch": 0.7005127495655031, + "grad_norm": 4.97430797623683, + "learning_rate": 4.359818140541836e-06, + "loss": 0.16380615234375, + "step": 81015 + }, + { + "epoch": 0.7005559830870464, + "grad_norm": 9.669098798519686, + "learning_rate": 4.359636535384945e-06, + "loss": 0.07519683837890626, + "step": 81020 + }, + { + "epoch": 0.7005992166085896, + "grad_norm": 2.2300879012613035, + "learning_rate": 4.35945492395753e-06, + "loss": 0.046543121337890625, + "step": 81025 + }, + { + "epoch": 0.7006424501301329, + "grad_norm": 3.4699038740490904, + "learning_rate": 4.359273306260428e-06, + "loss": 0.17551612854003906, + "step": 81030 + }, + { + "epoch": 0.7006856836516762, + "grad_norm": 4.128991983091286, + "learning_rate": 4.359091682294479e-06, + "loss": 0.2259124755859375, + "step": 81035 + }, + { + "epoch": 0.7007289171732194, + "grad_norm": 15.69462313047174, + "learning_rate": 4.358910052060519e-06, + "loss": 0.08816146850585938, + "step": 81040 + }, + { + "epoch": 0.7007721506947627, + "grad_norm": 22.765721323169394, + "learning_rate": 4.358728415559386e-06, + "loss": 0.4860679626464844, + "step": 81045 + }, + { + "epoch": 0.700815384216306, + "grad_norm": 3.079763243899565, + "learning_rate": 4.358546772791916e-06, + "loss": 0.08826904296875, + "step": 81050 + }, + { + "epoch": 0.7008586177378492, + "grad_norm": 11.731962922166188, + "learning_rate": 4.35836512375895e-06, + "loss": 0.11154403686523437, + "step": 81055 + }, + { + "epoch": 0.7009018512593925, + "grad_norm": 37.41669040513683, + "learning_rate": 4.358183468461324e-06, + "loss": 0.30728759765625, + "step": 81060 + }, + { + "epoch": 0.7009450847809358, + "grad_norm": 0.5628474067445636, + "learning_rate": 4.3580018068998755e-06, + "loss": 0.057183456420898435, + "step": 81065 + }, + { + "epoch": 0.700988318302479, + "grad_norm": 4.698619759209907, + "learning_rate": 4.357820139075442e-06, + "loss": 0.1576904296875, + "step": 81070 + }, + { + "epoch": 0.7010315518240223, + "grad_norm": 0.46534829629219837, + "learning_rate": 4.357638464988863e-06, + "loss": 0.18508148193359375, + "step": 81075 + }, + { + "epoch": 0.7010747853455656, + "grad_norm": 2.229323307392795, + "learning_rate": 4.357456784640975e-06, + "loss": 0.095703125, + "step": 81080 + }, + { + "epoch": 0.7011180188671088, + "grad_norm": 3.5452451270752174, + "learning_rate": 4.357275098032617e-06, + "loss": 0.18767547607421875, + "step": 81085 + }, + { + "epoch": 0.701161252388652, + "grad_norm": 3.940827570318308, + "learning_rate": 4.357093405164626e-06, + "loss": 0.37387723922729493, + "step": 81090 + }, + { + "epoch": 0.7012044859101954, + "grad_norm": 2.6815952053115315, + "learning_rate": 4.356911706037839e-06, + "loss": 0.02508544921875, + "step": 81095 + }, + { + "epoch": 0.7012477194317386, + "grad_norm": 3.1207687237940087, + "learning_rate": 4.356730000653096e-06, + "loss": 0.13058929443359374, + "step": 81100 + }, + { + "epoch": 0.7012909529532818, + "grad_norm": 11.381131325283864, + "learning_rate": 4.356548289011233e-06, + "loss": 0.060428619384765625, + "step": 81105 + }, + { + "epoch": 0.7013341864748251, + "grad_norm": 7.090643558508803, + "learning_rate": 4.356366571113091e-06, + "loss": 0.17806549072265626, + "step": 81110 + }, + { + "epoch": 0.7013774199963684, + "grad_norm": 0.6532982861376082, + "learning_rate": 4.356184846959505e-06, + "loss": 0.12955322265625, + "step": 81115 + }, + { + "epoch": 0.7014206535179116, + "grad_norm": 19.015048418354866, + "learning_rate": 4.3560031165513144e-06, + "loss": 0.4316802978515625, + "step": 81120 + }, + { + "epoch": 0.701463887039455, + "grad_norm": 7.243346989050628, + "learning_rate": 4.355821379889358e-06, + "loss": 0.045151901245117185, + "step": 81125 + }, + { + "epoch": 0.7015071205609982, + "grad_norm": 9.992287741061357, + "learning_rate": 4.355639636974472e-06, + "loss": 0.201910400390625, + "step": 81130 + }, + { + "epoch": 0.7015503540825414, + "grad_norm": 40.5153250281022, + "learning_rate": 4.355457887807496e-06, + "loss": 0.1672332763671875, + "step": 81135 + }, + { + "epoch": 0.7015935876040847, + "grad_norm": 14.537981693409014, + "learning_rate": 4.355276132389268e-06, + "loss": 0.107110595703125, + "step": 81140 + }, + { + "epoch": 0.701636821125628, + "grad_norm": 1.341222302478872, + "learning_rate": 4.355094370720627e-06, + "loss": 0.1158447265625, + "step": 81145 + }, + { + "epoch": 0.7016800546471712, + "grad_norm": 1.7220536943929432, + "learning_rate": 4.354912602802411e-06, + "loss": 0.12167892456054688, + "step": 81150 + }, + { + "epoch": 0.7017232881687145, + "grad_norm": 29.97859399963512, + "learning_rate": 4.3547308286354556e-06, + "loss": 0.13347091674804687, + "step": 81155 + }, + { + "epoch": 0.7017665216902578, + "grad_norm": 0.05782563440681116, + "learning_rate": 4.354549048220603e-06, + "loss": 0.16803817749023436, + "step": 81160 + }, + { + "epoch": 0.701809755211801, + "grad_norm": 6.564987344603206, + "learning_rate": 4.354367261558689e-06, + "loss": 0.18741607666015625, + "step": 81165 + }, + { + "epoch": 0.7018529887333443, + "grad_norm": 0.4278063902015095, + "learning_rate": 4.3541854686505535e-06, + "loss": 0.13802337646484375, + "step": 81170 + }, + { + "epoch": 0.7018962222548876, + "grad_norm": 29.24888364471813, + "learning_rate": 4.3540036694970345e-06, + "loss": 0.1238311767578125, + "step": 81175 + }, + { + "epoch": 0.7019394557764308, + "grad_norm": 16.03248771149967, + "learning_rate": 4.35382186409897e-06, + "loss": 0.07183837890625, + "step": 81180 + }, + { + "epoch": 0.701982689297974, + "grad_norm": 16.707100281895762, + "learning_rate": 4.353640052457199e-06, + "loss": 0.0624908447265625, + "step": 81185 + }, + { + "epoch": 0.7020259228195174, + "grad_norm": 34.52353305054334, + "learning_rate": 4.3534582345725585e-06, + "loss": 0.21660537719726564, + "step": 81190 + }, + { + "epoch": 0.7020691563410606, + "grad_norm": 0.9665722864916567, + "learning_rate": 4.353276410445889e-06, + "loss": 0.156060791015625, + "step": 81195 + }, + { + "epoch": 0.7021123898626038, + "grad_norm": 15.51079669617745, + "learning_rate": 4.353094580078029e-06, + "loss": 0.08783493041992188, + "step": 81200 + }, + { + "epoch": 0.7021556233841472, + "grad_norm": 0.9889880283236221, + "learning_rate": 4.352912743469815e-06, + "loss": 0.26950225830078123, + "step": 81205 + }, + { + "epoch": 0.7021988569056904, + "grad_norm": 1.391275948538319, + "learning_rate": 4.352730900622088e-06, + "loss": 0.205126953125, + "step": 81210 + }, + { + "epoch": 0.7022420904272336, + "grad_norm": 0.7203235081613452, + "learning_rate": 4.352549051535685e-06, + "loss": 0.0130828857421875, + "step": 81215 + }, + { + "epoch": 0.702285323948777, + "grad_norm": 1.8183168115228514, + "learning_rate": 4.352367196211445e-06, + "loss": 0.0290008544921875, + "step": 81220 + }, + { + "epoch": 0.7023285574703202, + "grad_norm": 0.2355681638710652, + "learning_rate": 4.352185334650208e-06, + "loss": 0.0304443359375, + "step": 81225 + }, + { + "epoch": 0.7023717909918634, + "grad_norm": 3.0746753098148627, + "learning_rate": 4.352003466852811e-06, + "loss": 0.14707040786743164, + "step": 81230 + }, + { + "epoch": 0.7024150245134068, + "grad_norm": 17.47435668063142, + "learning_rate": 4.351821592820094e-06, + "loss": 0.21496505737304689, + "step": 81235 + }, + { + "epoch": 0.70245825803495, + "grad_norm": 2.2750077248512417, + "learning_rate": 4.351639712552895e-06, + "loss": 0.11639690399169922, + "step": 81240 + }, + { + "epoch": 0.7025014915564932, + "grad_norm": 3.998885153956391, + "learning_rate": 4.351457826052053e-06, + "loss": 0.09256591796875, + "step": 81245 + }, + { + "epoch": 0.7025447250780366, + "grad_norm": 19.64930817062829, + "learning_rate": 4.3512759333184065e-06, + "loss": 0.11091690063476563, + "step": 81250 + }, + { + "epoch": 0.7025879585995798, + "grad_norm": 16.79108409132846, + "learning_rate": 4.351094034352795e-06, + "loss": 0.1743438720703125, + "step": 81255 + }, + { + "epoch": 0.702631192121123, + "grad_norm": 25.662653414283987, + "learning_rate": 4.350912129156058e-06, + "loss": 0.2465850830078125, + "step": 81260 + }, + { + "epoch": 0.7026744256426662, + "grad_norm": 1.6593825589483837, + "learning_rate": 4.350730217729033e-06, + "loss": 0.502020263671875, + "step": 81265 + }, + { + "epoch": 0.7027176591642096, + "grad_norm": 1.7549303376265366, + "learning_rate": 4.350548300072559e-06, + "loss": 0.05579986572265625, + "step": 81270 + }, + { + "epoch": 0.7027608926857528, + "grad_norm": 9.720097751116862, + "learning_rate": 4.350366376187476e-06, + "loss": 0.2430694580078125, + "step": 81275 + }, + { + "epoch": 0.702804126207296, + "grad_norm": 40.50323204496384, + "learning_rate": 4.350184446074624e-06, + "loss": 0.365570068359375, + "step": 81280 + }, + { + "epoch": 0.7028473597288394, + "grad_norm": 6.450325597097926, + "learning_rate": 4.350002509734839e-06, + "loss": 0.05517311096191406, + "step": 81285 + }, + { + "epoch": 0.7028905932503826, + "grad_norm": 2.924044812540637, + "learning_rate": 4.349820567168962e-06, + "loss": 0.1450164794921875, + "step": 81290 + }, + { + "epoch": 0.7029338267719258, + "grad_norm": 17.303716895673723, + "learning_rate": 4.349638618377832e-06, + "loss": 0.309100341796875, + "step": 81295 + }, + { + "epoch": 0.7029770602934692, + "grad_norm": 1.3600913708307945, + "learning_rate": 4.349456663362287e-06, + "loss": 0.15582275390625, + "step": 81300 + }, + { + "epoch": 0.7030202938150124, + "grad_norm": 8.81685305962187, + "learning_rate": 4.349274702123168e-06, + "loss": 0.1109283447265625, + "step": 81305 + }, + { + "epoch": 0.7030635273365556, + "grad_norm": 35.579605103500924, + "learning_rate": 4.349092734661314e-06, + "loss": 0.28308639526367185, + "step": 81310 + }, + { + "epoch": 0.703106760858099, + "grad_norm": 6.348044295537209, + "learning_rate": 4.3489107609775625e-06, + "loss": 0.12373199462890624, + "step": 81315 + }, + { + "epoch": 0.7031499943796422, + "grad_norm": 0.7897572235472019, + "learning_rate": 4.348728781072755e-06, + "loss": 0.07214508056640626, + "step": 81320 + }, + { + "epoch": 0.7031932279011854, + "grad_norm": 23.80428129307253, + "learning_rate": 4.3485467949477275e-06, + "loss": 0.14901885986328126, + "step": 81325 + }, + { + "epoch": 0.7032364614227288, + "grad_norm": 4.145821575020574, + "learning_rate": 4.348364802603322e-06, + "loss": 0.0330902099609375, + "step": 81330 + }, + { + "epoch": 0.703279694944272, + "grad_norm": 6.510290619647555, + "learning_rate": 4.348182804040378e-06, + "loss": 0.06252822875976563, + "step": 81335 + }, + { + "epoch": 0.7033229284658152, + "grad_norm": 0.34539599292828016, + "learning_rate": 4.348000799259734e-06, + "loss": 0.0571014404296875, + "step": 81340 + }, + { + "epoch": 0.7033661619873586, + "grad_norm": 0.8803658377359213, + "learning_rate": 4.347818788262229e-06, + "loss": 0.06860389709472656, + "step": 81345 + }, + { + "epoch": 0.7034093955089018, + "grad_norm": 1.4522729577185225, + "learning_rate": 4.347636771048703e-06, + "loss": 0.30257091522216795, + "step": 81350 + }, + { + "epoch": 0.703452629030445, + "grad_norm": 12.484903501890258, + "learning_rate": 4.347454747619995e-06, + "loss": 0.05380058288574219, + "step": 81355 + }, + { + "epoch": 0.7034958625519883, + "grad_norm": 0.9677231994963317, + "learning_rate": 4.347272717976946e-06, + "loss": 0.31254425048828127, + "step": 81360 + }, + { + "epoch": 0.7035390960735316, + "grad_norm": 8.314512310903933, + "learning_rate": 4.347090682120392e-06, + "loss": 0.3976837158203125, + "step": 81365 + }, + { + "epoch": 0.7035823295950748, + "grad_norm": 1.6440605284251941, + "learning_rate": 4.346908640051177e-06, + "loss": 0.1297393798828125, + "step": 81370 + }, + { + "epoch": 0.7036255631166181, + "grad_norm": 5.620394031888474, + "learning_rate": 4.3467265917701374e-06, + "loss": 0.121380615234375, + "step": 81375 + }, + { + "epoch": 0.7036687966381614, + "grad_norm": 24.629230720105543, + "learning_rate": 4.346544537278114e-06, + "loss": 0.11025390625, + "step": 81380 + }, + { + "epoch": 0.7037120301597046, + "grad_norm": 5.534672975599029, + "learning_rate": 4.346362476575945e-06, + "loss": 0.08088932037353516, + "step": 81385 + }, + { + "epoch": 0.7037552636812479, + "grad_norm": 8.900599053201049, + "learning_rate": 4.346180409664472e-06, + "loss": 0.13311920166015626, + "step": 81390 + }, + { + "epoch": 0.7037984972027912, + "grad_norm": 17.619635075818604, + "learning_rate": 4.345998336544534e-06, + "loss": 0.11542015075683594, + "step": 81395 + }, + { + "epoch": 0.7038417307243344, + "grad_norm": 4.38402461360357, + "learning_rate": 4.345816257216972e-06, + "loss": 0.04240875244140625, + "step": 81400 + }, + { + "epoch": 0.7038849642458777, + "grad_norm": 3.5209536885705743, + "learning_rate": 4.345634171682623e-06, + "loss": 0.146527099609375, + "step": 81405 + }, + { + "epoch": 0.703928197767421, + "grad_norm": 3.711332895564667, + "learning_rate": 4.345452079942328e-06, + "loss": 0.1319091796875, + "step": 81410 + }, + { + "epoch": 0.7039714312889642, + "grad_norm": 8.439328689627715, + "learning_rate": 4.345269981996926e-06, + "loss": 0.076904296875, + "step": 81415 + }, + { + "epoch": 0.7040146648105075, + "grad_norm": 0.5879199409767049, + "learning_rate": 4.345087877847259e-06, + "loss": 0.253533935546875, + "step": 81420 + }, + { + "epoch": 0.7040578983320508, + "grad_norm": 2.4580389116962666, + "learning_rate": 4.344905767494166e-06, + "loss": 0.1085845947265625, + "step": 81425 + }, + { + "epoch": 0.704101131853594, + "grad_norm": 7.725383371491828, + "learning_rate": 4.3447236509384855e-06, + "loss": 0.10987968444824218, + "step": 81430 + }, + { + "epoch": 0.7041443653751372, + "grad_norm": 12.735579507381868, + "learning_rate": 4.344541528181059e-06, + "loss": 0.22726707458496093, + "step": 81435 + }, + { + "epoch": 0.7041875988966805, + "grad_norm": 8.255279367652168, + "learning_rate": 4.344359399222725e-06, + "loss": 0.09534530639648438, + "step": 81440 + }, + { + "epoch": 0.7042308324182238, + "grad_norm": 7.984310822633186, + "learning_rate": 4.344177264064324e-06, + "loss": 0.13851318359375, + "step": 81445 + }, + { + "epoch": 0.704274065939767, + "grad_norm": 1.1983724311362631, + "learning_rate": 4.3439951227066975e-06, + "loss": 0.10529899597167969, + "step": 81450 + }, + { + "epoch": 0.7043172994613103, + "grad_norm": 13.372067191257988, + "learning_rate": 4.343812975150684e-06, + "loss": 0.05160598754882813, + "step": 81455 + }, + { + "epoch": 0.7043605329828536, + "grad_norm": 7.607185441754597, + "learning_rate": 4.343630821397122e-06, + "loss": 0.28369979858398436, + "step": 81460 + }, + { + "epoch": 0.7044037665043968, + "grad_norm": 10.554721856620763, + "learning_rate": 4.343448661446856e-06, + "loss": 0.22459869384765624, + "step": 81465 + }, + { + "epoch": 0.7044470000259401, + "grad_norm": 1.5178459272804485, + "learning_rate": 4.343266495300722e-06, + "loss": 0.12223129272460938, + "step": 81470 + }, + { + "epoch": 0.7044902335474834, + "grad_norm": 31.090232544955782, + "learning_rate": 4.343084322959562e-06, + "loss": 0.1586578369140625, + "step": 81475 + }, + { + "epoch": 0.7045334670690266, + "grad_norm": 26.46810606024044, + "learning_rate": 4.342902144424216e-06, + "loss": 0.384747314453125, + "step": 81480 + }, + { + "epoch": 0.7045767005905699, + "grad_norm": 8.575159414599506, + "learning_rate": 4.342719959695523e-06, + "loss": 0.06458511352539062, + "step": 81485 + }, + { + "epoch": 0.7046199341121132, + "grad_norm": 25.436541587165934, + "learning_rate": 4.342537768774325e-06, + "loss": 0.3258209228515625, + "step": 81490 + }, + { + "epoch": 0.7046631676336564, + "grad_norm": 0.7936945952598288, + "learning_rate": 4.342355571661461e-06, + "loss": 0.1305379867553711, + "step": 81495 + }, + { + "epoch": 0.7047064011551997, + "grad_norm": 40.55199613071958, + "learning_rate": 4.342173368357772e-06, + "loss": 0.6939727783203125, + "step": 81500 + }, + { + "epoch": 0.704749634676743, + "grad_norm": 0.6463501333914508, + "learning_rate": 4.3419911588640985e-06, + "loss": 0.16587066650390625, + "step": 81505 + }, + { + "epoch": 0.7047928681982862, + "grad_norm": 12.409211532881057, + "learning_rate": 4.34180894318128e-06, + "loss": 0.3783306121826172, + "step": 81510 + }, + { + "epoch": 0.7048361017198295, + "grad_norm": 0.15391950024004372, + "learning_rate": 4.341626721310157e-06, + "loss": 0.022454071044921874, + "step": 81515 + }, + { + "epoch": 0.7048793352413727, + "grad_norm": 1.7275949155471777, + "learning_rate": 4.34144449325157e-06, + "loss": 0.09818115234375, + "step": 81520 + }, + { + "epoch": 0.704922568762916, + "grad_norm": 2.3043803049363536, + "learning_rate": 4.3412622590063585e-06, + "loss": 0.027685546875, + "step": 81525 + }, + { + "epoch": 0.7049658022844593, + "grad_norm": 1.9403635909724963, + "learning_rate": 4.341080018575366e-06, + "loss": 0.1390453338623047, + "step": 81530 + }, + { + "epoch": 0.7050090358060025, + "grad_norm": 5.128135901513795, + "learning_rate": 4.34089777195943e-06, + "loss": 0.350653076171875, + "step": 81535 + }, + { + "epoch": 0.7050522693275458, + "grad_norm": 47.290710391177655, + "learning_rate": 4.340715519159392e-06, + "loss": 0.38153800964355467, + "step": 81540 + }, + { + "epoch": 0.7050955028490891, + "grad_norm": 0.3214063992130978, + "learning_rate": 4.340533260176093e-06, + "loss": 0.06008453369140625, + "step": 81545 + }, + { + "epoch": 0.7051387363706323, + "grad_norm": 13.71767746447391, + "learning_rate": 4.340350995010373e-06, + "loss": 0.49559326171875, + "step": 81550 + }, + { + "epoch": 0.7051819698921756, + "grad_norm": 6.963247508468867, + "learning_rate": 4.3401687236630715e-06, + "loss": 0.07865047454833984, + "step": 81555 + }, + { + "epoch": 0.7052252034137189, + "grad_norm": 8.168482543221273, + "learning_rate": 4.3399864461350316e-06, + "loss": 0.06783447265625, + "step": 81560 + }, + { + "epoch": 0.7052684369352621, + "grad_norm": 2.3278393164413695, + "learning_rate": 4.339804162427092e-06, + "loss": 0.0990488052368164, + "step": 81565 + }, + { + "epoch": 0.7053116704568054, + "grad_norm": 0.7828741884726024, + "learning_rate": 4.339621872540093e-06, + "loss": 0.2660850524902344, + "step": 81570 + }, + { + "epoch": 0.7053549039783487, + "grad_norm": 2.5716742395998278, + "learning_rate": 4.339439576474879e-06, + "loss": 0.196630859375, + "step": 81575 + }, + { + "epoch": 0.7053981374998919, + "grad_norm": 9.231023491727044, + "learning_rate": 4.339257274232285e-06, + "loss": 0.051409912109375, + "step": 81580 + }, + { + "epoch": 0.7054413710214352, + "grad_norm": 4.020930508892019, + "learning_rate": 4.339074965813156e-06, + "loss": 0.123382568359375, + "step": 81585 + }, + { + "epoch": 0.7054846045429785, + "grad_norm": 2.0805978562372927, + "learning_rate": 4.338892651218333e-06, + "loss": 0.09824066162109375, + "step": 81590 + }, + { + "epoch": 0.7055278380645217, + "grad_norm": 15.149144828580278, + "learning_rate": 4.3387103304486535e-06, + "loss": 0.14720458984375, + "step": 81595 + }, + { + "epoch": 0.705571071586065, + "grad_norm": 13.095266740889802, + "learning_rate": 4.3385280035049605e-06, + "loss": 0.09213790893554688, + "step": 81600 + }, + { + "epoch": 0.7056143051076083, + "grad_norm": 15.153604254421413, + "learning_rate": 4.3383456703880956e-06, + "loss": 0.09677886962890625, + "step": 81605 + }, + { + "epoch": 0.7056575386291515, + "grad_norm": 1.1651148313676112, + "learning_rate": 4.338163331098897e-06, + "loss": 0.12364959716796875, + "step": 81610 + }, + { + "epoch": 0.7057007721506947, + "grad_norm": 25.01556819023241, + "learning_rate": 4.3379809856382094e-06, + "loss": 0.5813735961914063, + "step": 81615 + }, + { + "epoch": 0.705744005672238, + "grad_norm": 5.228228318252446, + "learning_rate": 4.33779863400687e-06, + "loss": 0.07848663330078125, + "step": 81620 + }, + { + "epoch": 0.7057872391937813, + "grad_norm": 6.776637469786549, + "learning_rate": 4.337616276205724e-06, + "loss": 0.281982421875, + "step": 81625 + }, + { + "epoch": 0.7058304727153245, + "grad_norm": 32.32650479122368, + "learning_rate": 4.3374339122356085e-06, + "loss": 0.438470458984375, + "step": 81630 + }, + { + "epoch": 0.7058737062368678, + "grad_norm": 0.37879945476013344, + "learning_rate": 4.3372515420973645e-06, + "loss": 0.09794921875, + "step": 81635 + }, + { + "epoch": 0.7059169397584111, + "grad_norm": 0.27818802380703395, + "learning_rate": 4.337069165791836e-06, + "loss": 0.06729583740234375, + "step": 81640 + }, + { + "epoch": 0.7059601732799543, + "grad_norm": 10.273452246170395, + "learning_rate": 4.3368867833198625e-06, + "loss": 0.23426361083984376, + "step": 81645 + }, + { + "epoch": 0.7060034068014976, + "grad_norm": 1.5031714234743632, + "learning_rate": 4.336704394682285e-06, + "loss": 0.08129043579101562, + "step": 81650 + }, + { + "epoch": 0.7060466403230409, + "grad_norm": 7.847930371796977, + "learning_rate": 4.336521999879946e-06, + "loss": 0.15017414093017578, + "step": 81655 + }, + { + "epoch": 0.7060898738445841, + "grad_norm": 5.058719819456223, + "learning_rate": 4.3363395989136844e-06, + "loss": 0.0547210693359375, + "step": 81660 + }, + { + "epoch": 0.7061331073661274, + "grad_norm": 26.441745941326023, + "learning_rate": 4.3361571917843435e-06, + "loss": 0.12407989501953125, + "step": 81665 + }, + { + "epoch": 0.7061763408876707, + "grad_norm": 1.0936368580014662, + "learning_rate": 4.3359747784927625e-06, + "loss": 0.067364501953125, + "step": 81670 + }, + { + "epoch": 0.7062195744092139, + "grad_norm": 22.53037186036993, + "learning_rate": 4.335792359039785e-06, + "loss": 0.1289306640625, + "step": 81675 + }, + { + "epoch": 0.7062628079307572, + "grad_norm": 20.601080341049084, + "learning_rate": 4.335609933426251e-06, + "loss": 0.2215179443359375, + "step": 81680 + }, + { + "epoch": 0.7063060414523005, + "grad_norm": 0.11801898925680654, + "learning_rate": 4.335427501653001e-06, + "loss": 0.09366312026977539, + "step": 81685 + }, + { + "epoch": 0.7063492749738437, + "grad_norm": 33.8527623414574, + "learning_rate": 4.3352450637208785e-06, + "loss": 0.18310546875, + "step": 81690 + }, + { + "epoch": 0.7063925084953869, + "grad_norm": 2.5251954955994766, + "learning_rate": 4.335062619630723e-06, + "loss": 0.1138885498046875, + "step": 81695 + }, + { + "epoch": 0.7064357420169303, + "grad_norm": 0.8921570917164273, + "learning_rate": 4.334880169383377e-06, + "loss": 0.025232696533203126, + "step": 81700 + }, + { + "epoch": 0.7064789755384735, + "grad_norm": 51.04196640995936, + "learning_rate": 4.334697712979682e-06, + "loss": 0.24441757202148437, + "step": 81705 + }, + { + "epoch": 0.7065222090600167, + "grad_norm": 6.685495947682013, + "learning_rate": 4.334515250420479e-06, + "loss": 0.22819671630859376, + "step": 81710 + }, + { + "epoch": 0.7065654425815601, + "grad_norm": 0.40622918261362767, + "learning_rate": 4.33433278170661e-06, + "loss": 0.08292007446289062, + "step": 81715 + }, + { + "epoch": 0.7066086761031033, + "grad_norm": 3.6224162680156673, + "learning_rate": 4.334150306838914e-06, + "loss": 0.023645973205566405, + "step": 81720 + }, + { + "epoch": 0.7066519096246465, + "grad_norm": 33.46580369579083, + "learning_rate": 4.333967825818237e-06, + "loss": 0.25982131958007815, + "step": 81725 + }, + { + "epoch": 0.7066951431461899, + "grad_norm": 2.1203268590235815, + "learning_rate": 4.333785338645417e-06, + "loss": 0.2947662353515625, + "step": 81730 + }, + { + "epoch": 0.7067383766677331, + "grad_norm": 30.85725380044715, + "learning_rate": 4.333602845321298e-06, + "loss": 0.2822425842285156, + "step": 81735 + }, + { + "epoch": 0.7067816101892763, + "grad_norm": 1.262637955591581, + "learning_rate": 4.333420345846719e-06, + "loss": 0.103350830078125, + "step": 81740 + }, + { + "epoch": 0.7068248437108197, + "grad_norm": 0.559694724866029, + "learning_rate": 4.333237840222524e-06, + "loss": 0.0578125, + "step": 81745 + }, + { + "epoch": 0.7068680772323629, + "grad_norm": 4.586504449417537, + "learning_rate": 4.333055328449554e-06, + "loss": 0.24127655029296874, + "step": 81750 + }, + { + "epoch": 0.7069113107539061, + "grad_norm": 13.464969187263993, + "learning_rate": 4.33287281052865e-06, + "loss": 0.08535690307617187, + "step": 81755 + }, + { + "epoch": 0.7069545442754495, + "grad_norm": 13.776325237901423, + "learning_rate": 4.332690286460655e-06, + "loss": 0.2610313415527344, + "step": 81760 + }, + { + "epoch": 0.7069977777969927, + "grad_norm": 5.659594912263115, + "learning_rate": 4.3325077562464106e-06, + "loss": 0.0563812255859375, + "step": 81765 + }, + { + "epoch": 0.7070410113185359, + "grad_norm": 1.8055074734785157, + "learning_rate": 4.332325219886757e-06, + "loss": 0.1323944091796875, + "step": 81770 + }, + { + "epoch": 0.7070842448400793, + "grad_norm": 7.438287497853295, + "learning_rate": 4.332142677382538e-06, + "loss": 0.16283073425292968, + "step": 81775 + }, + { + "epoch": 0.7071274783616225, + "grad_norm": 3.12467160335954, + "learning_rate": 4.331960128734595e-06, + "loss": 0.23567657470703124, + "step": 81780 + }, + { + "epoch": 0.7071707118831657, + "grad_norm": 10.887331785862509, + "learning_rate": 4.331777573943769e-06, + "loss": 0.0712493896484375, + "step": 81785 + }, + { + "epoch": 0.7072139454047089, + "grad_norm": 15.434477404170865, + "learning_rate": 4.331595013010902e-06, + "loss": 0.13574066162109374, + "step": 81790 + }, + { + "epoch": 0.7072571789262523, + "grad_norm": 9.181068512045588, + "learning_rate": 4.331412445936837e-06, + "loss": 0.0823272705078125, + "step": 81795 + }, + { + "epoch": 0.7073004124477955, + "grad_norm": 0.35178587678159545, + "learning_rate": 4.331229872722416e-06, + "loss": 0.07740478515625, + "step": 81800 + }, + { + "epoch": 0.7073436459693387, + "grad_norm": 4.838383046976751, + "learning_rate": 4.33104729336848e-06, + "loss": 0.14188613891601562, + "step": 81805 + }, + { + "epoch": 0.7073868794908821, + "grad_norm": 2.0137896165535856, + "learning_rate": 4.330864707875871e-06, + "loss": 0.1283355712890625, + "step": 81810 + }, + { + "epoch": 0.7074301130124253, + "grad_norm": 9.742956336849515, + "learning_rate": 4.330682116245432e-06, + "loss": 0.030836868286132812, + "step": 81815 + }, + { + "epoch": 0.7074733465339685, + "grad_norm": 20.034746244199482, + "learning_rate": 4.330499518478005e-06, + "loss": 0.305615234375, + "step": 81820 + }, + { + "epoch": 0.7075165800555119, + "grad_norm": 31.78903453405696, + "learning_rate": 4.3303169145744315e-06, + "loss": 0.3785297393798828, + "step": 81825 + }, + { + "epoch": 0.7075598135770551, + "grad_norm": 4.055761981538845, + "learning_rate": 4.330134304535554e-06, + "loss": 0.14803009033203124, + "step": 81830 + }, + { + "epoch": 0.7076030470985983, + "grad_norm": 5.782101835228106, + "learning_rate": 4.329951688362214e-06, + "loss": 0.185125732421875, + "step": 81835 + }, + { + "epoch": 0.7076462806201417, + "grad_norm": 37.92772974252461, + "learning_rate": 4.329769066055254e-06, + "loss": 0.19947967529296876, + "step": 81840 + }, + { + "epoch": 0.7076895141416849, + "grad_norm": 0.3807613770078174, + "learning_rate": 4.329586437615518e-06, + "loss": 0.0323974609375, + "step": 81845 + }, + { + "epoch": 0.7077327476632281, + "grad_norm": 48.054640723443704, + "learning_rate": 4.329403803043847e-06, + "loss": 0.161370849609375, + "step": 81850 + }, + { + "epoch": 0.7077759811847715, + "grad_norm": 4.586221849313299, + "learning_rate": 4.329221162341081e-06, + "loss": 0.24436073303222655, + "step": 81855 + }, + { + "epoch": 0.7078192147063147, + "grad_norm": 27.06210028699503, + "learning_rate": 4.329038515508067e-06, + "loss": 0.2624248504638672, + "step": 81860 + }, + { + "epoch": 0.7078624482278579, + "grad_norm": 37.56741466194332, + "learning_rate": 4.328855862545643e-06, + "loss": 0.20560760498046876, + "step": 81865 + }, + { + "epoch": 0.7079056817494012, + "grad_norm": 0.28473261292497526, + "learning_rate": 4.328673203454654e-06, + "loss": 0.10724945068359375, + "step": 81870 + }, + { + "epoch": 0.7079489152709445, + "grad_norm": 7.242580119101449, + "learning_rate": 4.3284905382359405e-06, + "loss": 0.10901107788085937, + "step": 81875 + }, + { + "epoch": 0.7079921487924877, + "grad_norm": 5.463911888735756, + "learning_rate": 4.328307866890348e-06, + "loss": 0.216015625, + "step": 81880 + }, + { + "epoch": 0.708035382314031, + "grad_norm": 4.9392594930511216, + "learning_rate": 4.328125189418716e-06, + "loss": 0.5083175659179687, + "step": 81885 + }, + { + "epoch": 0.7080786158355743, + "grad_norm": 13.173030724293872, + "learning_rate": 4.327942505821887e-06, + "loss": 0.17718887329101562, + "step": 81890 + }, + { + "epoch": 0.7081218493571175, + "grad_norm": 18.243601555910754, + "learning_rate": 4.327759816100706e-06, + "loss": 0.26851959228515626, + "step": 81895 + }, + { + "epoch": 0.7081650828786608, + "grad_norm": 2.071922746967601, + "learning_rate": 4.327577120256013e-06, + "loss": 0.13750038146972657, + "step": 81900 + }, + { + "epoch": 0.7082083164002041, + "grad_norm": 4.004604789918113, + "learning_rate": 4.327394418288652e-06, + "loss": 0.1546875, + "step": 81905 + }, + { + "epoch": 0.7082515499217473, + "grad_norm": 31.179350948650818, + "learning_rate": 4.327211710199465e-06, + "loss": 0.19090423583984376, + "step": 81910 + }, + { + "epoch": 0.7082947834432906, + "grad_norm": 6.658971304910045, + "learning_rate": 4.327028995989296e-06, + "loss": 0.4447929382324219, + "step": 81915 + }, + { + "epoch": 0.7083380169648339, + "grad_norm": 4.228206847149128, + "learning_rate": 4.326846275658985e-06, + "loss": 0.16139068603515624, + "step": 81920 + }, + { + "epoch": 0.7083812504863771, + "grad_norm": 1.4948879555347854, + "learning_rate": 4.326663549209377e-06, + "loss": 0.3871246337890625, + "step": 81925 + }, + { + "epoch": 0.7084244840079204, + "grad_norm": 7.507686612927782, + "learning_rate": 4.326480816641314e-06, + "loss": 0.12124099731445312, + "step": 81930 + }, + { + "epoch": 0.7084677175294637, + "grad_norm": 6.832016152894039, + "learning_rate": 4.326298077955638e-06, + "loss": 0.07086105346679687, + "step": 81935 + }, + { + "epoch": 0.7085109510510069, + "grad_norm": 4.711037259042006, + "learning_rate": 4.326115333153194e-06, + "loss": 0.297979736328125, + "step": 81940 + }, + { + "epoch": 0.7085541845725501, + "grad_norm": 25.310042688289023, + "learning_rate": 4.3259325822348214e-06, + "loss": 0.1969268798828125, + "step": 81945 + }, + { + "epoch": 0.7085974180940935, + "grad_norm": 0.8134520144541931, + "learning_rate": 4.325749825201366e-06, + "loss": 0.05483322143554688, + "step": 81950 + }, + { + "epoch": 0.7086406516156367, + "grad_norm": 1.7712003308145534, + "learning_rate": 4.325567062053669e-06, + "loss": 0.04022064208984375, + "step": 81955 + }, + { + "epoch": 0.70868388513718, + "grad_norm": 19.61367776596773, + "learning_rate": 4.325384292792574e-06, + "loss": 0.3135093688964844, + "step": 81960 + }, + { + "epoch": 0.7087271186587232, + "grad_norm": 1.4669235015773086, + "learning_rate": 4.325201517418925e-06, + "loss": 0.09358673095703125, + "step": 81965 + }, + { + "epoch": 0.7087703521802665, + "grad_norm": 6.484788394780266, + "learning_rate": 4.325018735933562e-06, + "loss": 0.1455982208251953, + "step": 81970 + }, + { + "epoch": 0.7088135857018097, + "grad_norm": 55.79039694888006, + "learning_rate": 4.32483594833733e-06, + "loss": 0.316583251953125, + "step": 81975 + }, + { + "epoch": 0.708856819223353, + "grad_norm": 18.126406316249025, + "learning_rate": 4.324653154631072e-06, + "loss": 0.07969970703125, + "step": 81980 + }, + { + "epoch": 0.7089000527448963, + "grad_norm": 19.49398402414753, + "learning_rate": 4.32447035481563e-06, + "loss": 0.13048248291015624, + "step": 81985 + }, + { + "epoch": 0.7089432862664395, + "grad_norm": 7.818609184861271, + "learning_rate": 4.324287548891849e-06, + "loss": 0.31290130615234374, + "step": 81990 + }, + { + "epoch": 0.7089865197879828, + "grad_norm": 13.609233447216916, + "learning_rate": 4.32410473686057e-06, + "loss": 0.0478515625, + "step": 81995 + }, + { + "epoch": 0.7090297533095261, + "grad_norm": 0.2874068077749435, + "learning_rate": 4.323921918722637e-06, + "loss": 0.3507091522216797, + "step": 82000 + }, + { + "epoch": 0.7090729868310693, + "grad_norm": 11.66581842480579, + "learning_rate": 4.323739094478893e-06, + "loss": 0.2323394775390625, + "step": 82005 + }, + { + "epoch": 0.7091162203526126, + "grad_norm": 2.4844360406466666, + "learning_rate": 4.3235562641301815e-06, + "loss": 0.19169540405273439, + "step": 82010 + }, + { + "epoch": 0.7091594538741559, + "grad_norm": 45.889604316813156, + "learning_rate": 4.323373427677345e-06, + "loss": 0.23048553466796876, + "step": 82015 + }, + { + "epoch": 0.7092026873956991, + "grad_norm": 2.8583402570955596, + "learning_rate": 4.323190585121229e-06, + "loss": 0.1207122802734375, + "step": 82020 + }, + { + "epoch": 0.7092459209172424, + "grad_norm": 1.207093118774349, + "learning_rate": 4.323007736462673e-06, + "loss": 0.28186492919921874, + "step": 82025 + }, + { + "epoch": 0.7092891544387857, + "grad_norm": 2.630849837221141, + "learning_rate": 4.3228248817025235e-06, + "loss": 0.045123291015625, + "step": 82030 + }, + { + "epoch": 0.7093323879603289, + "grad_norm": 0.9140448110885017, + "learning_rate": 4.322642020841621e-06, + "loss": 0.09034881591796876, + "step": 82035 + }, + { + "epoch": 0.7093756214818722, + "grad_norm": 13.017931867733992, + "learning_rate": 4.322459153880811e-06, + "loss": 0.24760894775390624, + "step": 82040 + }, + { + "epoch": 0.7094188550034154, + "grad_norm": 17.31920233793031, + "learning_rate": 4.322276280820937e-06, + "loss": 0.5777389526367187, + "step": 82045 + }, + { + "epoch": 0.7094620885249587, + "grad_norm": 37.29406613352645, + "learning_rate": 4.322093401662841e-06, + "loss": 0.5124351501464843, + "step": 82050 + }, + { + "epoch": 0.709505322046502, + "grad_norm": 11.193724276876413, + "learning_rate": 4.3219105164073675e-06, + "loss": 0.1624542236328125, + "step": 82055 + }, + { + "epoch": 0.7095485555680452, + "grad_norm": 1.1593792098084086, + "learning_rate": 4.321727625055359e-06, + "loss": 0.21197547912597656, + "step": 82060 + }, + { + "epoch": 0.7095917890895885, + "grad_norm": 2.9358379472960268, + "learning_rate": 4.321544727607659e-06, + "loss": 0.1241973876953125, + "step": 82065 + }, + { + "epoch": 0.7096350226111318, + "grad_norm": 18.6885716322062, + "learning_rate": 4.3213618240651125e-06, + "loss": 0.23469696044921876, + "step": 82070 + }, + { + "epoch": 0.709678256132675, + "grad_norm": 37.01600436709648, + "learning_rate": 4.321178914428562e-06, + "loss": 0.1511505126953125, + "step": 82075 + }, + { + "epoch": 0.7097214896542183, + "grad_norm": 2.197111918025533, + "learning_rate": 4.32099599869885e-06, + "loss": 0.10929336547851562, + "step": 82080 + }, + { + "epoch": 0.7097647231757616, + "grad_norm": 7.2974356790116905, + "learning_rate": 4.320813076876823e-06, + "loss": 0.32241973876953123, + "step": 82085 + }, + { + "epoch": 0.7098079566973048, + "grad_norm": 0.2597560451212494, + "learning_rate": 4.320630148963321e-06, + "loss": 0.17722015380859374, + "step": 82090 + }, + { + "epoch": 0.7098511902188481, + "grad_norm": 0.06995546925052114, + "learning_rate": 4.32044721495919e-06, + "loss": 0.1800567626953125, + "step": 82095 + }, + { + "epoch": 0.7098944237403914, + "grad_norm": 1.5528731869933494, + "learning_rate": 4.3202642748652735e-06, + "loss": 0.1329559326171875, + "step": 82100 + }, + { + "epoch": 0.7099376572619346, + "grad_norm": 3.9532326809980547, + "learning_rate": 4.320081328682415e-06, + "loss": 0.03762054443359375, + "step": 82105 + }, + { + "epoch": 0.7099808907834779, + "grad_norm": 1.105486190851153, + "learning_rate": 4.319898376411457e-06, + "loss": 0.03188018798828125, + "step": 82110 + }, + { + "epoch": 0.7100241243050212, + "grad_norm": 0.3064256407205273, + "learning_rate": 4.3197154180532455e-06, + "loss": 0.1690725326538086, + "step": 82115 + }, + { + "epoch": 0.7100673578265644, + "grad_norm": 18.344439369907914, + "learning_rate": 4.319532453608623e-06, + "loss": 0.23281478881835938, + "step": 82120 + }, + { + "epoch": 0.7101105913481077, + "grad_norm": 3.0870829294902484, + "learning_rate": 4.319349483078432e-06, + "loss": 0.09567337036132813, + "step": 82125 + }, + { + "epoch": 0.710153824869651, + "grad_norm": 20.863900422719873, + "learning_rate": 4.319166506463519e-06, + "loss": 0.34640016555786135, + "step": 82130 + }, + { + "epoch": 0.7101970583911942, + "grad_norm": 9.674748318144777, + "learning_rate": 4.318983523764727e-06, + "loss": 0.2390716552734375, + "step": 82135 + }, + { + "epoch": 0.7102402919127374, + "grad_norm": 27.730173117016452, + "learning_rate": 4.318800534982898e-06, + "loss": 0.128955078125, + "step": 82140 + }, + { + "epoch": 0.7102835254342807, + "grad_norm": 13.212675887407313, + "learning_rate": 4.318617540118879e-06, + "loss": 0.18624420166015626, + "step": 82145 + }, + { + "epoch": 0.710326758955824, + "grad_norm": 1.264276524585368, + "learning_rate": 4.318434539173511e-06, + "loss": 0.06920547485351562, + "step": 82150 + }, + { + "epoch": 0.7103699924773672, + "grad_norm": 0.6425601484592547, + "learning_rate": 4.318251532147641e-06, + "loss": 0.06679162979125977, + "step": 82155 + }, + { + "epoch": 0.7104132259989105, + "grad_norm": 1.7334550775236899, + "learning_rate": 4.31806851904211e-06, + "loss": 0.19408950805664063, + "step": 82160 + }, + { + "epoch": 0.7104564595204538, + "grad_norm": 2.4254343167194796, + "learning_rate": 4.317885499857765e-06, + "loss": 0.38647308349609377, + "step": 82165 + }, + { + "epoch": 0.710499693041997, + "grad_norm": 1.5724368522200793, + "learning_rate": 4.317702474595447e-06, + "loss": 0.0563323974609375, + "step": 82170 + }, + { + "epoch": 0.7105429265635403, + "grad_norm": 1.5264003386995642, + "learning_rate": 4.317519443256002e-06, + "loss": 0.07252349853515624, + "step": 82175 + }, + { + "epoch": 0.7105861600850836, + "grad_norm": 38.29999561671648, + "learning_rate": 4.317336405840274e-06, + "loss": 0.2320281982421875, + "step": 82180 + }, + { + "epoch": 0.7106293936066268, + "grad_norm": 9.668076845794571, + "learning_rate": 4.317153362349108e-06, + "loss": 0.1568462371826172, + "step": 82185 + }, + { + "epoch": 0.7106726271281701, + "grad_norm": 8.432086969745292, + "learning_rate": 4.316970312783346e-06, + "loss": 0.0741424560546875, + "step": 82190 + }, + { + "epoch": 0.7107158606497134, + "grad_norm": 0.8372726140349832, + "learning_rate": 4.316787257143833e-06, + "loss": 0.049457740783691403, + "step": 82195 + }, + { + "epoch": 0.7107590941712566, + "grad_norm": 6.255379160317158, + "learning_rate": 4.316604195431413e-06, + "loss": 0.0582733154296875, + "step": 82200 + }, + { + "epoch": 0.7108023276927999, + "grad_norm": 9.434217186785261, + "learning_rate": 4.3164211276469315e-06, + "loss": 0.07999496459960938, + "step": 82205 + }, + { + "epoch": 0.7108455612143432, + "grad_norm": 2.9991509931350806, + "learning_rate": 4.316238053791232e-06, + "loss": 0.236492919921875, + "step": 82210 + }, + { + "epoch": 0.7108887947358864, + "grad_norm": 4.734173557624044, + "learning_rate": 4.3160549738651596e-06, + "loss": 0.053263092041015626, + "step": 82215 + }, + { + "epoch": 0.7109320282574296, + "grad_norm": 1.5526344066837885, + "learning_rate": 4.315871887869558e-06, + "loss": 0.21548309326171874, + "step": 82220 + }, + { + "epoch": 0.710975261778973, + "grad_norm": 2.470670715229022, + "learning_rate": 4.31568879580527e-06, + "loss": 0.033374786376953125, + "step": 82225 + }, + { + "epoch": 0.7110184953005162, + "grad_norm": 0.5084094397766812, + "learning_rate": 4.315505697673142e-06, + "loss": 0.33654632568359377, + "step": 82230 + }, + { + "epoch": 0.7110617288220594, + "grad_norm": 9.393145362461148, + "learning_rate": 4.315322593474019e-06, + "loss": 0.07917633056640624, + "step": 82235 + }, + { + "epoch": 0.7111049623436028, + "grad_norm": 3.1898811625472527, + "learning_rate": 4.315139483208743e-06, + "loss": 0.09366302490234375, + "step": 82240 + }, + { + "epoch": 0.711148195865146, + "grad_norm": 0.2765357221283494, + "learning_rate": 4.314956366878162e-06, + "loss": 0.09680309295654296, + "step": 82245 + }, + { + "epoch": 0.7111914293866892, + "grad_norm": 24.508646199093636, + "learning_rate": 4.314773244483117e-06, + "loss": 0.26396484375, + "step": 82250 + }, + { + "epoch": 0.7112346629082326, + "grad_norm": 2.4227977064020214, + "learning_rate": 4.314590116024454e-06, + "loss": 0.0977294921875, + "step": 82255 + }, + { + "epoch": 0.7112778964297758, + "grad_norm": 1.9877718316399442, + "learning_rate": 4.314406981503018e-06, + "loss": 0.20915069580078124, + "step": 82260 + }, + { + "epoch": 0.711321129951319, + "grad_norm": 9.434506995681915, + "learning_rate": 4.314223840919653e-06, + "loss": 0.2693115234375, + "step": 82265 + }, + { + "epoch": 0.7113643634728624, + "grad_norm": 17.504708593184386, + "learning_rate": 4.314040694275203e-06, + "loss": 0.14242172241210938, + "step": 82270 + }, + { + "epoch": 0.7114075969944056, + "grad_norm": 11.304387998344595, + "learning_rate": 4.313857541570514e-06, + "loss": 0.26102218627929685, + "step": 82275 + }, + { + "epoch": 0.7114508305159488, + "grad_norm": 15.237094700664313, + "learning_rate": 4.313674382806431e-06, + "loss": 0.24123687744140626, + "step": 82280 + }, + { + "epoch": 0.7114940640374922, + "grad_norm": 9.60566789584066, + "learning_rate": 4.313491217983797e-06, + "loss": 0.16054306030273438, + "step": 82285 + }, + { + "epoch": 0.7115372975590354, + "grad_norm": 0.9807369066143088, + "learning_rate": 4.3133080471034565e-06, + "loss": 0.18554229736328126, + "step": 82290 + }, + { + "epoch": 0.7115805310805786, + "grad_norm": 8.409552939815468, + "learning_rate": 4.3131248701662565e-06, + "loss": 0.12222518920898437, + "step": 82295 + }, + { + "epoch": 0.711623764602122, + "grad_norm": 27.154869144014615, + "learning_rate": 4.31294168717304e-06, + "loss": 0.1436248779296875, + "step": 82300 + }, + { + "epoch": 0.7116669981236652, + "grad_norm": 19.215264492920554, + "learning_rate": 4.312758498124654e-06, + "loss": 0.16903839111328126, + "step": 82305 + }, + { + "epoch": 0.7117102316452084, + "grad_norm": 11.547491457624549, + "learning_rate": 4.312575303021941e-06, + "loss": 0.08604278564453124, + "step": 82310 + }, + { + "epoch": 0.7117534651667516, + "grad_norm": 8.332473917295005, + "learning_rate": 4.312392101865745e-06, + "loss": 0.34630126953125, + "step": 82315 + }, + { + "epoch": 0.711796698688295, + "grad_norm": 0.9225466162977333, + "learning_rate": 4.312208894656914e-06, + "loss": 0.1357177734375, + "step": 82320 + }, + { + "epoch": 0.7118399322098382, + "grad_norm": 0.7790289086813541, + "learning_rate": 4.312025681396293e-06, + "loss": 0.08071556091308593, + "step": 82325 + }, + { + "epoch": 0.7118831657313814, + "grad_norm": 25.46965953475003, + "learning_rate": 4.311842462084724e-06, + "loss": 0.17348480224609375, + "step": 82330 + }, + { + "epoch": 0.7119263992529248, + "grad_norm": 5.596640413273592, + "learning_rate": 4.311659236723053e-06, + "loss": 0.08999176025390625, + "step": 82335 + }, + { + "epoch": 0.711969632774468, + "grad_norm": 3.3322975217583677, + "learning_rate": 4.311476005312126e-06, + "loss": 0.08661575317382812, + "step": 82340 + }, + { + "epoch": 0.7120128662960112, + "grad_norm": 13.261418080177451, + "learning_rate": 4.311292767852787e-06, + "loss": 0.07976932525634765, + "step": 82345 + }, + { + "epoch": 0.7120560998175546, + "grad_norm": 4.436812099282049, + "learning_rate": 4.311109524345882e-06, + "loss": 0.23317222595214843, + "step": 82350 + }, + { + "epoch": 0.7120993333390978, + "grad_norm": 1.055012039930726, + "learning_rate": 4.310926274792257e-06, + "loss": 0.1455596923828125, + "step": 82355 + }, + { + "epoch": 0.712142566860641, + "grad_norm": 0.36587136363009337, + "learning_rate": 4.310743019192755e-06, + "loss": 0.10603179931640624, + "step": 82360 + }, + { + "epoch": 0.7121858003821844, + "grad_norm": 42.05409108535521, + "learning_rate": 4.31055975754822e-06, + "loss": 0.4377859115600586, + "step": 82365 + }, + { + "epoch": 0.7122290339037276, + "grad_norm": 9.338346884964647, + "learning_rate": 4.310376489859502e-06, + "loss": 0.126751708984375, + "step": 82370 + }, + { + "epoch": 0.7122722674252708, + "grad_norm": 8.106361492241543, + "learning_rate": 4.310193216127443e-06, + "loss": 0.08673152923583985, + "step": 82375 + }, + { + "epoch": 0.7123155009468142, + "grad_norm": 6.587270553361115, + "learning_rate": 4.3100099363528865e-06, + "loss": 0.16417713165283204, + "step": 82380 + }, + { + "epoch": 0.7123587344683574, + "grad_norm": 0.37064445646127786, + "learning_rate": 4.309826650536682e-06, + "loss": 0.115570068359375, + "step": 82385 + }, + { + "epoch": 0.7124019679899006, + "grad_norm": 21.460770309536436, + "learning_rate": 4.309643358679673e-06, + "loss": 0.13326873779296874, + "step": 82390 + }, + { + "epoch": 0.7124452015114439, + "grad_norm": 2.805210448102839, + "learning_rate": 4.309460060782703e-06, + "loss": 0.22562255859375, + "step": 82395 + }, + { + "epoch": 0.7124884350329872, + "grad_norm": 2.4520406130888266, + "learning_rate": 4.30927675684662e-06, + "loss": 0.0621795654296875, + "step": 82400 + }, + { + "epoch": 0.7125316685545304, + "grad_norm": 0.8705800117640028, + "learning_rate": 4.309093446872268e-06, + "loss": 0.37880992889404297, + "step": 82405 + }, + { + "epoch": 0.7125749020760737, + "grad_norm": 5.429716073893446, + "learning_rate": 4.308910130860493e-06, + "loss": 0.07892608642578125, + "step": 82410 + }, + { + "epoch": 0.712618135597617, + "grad_norm": 0.0969396306389603, + "learning_rate": 4.308726808812139e-06, + "loss": 0.14043731689453126, + "step": 82415 + }, + { + "epoch": 0.7126613691191602, + "grad_norm": 2.800354942097625, + "learning_rate": 4.308543480728054e-06, + "loss": 0.026861572265625, + "step": 82420 + }, + { + "epoch": 0.7127046026407035, + "grad_norm": 3.587339231892574, + "learning_rate": 4.308360146609081e-06, + "loss": 0.09652099609375, + "step": 82425 + }, + { + "epoch": 0.7127478361622468, + "grad_norm": 2.6058445985681047, + "learning_rate": 4.3081768064560676e-06, + "loss": 0.248870849609375, + "step": 82430 + }, + { + "epoch": 0.71279106968379, + "grad_norm": 0.14744738930624077, + "learning_rate": 4.307993460269858e-06, + "loss": 0.25818576812744143, + "step": 82435 + }, + { + "epoch": 0.7128343032053333, + "grad_norm": 14.185765029864003, + "learning_rate": 4.307810108051298e-06, + "loss": 0.0830352783203125, + "step": 82440 + }, + { + "epoch": 0.7128775367268766, + "grad_norm": 4.091165431653883, + "learning_rate": 4.307626749801233e-06, + "loss": 0.07466697692871094, + "step": 82445 + }, + { + "epoch": 0.7129207702484198, + "grad_norm": 8.414518528470648, + "learning_rate": 4.307443385520509e-06, + "loss": 0.0724578857421875, + "step": 82450 + }, + { + "epoch": 0.712964003769963, + "grad_norm": 3.0457355489192843, + "learning_rate": 4.307260015209972e-06, + "loss": 0.13575668334960939, + "step": 82455 + }, + { + "epoch": 0.7130072372915064, + "grad_norm": 0.8011802056710882, + "learning_rate": 4.307076638870467e-06, + "loss": 0.03465614318847656, + "step": 82460 + }, + { + "epoch": 0.7130504708130496, + "grad_norm": 15.899823633057798, + "learning_rate": 4.306893256502841e-06, + "loss": 0.1621318817138672, + "step": 82465 + }, + { + "epoch": 0.7130937043345928, + "grad_norm": 1.3099461926048626, + "learning_rate": 4.306709868107938e-06, + "loss": 0.14393692016601561, + "step": 82470 + }, + { + "epoch": 0.7131369378561362, + "grad_norm": 4.930139702378169, + "learning_rate": 4.306526473686604e-06, + "loss": 0.2173126220703125, + "step": 82475 + }, + { + "epoch": 0.7131801713776794, + "grad_norm": 1.3982016211840418, + "learning_rate": 4.306343073239686e-06, + "loss": 0.20580120086669923, + "step": 82480 + }, + { + "epoch": 0.7132234048992226, + "grad_norm": 25.838100125297924, + "learning_rate": 4.30615966676803e-06, + "loss": 0.31906661987304685, + "step": 82485 + }, + { + "epoch": 0.7132666384207659, + "grad_norm": 31.599400948152844, + "learning_rate": 4.30597625427248e-06, + "loss": 0.43394775390625, + "step": 82490 + }, + { + "epoch": 0.7133098719423092, + "grad_norm": 2.0331661683793545, + "learning_rate": 4.305792835753883e-06, + "loss": 0.03538532257080078, + "step": 82495 + }, + { + "epoch": 0.7133531054638524, + "grad_norm": 0.3202254521768337, + "learning_rate": 4.305609411213085e-06, + "loss": 0.2476348876953125, + "step": 82500 + }, + { + "epoch": 0.7133963389853957, + "grad_norm": 45.72630468115058, + "learning_rate": 4.305425980650932e-06, + "loss": 0.20279541015625, + "step": 82505 + }, + { + "epoch": 0.713439572506939, + "grad_norm": 0.6638840754622015, + "learning_rate": 4.305242544068269e-06, + "loss": 0.07807903289794922, + "step": 82510 + }, + { + "epoch": 0.7134828060284822, + "grad_norm": 0.13911007636542685, + "learning_rate": 4.305059101465943e-06, + "loss": 0.14930400848388672, + "step": 82515 + }, + { + "epoch": 0.7135260395500255, + "grad_norm": 49.61581579120243, + "learning_rate": 4.3048756528448e-06, + "loss": 0.22547626495361328, + "step": 82520 + }, + { + "epoch": 0.7135692730715688, + "grad_norm": 4.603477347752737, + "learning_rate": 4.304692198205685e-06, + "loss": 0.10621261596679688, + "step": 82525 + }, + { + "epoch": 0.713612506593112, + "grad_norm": 17.463894745535406, + "learning_rate": 4.304508737549446e-06, + "loss": 0.17841796875, + "step": 82530 + }, + { + "epoch": 0.7136557401146553, + "grad_norm": 30.861953058090013, + "learning_rate": 4.304325270876928e-06, + "loss": 0.37985992431640625, + "step": 82535 + }, + { + "epoch": 0.7136989736361986, + "grad_norm": 39.92001028728759, + "learning_rate": 4.304141798188976e-06, + "loss": 0.3870595932006836, + "step": 82540 + }, + { + "epoch": 0.7137422071577418, + "grad_norm": 3.4150025738812193, + "learning_rate": 4.303958319486438e-06, + "loss": 0.02510223388671875, + "step": 82545 + }, + { + "epoch": 0.7137854406792851, + "grad_norm": 21.8781003020204, + "learning_rate": 4.303774834770159e-06, + "loss": 0.14384613037109376, + "step": 82550 + }, + { + "epoch": 0.7138286742008284, + "grad_norm": 2.4622154550876365, + "learning_rate": 4.303591344040986e-06, + "loss": 0.0984375, + "step": 82555 + }, + { + "epoch": 0.7138719077223716, + "grad_norm": 36.418278548435765, + "learning_rate": 4.303407847299765e-06, + "loss": 0.26156158447265626, + "step": 82560 + }, + { + "epoch": 0.7139151412439149, + "grad_norm": 34.63637955556247, + "learning_rate": 4.303224344547342e-06, + "loss": 0.367340087890625, + "step": 82565 + }, + { + "epoch": 0.7139583747654581, + "grad_norm": 5.710880065301414, + "learning_rate": 4.3030408357845636e-06, + "loss": 0.10696144104003906, + "step": 82570 + }, + { + "epoch": 0.7140016082870014, + "grad_norm": 0.5823200958661865, + "learning_rate": 4.302857321012276e-06, + "loss": 0.3474296569824219, + "step": 82575 + }, + { + "epoch": 0.7140448418085447, + "grad_norm": 40.720324857728336, + "learning_rate": 4.302673800231326e-06, + "loss": 0.37005043029785156, + "step": 82580 + }, + { + "epoch": 0.7140880753300879, + "grad_norm": 30.531112298800267, + "learning_rate": 4.302490273442559e-06, + "loss": 0.2359100341796875, + "step": 82585 + }, + { + "epoch": 0.7141313088516312, + "grad_norm": 17.22261362596865, + "learning_rate": 4.302306740646822e-06, + "loss": 0.19005470275878905, + "step": 82590 + }, + { + "epoch": 0.7141745423731745, + "grad_norm": 0.5688709676453236, + "learning_rate": 4.302123201844961e-06, + "loss": 0.30011749267578125, + "step": 82595 + }, + { + "epoch": 0.7142177758947177, + "grad_norm": 2.9181474670726186, + "learning_rate": 4.301939657037823e-06, + "loss": 0.06727733612060546, + "step": 82600 + }, + { + "epoch": 0.714261009416261, + "grad_norm": 33.54233063636516, + "learning_rate": 4.301756106226254e-06, + "loss": 0.2188385009765625, + "step": 82605 + }, + { + "epoch": 0.7143042429378043, + "grad_norm": 29.022890749341244, + "learning_rate": 4.301572549411102e-06, + "loss": 0.15737228393554686, + "step": 82610 + }, + { + "epoch": 0.7143474764593475, + "grad_norm": 26.1956185062915, + "learning_rate": 4.301388986593212e-06, + "loss": 0.1864593505859375, + "step": 82615 + }, + { + "epoch": 0.7143907099808908, + "grad_norm": 5.388433866644799, + "learning_rate": 4.301205417773431e-06, + "loss": 0.09671630859375, + "step": 82620 + }, + { + "epoch": 0.714433943502434, + "grad_norm": 2.3026328967820455, + "learning_rate": 4.301021842952605e-06, + "loss": 0.212689208984375, + "step": 82625 + }, + { + "epoch": 0.7144771770239773, + "grad_norm": 1.8344005164226045, + "learning_rate": 4.300838262131582e-06, + "loss": 0.111663818359375, + "step": 82630 + }, + { + "epoch": 0.7145204105455206, + "grad_norm": 33.026607971578514, + "learning_rate": 4.300654675311207e-06, + "loss": 0.07455978393554688, + "step": 82635 + }, + { + "epoch": 0.7145636440670639, + "grad_norm": 7.725338052947847, + "learning_rate": 4.300471082492328e-06, + "loss": 0.18158283233642578, + "step": 82640 + }, + { + "epoch": 0.7146068775886071, + "grad_norm": 2.5432893203060534, + "learning_rate": 4.300287483675792e-06, + "loss": 0.0353607177734375, + "step": 82645 + }, + { + "epoch": 0.7146501111101504, + "grad_norm": 4.064497466673464, + "learning_rate": 4.300103878862444e-06, + "loss": 0.22977447509765625, + "step": 82650 + }, + { + "epoch": 0.7146933446316937, + "grad_norm": 3.5924799648996006, + "learning_rate": 4.299920268053132e-06, + "loss": 0.04288978576660156, + "step": 82655 + }, + { + "epoch": 0.7147365781532369, + "grad_norm": 6.525327490723307, + "learning_rate": 4.299736651248703e-06, + "loss": 0.38621368408203127, + "step": 82660 + }, + { + "epoch": 0.7147798116747801, + "grad_norm": 1.6235497150695284, + "learning_rate": 4.299553028450003e-06, + "loss": 0.045355224609375, + "step": 82665 + }, + { + "epoch": 0.7148230451963234, + "grad_norm": 12.176727745245554, + "learning_rate": 4.299369399657879e-06, + "loss": 0.2023529052734375, + "step": 82670 + }, + { + "epoch": 0.7148662787178667, + "grad_norm": 5.284350139108507, + "learning_rate": 4.29918576487318e-06, + "loss": 0.1009765625, + "step": 82675 + }, + { + "epoch": 0.7149095122394099, + "grad_norm": 5.042920604631324, + "learning_rate": 4.2990021240967484e-06, + "loss": 0.19462013244628906, + "step": 82680 + }, + { + "epoch": 0.7149527457609532, + "grad_norm": 0.28168937401785, + "learning_rate": 4.298818477329435e-06, + "loss": 0.08445587158203124, + "step": 82685 + }, + { + "epoch": 0.7149959792824965, + "grad_norm": 6.672958328783442, + "learning_rate": 4.298634824572086e-06, + "loss": 0.16978912353515624, + "step": 82690 + }, + { + "epoch": 0.7150392128040397, + "grad_norm": 9.223892556691034, + "learning_rate": 4.298451165825548e-06, + "loss": 0.21236572265625, + "step": 82695 + }, + { + "epoch": 0.715082446325583, + "grad_norm": 10.111668431559853, + "learning_rate": 4.298267501090667e-06, + "loss": 0.28549995422363283, + "step": 82700 + }, + { + "epoch": 0.7151256798471263, + "grad_norm": 9.435257941697538, + "learning_rate": 4.298083830368292e-06, + "loss": 0.09721298217773437, + "step": 82705 + }, + { + "epoch": 0.7151689133686695, + "grad_norm": 0.7776319206957543, + "learning_rate": 4.297900153659269e-06, + "loss": 0.33036556243896487, + "step": 82710 + }, + { + "epoch": 0.7152121468902128, + "grad_norm": 31.92074633393248, + "learning_rate": 4.297716470964444e-06, + "loss": 0.13884544372558594, + "step": 82715 + }, + { + "epoch": 0.7152553804117561, + "grad_norm": 8.688913183681537, + "learning_rate": 4.297532782284666e-06, + "loss": 0.1249267578125, + "step": 82720 + }, + { + "epoch": 0.7152986139332993, + "grad_norm": 1.0540143270012248, + "learning_rate": 4.297349087620781e-06, + "loss": 0.28087158203125, + "step": 82725 + }, + { + "epoch": 0.7153418474548426, + "grad_norm": 1.1837955659247985, + "learning_rate": 4.297165386973637e-06, + "loss": 0.035205078125, + "step": 82730 + }, + { + "epoch": 0.7153850809763859, + "grad_norm": 5.104069502284281, + "learning_rate": 4.296981680344082e-06, + "loss": 0.10881805419921875, + "step": 82735 + }, + { + "epoch": 0.7154283144979291, + "grad_norm": 5.4166212243887, + "learning_rate": 4.29679796773296e-06, + "loss": 0.0861968994140625, + "step": 82740 + }, + { + "epoch": 0.7154715480194723, + "grad_norm": 1.4248311404298555, + "learning_rate": 4.296614249141122e-06, + "loss": 0.03858261108398438, + "step": 82745 + }, + { + "epoch": 0.7155147815410157, + "grad_norm": 4.761962652094467, + "learning_rate": 4.296430524569413e-06, + "loss": 0.16759490966796875, + "step": 82750 + }, + { + "epoch": 0.7155580150625589, + "grad_norm": 19.745575772167705, + "learning_rate": 4.296246794018681e-06, + "loss": 0.1324859619140625, + "step": 82755 + }, + { + "epoch": 0.7156012485841021, + "grad_norm": 5.247444367167276, + "learning_rate": 4.2960630574897735e-06, + "loss": 0.02015533447265625, + "step": 82760 + }, + { + "epoch": 0.7156444821056455, + "grad_norm": 22.631122795331127, + "learning_rate": 4.295879314983537e-06, + "loss": 0.2964111328125, + "step": 82765 + }, + { + "epoch": 0.7156877156271887, + "grad_norm": 1.2651587165893925, + "learning_rate": 4.2956955665008205e-06, + "loss": 0.0837738037109375, + "step": 82770 + }, + { + "epoch": 0.7157309491487319, + "grad_norm": 1.9024722442195097, + "learning_rate": 4.29551181204247e-06, + "loss": 0.16840667724609376, + "step": 82775 + }, + { + "epoch": 0.7157741826702753, + "grad_norm": 1.5995444123972975, + "learning_rate": 4.295328051609333e-06, + "loss": 0.12062454223632812, + "step": 82780 + }, + { + "epoch": 0.7158174161918185, + "grad_norm": 0.2562607636574223, + "learning_rate": 4.295144285202259e-06, + "loss": 0.2490570068359375, + "step": 82785 + }, + { + "epoch": 0.7158606497133617, + "grad_norm": 1.8618399961354084, + "learning_rate": 4.294960512822093e-06, + "loss": 0.02685546875, + "step": 82790 + }, + { + "epoch": 0.7159038832349051, + "grad_norm": 23.417557975719305, + "learning_rate": 4.294776734469684e-06, + "loss": 0.1475311279296875, + "step": 82795 + }, + { + "epoch": 0.7159471167564483, + "grad_norm": 0.12539041457220232, + "learning_rate": 4.294592950145879e-06, + "loss": 0.4943084716796875, + "step": 82800 + }, + { + "epoch": 0.7159903502779915, + "grad_norm": 27.983334105006445, + "learning_rate": 4.294409159851525e-06, + "loss": 0.3513214111328125, + "step": 82805 + }, + { + "epoch": 0.7160335837995349, + "grad_norm": 40.68909102618218, + "learning_rate": 4.294225363587471e-06, + "loss": 0.18581695556640626, + "step": 82810 + }, + { + "epoch": 0.7160768173210781, + "grad_norm": 55.106053393636145, + "learning_rate": 4.294041561354564e-06, + "loss": 0.29752540588378906, + "step": 82815 + }, + { + "epoch": 0.7161200508426213, + "grad_norm": 5.766446406795815, + "learning_rate": 4.293857753153652e-06, + "loss": 0.16754837036132814, + "step": 82820 + }, + { + "epoch": 0.7161632843641647, + "grad_norm": 28.639981031139268, + "learning_rate": 4.293673938985581e-06, + "loss": 0.4516998291015625, + "step": 82825 + }, + { + "epoch": 0.7162065178857079, + "grad_norm": 10.885139426542588, + "learning_rate": 4.293490118851202e-06, + "loss": 0.06124677658081055, + "step": 82830 + }, + { + "epoch": 0.7162497514072511, + "grad_norm": 0.27912680780192006, + "learning_rate": 4.293306292751359e-06, + "loss": 0.16890487670898438, + "step": 82835 + }, + { + "epoch": 0.7162929849287943, + "grad_norm": 1.3718707944497515, + "learning_rate": 4.293122460686903e-06, + "loss": 0.20642833709716796, + "step": 82840 + }, + { + "epoch": 0.7163362184503377, + "grad_norm": 5.342531883342633, + "learning_rate": 4.2929386226586805e-06, + "loss": 0.10613212585449219, + "step": 82845 + }, + { + "epoch": 0.7163794519718809, + "grad_norm": 0.710982425272788, + "learning_rate": 4.292754778667537e-06, + "loss": 0.26618194580078125, + "step": 82850 + }, + { + "epoch": 0.7164226854934241, + "grad_norm": 13.804927276605016, + "learning_rate": 4.2925709287143255e-06, + "loss": 0.19145965576171875, + "step": 82855 + }, + { + "epoch": 0.7164659190149675, + "grad_norm": 8.834668918692994, + "learning_rate": 4.29238707279989e-06, + "loss": 0.25624923706054686, + "step": 82860 + }, + { + "epoch": 0.7165091525365107, + "grad_norm": 1.9205611590089995, + "learning_rate": 4.29220321092508e-06, + "loss": 0.064605712890625, + "step": 82865 + }, + { + "epoch": 0.7165523860580539, + "grad_norm": 30.05852417583906, + "learning_rate": 4.292019343090743e-06, + "loss": 0.13023872375488282, + "step": 82870 + }, + { + "epoch": 0.7165956195795973, + "grad_norm": 6.665512909999818, + "learning_rate": 4.291835469297726e-06, + "loss": 0.27019195556640624, + "step": 82875 + }, + { + "epoch": 0.7166388531011405, + "grad_norm": 1.7414131994729465, + "learning_rate": 4.291651589546878e-06, + "loss": 0.23564453125, + "step": 82880 + }, + { + "epoch": 0.7166820866226837, + "grad_norm": 22.362326549573243, + "learning_rate": 4.2914677038390486e-06, + "loss": 0.210162353515625, + "step": 82885 + }, + { + "epoch": 0.7167253201442271, + "grad_norm": 1.5064744534447503, + "learning_rate": 4.291283812175082e-06, + "loss": 0.077294921875, + "step": 82890 + }, + { + "epoch": 0.7167685536657703, + "grad_norm": 11.660261135529813, + "learning_rate": 4.29109991455583e-06, + "loss": 0.183319091796875, + "step": 82895 + }, + { + "epoch": 0.7168117871873135, + "grad_norm": 16.214422085461173, + "learning_rate": 4.29091601098214e-06, + "loss": 0.15021286010742188, + "step": 82900 + }, + { + "epoch": 0.7168550207088569, + "grad_norm": 3.6123723680688222, + "learning_rate": 4.290732101454857e-06, + "loss": 0.075433349609375, + "step": 82905 + }, + { + "epoch": 0.7168982542304001, + "grad_norm": 2.232973090404064, + "learning_rate": 4.290548185974834e-06, + "loss": 0.12104873657226563, + "step": 82910 + }, + { + "epoch": 0.7169414877519433, + "grad_norm": 1.94875644156894, + "learning_rate": 4.2903642645429155e-06, + "loss": 0.14562835693359374, + "step": 82915 + }, + { + "epoch": 0.7169847212734866, + "grad_norm": 2.166294211580369, + "learning_rate": 4.290180337159951e-06, + "loss": 0.3638603210449219, + "step": 82920 + }, + { + "epoch": 0.7170279547950299, + "grad_norm": 2.285496821025736, + "learning_rate": 4.289996403826789e-06, + "loss": 0.09877700805664062, + "step": 82925 + }, + { + "epoch": 0.7170711883165731, + "grad_norm": 0.42673910165656387, + "learning_rate": 4.289812464544278e-06, + "loss": 0.38142242431640627, + "step": 82930 + }, + { + "epoch": 0.7171144218381164, + "grad_norm": 15.660666816842342, + "learning_rate": 4.289628519313266e-06, + "loss": 0.08325996398925781, + "step": 82935 + }, + { + "epoch": 0.7171576553596597, + "grad_norm": 0.5824502067163444, + "learning_rate": 4.2894445681346e-06, + "loss": 0.040692138671875, + "step": 82940 + }, + { + "epoch": 0.7172008888812029, + "grad_norm": 15.686612594944554, + "learning_rate": 4.289260611009131e-06, + "loss": 0.1323974609375, + "step": 82945 + }, + { + "epoch": 0.7172441224027462, + "grad_norm": 5.667341982869684, + "learning_rate": 4.289076647937705e-06, + "loss": 0.18762588500976562, + "step": 82950 + }, + { + "epoch": 0.7172873559242895, + "grad_norm": 11.829294015934183, + "learning_rate": 4.2888926789211725e-06, + "loss": 0.20192375183105468, + "step": 82955 + }, + { + "epoch": 0.7173305894458327, + "grad_norm": 26.227645645295805, + "learning_rate": 4.28870870396038e-06, + "loss": 0.1626190185546875, + "step": 82960 + }, + { + "epoch": 0.717373822967376, + "grad_norm": 24.474817064686267, + "learning_rate": 4.2885247230561775e-06, + "loss": 0.0564361572265625, + "step": 82965 + }, + { + "epoch": 0.7174170564889193, + "grad_norm": 6.23554604346003, + "learning_rate": 4.288340736209411e-06, + "loss": 0.0736053466796875, + "step": 82970 + }, + { + "epoch": 0.7174602900104625, + "grad_norm": 15.024803734666284, + "learning_rate": 4.288156743420932e-06, + "loss": 0.11178665161132813, + "step": 82975 + }, + { + "epoch": 0.7175035235320057, + "grad_norm": 7.425820505526284, + "learning_rate": 4.287972744691589e-06, + "loss": 0.11376953125, + "step": 82980 + }, + { + "epoch": 0.7175467570535491, + "grad_norm": 13.82821654592251, + "learning_rate": 4.287788740022229e-06, + "loss": 0.16610870361328126, + "step": 82985 + }, + { + "epoch": 0.7175899905750923, + "grad_norm": 8.333059420926835, + "learning_rate": 4.2876047294137e-06, + "loss": 0.12650146484375, + "step": 82990 + }, + { + "epoch": 0.7176332240966355, + "grad_norm": 8.397928188120753, + "learning_rate": 4.287420712866852e-06, + "loss": 0.16515579223632812, + "step": 82995 + }, + { + "epoch": 0.7176764576181788, + "grad_norm": 1.41595060510517, + "learning_rate": 4.2872366903825354e-06, + "loss": 0.10499801635742187, + "step": 83000 + }, + { + "epoch": 0.7177196911397221, + "grad_norm": 4.854742111700291, + "learning_rate": 4.287052661961594e-06, + "loss": 0.0347381591796875, + "step": 83005 + }, + { + "epoch": 0.7177629246612653, + "grad_norm": 0.3279802467160487, + "learning_rate": 4.286868627604882e-06, + "loss": 0.04510574340820313, + "step": 83010 + }, + { + "epoch": 0.7178061581828086, + "grad_norm": 3.593131717536672, + "learning_rate": 4.286684587313244e-06, + "loss": 0.11128082275390624, + "step": 83015 + }, + { + "epoch": 0.7178493917043519, + "grad_norm": 1.5502317133446786, + "learning_rate": 4.28650054108753e-06, + "loss": 0.07571868896484375, + "step": 83020 + }, + { + "epoch": 0.7178926252258951, + "grad_norm": 1.9769041785410246, + "learning_rate": 4.286316488928591e-06, + "loss": 0.2524131774902344, + "step": 83025 + }, + { + "epoch": 0.7179358587474384, + "grad_norm": 0.9811484020181578, + "learning_rate": 4.286132430837273e-06, + "loss": 0.0307098388671875, + "step": 83030 + }, + { + "epoch": 0.7179790922689817, + "grad_norm": 0.8935281843851247, + "learning_rate": 4.285948366814426e-06, + "loss": 0.029357528686523436, + "step": 83035 + }, + { + "epoch": 0.7180223257905249, + "grad_norm": 13.020115123918721, + "learning_rate": 4.285764296860899e-06, + "loss": 0.078753662109375, + "step": 83040 + }, + { + "epoch": 0.7180655593120682, + "grad_norm": 1.7395837521153332, + "learning_rate": 4.2855802209775395e-06, + "loss": 0.059332275390625, + "step": 83045 + }, + { + "epoch": 0.7181087928336115, + "grad_norm": 5.447724622043259, + "learning_rate": 4.285396139165199e-06, + "loss": 0.3390201568603516, + "step": 83050 + }, + { + "epoch": 0.7181520263551547, + "grad_norm": 7.615676191816369, + "learning_rate": 4.285212051424724e-06, + "loss": 0.30774459838867185, + "step": 83055 + }, + { + "epoch": 0.718195259876698, + "grad_norm": 6.12302593663077, + "learning_rate": 4.285027957756966e-06, + "loss": 0.14091796875, + "step": 83060 + }, + { + "epoch": 0.7182384933982413, + "grad_norm": 5.531350105489183, + "learning_rate": 4.2848438581627715e-06, + "loss": 0.519659423828125, + "step": 83065 + }, + { + "epoch": 0.7182817269197845, + "grad_norm": 0.8954296976801757, + "learning_rate": 4.2846597526429915e-06, + "loss": 0.0910064697265625, + "step": 83070 + }, + { + "epoch": 0.7183249604413278, + "grad_norm": 7.4349042863747705, + "learning_rate": 4.284475641198473e-06, + "loss": 0.25330429077148436, + "step": 83075 + }, + { + "epoch": 0.7183681939628711, + "grad_norm": 10.666171053941024, + "learning_rate": 4.2842915238300665e-06, + "loss": 0.0489715576171875, + "step": 83080 + }, + { + "epoch": 0.7184114274844143, + "grad_norm": 18.29464166452458, + "learning_rate": 4.284107400538621e-06, + "loss": 0.13472747802734375, + "step": 83085 + }, + { + "epoch": 0.7184546610059576, + "grad_norm": 3.3739929991654463, + "learning_rate": 4.283923271324988e-06, + "loss": 0.0892791748046875, + "step": 83090 + }, + { + "epoch": 0.7184978945275008, + "grad_norm": 5.280029159353305, + "learning_rate": 4.283739136190012e-06, + "loss": 0.27668609619140627, + "step": 83095 + }, + { + "epoch": 0.7185411280490441, + "grad_norm": 2.563995284847684, + "learning_rate": 4.283554995134545e-06, + "loss": 0.39148101806640623, + "step": 83100 + }, + { + "epoch": 0.7185843615705874, + "grad_norm": 0.08901542362314274, + "learning_rate": 4.283370848159435e-06, + "loss": 0.1652996063232422, + "step": 83105 + }, + { + "epoch": 0.7186275950921306, + "grad_norm": 4.004809442587146, + "learning_rate": 4.2831866952655334e-06, + "loss": 0.20381622314453124, + "step": 83110 + }, + { + "epoch": 0.7186708286136739, + "grad_norm": 0.6860814742744372, + "learning_rate": 4.283002536453687e-06, + "loss": 0.3008720397949219, + "step": 83115 + }, + { + "epoch": 0.7187140621352172, + "grad_norm": 4.263758587549024, + "learning_rate": 4.2828183717247465e-06, + "loss": 0.1561859130859375, + "step": 83120 + }, + { + "epoch": 0.7187572956567604, + "grad_norm": 5.931803221385293, + "learning_rate": 4.282634201079563e-06, + "loss": 0.05452651977539062, + "step": 83125 + }, + { + "epoch": 0.7188005291783037, + "grad_norm": 16.605072266645806, + "learning_rate": 4.282450024518981e-06, + "loss": 0.1493743896484375, + "step": 83130 + }, + { + "epoch": 0.718843762699847, + "grad_norm": 3.059501096587703, + "learning_rate": 4.2822658420438535e-06, + "loss": 0.073602294921875, + "step": 83135 + }, + { + "epoch": 0.7188869962213902, + "grad_norm": 0.5147861466659313, + "learning_rate": 4.282081653655031e-06, + "loss": 0.2383544921875, + "step": 83140 + }, + { + "epoch": 0.7189302297429335, + "grad_norm": 2.112107420372575, + "learning_rate": 4.281897459353359e-06, + "loss": 0.05539398193359375, + "step": 83145 + }, + { + "epoch": 0.7189734632644768, + "grad_norm": 7.47000251759952, + "learning_rate": 4.281713259139691e-06, + "loss": 0.17033348083496094, + "step": 83150 + }, + { + "epoch": 0.71901669678602, + "grad_norm": 5.871252483052953, + "learning_rate": 4.281529053014874e-06, + "loss": 0.082720947265625, + "step": 83155 + }, + { + "epoch": 0.7190599303075633, + "grad_norm": 1.9477528326194242, + "learning_rate": 4.281344840979758e-06, + "loss": 0.0588897705078125, + "step": 83160 + }, + { + "epoch": 0.7191031638291066, + "grad_norm": 33.239487845402095, + "learning_rate": 4.281160623035192e-06, + "loss": 0.30925445556640624, + "step": 83165 + }, + { + "epoch": 0.7191463973506498, + "grad_norm": 26.565834766199206, + "learning_rate": 4.280976399182028e-06, + "loss": 0.41852455139160155, + "step": 83170 + }, + { + "epoch": 0.719189630872193, + "grad_norm": 12.12726831416927, + "learning_rate": 4.280792169421114e-06, + "loss": 0.1353271484375, + "step": 83175 + }, + { + "epoch": 0.7192328643937363, + "grad_norm": 1.2012816592263424, + "learning_rate": 4.280607933753299e-06, + "loss": 0.29242706298828125, + "step": 83180 + }, + { + "epoch": 0.7192760979152796, + "grad_norm": 2.6018707045291394, + "learning_rate": 4.2804236921794334e-06, + "loss": 0.10505599975585937, + "step": 83185 + }, + { + "epoch": 0.7193193314368228, + "grad_norm": 7.761019444558062, + "learning_rate": 4.280239444700366e-06, + "loss": 0.11170883178710937, + "step": 83190 + }, + { + "epoch": 0.7193625649583661, + "grad_norm": 0.5840261056280209, + "learning_rate": 4.280055191316948e-06, + "loss": 0.28312225341796876, + "step": 83195 + }, + { + "epoch": 0.7194057984799094, + "grad_norm": 4.105251611275978, + "learning_rate": 4.279870932030029e-06, + "loss": 0.10407447814941406, + "step": 83200 + }, + { + "epoch": 0.7194490320014526, + "grad_norm": 5.082435406056391, + "learning_rate": 4.279686666840459e-06, + "loss": 0.04357757568359375, + "step": 83205 + }, + { + "epoch": 0.7194922655229959, + "grad_norm": 18.464325620369145, + "learning_rate": 4.279502395749085e-06, + "loss": 0.31471710205078124, + "step": 83210 + }, + { + "epoch": 0.7195354990445392, + "grad_norm": 1.5417906179525425, + "learning_rate": 4.279318118756761e-06, + "loss": 0.024648284912109374, + "step": 83215 + }, + { + "epoch": 0.7195787325660824, + "grad_norm": 1.1380675067896517, + "learning_rate": 4.279133835864334e-06, + "loss": 0.11003036499023437, + "step": 83220 + }, + { + "epoch": 0.7196219660876257, + "grad_norm": 8.954055702612756, + "learning_rate": 4.278949547072655e-06, + "loss": 0.2646484375, + "step": 83225 + }, + { + "epoch": 0.719665199609169, + "grad_norm": 23.610289515938582, + "learning_rate": 4.2787652523825745e-06, + "loss": 0.2153858184814453, + "step": 83230 + }, + { + "epoch": 0.7197084331307122, + "grad_norm": 4.095464353514593, + "learning_rate": 4.278580951794941e-06, + "loss": 0.023532867431640625, + "step": 83235 + }, + { + "epoch": 0.7197516666522555, + "grad_norm": 6.364191423573313, + "learning_rate": 4.278396645310605e-06, + "loss": 0.2904205322265625, + "step": 83240 + }, + { + "epoch": 0.7197949001737988, + "grad_norm": 0.6434330966129521, + "learning_rate": 4.278212332930416e-06, + "loss": 0.2226348876953125, + "step": 83245 + }, + { + "epoch": 0.719838133695342, + "grad_norm": 101.82928314686855, + "learning_rate": 4.2780280146552254e-06, + "loss": 0.36634979248046873, + "step": 83250 + }, + { + "epoch": 0.7198813672168853, + "grad_norm": 20.873191528602973, + "learning_rate": 4.2778436904858825e-06, + "loss": 0.1676544189453125, + "step": 83255 + }, + { + "epoch": 0.7199246007384286, + "grad_norm": 11.719317925022331, + "learning_rate": 4.2776593604232375e-06, + "loss": 0.2126178741455078, + "step": 83260 + }, + { + "epoch": 0.7199678342599718, + "grad_norm": 11.803731955946237, + "learning_rate": 4.277475024468141e-06, + "loss": 0.17663536071777344, + "step": 83265 + }, + { + "epoch": 0.720011067781515, + "grad_norm": 107.15044722686545, + "learning_rate": 4.277290682621441e-06, + "loss": 0.37757568359375, + "step": 83270 + }, + { + "epoch": 0.7200543013030584, + "grad_norm": 0.1411229347181891, + "learning_rate": 4.27710633488399e-06, + "loss": 0.06272506713867188, + "step": 83275 + }, + { + "epoch": 0.7200975348246016, + "grad_norm": 31.963161859193622, + "learning_rate": 4.276921981256638e-06, + "loss": 0.4836669921875, + "step": 83280 + }, + { + "epoch": 0.7201407683461448, + "grad_norm": 7.254605542073886, + "learning_rate": 4.276737621740234e-06, + "loss": 0.22542037963867187, + "step": 83285 + }, + { + "epoch": 0.7201840018676882, + "grad_norm": 2.6733254680389082, + "learning_rate": 4.276553256335629e-06, + "loss": 0.3048683166503906, + "step": 83290 + }, + { + "epoch": 0.7202272353892314, + "grad_norm": 1.644445206627681, + "learning_rate": 4.276368885043673e-06, + "loss": 0.21849822998046875, + "step": 83295 + }, + { + "epoch": 0.7202704689107746, + "grad_norm": 8.84831382507366, + "learning_rate": 4.276184507865217e-06, + "loss": 0.073681640625, + "step": 83300 + }, + { + "epoch": 0.720313702432318, + "grad_norm": 19.715961236516822, + "learning_rate": 4.2760001248011105e-06, + "loss": 0.12639312744140624, + "step": 83305 + }, + { + "epoch": 0.7203569359538612, + "grad_norm": 31.99478169483562, + "learning_rate": 4.275815735852204e-06, + "loss": 0.29297866821289065, + "step": 83310 + }, + { + "epoch": 0.7204001694754044, + "grad_norm": 5.489063231768492, + "learning_rate": 4.275631341019348e-06, + "loss": 0.2225311279296875, + "step": 83315 + }, + { + "epoch": 0.7204434029969478, + "grad_norm": 11.457568631582554, + "learning_rate": 4.275446940303394e-06, + "loss": 0.15850334167480468, + "step": 83320 + }, + { + "epoch": 0.720486636518491, + "grad_norm": 1.1912923660408579, + "learning_rate": 4.275262533705191e-06, + "loss": 0.2492586135864258, + "step": 83325 + }, + { + "epoch": 0.7205298700400342, + "grad_norm": 3.0884388572024957, + "learning_rate": 4.275078121225589e-06, + "loss": 0.19576416015625, + "step": 83330 + }, + { + "epoch": 0.7205731035615776, + "grad_norm": 4.732217765940248, + "learning_rate": 4.2748937028654396e-06, + "loss": 0.10774078369140624, + "step": 83335 + }, + { + "epoch": 0.7206163370831208, + "grad_norm": 2.448907964085193, + "learning_rate": 4.274709278625594e-06, + "loss": 0.05752487182617187, + "step": 83340 + }, + { + "epoch": 0.720659570604664, + "grad_norm": 2.021036954904385, + "learning_rate": 4.274524848506901e-06, + "loss": 0.0591094970703125, + "step": 83345 + }, + { + "epoch": 0.7207028041262072, + "grad_norm": 17.122874863256932, + "learning_rate": 4.274340412510212e-06, + "loss": 0.130572509765625, + "step": 83350 + }, + { + "epoch": 0.7207460376477506, + "grad_norm": 6.174322738473592, + "learning_rate": 4.274155970636378e-06, + "loss": 0.11925086975097657, + "step": 83355 + }, + { + "epoch": 0.7207892711692938, + "grad_norm": 0.7396376056239167, + "learning_rate": 4.27397152288625e-06, + "loss": 0.12271881103515625, + "step": 83360 + }, + { + "epoch": 0.720832504690837, + "grad_norm": 6.447781594216949, + "learning_rate": 4.273787069260676e-06, + "loss": 0.20905799865722657, + "step": 83365 + }, + { + "epoch": 0.7208757382123804, + "grad_norm": 2.2543289075247595, + "learning_rate": 4.27360260976051e-06, + "loss": 0.2658344268798828, + "step": 83370 + }, + { + "epoch": 0.7209189717339236, + "grad_norm": 6.507408612894203, + "learning_rate": 4.273418144386601e-06, + "loss": 0.10706787109375, + "step": 83375 + }, + { + "epoch": 0.7209622052554668, + "grad_norm": 0.20710746103588681, + "learning_rate": 4.2732336731398e-06, + "loss": 0.05135498046875, + "step": 83380 + }, + { + "epoch": 0.7210054387770102, + "grad_norm": 0.5516131861801133, + "learning_rate": 4.273049196020958e-06, + "loss": 0.07557220458984375, + "step": 83385 + }, + { + "epoch": 0.7210486722985534, + "grad_norm": 0.3711595593469142, + "learning_rate": 4.272864713030925e-06, + "loss": 0.21085205078125, + "step": 83390 + }, + { + "epoch": 0.7210919058200966, + "grad_norm": 12.72622801095193, + "learning_rate": 4.2726802241705535e-06, + "loss": 0.1457366943359375, + "step": 83395 + }, + { + "epoch": 0.72113513934164, + "grad_norm": 1.6099322222721606, + "learning_rate": 4.272495729440692e-06, + "loss": 0.07821025848388671, + "step": 83400 + }, + { + "epoch": 0.7211783728631832, + "grad_norm": 15.454983222934557, + "learning_rate": 4.272311228842193e-06, + "loss": 0.24576683044433595, + "step": 83405 + }, + { + "epoch": 0.7212216063847264, + "grad_norm": 1.4775378404742783, + "learning_rate": 4.272126722375908e-06, + "loss": 0.13079071044921875, + "step": 83410 + }, + { + "epoch": 0.7212648399062698, + "grad_norm": 1.2937585760384986, + "learning_rate": 4.271942210042685e-06, + "loss": 0.27803955078125, + "step": 83415 + }, + { + "epoch": 0.721308073427813, + "grad_norm": 11.119798200356161, + "learning_rate": 4.271757691843378e-06, + "loss": 0.041412353515625, + "step": 83420 + }, + { + "epoch": 0.7213513069493562, + "grad_norm": 0.2707179520907113, + "learning_rate": 4.271573167778838e-06, + "loss": 0.08903541564941406, + "step": 83425 + }, + { + "epoch": 0.7213945404708996, + "grad_norm": 7.783457048570428, + "learning_rate": 4.271388637849913e-06, + "loss": 0.05983409881591797, + "step": 83430 + }, + { + "epoch": 0.7214377739924428, + "grad_norm": 1.4804170461292527, + "learning_rate": 4.271204102057457e-06, + "loss": 0.08402290344238281, + "step": 83435 + }, + { + "epoch": 0.721481007513986, + "grad_norm": 2.7083968643491776, + "learning_rate": 4.2710195604023196e-06, + "loss": 0.16559982299804688, + "step": 83440 + }, + { + "epoch": 0.7215242410355293, + "grad_norm": 3.9492039883902628, + "learning_rate": 4.270835012885354e-06, + "loss": 0.05680694580078125, + "step": 83445 + }, + { + "epoch": 0.7215674745570726, + "grad_norm": 2.0264797662327942, + "learning_rate": 4.270650459507407e-06, + "loss": 0.1075958251953125, + "step": 83450 + }, + { + "epoch": 0.7216107080786158, + "grad_norm": 0.4690303928561812, + "learning_rate": 4.270465900269333e-06, + "loss": 0.035748291015625, + "step": 83455 + }, + { + "epoch": 0.721653941600159, + "grad_norm": 0.4651208391894143, + "learning_rate": 4.270281335171984e-06, + "loss": 0.051381683349609374, + "step": 83460 + }, + { + "epoch": 0.7216971751217024, + "grad_norm": 5.002720070805769, + "learning_rate": 4.270096764216209e-06, + "loss": 0.15367279052734376, + "step": 83465 + }, + { + "epoch": 0.7217404086432456, + "grad_norm": 6.846810611722094, + "learning_rate": 4.269912187402859e-06, + "loss": 0.4101585388183594, + "step": 83470 + }, + { + "epoch": 0.7217836421647889, + "grad_norm": 7.650821023466303, + "learning_rate": 4.269727604732787e-06, + "loss": 0.2926826477050781, + "step": 83475 + }, + { + "epoch": 0.7218268756863322, + "grad_norm": 5.301087402170309, + "learning_rate": 4.2695430162068435e-06, + "loss": 0.16665172576904297, + "step": 83480 + }, + { + "epoch": 0.7218701092078754, + "grad_norm": 7.477096107624505, + "learning_rate": 4.26935842182588e-06, + "loss": 0.06351318359375, + "step": 83485 + }, + { + "epoch": 0.7219133427294187, + "grad_norm": 12.110224858963607, + "learning_rate": 4.269173821590748e-06, + "loss": 0.10377273559570313, + "step": 83490 + }, + { + "epoch": 0.721956576250962, + "grad_norm": 4.673974478869529, + "learning_rate": 4.268989215502297e-06, + "loss": 0.1884796142578125, + "step": 83495 + }, + { + "epoch": 0.7219998097725052, + "grad_norm": 29.811995833881454, + "learning_rate": 4.26880460356138e-06, + "loss": 0.1991241455078125, + "step": 83500 + }, + { + "epoch": 0.7220430432940484, + "grad_norm": 3.3822792150992074, + "learning_rate": 4.268619985768849e-06, + "loss": 0.3902439117431641, + "step": 83505 + }, + { + "epoch": 0.7220862768155918, + "grad_norm": 18.224474677710663, + "learning_rate": 4.2684353621255555e-06, + "loss": 0.45353240966796876, + "step": 83510 + }, + { + "epoch": 0.722129510337135, + "grad_norm": 4.216237486186528, + "learning_rate": 4.26825073263235e-06, + "loss": 0.020578765869140626, + "step": 83515 + }, + { + "epoch": 0.7221727438586782, + "grad_norm": 31.274979267553878, + "learning_rate": 4.2680660972900825e-06, + "loss": 0.34396209716796877, + "step": 83520 + }, + { + "epoch": 0.7222159773802215, + "grad_norm": 7.093111303890063, + "learning_rate": 4.267881456099608e-06, + "loss": 0.050787353515625, + "step": 83525 + }, + { + "epoch": 0.7222592109017648, + "grad_norm": 42.52931759449287, + "learning_rate": 4.2676968090617745e-06, + "loss": 0.1317047119140625, + "step": 83530 + }, + { + "epoch": 0.722302444423308, + "grad_norm": 0.08099071969165679, + "learning_rate": 4.267512156177437e-06, + "loss": 0.04294166564941406, + "step": 83535 + }, + { + "epoch": 0.7223456779448513, + "grad_norm": 64.43843933583743, + "learning_rate": 4.267327497447445e-06, + "loss": 0.2934722900390625, + "step": 83540 + }, + { + "epoch": 0.7223889114663946, + "grad_norm": 38.34344386284737, + "learning_rate": 4.2671428328726495e-06, + "loss": 0.2171539306640625, + "step": 83545 + }, + { + "epoch": 0.7224321449879378, + "grad_norm": 3.4582403789988394, + "learning_rate": 4.266958162453905e-06, + "loss": 0.074359130859375, + "step": 83550 + }, + { + "epoch": 0.7224753785094811, + "grad_norm": 1.9084468819700346, + "learning_rate": 4.266773486192061e-06, + "loss": 0.1510761260986328, + "step": 83555 + }, + { + "epoch": 0.7225186120310244, + "grad_norm": 16.16260615270764, + "learning_rate": 4.266588804087968e-06, + "loss": 0.17591094970703125, + "step": 83560 + }, + { + "epoch": 0.7225618455525676, + "grad_norm": 2.827089747867661, + "learning_rate": 4.266404116142481e-06, + "loss": 0.19447021484375, + "step": 83565 + }, + { + "epoch": 0.7226050790741109, + "grad_norm": 1.6417608460613256, + "learning_rate": 4.2662194223564495e-06, + "loss": 0.244976806640625, + "step": 83570 + }, + { + "epoch": 0.7226483125956542, + "grad_norm": 28.320254278708326, + "learning_rate": 4.266034722730726e-06, + "loss": 0.19554557800292968, + "step": 83575 + }, + { + "epoch": 0.7226915461171974, + "grad_norm": 26.57204207647679, + "learning_rate": 4.265850017266162e-06, + "loss": 0.08645706176757813, + "step": 83580 + }, + { + "epoch": 0.7227347796387407, + "grad_norm": 0.11505249912149565, + "learning_rate": 4.26566530596361e-06, + "loss": 0.1762237548828125, + "step": 83585 + }, + { + "epoch": 0.722778013160284, + "grad_norm": 2.522168863291161, + "learning_rate": 4.265480588823921e-06, + "loss": 0.1426219940185547, + "step": 83590 + }, + { + "epoch": 0.7228212466818272, + "grad_norm": 3.549745449879666, + "learning_rate": 4.265295865847947e-06, + "loss": 0.0675079345703125, + "step": 83595 + }, + { + "epoch": 0.7228644802033705, + "grad_norm": 14.612126990437842, + "learning_rate": 4.265111137036542e-06, + "loss": 0.128594970703125, + "step": 83600 + }, + { + "epoch": 0.7229077137249138, + "grad_norm": 0.19911765118009228, + "learning_rate": 4.2649264023905545e-06, + "loss": 0.04078216552734375, + "step": 83605 + }, + { + "epoch": 0.722950947246457, + "grad_norm": 2.0733885667614924, + "learning_rate": 4.2647416619108385e-06, + "loss": 0.06103591918945313, + "step": 83610 + }, + { + "epoch": 0.7229941807680003, + "grad_norm": 2.491219889021065, + "learning_rate": 4.264556915598246e-06, + "loss": 0.270562744140625, + "step": 83615 + }, + { + "epoch": 0.7230374142895435, + "grad_norm": 0.13494743297061237, + "learning_rate": 4.264372163453629e-06, + "loss": 0.16216278076171875, + "step": 83620 + }, + { + "epoch": 0.7230806478110868, + "grad_norm": 11.281960222179785, + "learning_rate": 4.264187405477839e-06, + "loss": 0.3336189270019531, + "step": 83625 + }, + { + "epoch": 0.7231238813326301, + "grad_norm": 10.29118900032558, + "learning_rate": 4.264002641671729e-06, + "loss": 0.1022979736328125, + "step": 83630 + }, + { + "epoch": 0.7231671148541733, + "grad_norm": 21.54360683056263, + "learning_rate": 4.263817872036149e-06, + "loss": 0.323126220703125, + "step": 83635 + }, + { + "epoch": 0.7232103483757166, + "grad_norm": 13.22684532899637, + "learning_rate": 4.263633096571953e-06, + "loss": 0.22825469970703124, + "step": 83640 + }, + { + "epoch": 0.7232535818972599, + "grad_norm": 2.28885118486056, + "learning_rate": 4.263448315279994e-06, + "loss": 0.1415264129638672, + "step": 83645 + }, + { + "epoch": 0.7232968154188031, + "grad_norm": 5.932942874184703, + "learning_rate": 4.263263528161123e-06, + "loss": 0.127349853515625, + "step": 83650 + }, + { + "epoch": 0.7233400489403464, + "grad_norm": 3.4275346071028188, + "learning_rate": 4.263078735216191e-06, + "loss": 0.06560897827148438, + "step": 83655 + }, + { + "epoch": 0.7233832824618897, + "grad_norm": 3.296052230623242, + "learning_rate": 4.262893936446053e-06, + "loss": 0.28267059326171873, + "step": 83660 + }, + { + "epoch": 0.7234265159834329, + "grad_norm": 3.8440449104926153, + "learning_rate": 4.262709131851558e-06, + "loss": 0.24228515625, + "step": 83665 + }, + { + "epoch": 0.7234697495049762, + "grad_norm": 4.123573805787013, + "learning_rate": 4.2625243214335605e-06, + "loss": 0.10122413635253906, + "step": 83670 + }, + { + "epoch": 0.7235129830265195, + "grad_norm": 10.846163206487011, + "learning_rate": 4.262339505192913e-06, + "loss": 0.1226470947265625, + "step": 83675 + }, + { + "epoch": 0.7235562165480627, + "grad_norm": 2.9899870760146716, + "learning_rate": 4.262154683130467e-06, + "loss": 0.233575439453125, + "step": 83680 + }, + { + "epoch": 0.723599450069606, + "grad_norm": 37.91849072055342, + "learning_rate": 4.261969855247075e-06, + "loss": 0.3006500244140625, + "step": 83685 + }, + { + "epoch": 0.7236426835911492, + "grad_norm": 8.28641128708355, + "learning_rate": 4.26178502154359e-06, + "loss": 0.02668609619140625, + "step": 83690 + }, + { + "epoch": 0.7236859171126925, + "grad_norm": 22.411914986919317, + "learning_rate": 4.261600182020863e-06, + "loss": 0.14203262329101562, + "step": 83695 + }, + { + "epoch": 0.7237291506342357, + "grad_norm": 6.869757128876408, + "learning_rate": 4.261415336679749e-06, + "loss": 0.6893402099609375, + "step": 83700 + }, + { + "epoch": 0.723772384155779, + "grad_norm": 2.609129645646317, + "learning_rate": 4.2612304855210975e-06, + "loss": 0.029213714599609374, + "step": 83705 + }, + { + "epoch": 0.7238156176773223, + "grad_norm": 7.189377611117482, + "learning_rate": 4.261045628545763e-06, + "loss": 0.2597923278808594, + "step": 83710 + }, + { + "epoch": 0.7238588511988655, + "grad_norm": 1.460017405548987, + "learning_rate": 4.260860765754599e-06, + "loss": 0.359716796875, + "step": 83715 + }, + { + "epoch": 0.7239020847204088, + "grad_norm": 7.278219215127159, + "learning_rate": 4.260675897148454e-06, + "loss": 0.10433540344238282, + "step": 83720 + }, + { + "epoch": 0.7239453182419521, + "grad_norm": 5.731116466580788, + "learning_rate": 4.260491022728184e-06, + "loss": 0.19917984008789064, + "step": 83725 + }, + { + "epoch": 0.7239885517634953, + "grad_norm": 0.5299689963106804, + "learning_rate": 4.260306142494641e-06, + "loss": 0.04079360961914062, + "step": 83730 + }, + { + "epoch": 0.7240317852850386, + "grad_norm": 0.9294462979912831, + "learning_rate": 4.260121256448678e-06, + "loss": 0.2201251983642578, + "step": 83735 + }, + { + "epoch": 0.7240750188065819, + "grad_norm": 9.545557160749436, + "learning_rate": 4.259936364591146e-06, + "loss": 0.16157684326171876, + "step": 83740 + }, + { + "epoch": 0.7241182523281251, + "grad_norm": 6.67215870042354, + "learning_rate": 4.2597514669229e-06, + "loss": 0.23767242431640626, + "step": 83745 + }, + { + "epoch": 0.7241614858496684, + "grad_norm": 3.6866115369054286, + "learning_rate": 4.25956656344479e-06, + "loss": 0.17256011962890624, + "step": 83750 + }, + { + "epoch": 0.7242047193712117, + "grad_norm": 6.205477935365689, + "learning_rate": 4.259381654157671e-06, + "loss": 0.30458984375, + "step": 83755 + }, + { + "epoch": 0.7242479528927549, + "grad_norm": 22.470201996071985, + "learning_rate": 4.259196739062396e-06, + "loss": 0.21880168914794923, + "step": 83760 + }, + { + "epoch": 0.7242911864142982, + "grad_norm": 1.5148894173023282, + "learning_rate": 4.259011818159816e-06, + "loss": 0.10556526184082031, + "step": 83765 + }, + { + "epoch": 0.7243344199358415, + "grad_norm": 14.535512901549364, + "learning_rate": 4.258826891450785e-06, + "loss": 0.15438690185546874, + "step": 83770 + }, + { + "epoch": 0.7243776534573847, + "grad_norm": 2.4311411463775396, + "learning_rate": 4.258641958936156e-06, + "loss": 0.07741546630859375, + "step": 83775 + }, + { + "epoch": 0.724420886978928, + "grad_norm": 0.8007119207164002, + "learning_rate": 4.25845702061678e-06, + "loss": 0.08251876831054687, + "step": 83780 + }, + { + "epoch": 0.7244641205004713, + "grad_norm": 20.617951548645166, + "learning_rate": 4.258272076493513e-06, + "loss": 0.283978271484375, + "step": 83785 + }, + { + "epoch": 0.7245073540220145, + "grad_norm": 1.2953448986613298, + "learning_rate": 4.258087126567206e-06, + "loss": 0.2373668670654297, + "step": 83790 + }, + { + "epoch": 0.7245505875435577, + "grad_norm": 5.765596438687675, + "learning_rate": 4.257902170838712e-06, + "loss": 0.08555831909179687, + "step": 83795 + }, + { + "epoch": 0.7245938210651011, + "grad_norm": 4.414272610082724, + "learning_rate": 4.257717209308885e-06, + "loss": 0.3908355712890625, + "step": 83800 + }, + { + "epoch": 0.7246370545866443, + "grad_norm": 6.466396335217968, + "learning_rate": 4.257532241978577e-06, + "loss": 0.101470947265625, + "step": 83805 + }, + { + "epoch": 0.7246802881081875, + "grad_norm": 2.617054193566902, + "learning_rate": 4.2573472688486404e-06, + "loss": 0.097772216796875, + "step": 83810 + }, + { + "epoch": 0.7247235216297309, + "grad_norm": 33.89456074273089, + "learning_rate": 4.257162289919931e-06, + "loss": 0.130792236328125, + "step": 83815 + }, + { + "epoch": 0.7247667551512741, + "grad_norm": 3.9633830064821303, + "learning_rate": 4.256977305193299e-06, + "loss": 0.0480072021484375, + "step": 83820 + }, + { + "epoch": 0.7248099886728173, + "grad_norm": 0.3379362956674923, + "learning_rate": 4.256792314669599e-06, + "loss": 0.166796875, + "step": 83825 + }, + { + "epoch": 0.7248532221943607, + "grad_norm": 0.7259519790700418, + "learning_rate": 4.256607318349684e-06, + "loss": 0.2518341064453125, + "step": 83830 + }, + { + "epoch": 0.7248964557159039, + "grad_norm": 0.9986437193629554, + "learning_rate": 4.256422316234407e-06, + "loss": 0.08576202392578125, + "step": 83835 + }, + { + "epoch": 0.7249396892374471, + "grad_norm": 1.1170563132282156, + "learning_rate": 4.256237308324622e-06, + "loss": 0.0967864990234375, + "step": 83840 + }, + { + "epoch": 0.7249829227589905, + "grad_norm": 2.20843922428221, + "learning_rate": 4.256052294621181e-06, + "loss": 0.01209087371826172, + "step": 83845 + }, + { + "epoch": 0.7250261562805337, + "grad_norm": 11.886113592114608, + "learning_rate": 4.2558672751249375e-06, + "loss": 0.14703445434570311, + "step": 83850 + }, + { + "epoch": 0.7250693898020769, + "grad_norm": 0.6301002765631346, + "learning_rate": 4.255682249836745e-06, + "loss": 0.05995941162109375, + "step": 83855 + }, + { + "epoch": 0.7251126233236203, + "grad_norm": 2.1267990040318607, + "learning_rate": 4.2554972187574575e-06, + "loss": 0.10950698852539062, + "step": 83860 + }, + { + "epoch": 0.7251558568451635, + "grad_norm": 14.673113021689899, + "learning_rate": 4.255312181887927e-06, + "loss": 0.10567703247070312, + "step": 83865 + }, + { + "epoch": 0.7251990903667067, + "grad_norm": 7.407860553270111, + "learning_rate": 4.2551271392290085e-06, + "loss": 0.22977943420410157, + "step": 83870 + }, + { + "epoch": 0.7252423238882499, + "grad_norm": 2.455780760280907, + "learning_rate": 4.2549420907815545e-06, + "loss": 0.23494415283203124, + "step": 83875 + }, + { + "epoch": 0.7252855574097933, + "grad_norm": 37.899468023416375, + "learning_rate": 4.2547570365464175e-06, + "loss": 0.5481536865234375, + "step": 83880 + }, + { + "epoch": 0.7253287909313365, + "grad_norm": 0.22442540065493427, + "learning_rate": 4.254571976524453e-06, + "loss": 0.33703155517578126, + "step": 83885 + }, + { + "epoch": 0.7253720244528797, + "grad_norm": 1.3983524575374313, + "learning_rate": 4.254386910716512e-06, + "loss": 0.082891845703125, + "step": 83890 + }, + { + "epoch": 0.7254152579744231, + "grad_norm": 5.472263839872116, + "learning_rate": 4.254201839123451e-06, + "loss": 0.13149795532226563, + "step": 83895 + }, + { + "epoch": 0.7254584914959663, + "grad_norm": 30.52144655081904, + "learning_rate": 4.254016761746121e-06, + "loss": 0.26107330322265626, + "step": 83900 + }, + { + "epoch": 0.7255017250175095, + "grad_norm": 30.958715508651252, + "learning_rate": 4.253831678585378e-06, + "loss": 0.23492202758789063, + "step": 83905 + }, + { + "epoch": 0.7255449585390529, + "grad_norm": 2.4198534536206595, + "learning_rate": 4.253646589642072e-06, + "loss": 0.015382957458496094, + "step": 83910 + }, + { + "epoch": 0.7255881920605961, + "grad_norm": 6.859481505841653, + "learning_rate": 4.253461494917061e-06, + "loss": 0.13634986877441407, + "step": 83915 + }, + { + "epoch": 0.7256314255821393, + "grad_norm": 2.5223083128451003, + "learning_rate": 4.253276394411194e-06, + "loss": 0.08877182006835938, + "step": 83920 + }, + { + "epoch": 0.7256746591036827, + "grad_norm": 0.8754990031824113, + "learning_rate": 4.2530912881253276e-06, + "loss": 0.353631591796875, + "step": 83925 + }, + { + "epoch": 0.7257178926252259, + "grad_norm": 4.602240890339762, + "learning_rate": 4.2529061760603155e-06, + "loss": 0.062078857421875, + "step": 83930 + }, + { + "epoch": 0.7257611261467691, + "grad_norm": 0.8706964094701963, + "learning_rate": 4.252721058217011e-06, + "loss": 0.24890899658203125, + "step": 83935 + }, + { + "epoch": 0.7258043596683125, + "grad_norm": 6.474978541163824, + "learning_rate": 4.252535934596267e-06, + "loss": 0.01450347900390625, + "step": 83940 + }, + { + "epoch": 0.7258475931898557, + "grad_norm": 16.70666369266751, + "learning_rate": 4.252350805198939e-06, + "loss": 0.22363052368164063, + "step": 83945 + }, + { + "epoch": 0.7258908267113989, + "grad_norm": 26.52243632145259, + "learning_rate": 4.252165670025878e-06, + "loss": 0.25145587921142576, + "step": 83950 + }, + { + "epoch": 0.7259340602329423, + "grad_norm": 4.275105405464428, + "learning_rate": 4.251980529077941e-06, + "loss": 0.15537261962890625, + "step": 83955 + }, + { + "epoch": 0.7259772937544855, + "grad_norm": 0.6830413245959811, + "learning_rate": 4.251795382355979e-06, + "loss": 0.166717529296875, + "step": 83960 + }, + { + "epoch": 0.7260205272760287, + "grad_norm": 7.943973558085221, + "learning_rate": 4.2516102298608496e-06, + "loss": 0.12809600830078124, + "step": 83965 + }, + { + "epoch": 0.726063760797572, + "grad_norm": 4.489896086299559, + "learning_rate": 4.251425071593403e-06, + "loss": 0.20390777587890624, + "step": 83970 + }, + { + "epoch": 0.7261069943191153, + "grad_norm": 6.187214806836528, + "learning_rate": 4.251239907554495e-06, + "loss": 0.2121795654296875, + "step": 83975 + }, + { + "epoch": 0.7261502278406585, + "grad_norm": 39.607936969025296, + "learning_rate": 4.251054737744978e-06, + "loss": 0.3336669921875, + "step": 83980 + }, + { + "epoch": 0.7261934613622018, + "grad_norm": 42.25640786135254, + "learning_rate": 4.250869562165708e-06, + "loss": 0.5181350708007812, + "step": 83985 + }, + { + "epoch": 0.7262366948837451, + "grad_norm": 3.823510408869257, + "learning_rate": 4.250684380817538e-06, + "loss": 0.25753173828125, + "step": 83990 + }, + { + "epoch": 0.7262799284052883, + "grad_norm": 11.684044701517278, + "learning_rate": 4.250499193701321e-06, + "loss": 0.09215660095214843, + "step": 83995 + }, + { + "epoch": 0.7263231619268316, + "grad_norm": 10.711886453020652, + "learning_rate": 4.250314000817914e-06, + "loss": 0.1873779296875, + "step": 84000 + }, + { + "epoch": 0.7263663954483749, + "grad_norm": 0.646007751011855, + "learning_rate": 4.2501288021681685e-06, + "loss": 0.07654571533203125, + "step": 84005 + }, + { + "epoch": 0.7264096289699181, + "grad_norm": 13.830015527076636, + "learning_rate": 4.24994359775294e-06, + "loss": 0.09295196533203125, + "step": 84010 + }, + { + "epoch": 0.7264528624914613, + "grad_norm": 13.284026854119356, + "learning_rate": 4.249758387573082e-06, + "loss": 0.28648033142089846, + "step": 84015 + }, + { + "epoch": 0.7264960960130047, + "grad_norm": 8.589019417251762, + "learning_rate": 4.249573171629448e-06, + "loss": 0.29635086059570315, + "step": 84020 + }, + { + "epoch": 0.7265393295345479, + "grad_norm": 8.694752573578276, + "learning_rate": 4.249387949922893e-06, + "loss": 0.3803955078125, + "step": 84025 + }, + { + "epoch": 0.7265825630560911, + "grad_norm": 0.7925115866151112, + "learning_rate": 4.249202722454272e-06, + "loss": 0.1545989990234375, + "step": 84030 + }, + { + "epoch": 0.7266257965776345, + "grad_norm": 22.114588526066985, + "learning_rate": 4.249017489224437e-06, + "loss": 0.2475505828857422, + "step": 84035 + }, + { + "epoch": 0.7266690300991777, + "grad_norm": 3.113275280024159, + "learning_rate": 4.248832250234245e-06, + "loss": 0.18948898315429688, + "step": 84040 + }, + { + "epoch": 0.7267122636207209, + "grad_norm": 3.225154475019402, + "learning_rate": 4.248647005484548e-06, + "loss": 0.2398193359375, + "step": 84045 + }, + { + "epoch": 0.7267554971422642, + "grad_norm": 26.032620176413396, + "learning_rate": 4.248461754976203e-06, + "loss": 0.1292724609375, + "step": 84050 + }, + { + "epoch": 0.7267987306638075, + "grad_norm": 2.035556013516875, + "learning_rate": 4.248276498710061e-06, + "loss": 0.06427230834960937, + "step": 84055 + }, + { + "epoch": 0.7268419641853507, + "grad_norm": 0.4353340436171339, + "learning_rate": 4.2480912366869785e-06, + "loss": 0.025200653076171874, + "step": 84060 + }, + { + "epoch": 0.726885197706894, + "grad_norm": 4.256592371807554, + "learning_rate": 4.24790596890781e-06, + "loss": 0.274725341796875, + "step": 84065 + }, + { + "epoch": 0.7269284312284373, + "grad_norm": 2.48634251927284, + "learning_rate": 4.247720695373409e-06, + "loss": 0.5948394775390625, + "step": 84070 + }, + { + "epoch": 0.7269716647499805, + "grad_norm": 34.39437851121331, + "learning_rate": 4.247535416084631e-06, + "loss": 0.2929046630859375, + "step": 84075 + }, + { + "epoch": 0.7270148982715238, + "grad_norm": 9.412499516666482, + "learning_rate": 4.24735013104233e-06, + "loss": 0.09070358276367188, + "step": 84080 + }, + { + "epoch": 0.7270581317930671, + "grad_norm": 4.6779246038752165, + "learning_rate": 4.247164840247359e-06, + "loss": 0.07815017700195312, + "step": 84085 + }, + { + "epoch": 0.7271013653146103, + "grad_norm": 8.028350737800817, + "learning_rate": 4.246979543700575e-06, + "loss": 0.11791763305664063, + "step": 84090 + }, + { + "epoch": 0.7271445988361536, + "grad_norm": 8.305639632899597, + "learning_rate": 4.246794241402831e-06, + "loss": 0.46358299255371094, + "step": 84095 + }, + { + "epoch": 0.7271878323576969, + "grad_norm": 17.440461753314292, + "learning_rate": 4.246608933354983e-06, + "loss": 0.21133270263671874, + "step": 84100 + }, + { + "epoch": 0.7272310658792401, + "grad_norm": 0.9309985504267717, + "learning_rate": 4.246423619557885e-06, + "loss": 0.12033653259277344, + "step": 84105 + }, + { + "epoch": 0.7272742994007834, + "grad_norm": 3.1925659339893353, + "learning_rate": 4.24623830001239e-06, + "loss": 0.232415771484375, + "step": 84110 + }, + { + "epoch": 0.7273175329223267, + "grad_norm": 0.8666631690951782, + "learning_rate": 4.246052974719355e-06, + "loss": 0.049163818359375, + "step": 84115 + }, + { + "epoch": 0.7273607664438699, + "grad_norm": 1.4008568233128496, + "learning_rate": 4.245867643679633e-06, + "loss": 0.2993156433105469, + "step": 84120 + }, + { + "epoch": 0.7274039999654132, + "grad_norm": 31.17164291441279, + "learning_rate": 4.24568230689408e-06, + "loss": 0.31982421875, + "step": 84125 + }, + { + "epoch": 0.7274472334869565, + "grad_norm": 3.0748126869586145, + "learning_rate": 4.245496964363551e-06, + "loss": 0.11826324462890625, + "step": 84130 + }, + { + "epoch": 0.7274904670084997, + "grad_norm": 9.634885566094574, + "learning_rate": 4.2453116160889e-06, + "loss": 0.15536575317382811, + "step": 84135 + }, + { + "epoch": 0.727533700530043, + "grad_norm": 27.140186224838722, + "learning_rate": 4.2451262620709805e-06, + "loss": 0.1512054443359375, + "step": 84140 + }, + { + "epoch": 0.7275769340515862, + "grad_norm": 1.8051917729312728, + "learning_rate": 4.2449409023106495e-06, + "loss": 0.44119873046875, + "step": 84145 + }, + { + "epoch": 0.7276201675731295, + "grad_norm": 1.193583542987602, + "learning_rate": 4.244755536808761e-06, + "loss": 0.10908012390136719, + "step": 84150 + }, + { + "epoch": 0.7276634010946728, + "grad_norm": 4.6390835314879695, + "learning_rate": 4.2445701655661704e-06, + "loss": 0.05146942138671875, + "step": 84155 + }, + { + "epoch": 0.727706634616216, + "grad_norm": 1.197052817206225, + "learning_rate": 4.244384788583732e-06, + "loss": 0.2186309814453125, + "step": 84160 + }, + { + "epoch": 0.7277498681377593, + "grad_norm": 0.6344631221058874, + "learning_rate": 4.2441994058623e-06, + "loss": 0.18036041259765626, + "step": 84165 + }, + { + "epoch": 0.7277931016593026, + "grad_norm": 9.16292734961286, + "learning_rate": 4.244014017402731e-06, + "loss": 0.11030693054199218, + "step": 84170 + }, + { + "epoch": 0.7278363351808458, + "grad_norm": 1.5663749296997216, + "learning_rate": 4.243828623205879e-06, + "loss": 0.11447257995605468, + "step": 84175 + }, + { + "epoch": 0.7278795687023891, + "grad_norm": 0.9663583621852527, + "learning_rate": 4.243643223272599e-06, + "loss": 0.061894989013671874, + "step": 84180 + }, + { + "epoch": 0.7279228022239324, + "grad_norm": 0.7365967625759573, + "learning_rate": 4.243457817603747e-06, + "loss": 0.15886764526367186, + "step": 84185 + }, + { + "epoch": 0.7279660357454756, + "grad_norm": 3.789577651098886, + "learning_rate": 4.243272406200177e-06, + "loss": 0.20223541259765626, + "step": 84190 + }, + { + "epoch": 0.7280092692670189, + "grad_norm": 53.92449031553241, + "learning_rate": 4.243086989062746e-06, + "loss": 0.13629913330078125, + "step": 84195 + }, + { + "epoch": 0.7280525027885622, + "grad_norm": 64.5409060201135, + "learning_rate": 4.242901566192306e-06, + "loss": 0.628167724609375, + "step": 84200 + }, + { + "epoch": 0.7280957363101054, + "grad_norm": 12.01803878805349, + "learning_rate": 4.242716137589713e-06, + "loss": 0.13508071899414062, + "step": 84205 + }, + { + "epoch": 0.7281389698316487, + "grad_norm": 0.4421820181898576, + "learning_rate": 4.242530703255824e-06, + "loss": 0.07575244903564453, + "step": 84210 + }, + { + "epoch": 0.728182203353192, + "grad_norm": 28.45948190975162, + "learning_rate": 4.242345263191495e-06, + "loss": 0.18738250732421874, + "step": 84215 + }, + { + "epoch": 0.7282254368747352, + "grad_norm": 11.209744552608164, + "learning_rate": 4.242159817397578e-06, + "loss": 0.1592864990234375, + "step": 84220 + }, + { + "epoch": 0.7282686703962784, + "grad_norm": 36.423107115173636, + "learning_rate": 4.241974365874929e-06, + "loss": 0.5447990417480468, + "step": 84225 + }, + { + "epoch": 0.7283119039178217, + "grad_norm": 2.3248903235849503, + "learning_rate": 4.2417889086244045e-06, + "loss": 0.033282470703125, + "step": 84230 + }, + { + "epoch": 0.728355137439365, + "grad_norm": 0.6312310590751623, + "learning_rate": 4.2416034456468594e-06, + "loss": 0.249542236328125, + "step": 84235 + }, + { + "epoch": 0.7283983709609082, + "grad_norm": 1.7366138485077316, + "learning_rate": 4.241417976943149e-06, + "loss": 0.05434417724609375, + "step": 84240 + }, + { + "epoch": 0.7284416044824515, + "grad_norm": 28.820787708591865, + "learning_rate": 4.2412325025141275e-06, + "loss": 0.197113037109375, + "step": 84245 + }, + { + "epoch": 0.7284848380039948, + "grad_norm": 15.21940309636743, + "learning_rate": 4.2410470223606525e-06, + "loss": 0.2097198486328125, + "step": 84250 + }, + { + "epoch": 0.728528071525538, + "grad_norm": 25.654889409350858, + "learning_rate": 4.2408615364835785e-06, + "loss": 0.39983062744140624, + "step": 84255 + }, + { + "epoch": 0.7285713050470813, + "grad_norm": 6.1813958306273555, + "learning_rate": 4.240676044883759e-06, + "loss": 0.09600830078125, + "step": 84260 + }, + { + "epoch": 0.7286145385686246, + "grad_norm": 3.66890192856096, + "learning_rate": 4.240490547562053e-06, + "loss": 0.05359344482421875, + "step": 84265 + }, + { + "epoch": 0.7286577720901678, + "grad_norm": 0.37902845949057434, + "learning_rate": 4.240305044519313e-06, + "loss": 0.058705902099609374, + "step": 84270 + }, + { + "epoch": 0.7287010056117111, + "grad_norm": 0.4222795949605815, + "learning_rate": 4.240119535756397e-06, + "loss": 0.1283782958984375, + "step": 84275 + }, + { + "epoch": 0.7287442391332544, + "grad_norm": 0.8478599834936946, + "learning_rate": 4.2399340212741595e-06, + "loss": 0.14512481689453124, + "step": 84280 + }, + { + "epoch": 0.7287874726547976, + "grad_norm": 27.54982134865894, + "learning_rate": 4.239748501073455e-06, + "loss": 0.11795654296875, + "step": 84285 + }, + { + "epoch": 0.7288307061763409, + "grad_norm": 6.088326895330389, + "learning_rate": 4.239562975155138e-06, + "loss": 0.117767333984375, + "step": 84290 + }, + { + "epoch": 0.7288739396978842, + "grad_norm": 4.300240726891603, + "learning_rate": 4.239377443520069e-06, + "loss": 0.27310028076171877, + "step": 84295 + }, + { + "epoch": 0.7289171732194274, + "grad_norm": 0.29892646986487564, + "learning_rate": 4.2391919061691e-06, + "loss": 0.15272216796875, + "step": 84300 + }, + { + "epoch": 0.7289604067409707, + "grad_norm": 1.7214913697856, + "learning_rate": 4.239006363103087e-06, + "loss": 0.02724456787109375, + "step": 84305 + }, + { + "epoch": 0.729003640262514, + "grad_norm": 24.10673942894621, + "learning_rate": 4.238820814322887e-06, + "loss": 0.17634429931640624, + "step": 84310 + }, + { + "epoch": 0.7290468737840572, + "grad_norm": 37.34332154909239, + "learning_rate": 4.238635259829354e-06, + "loss": 0.15858535766601561, + "step": 84315 + }, + { + "epoch": 0.7290901073056004, + "grad_norm": 6.150395020647746, + "learning_rate": 4.238449699623345e-06, + "loss": 0.13323841094970704, + "step": 84320 + }, + { + "epoch": 0.7291333408271438, + "grad_norm": 0.1036414132597783, + "learning_rate": 4.238264133705715e-06, + "loss": 0.07196578979492188, + "step": 84325 + }, + { + "epoch": 0.729176574348687, + "grad_norm": 16.62961483077196, + "learning_rate": 4.238078562077321e-06, + "loss": 0.156488037109375, + "step": 84330 + }, + { + "epoch": 0.7292198078702302, + "grad_norm": 35.38042975584637, + "learning_rate": 4.237892984739017e-06, + "loss": 0.22403717041015625, + "step": 84335 + }, + { + "epoch": 0.7292630413917736, + "grad_norm": 4.516194248775158, + "learning_rate": 4.237707401691661e-06, + "loss": 0.10970611572265625, + "step": 84340 + }, + { + "epoch": 0.7293062749133168, + "grad_norm": 26.11398928447353, + "learning_rate": 4.237521812936107e-06, + "loss": 0.5223594665527344, + "step": 84345 + }, + { + "epoch": 0.72934950843486, + "grad_norm": 6.017721536963525, + "learning_rate": 4.237336218473213e-06, + "loss": 0.11381416320800782, + "step": 84350 + }, + { + "epoch": 0.7293927419564034, + "grad_norm": 7.930732824076691, + "learning_rate": 4.237150618303832e-06, + "loss": 0.2372406005859375, + "step": 84355 + }, + { + "epoch": 0.7294359754779466, + "grad_norm": 18.41478610434044, + "learning_rate": 4.236965012428822e-06, + "loss": 0.16429977416992186, + "step": 84360 + }, + { + "epoch": 0.7294792089994898, + "grad_norm": 29.569368055671166, + "learning_rate": 4.23677940084904e-06, + "loss": 0.1687591552734375, + "step": 84365 + }, + { + "epoch": 0.7295224425210332, + "grad_norm": 2.3020649395545645, + "learning_rate": 4.23659378356534e-06, + "loss": 0.29993896484375, + "step": 84370 + }, + { + "epoch": 0.7295656760425764, + "grad_norm": 17.62964137574456, + "learning_rate": 4.236408160578578e-06, + "loss": 0.2454132080078125, + "step": 84375 + }, + { + "epoch": 0.7296089095641196, + "grad_norm": 16.210865113954895, + "learning_rate": 4.2362225318896115e-06, + "loss": 0.5902755737304688, + "step": 84380 + }, + { + "epoch": 0.729652143085663, + "grad_norm": 41.276111678549384, + "learning_rate": 4.2360368974992955e-06, + "loss": 0.42918243408203127, + "step": 84385 + }, + { + "epoch": 0.7296953766072062, + "grad_norm": 8.328668950023726, + "learning_rate": 4.235851257408487e-06, + "loss": 0.2185588836669922, + "step": 84390 + }, + { + "epoch": 0.7297386101287494, + "grad_norm": 0.42863874865421553, + "learning_rate": 4.235665611618042e-06, + "loss": 0.07691459655761719, + "step": 84395 + }, + { + "epoch": 0.7297818436502926, + "grad_norm": 42.59907117003878, + "learning_rate": 4.235479960128816e-06, + "loss": 0.2853370666503906, + "step": 84400 + }, + { + "epoch": 0.729825077171836, + "grad_norm": 11.955445846469969, + "learning_rate": 4.235294302941665e-06, + "loss": 0.21484832763671874, + "step": 84405 + }, + { + "epoch": 0.7298683106933792, + "grad_norm": 6.972058247153089, + "learning_rate": 4.235108640057446e-06, + "loss": 0.0618804931640625, + "step": 84410 + }, + { + "epoch": 0.7299115442149224, + "grad_norm": 1.6470391815844305, + "learning_rate": 4.234922971477016e-06, + "loss": 0.06138038635253906, + "step": 84415 + }, + { + "epoch": 0.7299547777364658, + "grad_norm": 2.332520284580805, + "learning_rate": 4.234737297201229e-06, + "loss": 0.16098709106445314, + "step": 84420 + }, + { + "epoch": 0.729998011258009, + "grad_norm": 6.149047648723553, + "learning_rate": 4.234551617230945e-06, + "loss": 0.052630615234375, + "step": 84425 + }, + { + "epoch": 0.7300412447795522, + "grad_norm": 37.25467754046997, + "learning_rate": 4.2343659315670155e-06, + "loss": 0.1545948028564453, + "step": 84430 + }, + { + "epoch": 0.7300844783010956, + "grad_norm": 5.019904533586117, + "learning_rate": 4.2341802402103e-06, + "loss": 0.06952438354492188, + "step": 84435 + }, + { + "epoch": 0.7301277118226388, + "grad_norm": 3.7141197615460864, + "learning_rate": 4.233994543161655e-06, + "loss": 0.060955047607421875, + "step": 84440 + }, + { + "epoch": 0.730170945344182, + "grad_norm": 3.717368323556138, + "learning_rate": 4.233808840421936e-06, + "loss": 0.0428253173828125, + "step": 84445 + }, + { + "epoch": 0.7302141788657254, + "grad_norm": 1.2966349141038234, + "learning_rate": 4.233623131991999e-06, + "loss": 0.13477020263671874, + "step": 84450 + }, + { + "epoch": 0.7302574123872686, + "grad_norm": 6.797390396539649, + "learning_rate": 4.233437417872702e-06, + "loss": 0.08897552490234376, + "step": 84455 + }, + { + "epoch": 0.7303006459088118, + "grad_norm": 1.9618935159343345, + "learning_rate": 4.2332516980649e-06, + "loss": 0.20350875854492187, + "step": 84460 + }, + { + "epoch": 0.7303438794303552, + "grad_norm": 5.072370765752823, + "learning_rate": 4.23306597256945e-06, + "loss": 0.3709678649902344, + "step": 84465 + }, + { + "epoch": 0.7303871129518984, + "grad_norm": 14.36345824289482, + "learning_rate": 4.23288024138721e-06, + "loss": 0.0668243408203125, + "step": 84470 + }, + { + "epoch": 0.7304303464734416, + "grad_norm": 27.019493769722146, + "learning_rate": 4.232694504519034e-06, + "loss": 0.40690460205078127, + "step": 84475 + }, + { + "epoch": 0.7304735799949849, + "grad_norm": 10.485300694992718, + "learning_rate": 4.232508761965781e-06, + "loss": 0.10232734680175781, + "step": 84480 + }, + { + "epoch": 0.7305168135165282, + "grad_norm": 7.692075434405819, + "learning_rate": 4.232323013728305e-06, + "loss": 0.12431793212890625, + "step": 84485 + }, + { + "epoch": 0.7305600470380714, + "grad_norm": 0.8327481627691707, + "learning_rate": 4.232137259807465e-06, + "loss": 0.07005290985107422, + "step": 84490 + }, + { + "epoch": 0.7306032805596147, + "grad_norm": 32.610897912763335, + "learning_rate": 4.231951500204117e-06, + "loss": 0.19680938720703126, + "step": 84495 + }, + { + "epoch": 0.730646514081158, + "grad_norm": 16.16816892834761, + "learning_rate": 4.231765734919117e-06, + "loss": 0.09533939361572266, + "step": 84500 + }, + { + "epoch": 0.7306897476027012, + "grad_norm": 22.674725965293177, + "learning_rate": 4.2315799639533226e-06, + "loss": 0.1954681396484375, + "step": 84505 + }, + { + "epoch": 0.7307329811242445, + "grad_norm": 2.778595868576687, + "learning_rate": 4.231394187307591e-06, + "loss": 0.28274078369140626, + "step": 84510 + }, + { + "epoch": 0.7307762146457878, + "grad_norm": 18.949742190253097, + "learning_rate": 4.2312084049827766e-06, + "loss": 0.3318639755249023, + "step": 84515 + }, + { + "epoch": 0.730819448167331, + "grad_norm": 3.78929725467462, + "learning_rate": 4.231022616979739e-06, + "loss": 0.10584716796875, + "step": 84520 + }, + { + "epoch": 0.7308626816888742, + "grad_norm": 3.6637468645247475, + "learning_rate": 4.2308368232993335e-06, + "loss": 0.16651840209960939, + "step": 84525 + }, + { + "epoch": 0.7309059152104176, + "grad_norm": 8.404637566083617, + "learning_rate": 4.230651023942417e-06, + "loss": 0.06157989501953125, + "step": 84530 + }, + { + "epoch": 0.7309491487319608, + "grad_norm": 13.943801148266175, + "learning_rate": 4.2304652189098474e-06, + "loss": 0.06962242126464843, + "step": 84535 + }, + { + "epoch": 0.730992382253504, + "grad_norm": 0.20637988135660884, + "learning_rate": 4.23027940820248e-06, + "loss": 0.08662986755371094, + "step": 84540 + }, + { + "epoch": 0.7310356157750474, + "grad_norm": 6.637268091780927, + "learning_rate": 4.230093591821173e-06, + "loss": 0.607061767578125, + "step": 84545 + }, + { + "epoch": 0.7310788492965906, + "grad_norm": 1.499517673639293, + "learning_rate": 4.229907769766783e-06, + "loss": 0.5429718017578125, + "step": 84550 + }, + { + "epoch": 0.7311220828181338, + "grad_norm": 1.3753754912635685, + "learning_rate": 4.229721942040168e-06, + "loss": 0.36233062744140626, + "step": 84555 + }, + { + "epoch": 0.7311653163396772, + "grad_norm": 1.167840902300939, + "learning_rate": 4.229536108642183e-06, + "loss": 0.17586517333984375, + "step": 84560 + }, + { + "epoch": 0.7312085498612204, + "grad_norm": 30.677678712672815, + "learning_rate": 4.229350269573687e-06, + "loss": 0.24127578735351562, + "step": 84565 + }, + { + "epoch": 0.7312517833827636, + "grad_norm": 55.96114201814945, + "learning_rate": 4.229164424835536e-06, + "loss": 0.48255462646484376, + "step": 84570 + }, + { + "epoch": 0.7312950169043069, + "grad_norm": 9.01747440860128, + "learning_rate": 4.228978574428586e-06, + "loss": 0.06499481201171875, + "step": 84575 + }, + { + "epoch": 0.7313382504258502, + "grad_norm": 20.234441153120315, + "learning_rate": 4.2287927183536965e-06, + "loss": 0.403839111328125, + "step": 84580 + }, + { + "epoch": 0.7313814839473934, + "grad_norm": 8.336891116184018, + "learning_rate": 4.228606856611724e-06, + "loss": 0.1190765380859375, + "step": 84585 + }, + { + "epoch": 0.7314247174689367, + "grad_norm": 4.891247610803181, + "learning_rate": 4.228420989203525e-06, + "loss": 0.19469451904296875, + "step": 84590 + }, + { + "epoch": 0.73146795099048, + "grad_norm": 2.1873050229088395, + "learning_rate": 4.2282351161299555e-06, + "loss": 0.1021754264831543, + "step": 84595 + }, + { + "epoch": 0.7315111845120232, + "grad_norm": 0.6919177339794907, + "learning_rate": 4.228049237391875e-06, + "loss": 0.15350875854492188, + "step": 84600 + }, + { + "epoch": 0.7315544180335665, + "grad_norm": 0.6093281318984936, + "learning_rate": 4.2278633529901415e-06, + "loss": 0.042791748046875, + "step": 84605 + }, + { + "epoch": 0.7315976515551098, + "grad_norm": 3.4530537779474564, + "learning_rate": 4.227677462925609e-06, + "loss": 0.31472015380859375, + "step": 84610 + }, + { + "epoch": 0.731640885076653, + "grad_norm": 1.008094090039079, + "learning_rate": 4.227491567199137e-06, + "loss": 0.1613300323486328, + "step": 84615 + }, + { + "epoch": 0.7316841185981963, + "grad_norm": 1.0406546792826712, + "learning_rate": 4.227305665811583e-06, + "loss": 0.2556312561035156, + "step": 84620 + }, + { + "epoch": 0.7317273521197396, + "grad_norm": 1.3615454409684955, + "learning_rate": 4.227119758763802e-06, + "loss": 0.094952392578125, + "step": 84625 + }, + { + "epoch": 0.7317705856412828, + "grad_norm": 0.42309464526003276, + "learning_rate": 4.226933846056654e-06, + "loss": 0.30537109375, + "step": 84630 + }, + { + "epoch": 0.7318138191628261, + "grad_norm": 2.863765135846331, + "learning_rate": 4.226747927690996e-06, + "loss": 0.0481170654296875, + "step": 84635 + }, + { + "epoch": 0.7318570526843694, + "grad_norm": 0.8486962909569518, + "learning_rate": 4.2265620036676845e-06, + "loss": 0.08644561767578125, + "step": 84640 + }, + { + "epoch": 0.7319002862059126, + "grad_norm": 17.34267485485547, + "learning_rate": 4.2263760739875775e-06, + "loss": 0.5615234375, + "step": 84645 + }, + { + "epoch": 0.7319435197274559, + "grad_norm": 9.013604210461768, + "learning_rate": 4.2261901386515325e-06, + "loss": 0.19283065795898438, + "step": 84650 + }, + { + "epoch": 0.7319867532489991, + "grad_norm": 29.350771035264593, + "learning_rate": 4.226004197660407e-06, + "loss": 0.17650318145751953, + "step": 84655 + }, + { + "epoch": 0.7320299867705424, + "grad_norm": 20.74954995475334, + "learning_rate": 4.225818251015057e-06, + "loss": 0.34817657470703123, + "step": 84660 + }, + { + "epoch": 0.7320732202920857, + "grad_norm": 3.3847773285883793, + "learning_rate": 4.225632298716344e-06, + "loss": 0.09912147521972656, + "step": 84665 + }, + { + "epoch": 0.7321164538136289, + "grad_norm": 0.43693765643808397, + "learning_rate": 4.2254463407651225e-06, + "loss": 0.2721435546875, + "step": 84670 + }, + { + "epoch": 0.7321596873351722, + "grad_norm": 28.262733568422078, + "learning_rate": 4.22526037716225e-06, + "loss": 0.24478302001953126, + "step": 84675 + }, + { + "epoch": 0.7322029208567155, + "grad_norm": 4.469023392654252, + "learning_rate": 4.225074407908585e-06, + "loss": 0.14249343872070314, + "step": 84680 + }, + { + "epoch": 0.7322461543782587, + "grad_norm": 0.394000093204043, + "learning_rate": 4.2248884330049855e-06, + "loss": 0.0833892822265625, + "step": 84685 + }, + { + "epoch": 0.732289387899802, + "grad_norm": 13.794529787145285, + "learning_rate": 4.224702452452308e-06, + "loss": 0.3736927032470703, + "step": 84690 + }, + { + "epoch": 0.7323326214213453, + "grad_norm": 4.336977672940649, + "learning_rate": 4.224516466251412e-06, + "loss": 0.046544647216796874, + "step": 84695 + }, + { + "epoch": 0.7323758549428885, + "grad_norm": 0.9006709077859371, + "learning_rate": 4.224330474403154e-06, + "loss": 0.5019630432128906, + "step": 84700 + }, + { + "epoch": 0.7324190884644318, + "grad_norm": 19.011139529620436, + "learning_rate": 4.224144476908391e-06, + "loss": 0.11392822265625, + "step": 84705 + }, + { + "epoch": 0.732462321985975, + "grad_norm": 2.6225413100266737, + "learning_rate": 4.223958473767983e-06, + "loss": 0.02679290771484375, + "step": 84710 + }, + { + "epoch": 0.7325055555075183, + "grad_norm": 0.6075720418482047, + "learning_rate": 4.223772464982786e-06, + "loss": 0.09916229248046875, + "step": 84715 + }, + { + "epoch": 0.7325487890290616, + "grad_norm": 0.8887460390608654, + "learning_rate": 4.223586450553659e-06, + "loss": 0.09921875, + "step": 84720 + }, + { + "epoch": 0.7325920225506048, + "grad_norm": 25.014199353176597, + "learning_rate": 4.22340043048146e-06, + "loss": 0.26551361083984376, + "step": 84725 + }, + { + "epoch": 0.7326352560721481, + "grad_norm": 1.1205928440766324, + "learning_rate": 4.223214404767046e-06, + "loss": 0.12209625244140625, + "step": 84730 + }, + { + "epoch": 0.7326784895936914, + "grad_norm": 1.8692212052575372, + "learning_rate": 4.223028373411274e-06, + "loss": 0.042508697509765624, + "step": 84735 + }, + { + "epoch": 0.7327217231152346, + "grad_norm": 52.49028071671379, + "learning_rate": 4.222842336415004e-06, + "loss": 0.32745018005371096, + "step": 84740 + }, + { + "epoch": 0.7327649566367779, + "grad_norm": 1.1802559826955863, + "learning_rate": 4.222656293779093e-06, + "loss": 0.2757083892822266, + "step": 84745 + }, + { + "epoch": 0.7328081901583211, + "grad_norm": 13.470374861382464, + "learning_rate": 4.2224702455044e-06, + "loss": 0.1548736572265625, + "step": 84750 + }, + { + "epoch": 0.7328514236798644, + "grad_norm": 1.069399134521909, + "learning_rate": 4.2222841915917806e-06, + "loss": 0.683837890625, + "step": 84755 + }, + { + "epoch": 0.7328946572014077, + "grad_norm": 3.2207310016881623, + "learning_rate": 4.222098132042096e-06, + "loss": 0.30956497192382815, + "step": 84760 + }, + { + "epoch": 0.7329378907229509, + "grad_norm": 13.096364088488889, + "learning_rate": 4.221912066856203e-06, + "loss": 0.07706146240234375, + "step": 84765 + }, + { + "epoch": 0.7329811242444942, + "grad_norm": 12.860984161874196, + "learning_rate": 4.221725996034958e-06, + "loss": 0.20364532470703126, + "step": 84770 + }, + { + "epoch": 0.7330243577660375, + "grad_norm": 3.5499164018815295, + "learning_rate": 4.221539919579222e-06, + "loss": 0.31245956420898435, + "step": 84775 + }, + { + "epoch": 0.7330675912875807, + "grad_norm": 22.24898667613334, + "learning_rate": 4.221353837489852e-06, + "loss": 0.2131011962890625, + "step": 84780 + }, + { + "epoch": 0.733110824809124, + "grad_norm": 8.320825287349244, + "learning_rate": 4.221167749767705e-06, + "loss": 0.10880889892578124, + "step": 84785 + }, + { + "epoch": 0.7331540583306673, + "grad_norm": 16.70764348382996, + "learning_rate": 4.220981656413641e-06, + "loss": 0.3207859039306641, + "step": 84790 + }, + { + "epoch": 0.7331972918522105, + "grad_norm": 4.598823626924354, + "learning_rate": 4.220795557428517e-06, + "loss": 0.21815261840820313, + "step": 84795 + }, + { + "epoch": 0.7332405253737538, + "grad_norm": 2.0016312663411235, + "learning_rate": 4.220609452813192e-06, + "loss": 0.12303466796875, + "step": 84800 + }, + { + "epoch": 0.7332837588952971, + "grad_norm": 21.396869655964384, + "learning_rate": 4.220423342568524e-06, + "loss": 0.24690475463867187, + "step": 84805 + }, + { + "epoch": 0.7333269924168403, + "grad_norm": 1.5620445072249693, + "learning_rate": 4.220237226695371e-06, + "loss": 0.04062662124633789, + "step": 84810 + }, + { + "epoch": 0.7333702259383836, + "grad_norm": 2.371806108206899, + "learning_rate": 4.220051105194593e-06, + "loss": 0.0687103271484375, + "step": 84815 + }, + { + "epoch": 0.7334134594599269, + "grad_norm": 2.0970927873896157, + "learning_rate": 4.219864978067046e-06, + "loss": 0.07117156982421875, + "step": 84820 + }, + { + "epoch": 0.7334566929814701, + "grad_norm": 8.367159925663648, + "learning_rate": 4.219678845313589e-06, + "loss": 0.1067291259765625, + "step": 84825 + }, + { + "epoch": 0.7334999265030133, + "grad_norm": 0.37043139554214816, + "learning_rate": 4.2194927069350825e-06, + "loss": 0.10085792541503906, + "step": 84830 + }, + { + "epoch": 0.7335431600245567, + "grad_norm": 30.532266375063625, + "learning_rate": 4.219306562932382e-06, + "loss": 0.2832965850830078, + "step": 84835 + }, + { + "epoch": 0.7335863935460999, + "grad_norm": 0.5688634127603264, + "learning_rate": 4.219120413306348e-06, + "loss": 0.18505859375, + "step": 84840 + }, + { + "epoch": 0.7336296270676431, + "grad_norm": 69.5283950706316, + "learning_rate": 4.218934258057839e-06, + "loss": 0.2821063995361328, + "step": 84845 + }, + { + "epoch": 0.7336728605891865, + "grad_norm": 2.4949834505910773, + "learning_rate": 4.218748097187712e-06, + "loss": 0.2761688232421875, + "step": 84850 + }, + { + "epoch": 0.7337160941107297, + "grad_norm": 20.823890606669032, + "learning_rate": 4.2185619306968255e-06, + "loss": 0.1384765625, + "step": 84855 + }, + { + "epoch": 0.7337593276322729, + "grad_norm": 24.14613078496927, + "learning_rate": 4.2183757585860406e-06, + "loss": 0.15248184204101561, + "step": 84860 + }, + { + "epoch": 0.7338025611538163, + "grad_norm": 44.17159079843773, + "learning_rate": 4.218189580856214e-06, + "loss": 0.23032569885253906, + "step": 84865 + }, + { + "epoch": 0.7338457946753595, + "grad_norm": 21.40881405125596, + "learning_rate": 4.218003397508205e-06, + "loss": 0.20450439453125, + "step": 84870 + }, + { + "epoch": 0.7338890281969027, + "grad_norm": 2.2812702218222896, + "learning_rate": 4.217817208542872e-06, + "loss": 0.0794830322265625, + "step": 84875 + }, + { + "epoch": 0.733932261718446, + "grad_norm": 2.115709504101234, + "learning_rate": 4.217631013961072e-06, + "loss": 0.21196212768554687, + "step": 84880 + }, + { + "epoch": 0.7339754952399893, + "grad_norm": 9.617984769134603, + "learning_rate": 4.217444813763667e-06, + "loss": 0.38827972412109374, + "step": 84885 + }, + { + "epoch": 0.7340187287615325, + "grad_norm": 8.838541354956813, + "learning_rate": 4.217258607951514e-06, + "loss": 0.13236465454101562, + "step": 84890 + }, + { + "epoch": 0.7340619622830759, + "grad_norm": 1.02715522797999, + "learning_rate": 4.217072396525471e-06, + "loss": 0.317547607421875, + "step": 84895 + }, + { + "epoch": 0.7341051958046191, + "grad_norm": 4.386274125237071, + "learning_rate": 4.216886179486399e-06, + "loss": 0.0441864013671875, + "step": 84900 + }, + { + "epoch": 0.7341484293261623, + "grad_norm": 6.182795003520763, + "learning_rate": 4.216699956835154e-06, + "loss": 0.0792144775390625, + "step": 84905 + }, + { + "epoch": 0.7341916628477057, + "grad_norm": 0.8971404938641288, + "learning_rate": 4.216513728572598e-06, + "loss": 0.15880126953125, + "step": 84910 + }, + { + "epoch": 0.7342348963692489, + "grad_norm": 14.457478271385417, + "learning_rate": 4.216327494699586e-06, + "loss": 0.1565582275390625, + "step": 84915 + }, + { + "epoch": 0.7342781298907921, + "grad_norm": 10.743076716630299, + "learning_rate": 4.2161412552169815e-06, + "loss": 0.08974151611328125, + "step": 84920 + }, + { + "epoch": 0.7343213634123353, + "grad_norm": 15.774113001888091, + "learning_rate": 4.2159550101256396e-06, + "loss": 0.12317352294921875, + "step": 84925 + }, + { + "epoch": 0.7343645969338787, + "grad_norm": 4.448761194027551, + "learning_rate": 4.21576875942642e-06, + "loss": 0.0381988525390625, + "step": 84930 + }, + { + "epoch": 0.7344078304554219, + "grad_norm": 5.488937238709093, + "learning_rate": 4.2155825031201835e-06, + "loss": 0.6163864135742188, + "step": 84935 + }, + { + "epoch": 0.7344510639769651, + "grad_norm": 1.5821624600877955, + "learning_rate": 4.215396241207787e-06, + "loss": 0.17851791381835938, + "step": 84940 + }, + { + "epoch": 0.7344942974985085, + "grad_norm": 19.096762373170797, + "learning_rate": 4.215209973690092e-06, + "loss": 0.1361175537109375, + "step": 84945 + }, + { + "epoch": 0.7345375310200517, + "grad_norm": 21.099526202034145, + "learning_rate": 4.2150237005679545e-06, + "loss": 0.17171745300292968, + "step": 84950 + }, + { + "epoch": 0.7345807645415949, + "grad_norm": 2.2906291402296195, + "learning_rate": 4.214837421842236e-06, + "loss": 0.07509307861328125, + "step": 84955 + }, + { + "epoch": 0.7346239980631383, + "grad_norm": 6.153921151680609, + "learning_rate": 4.214651137513794e-06, + "loss": 0.09506893157958984, + "step": 84960 + }, + { + "epoch": 0.7346672315846815, + "grad_norm": 4.976263787579256, + "learning_rate": 4.214464847583488e-06, + "loss": 0.19196624755859376, + "step": 84965 + }, + { + "epoch": 0.7347104651062247, + "grad_norm": 0.026486674851587566, + "learning_rate": 4.214278552052178e-06, + "loss": 0.2668478012084961, + "step": 84970 + }, + { + "epoch": 0.7347536986277681, + "grad_norm": 13.507113157251185, + "learning_rate": 4.214092250920722e-06, + "loss": 0.284832763671875, + "step": 84975 + }, + { + "epoch": 0.7347969321493113, + "grad_norm": 2.7710511560664735, + "learning_rate": 4.21390594418998e-06, + "loss": 0.24014892578125, + "step": 84980 + }, + { + "epoch": 0.7348401656708545, + "grad_norm": 1.612082909512949, + "learning_rate": 4.213719631860812e-06, + "loss": 0.25174102783203123, + "step": 84985 + }, + { + "epoch": 0.7348833991923979, + "grad_norm": 0.5023850650000301, + "learning_rate": 4.213533313934075e-06, + "loss": 0.022522735595703124, + "step": 84990 + }, + { + "epoch": 0.7349266327139411, + "grad_norm": 7.963612663492367, + "learning_rate": 4.21334699041063e-06, + "loss": 0.21880216598510743, + "step": 84995 + }, + { + "epoch": 0.7349698662354843, + "grad_norm": 2.976425129500343, + "learning_rate": 4.213160661291336e-06, + "loss": 0.4348804473876953, + "step": 85000 + }, + { + "epoch": 0.7350130997570276, + "grad_norm": 1.7600668394621226, + "learning_rate": 4.2129743265770535e-06, + "loss": 0.22443618774414062, + "step": 85005 + }, + { + "epoch": 0.7350563332785709, + "grad_norm": 1.4376003562856154, + "learning_rate": 4.212787986268639e-06, + "loss": 0.06662750244140625, + "step": 85010 + }, + { + "epoch": 0.7350995668001141, + "grad_norm": 8.73816176229515, + "learning_rate": 4.212601640366954e-06, + "loss": 0.30009765625, + "step": 85015 + }, + { + "epoch": 0.7351428003216574, + "grad_norm": 19.07638477903776, + "learning_rate": 4.2124152888728565e-06, + "loss": 0.062841796875, + "step": 85020 + }, + { + "epoch": 0.7351860338432007, + "grad_norm": 11.12227536341906, + "learning_rate": 4.212228931787208e-06, + "loss": 0.06998748779296875, + "step": 85025 + }, + { + "epoch": 0.7352292673647439, + "grad_norm": 25.151638446534406, + "learning_rate": 4.212042569110866e-06, + "loss": 0.3422737121582031, + "step": 85030 + }, + { + "epoch": 0.7352725008862872, + "grad_norm": 1.846901234614829, + "learning_rate": 4.2118562008446915e-06, + "loss": 0.05770263671875, + "step": 85035 + }, + { + "epoch": 0.7353157344078305, + "grad_norm": 3.1558569726959784, + "learning_rate": 4.211669826989542e-06, + "loss": 0.09407958984375, + "step": 85040 + }, + { + "epoch": 0.7353589679293737, + "grad_norm": 18.26975015898733, + "learning_rate": 4.21148344754628e-06, + "loss": 0.17505836486816406, + "step": 85045 + }, + { + "epoch": 0.735402201450917, + "grad_norm": 24.05794099295921, + "learning_rate": 4.211297062515763e-06, + "loss": 0.18872356414794922, + "step": 85050 + }, + { + "epoch": 0.7354454349724603, + "grad_norm": 0.6752077466699845, + "learning_rate": 4.21111067189885e-06, + "loss": 0.05340576171875, + "step": 85055 + }, + { + "epoch": 0.7354886684940035, + "grad_norm": 0.9210523703051186, + "learning_rate": 4.210924275696402e-06, + "loss": 0.31389503479003905, + "step": 85060 + }, + { + "epoch": 0.7355319020155467, + "grad_norm": 2.080431905896346, + "learning_rate": 4.210737873909279e-06, + "loss": 0.015625762939453124, + "step": 85065 + }, + { + "epoch": 0.7355751355370901, + "grad_norm": 2.575545938717458, + "learning_rate": 4.210551466538339e-06, + "loss": 0.10521240234375, + "step": 85070 + }, + { + "epoch": 0.7356183690586333, + "grad_norm": 19.06410812031101, + "learning_rate": 4.210365053584443e-06, + "loss": 0.12866973876953125, + "step": 85075 + }, + { + "epoch": 0.7356616025801765, + "grad_norm": 5.04730718811967, + "learning_rate": 4.21017863504845e-06, + "loss": 0.06499176025390625, + "step": 85080 + }, + { + "epoch": 0.7357048361017199, + "grad_norm": 21.539206227388217, + "learning_rate": 4.209992210931221e-06, + "loss": 0.09638900756835937, + "step": 85085 + }, + { + "epoch": 0.7357480696232631, + "grad_norm": 35.09605431737974, + "learning_rate": 4.209805781233614e-06, + "loss": 0.3890899658203125, + "step": 85090 + }, + { + "epoch": 0.7357913031448063, + "grad_norm": 1.061607483142542, + "learning_rate": 4.2096193459564904e-06, + "loss": 0.083294677734375, + "step": 85095 + }, + { + "epoch": 0.7358345366663496, + "grad_norm": 6.601675238140127, + "learning_rate": 4.209432905100708e-06, + "loss": 0.13037109375, + "step": 85100 + }, + { + "epoch": 0.7358777701878929, + "grad_norm": 1.8077638545522088, + "learning_rate": 4.209246458667128e-06, + "loss": 0.1125274658203125, + "step": 85105 + }, + { + "epoch": 0.7359210037094361, + "grad_norm": 10.920522508876722, + "learning_rate": 4.2090600066566115e-06, + "loss": 0.1216339111328125, + "step": 85110 + }, + { + "epoch": 0.7359642372309794, + "grad_norm": 5.378251613724262, + "learning_rate": 4.208873549070017e-06, + "loss": 0.26207275390625, + "step": 85115 + }, + { + "epoch": 0.7360074707525227, + "grad_norm": 14.239748638382228, + "learning_rate": 4.208687085908203e-06, + "loss": 0.0927642822265625, + "step": 85120 + }, + { + "epoch": 0.7360507042740659, + "grad_norm": 47.592454342741334, + "learning_rate": 4.208500617172032e-06, + "loss": 0.8566364288330078, + "step": 85125 + }, + { + "epoch": 0.7360939377956092, + "grad_norm": 15.777194299346146, + "learning_rate": 4.208314142862362e-06, + "loss": 0.29021873474121096, + "step": 85130 + }, + { + "epoch": 0.7361371713171525, + "grad_norm": 6.130989719951991, + "learning_rate": 4.2081276629800546e-06, + "loss": 0.1365234375, + "step": 85135 + }, + { + "epoch": 0.7361804048386957, + "grad_norm": 3.804189314541226, + "learning_rate": 4.20794117752597e-06, + "loss": 0.1540740966796875, + "step": 85140 + }, + { + "epoch": 0.736223638360239, + "grad_norm": 30.2080703599434, + "learning_rate": 4.2077546865009665e-06, + "loss": 0.10758399963378906, + "step": 85145 + }, + { + "epoch": 0.7362668718817823, + "grad_norm": 32.315530176363616, + "learning_rate": 4.207568189905906e-06, + "loss": 0.1585418701171875, + "step": 85150 + }, + { + "epoch": 0.7363101054033255, + "grad_norm": 3.5827519921399174, + "learning_rate": 4.207381687741647e-06, + "loss": 0.0541412353515625, + "step": 85155 + }, + { + "epoch": 0.7363533389248688, + "grad_norm": 60.27469003781032, + "learning_rate": 4.20719518000905e-06, + "loss": 0.3616813659667969, + "step": 85160 + }, + { + "epoch": 0.7363965724464121, + "grad_norm": 1.9986553063551031, + "learning_rate": 4.207008666708975e-06, + "loss": 0.19809513092041015, + "step": 85165 + }, + { + "epoch": 0.7364398059679553, + "grad_norm": 2.21677865560416, + "learning_rate": 4.206822147842284e-06, + "loss": 0.49593505859375, + "step": 85170 + }, + { + "epoch": 0.7364830394894986, + "grad_norm": 1.9997398712285193, + "learning_rate": 4.206635623409836e-06, + "loss": 0.04060287475585937, + "step": 85175 + }, + { + "epoch": 0.7365262730110418, + "grad_norm": 27.547745828240366, + "learning_rate": 4.20644909341249e-06, + "loss": 0.2191558837890625, + "step": 85180 + }, + { + "epoch": 0.7365695065325851, + "grad_norm": 28.751980059708572, + "learning_rate": 4.2062625578511075e-06, + "loss": 0.13500823974609374, + "step": 85185 + }, + { + "epoch": 0.7366127400541284, + "grad_norm": 0.3364284176795268, + "learning_rate": 4.206076016726549e-06, + "loss": 0.165826416015625, + "step": 85190 + }, + { + "epoch": 0.7366559735756716, + "grad_norm": 38.074745805974956, + "learning_rate": 4.205889470039676e-06, + "loss": 0.20742015838623046, + "step": 85195 + }, + { + "epoch": 0.7366992070972149, + "grad_norm": 6.373511065050166, + "learning_rate": 4.205702917791345e-06, + "loss": 0.07497596740722656, + "step": 85200 + }, + { + "epoch": 0.7367424406187582, + "grad_norm": 36.587133499252566, + "learning_rate": 4.20551635998242e-06, + "loss": 0.23169784545898436, + "step": 85205 + }, + { + "epoch": 0.7367856741403014, + "grad_norm": 2.184320985964636, + "learning_rate": 4.2053297966137595e-06, + "loss": 0.0410888671875, + "step": 85210 + }, + { + "epoch": 0.7368289076618447, + "grad_norm": 0.5326494185574132, + "learning_rate": 4.2051432276862246e-06, + "loss": 0.1193939208984375, + "step": 85215 + }, + { + "epoch": 0.736872141183388, + "grad_norm": 43.0118097058009, + "learning_rate": 4.204956653200675e-06, + "loss": 0.26697845458984376, + "step": 85220 + }, + { + "epoch": 0.7369153747049312, + "grad_norm": 13.613238391493288, + "learning_rate": 4.2047700731579735e-06, + "loss": 0.12857093811035156, + "step": 85225 + }, + { + "epoch": 0.7369586082264745, + "grad_norm": 41.26334469752486, + "learning_rate": 4.204583487558977e-06, + "loss": 0.3560943603515625, + "step": 85230 + }, + { + "epoch": 0.7370018417480177, + "grad_norm": 8.176512957179972, + "learning_rate": 4.204396896404549e-06, + "loss": 0.332086181640625, + "step": 85235 + }, + { + "epoch": 0.737045075269561, + "grad_norm": 6.152353421684419, + "learning_rate": 4.204210299695549e-06, + "loss": 0.1060394287109375, + "step": 85240 + }, + { + "epoch": 0.7370883087911043, + "grad_norm": 3.1352909076744644, + "learning_rate": 4.204023697432837e-06, + "loss": 0.09970474243164062, + "step": 85245 + }, + { + "epoch": 0.7371315423126475, + "grad_norm": 5.9476361911766515, + "learning_rate": 4.203837089617274e-06, + "loss": 0.060660934448242186, + "step": 85250 + }, + { + "epoch": 0.7371747758341908, + "grad_norm": 12.13998964792482, + "learning_rate": 4.203650476249721e-06, + "loss": 0.19753570556640626, + "step": 85255 + }, + { + "epoch": 0.7372180093557341, + "grad_norm": 4.5187617366187185, + "learning_rate": 4.203463857331038e-06, + "loss": 0.09166526794433594, + "step": 85260 + }, + { + "epoch": 0.7372612428772773, + "grad_norm": 1.8531467277841753, + "learning_rate": 4.203277232862087e-06, + "loss": 0.10064678192138672, + "step": 85265 + }, + { + "epoch": 0.7373044763988206, + "grad_norm": 1.2721850084019137, + "learning_rate": 4.203090602843727e-06, + "loss": 0.0937896728515625, + "step": 85270 + }, + { + "epoch": 0.7373477099203638, + "grad_norm": 1.4866666923083227, + "learning_rate": 4.202903967276819e-06, + "loss": 0.17871856689453125, + "step": 85275 + }, + { + "epoch": 0.7373909434419071, + "grad_norm": 1.3251184199184907, + "learning_rate": 4.202717326162225e-06, + "loss": 0.11210479736328124, + "step": 85280 + }, + { + "epoch": 0.7374341769634504, + "grad_norm": 1.6782119223922762, + "learning_rate": 4.202530679500805e-06, + "loss": 0.17375411987304687, + "step": 85285 + }, + { + "epoch": 0.7374774104849936, + "grad_norm": 4.8431913854770965, + "learning_rate": 4.202344027293419e-06, + "loss": 0.22406234741210937, + "step": 85290 + }, + { + "epoch": 0.7375206440065369, + "grad_norm": 4.988963168793822, + "learning_rate": 4.20215736954093e-06, + "loss": 0.12262191772460937, + "step": 85295 + }, + { + "epoch": 0.7375638775280802, + "grad_norm": 5.5033759170064265, + "learning_rate": 4.201970706244196e-06, + "loss": 0.1309112548828125, + "step": 85300 + }, + { + "epoch": 0.7376071110496234, + "grad_norm": 1.8243987857396053, + "learning_rate": 4.201784037404079e-06, + "loss": 0.04072113037109375, + "step": 85305 + }, + { + "epoch": 0.7376503445711667, + "grad_norm": 13.063587783925207, + "learning_rate": 4.201597363021441e-06, + "loss": 0.1414886474609375, + "step": 85310 + }, + { + "epoch": 0.73769357809271, + "grad_norm": 27.951570867767256, + "learning_rate": 4.201410683097143e-06, + "loss": 0.050351715087890624, + "step": 85315 + }, + { + "epoch": 0.7377368116142532, + "grad_norm": 4.080298340144235, + "learning_rate": 4.2012239976320435e-06, + "loss": 0.2671318054199219, + "step": 85320 + }, + { + "epoch": 0.7377800451357965, + "grad_norm": 36.06133394341363, + "learning_rate": 4.201037306627006e-06, + "loss": 0.4202779769897461, + "step": 85325 + }, + { + "epoch": 0.7378232786573398, + "grad_norm": 35.47652499836889, + "learning_rate": 4.20085061008289e-06, + "loss": 0.1895111083984375, + "step": 85330 + }, + { + "epoch": 0.737866512178883, + "grad_norm": 29.678846056060422, + "learning_rate": 4.200663908000558e-06, + "loss": 0.1735870361328125, + "step": 85335 + }, + { + "epoch": 0.7379097457004263, + "grad_norm": 1.9923022707773852, + "learning_rate": 4.200477200380869e-06, + "loss": 0.18898468017578124, + "step": 85340 + }, + { + "epoch": 0.7379529792219696, + "grad_norm": 5.988897837986307, + "learning_rate": 4.200290487224687e-06, + "loss": 0.0634979248046875, + "step": 85345 + }, + { + "epoch": 0.7379962127435128, + "grad_norm": 29.30125576593559, + "learning_rate": 4.200103768532869e-06, + "loss": 0.14344635009765624, + "step": 85350 + }, + { + "epoch": 0.738039446265056, + "grad_norm": 14.030074192108257, + "learning_rate": 4.19991704430628e-06, + "loss": 0.11669921875, + "step": 85355 + }, + { + "epoch": 0.7380826797865994, + "grad_norm": 16.58042564755201, + "learning_rate": 4.199730314545779e-06, + "loss": 0.319952392578125, + "step": 85360 + }, + { + "epoch": 0.7381259133081426, + "grad_norm": 18.160908677386328, + "learning_rate": 4.199543579252228e-06, + "loss": 0.47599258422851565, + "step": 85365 + }, + { + "epoch": 0.7381691468296858, + "grad_norm": 0.43432571865394565, + "learning_rate": 4.199356838426489e-06, + "loss": 0.08044662475585937, + "step": 85370 + }, + { + "epoch": 0.7382123803512292, + "grad_norm": 5.100068677062647, + "learning_rate": 4.19917009206942e-06, + "loss": 0.1284912109375, + "step": 85375 + }, + { + "epoch": 0.7382556138727724, + "grad_norm": 13.147254252547768, + "learning_rate": 4.198983340181887e-06, + "loss": 0.143804931640625, + "step": 85380 + }, + { + "epoch": 0.7382988473943156, + "grad_norm": 1.591468004951329, + "learning_rate": 4.198796582764746e-06, + "loss": 0.2910442352294922, + "step": 85385 + }, + { + "epoch": 0.738342080915859, + "grad_norm": 6.617833416870797, + "learning_rate": 4.198609819818863e-06, + "loss": 0.10694122314453125, + "step": 85390 + }, + { + "epoch": 0.7383853144374022, + "grad_norm": 0.51356806647009, + "learning_rate": 4.1984230513450975e-06, + "loss": 0.2920867919921875, + "step": 85395 + }, + { + "epoch": 0.7384285479589454, + "grad_norm": 2.4899163508538074, + "learning_rate": 4.1982362773443105e-06, + "loss": 0.1957935333251953, + "step": 85400 + }, + { + "epoch": 0.7384717814804888, + "grad_norm": 12.673875202029592, + "learning_rate": 4.198049497817364e-06, + "loss": 0.06436614990234375, + "step": 85405 + }, + { + "epoch": 0.738515015002032, + "grad_norm": 0.3396100187018671, + "learning_rate": 4.1978627127651184e-06, + "loss": 0.16867599487304688, + "step": 85410 + }, + { + "epoch": 0.7385582485235752, + "grad_norm": 5.434987013411536, + "learning_rate": 4.197675922188435e-06, + "loss": 0.0981658935546875, + "step": 85415 + }, + { + "epoch": 0.7386014820451186, + "grad_norm": 1.1588553358913791, + "learning_rate": 4.197489126088177e-06, + "loss": 0.158660888671875, + "step": 85420 + }, + { + "epoch": 0.7386447155666618, + "grad_norm": 46.72197078237622, + "learning_rate": 4.197302324465206e-06, + "loss": 0.3024555206298828, + "step": 85425 + }, + { + "epoch": 0.738687949088205, + "grad_norm": 12.339248391365757, + "learning_rate": 4.197115517320381e-06, + "loss": 0.124017333984375, + "step": 85430 + }, + { + "epoch": 0.7387311826097483, + "grad_norm": 1.3408121910669448, + "learning_rate": 4.196928704654565e-06, + "loss": 0.09393310546875, + "step": 85435 + }, + { + "epoch": 0.7387744161312916, + "grad_norm": 7.483300281847848, + "learning_rate": 4.1967418864686215e-06, + "loss": 0.09131317138671875, + "step": 85440 + }, + { + "epoch": 0.7388176496528348, + "grad_norm": 0.865633132807852, + "learning_rate": 4.196555062763408e-06, + "loss": 0.039779281616210936, + "step": 85445 + }, + { + "epoch": 0.738860883174378, + "grad_norm": 0.6542203659692691, + "learning_rate": 4.196368233539789e-06, + "loss": 0.19764537811279298, + "step": 85450 + }, + { + "epoch": 0.7389041166959214, + "grad_norm": 34.02649030783514, + "learning_rate": 4.196181398798626e-06, + "loss": 0.3023529052734375, + "step": 85455 + }, + { + "epoch": 0.7389473502174646, + "grad_norm": 41.06396448841012, + "learning_rate": 4.19599455854078e-06, + "loss": 0.362353515625, + "step": 85460 + }, + { + "epoch": 0.7389905837390078, + "grad_norm": 24.134417392053063, + "learning_rate": 4.195807712767112e-06, + "loss": 0.21756439208984374, + "step": 85465 + }, + { + "epoch": 0.7390338172605512, + "grad_norm": 0.368227128147041, + "learning_rate": 4.195620861478484e-06, + "loss": 0.054498291015625, + "step": 85470 + }, + { + "epoch": 0.7390770507820944, + "grad_norm": 8.332571841029894, + "learning_rate": 4.195434004675759e-06, + "loss": 0.0932281494140625, + "step": 85475 + }, + { + "epoch": 0.7391202843036376, + "grad_norm": 42.78414624599149, + "learning_rate": 4.195247142359799e-06, + "loss": 0.20977783203125, + "step": 85480 + }, + { + "epoch": 0.739163517825181, + "grad_norm": 1.5384305451283657, + "learning_rate": 4.195060274531463e-06, + "loss": 0.05513458251953125, + "step": 85485 + }, + { + "epoch": 0.7392067513467242, + "grad_norm": 5.621860848795191, + "learning_rate": 4.194873401191616e-06, + "loss": 0.28180732727050783, + "step": 85490 + }, + { + "epoch": 0.7392499848682674, + "grad_norm": 55.036124649928524, + "learning_rate": 4.194686522341118e-06, + "loss": 0.3642402648925781, + "step": 85495 + }, + { + "epoch": 0.7392932183898108, + "grad_norm": 1.9498692496028696, + "learning_rate": 4.194499637980831e-06, + "loss": 0.0656005859375, + "step": 85500 + }, + { + "epoch": 0.739336451911354, + "grad_norm": 5.659612849990857, + "learning_rate": 4.194312748111617e-06, + "loss": 0.051943206787109376, + "step": 85505 + }, + { + "epoch": 0.7393796854328972, + "grad_norm": 28.898301660557607, + "learning_rate": 4.194125852734339e-06, + "loss": 0.3081512451171875, + "step": 85510 + }, + { + "epoch": 0.7394229189544406, + "grad_norm": 0.4029405635849502, + "learning_rate": 4.193938951849857e-06, + "loss": 0.037578582763671875, + "step": 85515 + }, + { + "epoch": 0.7394661524759838, + "grad_norm": 39.1213718391735, + "learning_rate": 4.193752045459036e-06, + "loss": 0.12483978271484375, + "step": 85520 + }, + { + "epoch": 0.739509385997527, + "grad_norm": 0.16514394938756774, + "learning_rate": 4.1935651335627335e-06, + "loss": 0.3622161865234375, + "step": 85525 + }, + { + "epoch": 0.7395526195190703, + "grad_norm": 20.60142840715932, + "learning_rate": 4.193378216161816e-06, + "loss": 0.20938568115234374, + "step": 85530 + }, + { + "epoch": 0.7395958530406136, + "grad_norm": 4.017583357730669, + "learning_rate": 4.193191293257143e-06, + "loss": 0.2776020050048828, + "step": 85535 + }, + { + "epoch": 0.7396390865621568, + "grad_norm": 0.2070227305925609, + "learning_rate": 4.193004364849577e-06, + "loss": 0.09813385009765625, + "step": 85540 + }, + { + "epoch": 0.7396823200837, + "grad_norm": 1.8131259144356422, + "learning_rate": 4.192817430939981e-06, + "loss": 0.05610809326171875, + "step": 85545 + }, + { + "epoch": 0.7397255536052434, + "grad_norm": 21.49069669915236, + "learning_rate": 4.192630491529215e-06, + "loss": 0.2191650390625, + "step": 85550 + }, + { + "epoch": 0.7397687871267866, + "grad_norm": 9.548519868116296, + "learning_rate": 4.1924435466181435e-06, + "loss": 0.16561031341552734, + "step": 85555 + }, + { + "epoch": 0.7398120206483298, + "grad_norm": 20.182698993340207, + "learning_rate": 4.1922565962076274e-06, + "loss": 0.17648487091064452, + "step": 85560 + }, + { + "epoch": 0.7398552541698732, + "grad_norm": 1.3663738179613303, + "learning_rate": 4.19206964029853e-06, + "loss": 0.05367908477783203, + "step": 85565 + }, + { + "epoch": 0.7398984876914164, + "grad_norm": 15.559416614475525, + "learning_rate": 4.191882678891713e-06, + "loss": 0.11465950012207031, + "step": 85570 + }, + { + "epoch": 0.7399417212129596, + "grad_norm": 10.716756753610097, + "learning_rate": 4.191695711988037e-06, + "loss": 0.34005126953125, + "step": 85575 + }, + { + "epoch": 0.739984954734503, + "grad_norm": 8.397372794824268, + "learning_rate": 4.191508739588366e-06, + "loss": 0.05738372802734375, + "step": 85580 + }, + { + "epoch": 0.7400281882560462, + "grad_norm": 4.017729701632622, + "learning_rate": 4.191321761693562e-06, + "loss": 0.03910140991210938, + "step": 85585 + }, + { + "epoch": 0.7400714217775894, + "grad_norm": 0.2050683252154956, + "learning_rate": 4.191134778304488e-06, + "loss": 0.20430908203125, + "step": 85590 + }, + { + "epoch": 0.7401146552991328, + "grad_norm": 41.10336001858673, + "learning_rate": 4.190947789422004e-06, + "loss": 0.20036544799804687, + "step": 85595 + }, + { + "epoch": 0.740157888820676, + "grad_norm": 5.861952755780574, + "learning_rate": 4.190760795046976e-06, + "loss": 0.05694427490234375, + "step": 85600 + }, + { + "epoch": 0.7402011223422192, + "grad_norm": 7.547034519162328, + "learning_rate": 4.190573795180264e-06, + "loss": 0.0371429443359375, + "step": 85605 + }, + { + "epoch": 0.7402443558637626, + "grad_norm": 11.933574225035915, + "learning_rate": 4.19038678982273e-06, + "loss": 0.0775146484375, + "step": 85610 + }, + { + "epoch": 0.7402875893853058, + "grad_norm": 17.44903552658708, + "learning_rate": 4.190199778975238e-06, + "loss": 0.12620162963867188, + "step": 85615 + }, + { + "epoch": 0.740330822906849, + "grad_norm": 0.5339316178197823, + "learning_rate": 4.190012762638649e-06, + "loss": 0.09925689697265624, + "step": 85620 + }, + { + "epoch": 0.7403740564283923, + "grad_norm": 15.860168409007414, + "learning_rate": 4.189825740813828e-06, + "loss": 0.14983291625976564, + "step": 85625 + }, + { + "epoch": 0.7404172899499356, + "grad_norm": 12.860010650752654, + "learning_rate": 4.189638713501635e-06, + "loss": 0.16473617553710937, + "step": 85630 + }, + { + "epoch": 0.7404605234714788, + "grad_norm": 10.981910566159945, + "learning_rate": 4.189451680702933e-06, + "loss": 0.08895339965820312, + "step": 85635 + }, + { + "epoch": 0.7405037569930221, + "grad_norm": 12.928259215539075, + "learning_rate": 4.189264642418584e-06, + "loss": 0.46320037841796874, + "step": 85640 + }, + { + "epoch": 0.7405469905145654, + "grad_norm": 89.95810804575132, + "learning_rate": 4.189077598649453e-06, + "loss": 0.5575668334960937, + "step": 85645 + }, + { + "epoch": 0.7405902240361086, + "grad_norm": 0.10326595618701473, + "learning_rate": 4.1888905493964006e-06, + "loss": 0.02917327880859375, + "step": 85650 + }, + { + "epoch": 0.7406334575576519, + "grad_norm": 1.7131311713965587, + "learning_rate": 4.1887034946602905e-06, + "loss": 0.03876190185546875, + "step": 85655 + }, + { + "epoch": 0.7406766910791952, + "grad_norm": 4.815601788407156, + "learning_rate": 4.188516434441984e-06, + "loss": 0.1040283203125, + "step": 85660 + }, + { + "epoch": 0.7407199246007384, + "grad_norm": 15.437626067477947, + "learning_rate": 4.1883293687423465e-06, + "loss": 0.24755172729492186, + "step": 85665 + }, + { + "epoch": 0.7407631581222817, + "grad_norm": 0.0693049665088619, + "learning_rate": 4.188142297562237e-06, + "loss": 0.16251754760742188, + "step": 85670 + }, + { + "epoch": 0.740806391643825, + "grad_norm": 4.794944861242935, + "learning_rate": 4.187955220902521e-06, + "loss": 0.09524345397949219, + "step": 85675 + }, + { + "epoch": 0.7408496251653682, + "grad_norm": 0.7594807976141325, + "learning_rate": 4.187768138764061e-06, + "loss": 0.10894622802734374, + "step": 85680 + }, + { + "epoch": 0.7408928586869115, + "grad_norm": 0.09358451101411155, + "learning_rate": 4.1875810511477195e-06, + "loss": 0.2637825012207031, + "step": 85685 + }, + { + "epoch": 0.7409360922084548, + "grad_norm": 0.6045742536525764, + "learning_rate": 4.187393958054358e-06, + "loss": 0.06366405487060547, + "step": 85690 + }, + { + "epoch": 0.740979325729998, + "grad_norm": 4.325796708346382, + "learning_rate": 4.187206859484841e-06, + "loss": 0.1151153564453125, + "step": 85695 + }, + { + "epoch": 0.7410225592515413, + "grad_norm": 29.78120413251435, + "learning_rate": 4.187019755440031e-06, + "loss": 0.17525253295898438, + "step": 85700 + }, + { + "epoch": 0.7410657927730845, + "grad_norm": 33.412101465220374, + "learning_rate": 4.18683264592079e-06, + "loss": 0.07723579406738282, + "step": 85705 + }, + { + "epoch": 0.7411090262946278, + "grad_norm": 10.159054862333111, + "learning_rate": 4.186645530927983e-06, + "loss": 0.068939208984375, + "step": 85710 + }, + { + "epoch": 0.741152259816171, + "grad_norm": 4.874490248376691, + "learning_rate": 4.186458410462471e-06, + "loss": 0.0462005615234375, + "step": 85715 + }, + { + "epoch": 0.7411954933377143, + "grad_norm": 21.368497323192983, + "learning_rate": 4.186271284525118e-06, + "loss": 0.14168548583984375, + "step": 85720 + }, + { + "epoch": 0.7412387268592576, + "grad_norm": 0.44165821563258306, + "learning_rate": 4.186084153116785e-06, + "loss": 0.16781158447265626, + "step": 85725 + }, + { + "epoch": 0.7412819603808009, + "grad_norm": 36.24161339125894, + "learning_rate": 4.1858970162383375e-06, + "loss": 0.288134765625, + "step": 85730 + }, + { + "epoch": 0.7413251939023441, + "grad_norm": 1.8543634668572304, + "learning_rate": 4.185709873890639e-06, + "loss": 0.3261604309082031, + "step": 85735 + }, + { + "epoch": 0.7413684274238874, + "grad_norm": 0.7937266767639172, + "learning_rate": 4.1855227260745495e-06, + "loss": 0.08538665771484374, + "step": 85740 + }, + { + "epoch": 0.7414116609454307, + "grad_norm": 4.878221699991963, + "learning_rate": 4.185335572790936e-06, + "loss": 0.0501617431640625, + "step": 85745 + }, + { + "epoch": 0.7414548944669739, + "grad_norm": 11.356180437646792, + "learning_rate": 4.185148414040658e-06, + "loss": 0.328607177734375, + "step": 85750 + }, + { + "epoch": 0.7414981279885172, + "grad_norm": 4.046269074550881, + "learning_rate": 4.184961249824581e-06, + "loss": 0.13604068756103516, + "step": 85755 + }, + { + "epoch": 0.7415413615100604, + "grad_norm": 7.644098819602781, + "learning_rate": 4.184774080143567e-06, + "loss": 0.02690887451171875, + "step": 85760 + }, + { + "epoch": 0.7415845950316037, + "grad_norm": 3.3353779772698937, + "learning_rate": 4.1845869049984795e-06, + "loss": 0.16831207275390625, + "step": 85765 + }, + { + "epoch": 0.741627828553147, + "grad_norm": 12.035792941296185, + "learning_rate": 4.184399724390183e-06, + "loss": 0.09154052734375, + "step": 85770 + }, + { + "epoch": 0.7416710620746902, + "grad_norm": 3.0555938262551705, + "learning_rate": 4.184212538319539e-06, + "loss": 0.02053070068359375, + "step": 85775 + }, + { + "epoch": 0.7417142955962335, + "grad_norm": 5.784643515520763, + "learning_rate": 4.184025346787411e-06, + "loss": 0.1182403564453125, + "step": 85780 + }, + { + "epoch": 0.7417575291177768, + "grad_norm": 6.627550888590399, + "learning_rate": 4.1838381497946625e-06, + "loss": 0.1132568359375, + "step": 85785 + }, + { + "epoch": 0.74180076263932, + "grad_norm": 7.13653827970006, + "learning_rate": 4.183650947342158e-06, + "loss": 0.19003067016601563, + "step": 85790 + }, + { + "epoch": 0.7418439961608633, + "grad_norm": 2.0747505188397635, + "learning_rate": 4.183463739430759e-06, + "loss": 0.028763580322265624, + "step": 85795 + }, + { + "epoch": 0.7418872296824065, + "grad_norm": 26.98890110664896, + "learning_rate": 4.183276526061331e-06, + "loss": 0.378033447265625, + "step": 85800 + }, + { + "epoch": 0.7419304632039498, + "grad_norm": 58.971415846110574, + "learning_rate": 4.183089307234735e-06, + "loss": 0.224224853515625, + "step": 85805 + }, + { + "epoch": 0.7419736967254931, + "grad_norm": 17.532530180778668, + "learning_rate": 4.182902082951836e-06, + "loss": 0.2361572265625, + "step": 85810 + }, + { + "epoch": 0.7420169302470363, + "grad_norm": 19.28112302089749, + "learning_rate": 4.182714853213497e-06, + "loss": 0.1582733154296875, + "step": 85815 + }, + { + "epoch": 0.7420601637685796, + "grad_norm": 36.944159314319016, + "learning_rate": 4.182527618020581e-06, + "loss": 0.23109893798828124, + "step": 85820 + }, + { + "epoch": 0.7421033972901229, + "grad_norm": 15.178969759930817, + "learning_rate": 4.182340377373954e-06, + "loss": 0.3395263671875, + "step": 85825 + }, + { + "epoch": 0.7421466308116661, + "grad_norm": 0.47078197531170674, + "learning_rate": 4.1821531312744775e-06, + "loss": 0.08306045532226562, + "step": 85830 + }, + { + "epoch": 0.7421898643332094, + "grad_norm": 5.128989017083138, + "learning_rate": 4.181965879723013e-06, + "loss": 0.21251449584960938, + "step": 85835 + }, + { + "epoch": 0.7422330978547527, + "grad_norm": 1.080120370616402, + "learning_rate": 4.181778622720427e-06, + "loss": 0.04507522583007813, + "step": 85840 + }, + { + "epoch": 0.7422763313762959, + "grad_norm": 3.198978065755641, + "learning_rate": 4.181591360267584e-06, + "loss": 0.0824493408203125, + "step": 85845 + }, + { + "epoch": 0.7423195648978392, + "grad_norm": 33.63524761172036, + "learning_rate": 4.181404092365344e-06, + "loss": 0.254925537109375, + "step": 85850 + }, + { + "epoch": 0.7423627984193825, + "grad_norm": 21.376872418242492, + "learning_rate": 4.181216819014575e-06, + "loss": 0.1338134765625, + "step": 85855 + }, + { + "epoch": 0.7424060319409257, + "grad_norm": 3.453367164909741, + "learning_rate": 4.181029540216138e-06, + "loss": 0.056708908081054686, + "step": 85860 + }, + { + "epoch": 0.742449265462469, + "grad_norm": 57.05257740451872, + "learning_rate": 4.180842255970896e-06, + "loss": 0.203948974609375, + "step": 85865 + }, + { + "epoch": 0.7424924989840123, + "grad_norm": 8.622180499318164, + "learning_rate": 4.180654966279713e-06, + "loss": 0.2388702392578125, + "step": 85870 + }, + { + "epoch": 0.7425357325055555, + "grad_norm": 3.7713457804166333, + "learning_rate": 4.180467671143455e-06, + "loss": 0.07740306854248047, + "step": 85875 + }, + { + "epoch": 0.7425789660270987, + "grad_norm": 36.115374645194706, + "learning_rate": 4.180280370562985e-06, + "loss": 0.21477737426757812, + "step": 85880 + }, + { + "epoch": 0.7426221995486421, + "grad_norm": 5.955892247988357, + "learning_rate": 4.180093064539165e-06, + "loss": 0.09137725830078125, + "step": 85885 + }, + { + "epoch": 0.7426654330701853, + "grad_norm": 0.05414892071516142, + "learning_rate": 4.1799057530728605e-06, + "loss": 0.07565135955810547, + "step": 85890 + }, + { + "epoch": 0.7427086665917285, + "grad_norm": 14.465568280385861, + "learning_rate": 4.179718436164935e-06, + "loss": 0.11909637451171876, + "step": 85895 + }, + { + "epoch": 0.7427519001132719, + "grad_norm": 25.812687251253735, + "learning_rate": 4.179531113816252e-06, + "loss": 0.1685516357421875, + "step": 85900 + }, + { + "epoch": 0.7427951336348151, + "grad_norm": 36.51432523627017, + "learning_rate": 4.179343786027676e-06, + "loss": 0.6815673828125, + "step": 85905 + }, + { + "epoch": 0.7428383671563583, + "grad_norm": 8.023972417838339, + "learning_rate": 4.179156452800071e-06, + "loss": 0.3111530303955078, + "step": 85910 + }, + { + "epoch": 0.7428816006779017, + "grad_norm": 50.475552436776816, + "learning_rate": 4.1789691141343e-06, + "loss": 0.8684814453125, + "step": 85915 + }, + { + "epoch": 0.7429248341994449, + "grad_norm": 5.889551491058872, + "learning_rate": 4.178781770031229e-06, + "loss": 0.06172599792480469, + "step": 85920 + }, + { + "epoch": 0.7429680677209881, + "grad_norm": 1.2684850769278235, + "learning_rate": 4.1785944204917185e-06, + "loss": 0.05396881103515625, + "step": 85925 + }, + { + "epoch": 0.7430113012425315, + "grad_norm": 5.580356984076044, + "learning_rate": 4.178407065516635e-06, + "loss": 0.14629440307617186, + "step": 85930 + }, + { + "epoch": 0.7430545347640747, + "grad_norm": 0.2748937035577271, + "learning_rate": 4.178219705106843e-06, + "loss": 0.0159271240234375, + "step": 85935 + }, + { + "epoch": 0.7430977682856179, + "grad_norm": 25.269940632610805, + "learning_rate": 4.178032339263206e-06, + "loss": 0.20466079711914062, + "step": 85940 + }, + { + "epoch": 0.7431410018071612, + "grad_norm": 2.902958157849841, + "learning_rate": 4.177844967986588e-06, + "loss": 0.08743743896484375, + "step": 85945 + }, + { + "epoch": 0.7431842353287045, + "grad_norm": 25.547245960940344, + "learning_rate": 4.177657591277852e-06, + "loss": 0.125750732421875, + "step": 85950 + }, + { + "epoch": 0.7432274688502477, + "grad_norm": 7.354500907145441, + "learning_rate": 4.177470209137865e-06, + "loss": 0.17019805908203126, + "step": 85955 + }, + { + "epoch": 0.7432707023717909, + "grad_norm": 4.358502863935871, + "learning_rate": 4.177282821567488e-06, + "loss": 0.35324630737304685, + "step": 85960 + }, + { + "epoch": 0.7433139358933343, + "grad_norm": 9.283216041960001, + "learning_rate": 4.177095428567587e-06, + "loss": 0.06037445068359375, + "step": 85965 + }, + { + "epoch": 0.7433571694148775, + "grad_norm": 1.564200561988183, + "learning_rate": 4.176908030139026e-06, + "loss": 0.040810012817382814, + "step": 85970 + }, + { + "epoch": 0.7434004029364207, + "grad_norm": 28.342119217704425, + "learning_rate": 4.1767206262826695e-06, + "loss": 0.0795989990234375, + "step": 85975 + }, + { + "epoch": 0.7434436364579641, + "grad_norm": 0.36221106547998194, + "learning_rate": 4.176533216999381e-06, + "loss": 0.043927764892578124, + "step": 85980 + }, + { + "epoch": 0.7434868699795073, + "grad_norm": 40.092217378350284, + "learning_rate": 4.176345802290025e-06, + "loss": 0.1092529296875, + "step": 85985 + }, + { + "epoch": 0.7435301035010505, + "grad_norm": 0.23858346235081546, + "learning_rate": 4.176158382155467e-06, + "loss": 0.15194244384765626, + "step": 85990 + }, + { + "epoch": 0.7435733370225939, + "grad_norm": 32.5085145691647, + "learning_rate": 4.17597095659657e-06, + "loss": 0.39964599609375, + "step": 85995 + }, + { + "epoch": 0.7436165705441371, + "grad_norm": 0.36021267737410256, + "learning_rate": 4.175783525614199e-06, + "loss": 0.012014007568359375, + "step": 86000 + }, + { + "epoch": 0.7436598040656803, + "grad_norm": 11.874848741120973, + "learning_rate": 4.175596089209218e-06, + "loss": 0.14393310546875, + "step": 86005 + }, + { + "epoch": 0.7437030375872237, + "grad_norm": 5.241644526562773, + "learning_rate": 4.175408647382492e-06, + "loss": 0.05434684753417969, + "step": 86010 + }, + { + "epoch": 0.7437462711087669, + "grad_norm": 16.46101287593678, + "learning_rate": 4.175221200134885e-06, + "loss": 0.080364990234375, + "step": 86015 + }, + { + "epoch": 0.7437895046303101, + "grad_norm": 7.7598653734594585, + "learning_rate": 4.175033747467262e-06, + "loss": 0.15843505859375, + "step": 86020 + }, + { + "epoch": 0.7438327381518535, + "grad_norm": 7.553561504672671, + "learning_rate": 4.174846289380486e-06, + "loss": 0.353057861328125, + "step": 86025 + }, + { + "epoch": 0.7438759716733967, + "grad_norm": 2.6078116929964392, + "learning_rate": 4.174658825875424e-06, + "loss": 0.07360916137695313, + "step": 86030 + }, + { + "epoch": 0.7439192051949399, + "grad_norm": 1.1071559795029478, + "learning_rate": 4.174471356952939e-06, + "loss": 0.43463287353515623, + "step": 86035 + }, + { + "epoch": 0.7439624387164833, + "grad_norm": 6.008838000361411, + "learning_rate": 4.1742838826138945e-06, + "loss": 0.09102020263671876, + "step": 86040 + }, + { + "epoch": 0.7440056722380265, + "grad_norm": 1.5007907229151725, + "learning_rate": 4.174096402859158e-06, + "loss": 0.06967926025390625, + "step": 86045 + }, + { + "epoch": 0.7440489057595697, + "grad_norm": 6.410530504673818, + "learning_rate": 4.173908917689592e-06, + "loss": 0.022837448120117187, + "step": 86050 + }, + { + "epoch": 0.744092139281113, + "grad_norm": 0.9290270983744735, + "learning_rate": 4.173721427106063e-06, + "loss": 0.14060630798339843, + "step": 86055 + }, + { + "epoch": 0.7441353728026563, + "grad_norm": 0.6851145614793728, + "learning_rate": 4.1735339311094336e-06, + "loss": 0.026947975158691406, + "step": 86060 + }, + { + "epoch": 0.7441786063241995, + "grad_norm": 20.10040428646671, + "learning_rate": 4.17334642970057e-06, + "loss": 0.05526161193847656, + "step": 86065 + }, + { + "epoch": 0.7442218398457427, + "grad_norm": 1.4584815756588632, + "learning_rate": 4.173158922880336e-06, + "loss": 0.06766204833984375, + "step": 86070 + }, + { + "epoch": 0.7442650733672861, + "grad_norm": 0.6084009426026997, + "learning_rate": 4.1729714106495954e-06, + "loss": 0.13315887451171876, + "step": 86075 + }, + { + "epoch": 0.7443083068888293, + "grad_norm": 22.939803454389565, + "learning_rate": 4.1727838930092166e-06, + "loss": 0.4608802795410156, + "step": 86080 + }, + { + "epoch": 0.7443515404103725, + "grad_norm": 42.209514911023085, + "learning_rate": 4.1725963699600605e-06, + "loss": 0.3020050048828125, + "step": 86085 + }, + { + "epoch": 0.7443947739319159, + "grad_norm": 0.20762522489108232, + "learning_rate": 4.172408841502994e-06, + "loss": 0.068878173828125, + "step": 86090 + }, + { + "epoch": 0.7444380074534591, + "grad_norm": 14.7061838332285, + "learning_rate": 4.1722213076388805e-06, + "loss": 0.3988555908203125, + "step": 86095 + }, + { + "epoch": 0.7444812409750023, + "grad_norm": 0.5377955087244839, + "learning_rate": 4.172033768368588e-06, + "loss": 0.014814138412475586, + "step": 86100 + }, + { + "epoch": 0.7445244744965457, + "grad_norm": 22.89517057272061, + "learning_rate": 4.171846223692977e-06, + "loss": 0.1217010498046875, + "step": 86105 + }, + { + "epoch": 0.7445677080180889, + "grad_norm": 9.67788675043155, + "learning_rate": 4.171658673612916e-06, + "loss": 0.23391494750976563, + "step": 86110 + }, + { + "epoch": 0.7446109415396321, + "grad_norm": 1.874159894308713, + "learning_rate": 4.17147111812927e-06, + "loss": 0.16860733032226563, + "step": 86115 + }, + { + "epoch": 0.7446541750611755, + "grad_norm": 2.4638846314881744, + "learning_rate": 4.171283557242901e-06, + "loss": 0.05516891479492188, + "step": 86120 + }, + { + "epoch": 0.7446974085827187, + "grad_norm": 0.3536853346611453, + "learning_rate": 4.171095990954676e-06, + "loss": 0.12211761474609376, + "step": 86125 + }, + { + "epoch": 0.7447406421042619, + "grad_norm": 23.72037796958185, + "learning_rate": 4.1709084192654604e-06, + "loss": 0.2730892181396484, + "step": 86130 + }, + { + "epoch": 0.7447838756258052, + "grad_norm": 12.4409307527273, + "learning_rate": 4.170720842176118e-06, + "loss": 0.10974082946777344, + "step": 86135 + }, + { + "epoch": 0.7448271091473485, + "grad_norm": 2.8582923303250882, + "learning_rate": 4.170533259687514e-06, + "loss": 0.09728889465332032, + "step": 86140 + }, + { + "epoch": 0.7448703426688917, + "grad_norm": 2.3303570583870172, + "learning_rate": 4.170345671800516e-06, + "loss": 0.030397796630859376, + "step": 86145 + }, + { + "epoch": 0.744913576190435, + "grad_norm": 25.296928536158216, + "learning_rate": 4.170158078515985e-06, + "loss": 0.15233154296875, + "step": 86150 + }, + { + "epoch": 0.7449568097119783, + "grad_norm": 3.789410172502339, + "learning_rate": 4.169970479834789e-06, + "loss": 0.16849365234375, + "step": 86155 + }, + { + "epoch": 0.7450000432335215, + "grad_norm": 2.4251600577773003, + "learning_rate": 4.169782875757794e-06, + "loss": 0.2193836212158203, + "step": 86160 + }, + { + "epoch": 0.7450432767550648, + "grad_norm": 6.428653593687252, + "learning_rate": 4.169595266285862e-06, + "loss": 0.05625, + "step": 86165 + }, + { + "epoch": 0.7450865102766081, + "grad_norm": 4.080693863478584, + "learning_rate": 4.16940765141986e-06, + "loss": 0.22249832153320312, + "step": 86170 + }, + { + "epoch": 0.7451297437981513, + "grad_norm": 1.2956816277067709, + "learning_rate": 4.1692200311606535e-06, + "loss": 0.16520004272460936, + "step": 86175 + }, + { + "epoch": 0.7451729773196946, + "grad_norm": 19.339719887737733, + "learning_rate": 4.169032405509108e-06, + "loss": 0.07537841796875, + "step": 86180 + }, + { + "epoch": 0.7452162108412379, + "grad_norm": 3.2907037548367284, + "learning_rate": 4.168844774466088e-06, + "loss": 0.10957088470458984, + "step": 86185 + }, + { + "epoch": 0.7452594443627811, + "grad_norm": 0.5432781982583583, + "learning_rate": 4.16865713803246e-06, + "loss": 0.054627609252929685, + "step": 86190 + }, + { + "epoch": 0.7453026778843244, + "grad_norm": 0.6087001073390952, + "learning_rate": 4.168469496209088e-06, + "loss": 0.1438995361328125, + "step": 86195 + }, + { + "epoch": 0.7453459114058677, + "grad_norm": 2.3929934800432147, + "learning_rate": 4.168281848996836e-06, + "loss": 0.23953704833984374, + "step": 86200 + }, + { + "epoch": 0.7453891449274109, + "grad_norm": 33.91126668249374, + "learning_rate": 4.1680941963965745e-06, + "loss": 0.19542236328125, + "step": 86205 + }, + { + "epoch": 0.7454323784489542, + "grad_norm": 6.1240440903106865, + "learning_rate": 4.1679065384091645e-06, + "loss": 0.141961669921875, + "step": 86210 + }, + { + "epoch": 0.7454756119704975, + "grad_norm": 0.4378961148487985, + "learning_rate": 4.167718875035472e-06, + "loss": 0.1748016357421875, + "step": 86215 + }, + { + "epoch": 0.7455188454920407, + "grad_norm": 1.396940512824273, + "learning_rate": 4.167531206276364e-06, + "loss": 0.34184722900390624, + "step": 86220 + }, + { + "epoch": 0.745562079013584, + "grad_norm": 6.581701426214985, + "learning_rate": 4.167343532132705e-06, + "loss": 0.20865249633789062, + "step": 86225 + }, + { + "epoch": 0.7456053125351272, + "grad_norm": 20.026242206951945, + "learning_rate": 4.167155852605361e-06, + "loss": 0.3001991271972656, + "step": 86230 + }, + { + "epoch": 0.7456485460566705, + "grad_norm": 0.5927733657401248, + "learning_rate": 4.166968167695197e-06, + "loss": 0.1534149169921875, + "step": 86235 + }, + { + "epoch": 0.7456917795782138, + "grad_norm": 4.232350064472784, + "learning_rate": 4.166780477403079e-06, + "loss": 0.09883804321289062, + "step": 86240 + }, + { + "epoch": 0.745735013099757, + "grad_norm": 0.5291937506032577, + "learning_rate": 4.166592781729873e-06, + "loss": 0.20297813415527344, + "step": 86245 + }, + { + "epoch": 0.7457782466213003, + "grad_norm": 1.2488311423509388, + "learning_rate": 4.166405080676444e-06, + "loss": 0.09080047607421875, + "step": 86250 + }, + { + "epoch": 0.7458214801428436, + "grad_norm": 5.858961644448077, + "learning_rate": 4.166217374243658e-06, + "loss": 0.12697296142578124, + "step": 86255 + }, + { + "epoch": 0.7458647136643868, + "grad_norm": 2.086234897252883, + "learning_rate": 4.16602966243238e-06, + "loss": 0.06592559814453125, + "step": 86260 + }, + { + "epoch": 0.7459079471859301, + "grad_norm": 19.709263470973294, + "learning_rate": 4.165841945243475e-06, + "loss": 0.08218555450439453, + "step": 86265 + }, + { + "epoch": 0.7459511807074733, + "grad_norm": 21.21475083805183, + "learning_rate": 4.165654222677813e-06, + "loss": 0.30600738525390625, + "step": 86270 + }, + { + "epoch": 0.7459944142290166, + "grad_norm": 6.88533876105179, + "learning_rate": 4.1654664947362546e-06, + "loss": 0.09671554565429688, + "step": 86275 + }, + { + "epoch": 0.7460376477505599, + "grad_norm": 12.881603768532804, + "learning_rate": 4.1652787614196685e-06, + "loss": 0.221099853515625, + "step": 86280 + }, + { + "epoch": 0.7460808812721031, + "grad_norm": 24.885036022018404, + "learning_rate": 4.165091022728919e-06, + "loss": 0.119134521484375, + "step": 86285 + }, + { + "epoch": 0.7461241147936464, + "grad_norm": 5.203332136834062, + "learning_rate": 4.164903278664873e-06, + "loss": 0.1118804931640625, + "step": 86290 + }, + { + "epoch": 0.7461673483151897, + "grad_norm": 0.05702065414851712, + "learning_rate": 4.1647155292283965e-06, + "loss": 0.017847442626953126, + "step": 86295 + }, + { + "epoch": 0.746210581836733, + "grad_norm": 1.8009293932180195, + "learning_rate": 4.164527774420354e-06, + "loss": 0.37917251586914064, + "step": 86300 + }, + { + "epoch": 0.7462538153582762, + "grad_norm": 8.459300360941146, + "learning_rate": 4.164340014241613e-06, + "loss": 0.0706268310546875, + "step": 86305 + }, + { + "epoch": 0.7462970488798194, + "grad_norm": 5.068274138152662, + "learning_rate": 4.164152248693039e-06, + "loss": 0.17057952880859376, + "step": 86310 + }, + { + "epoch": 0.7463402824013627, + "grad_norm": 2.3728315573939534, + "learning_rate": 4.163964477775498e-06, + "loss": 0.030328941345214844, + "step": 86315 + }, + { + "epoch": 0.746383515922906, + "grad_norm": 0.9179782196953542, + "learning_rate": 4.163776701489854e-06, + "loss": 0.0929931640625, + "step": 86320 + }, + { + "epoch": 0.7464267494444492, + "grad_norm": 22.62966351688704, + "learning_rate": 4.163588919836976e-06, + "loss": 0.09401569366455079, + "step": 86325 + }, + { + "epoch": 0.7464699829659925, + "grad_norm": 10.032398966221342, + "learning_rate": 4.163401132817727e-06, + "loss": 0.048246002197265624, + "step": 86330 + }, + { + "epoch": 0.7465132164875358, + "grad_norm": 4.600326194193162, + "learning_rate": 4.163213340432977e-06, + "loss": 0.12650909423828124, + "step": 86335 + }, + { + "epoch": 0.746556450009079, + "grad_norm": 11.922669737132237, + "learning_rate": 4.16302554268359e-06, + "loss": 0.0802215576171875, + "step": 86340 + }, + { + "epoch": 0.7465996835306223, + "grad_norm": 14.553558869550098, + "learning_rate": 4.16283773957043e-06, + "loss": 0.05710945129394531, + "step": 86345 + }, + { + "epoch": 0.7466429170521656, + "grad_norm": 2.8260766250069937, + "learning_rate": 4.162649931094366e-06, + "loss": 0.3454242706298828, + "step": 86350 + }, + { + "epoch": 0.7466861505737088, + "grad_norm": 45.567551260268, + "learning_rate": 4.162462117256263e-06, + "loss": 0.2907524108886719, + "step": 86355 + }, + { + "epoch": 0.7467293840952521, + "grad_norm": 4.282174327168123, + "learning_rate": 4.162274298056987e-06, + "loss": 0.044720458984375, + "step": 86360 + }, + { + "epoch": 0.7467726176167954, + "grad_norm": 5.931164622748046, + "learning_rate": 4.1620864734974064e-06, + "loss": 0.07211837768554688, + "step": 86365 + }, + { + "epoch": 0.7468158511383386, + "grad_norm": 1.7562817959522883, + "learning_rate": 4.161898643578384e-06, + "loss": 0.06246337890625, + "step": 86370 + }, + { + "epoch": 0.7468590846598819, + "grad_norm": 52.798660776660256, + "learning_rate": 4.161710808300789e-06, + "loss": 0.40054931640625, + "step": 86375 + }, + { + "epoch": 0.7469023181814252, + "grad_norm": 3.3178460057689527, + "learning_rate": 4.161522967665485e-06, + "loss": 0.097259521484375, + "step": 86380 + }, + { + "epoch": 0.7469455517029684, + "grad_norm": 12.751785014597038, + "learning_rate": 4.161335121673341e-06, + "loss": 0.20885982513427734, + "step": 86385 + }, + { + "epoch": 0.7469887852245117, + "grad_norm": 1.555199915022431, + "learning_rate": 4.161147270325221e-06, + "loss": 0.165972900390625, + "step": 86390 + }, + { + "epoch": 0.747032018746055, + "grad_norm": 13.368313110437693, + "learning_rate": 4.160959413621993e-06, + "loss": 0.13193359375, + "step": 86395 + }, + { + "epoch": 0.7470752522675982, + "grad_norm": 12.87035825168922, + "learning_rate": 4.160771551564522e-06, + "loss": 0.1416473388671875, + "step": 86400 + }, + { + "epoch": 0.7471184857891414, + "grad_norm": 86.92619127272215, + "learning_rate": 4.1605836841536765e-06, + "loss": 0.3357666015625, + "step": 86405 + }, + { + "epoch": 0.7471617193106848, + "grad_norm": 9.293246335940175, + "learning_rate": 4.1603958113903206e-06, + "loss": 0.08543930053710938, + "step": 86410 + }, + { + "epoch": 0.747204952832228, + "grad_norm": 41.68015128671953, + "learning_rate": 4.160207933275322e-06, + "loss": 0.1239532470703125, + "step": 86415 + }, + { + "epoch": 0.7472481863537712, + "grad_norm": 14.32399197323688, + "learning_rate": 4.160020049809548e-06, + "loss": 0.22090644836425782, + "step": 86420 + }, + { + "epoch": 0.7472914198753146, + "grad_norm": 9.708564403495284, + "learning_rate": 4.159832160993862e-06, + "loss": 0.2555545806884766, + "step": 86425 + }, + { + "epoch": 0.7473346533968578, + "grad_norm": 0.7501000083902168, + "learning_rate": 4.159644266829133e-06, + "loss": 0.12542800903320311, + "step": 86430 + }, + { + "epoch": 0.747377886918401, + "grad_norm": 0.019618365111586628, + "learning_rate": 4.159456367316228e-06, + "loss": 0.39189720153808594, + "step": 86435 + }, + { + "epoch": 0.7474211204399444, + "grad_norm": 8.02910954492936, + "learning_rate": 4.159268462456012e-06, + "loss": 0.20890464782714843, + "step": 86440 + }, + { + "epoch": 0.7474643539614876, + "grad_norm": 8.709559537393838, + "learning_rate": 4.159080552249354e-06, + "loss": 0.04397125244140625, + "step": 86445 + }, + { + "epoch": 0.7475075874830308, + "grad_norm": 4.077779378907635, + "learning_rate": 4.1588926366971175e-06, + "loss": 0.18798980712890626, + "step": 86450 + }, + { + "epoch": 0.7475508210045742, + "grad_norm": 30.122048029653136, + "learning_rate": 4.15870471580017e-06, + "loss": 0.501461410522461, + "step": 86455 + }, + { + "epoch": 0.7475940545261174, + "grad_norm": 10.871706735161576, + "learning_rate": 4.15851678955938e-06, + "loss": 0.07414932250976562, + "step": 86460 + }, + { + "epoch": 0.7476372880476606, + "grad_norm": 8.213285245011454, + "learning_rate": 4.158328857975611e-06, + "loss": 0.0603057861328125, + "step": 86465 + }, + { + "epoch": 0.747680521569204, + "grad_norm": 7.4622626655798925, + "learning_rate": 4.158140921049734e-06, + "loss": 0.07300853729248047, + "step": 86470 + }, + { + "epoch": 0.7477237550907472, + "grad_norm": 0.06809088005696019, + "learning_rate": 4.157952978782612e-06, + "loss": 0.30060863494873047, + "step": 86475 + }, + { + "epoch": 0.7477669886122904, + "grad_norm": 21.456302236799498, + "learning_rate": 4.157765031175114e-06, + "loss": 0.14334945678710936, + "step": 86480 + }, + { + "epoch": 0.7478102221338336, + "grad_norm": 1.8193867725601194, + "learning_rate": 4.157577078228105e-06, + "loss": 0.07652740478515625, + "step": 86485 + }, + { + "epoch": 0.747853455655377, + "grad_norm": 7.672503929117214, + "learning_rate": 4.157389119942453e-06, + "loss": 0.2126220703125, + "step": 86490 + }, + { + "epoch": 0.7478966891769202, + "grad_norm": 25.437918184091245, + "learning_rate": 4.157201156319025e-06, + "loss": 0.1249786376953125, + "step": 86495 + }, + { + "epoch": 0.7479399226984634, + "grad_norm": 9.402093390029167, + "learning_rate": 4.157013187358689e-06, + "loss": 0.09195671081542969, + "step": 86500 + }, + { + "epoch": 0.7479831562200068, + "grad_norm": 28.165864222700083, + "learning_rate": 4.156825213062308e-06, + "loss": 0.1006988525390625, + "step": 86505 + }, + { + "epoch": 0.74802638974155, + "grad_norm": 33.22585201252008, + "learning_rate": 4.156637233430753e-06, + "loss": 0.36728515625, + "step": 86510 + }, + { + "epoch": 0.7480696232630932, + "grad_norm": 3.793920753004979, + "learning_rate": 4.156449248464888e-06, + "loss": 0.07946624755859374, + "step": 86515 + }, + { + "epoch": 0.7481128567846366, + "grad_norm": 3.2835580793822685, + "learning_rate": 4.156261258165581e-06, + "loss": 0.15620498657226561, + "step": 86520 + }, + { + "epoch": 0.7481560903061798, + "grad_norm": 1.1998902629570374, + "learning_rate": 4.156073262533701e-06, + "loss": 0.16828994750976561, + "step": 86525 + }, + { + "epoch": 0.748199323827723, + "grad_norm": 23.67710520695194, + "learning_rate": 4.155885261570112e-06, + "loss": 0.16303768157958984, + "step": 86530 + }, + { + "epoch": 0.7482425573492664, + "grad_norm": 2.1851497816742858, + "learning_rate": 4.155697255275682e-06, + "loss": 0.3014190673828125, + "step": 86535 + }, + { + "epoch": 0.7482857908708096, + "grad_norm": 1.638549439013079, + "learning_rate": 4.1555092436512794e-06, + "loss": 0.08758201599121093, + "step": 86540 + }, + { + "epoch": 0.7483290243923528, + "grad_norm": 0.11628612877211739, + "learning_rate": 4.155321226697769e-06, + "loss": 0.09853477478027343, + "step": 86545 + }, + { + "epoch": 0.7483722579138962, + "grad_norm": 29.34559822933684, + "learning_rate": 4.15513320441602e-06, + "loss": 0.13950538635253906, + "step": 86550 + }, + { + "epoch": 0.7484154914354394, + "grad_norm": 13.52987809793618, + "learning_rate": 4.1549451768069e-06, + "loss": 0.16335601806640626, + "step": 86555 + }, + { + "epoch": 0.7484587249569826, + "grad_norm": 11.214390374643633, + "learning_rate": 4.154757143871273e-06, + "loss": 0.33894500732421873, + "step": 86560 + }, + { + "epoch": 0.748501958478526, + "grad_norm": 1.1748978499006764, + "learning_rate": 4.154569105610009e-06, + "loss": 0.04590911865234375, + "step": 86565 + }, + { + "epoch": 0.7485451920000692, + "grad_norm": 3.1838897273648454, + "learning_rate": 4.154381062023974e-06, + "loss": 0.10846786499023438, + "step": 86570 + }, + { + "epoch": 0.7485884255216124, + "grad_norm": 46.23689074552767, + "learning_rate": 4.154193013114035e-06, + "loss": 0.23340988159179688, + "step": 86575 + }, + { + "epoch": 0.7486316590431557, + "grad_norm": 2.893754005972654, + "learning_rate": 4.154004958881061e-06, + "loss": 0.08474502563476563, + "step": 86580 + }, + { + "epoch": 0.748674892564699, + "grad_norm": 25.138137594269285, + "learning_rate": 4.153816899325917e-06, + "loss": 0.28529052734375, + "step": 86585 + }, + { + "epoch": 0.7487181260862422, + "grad_norm": 1.630334268431898, + "learning_rate": 4.153628834449473e-06, + "loss": 0.15872802734375, + "step": 86590 + }, + { + "epoch": 0.7487613596077854, + "grad_norm": 1.6341186944576063, + "learning_rate": 4.153440764252593e-06, + "loss": 0.3922740936279297, + "step": 86595 + }, + { + "epoch": 0.7488045931293288, + "grad_norm": 0.576024641676508, + "learning_rate": 4.153252688736145e-06, + "loss": 0.0712799072265625, + "step": 86600 + }, + { + "epoch": 0.748847826650872, + "grad_norm": 25.366617094665347, + "learning_rate": 4.153064607901001e-06, + "loss": 0.25693511962890625, + "step": 86605 + }, + { + "epoch": 0.7488910601724152, + "grad_norm": 35.873091535425054, + "learning_rate": 4.152876521748023e-06, + "loss": 0.1281951904296875, + "step": 86610 + }, + { + "epoch": 0.7489342936939586, + "grad_norm": 0.5883518552358091, + "learning_rate": 4.152688430278081e-06, + "loss": 0.07355270385742188, + "step": 86615 + }, + { + "epoch": 0.7489775272155018, + "grad_norm": 2.851062262578317, + "learning_rate": 4.152500333492041e-06, + "loss": 0.050651168823242186, + "step": 86620 + }, + { + "epoch": 0.749020760737045, + "grad_norm": 11.304665122831624, + "learning_rate": 4.152312231390771e-06, + "loss": 0.16793975830078126, + "step": 86625 + }, + { + "epoch": 0.7490639942585884, + "grad_norm": 31.693643898003216, + "learning_rate": 4.152124123975139e-06, + "loss": 0.42571773529052737, + "step": 86630 + }, + { + "epoch": 0.7491072277801316, + "grad_norm": 3.964016761534703, + "learning_rate": 4.151936011246013e-06, + "loss": 0.15406341552734376, + "step": 86635 + }, + { + "epoch": 0.7491504613016748, + "grad_norm": 4.023387962920305, + "learning_rate": 4.151747893204261e-06, + "loss": 0.15364532470703124, + "step": 86640 + }, + { + "epoch": 0.7491936948232182, + "grad_norm": 2.505270302530748, + "learning_rate": 4.1515597698507475e-06, + "loss": 0.06204147338867187, + "step": 86645 + }, + { + "epoch": 0.7492369283447614, + "grad_norm": 14.811802196018798, + "learning_rate": 4.1513716411863436e-06, + "loss": 0.17982177734375, + "step": 86650 + }, + { + "epoch": 0.7492801618663046, + "grad_norm": 40.40101964421521, + "learning_rate": 4.151183507211914e-06, + "loss": 0.31733551025390627, + "step": 86655 + }, + { + "epoch": 0.7493233953878479, + "grad_norm": 0.2667825878200438, + "learning_rate": 4.150995367928328e-06, + "loss": 0.013623046875, + "step": 86660 + }, + { + "epoch": 0.7493666289093912, + "grad_norm": 3.692784178963595, + "learning_rate": 4.150807223336454e-06, + "loss": 0.03040008544921875, + "step": 86665 + }, + { + "epoch": 0.7494098624309344, + "grad_norm": 6.217220844810119, + "learning_rate": 4.1506190734371585e-06, + "loss": 0.16190643310546876, + "step": 86670 + }, + { + "epoch": 0.7494530959524777, + "grad_norm": 3.214279544406209, + "learning_rate": 4.150430918231309e-06, + "loss": 0.1839630126953125, + "step": 86675 + }, + { + "epoch": 0.749496329474021, + "grad_norm": 0.4562215888174895, + "learning_rate": 4.150242757719774e-06, + "loss": 0.2007720947265625, + "step": 86680 + }, + { + "epoch": 0.7495395629955642, + "grad_norm": 1.1215429706254987, + "learning_rate": 4.15005459190342e-06, + "loss": 0.057879638671875, + "step": 86685 + }, + { + "epoch": 0.7495827965171075, + "grad_norm": 44.91022584863214, + "learning_rate": 4.149866420783118e-06, + "loss": 0.667437744140625, + "step": 86690 + }, + { + "epoch": 0.7496260300386508, + "grad_norm": 6.316642153296231, + "learning_rate": 4.149678244359732e-06, + "loss": 0.05531463623046875, + "step": 86695 + }, + { + "epoch": 0.749669263560194, + "grad_norm": 3.5325219398645915, + "learning_rate": 4.1494900626341325e-06, + "loss": 0.03817825317382813, + "step": 86700 + }, + { + "epoch": 0.7497124970817373, + "grad_norm": 14.689001874613373, + "learning_rate": 4.149301875607187e-06, + "loss": 0.16568603515625, + "step": 86705 + }, + { + "epoch": 0.7497557306032806, + "grad_norm": 1.0959761300904938, + "learning_rate": 4.149113683279761e-06, + "loss": 0.3960836410522461, + "step": 86710 + }, + { + "epoch": 0.7497989641248238, + "grad_norm": 30.74848304652641, + "learning_rate": 4.148925485652725e-06, + "loss": 0.20557403564453125, + "step": 86715 + }, + { + "epoch": 0.7498421976463671, + "grad_norm": 0.43906063874923834, + "learning_rate": 4.148737282726946e-06, + "loss": 0.09730110168457032, + "step": 86720 + }, + { + "epoch": 0.7498854311679104, + "grad_norm": 1.171695803467683, + "learning_rate": 4.148549074503292e-06, + "loss": 0.4861602783203125, + "step": 86725 + }, + { + "epoch": 0.7499286646894536, + "grad_norm": 15.869814427460925, + "learning_rate": 4.148360860982632e-06, + "loss": 0.30378265380859376, + "step": 86730 + }, + { + "epoch": 0.7499718982109969, + "grad_norm": 10.222881700833073, + "learning_rate": 4.148172642165833e-06, + "loss": 0.34899368286132815, + "step": 86735 + }, + { + "epoch": 0.7500151317325402, + "grad_norm": 26.18486936179846, + "learning_rate": 4.147984418053762e-06, + "loss": 0.12708740234375, + "step": 86740 + }, + { + "epoch": 0.7500583652540834, + "grad_norm": 10.111612142439151, + "learning_rate": 4.1477961886472895e-06, + "loss": 0.062481689453125, + "step": 86745 + }, + { + "epoch": 0.7501015987756267, + "grad_norm": 52.38324289085845, + "learning_rate": 4.147607953947282e-06, + "loss": 0.536404037475586, + "step": 86750 + }, + { + "epoch": 0.7501448322971699, + "grad_norm": 4.080618011569881, + "learning_rate": 4.147419713954609e-06, + "loss": 0.2371826171875, + "step": 86755 + }, + { + "epoch": 0.7501880658187132, + "grad_norm": 4.2443300364499414, + "learning_rate": 4.147231468670136e-06, + "loss": 0.117022705078125, + "step": 86760 + }, + { + "epoch": 0.7502312993402565, + "grad_norm": 4.402257696093449, + "learning_rate": 4.147043218094734e-06, + "loss": 0.09253692626953125, + "step": 86765 + }, + { + "epoch": 0.7502745328617997, + "grad_norm": 52.17511368440454, + "learning_rate": 4.14685496222927e-06, + "loss": 0.2949485778808594, + "step": 86770 + }, + { + "epoch": 0.750317766383343, + "grad_norm": 18.262995404587553, + "learning_rate": 4.146666701074611e-06, + "loss": 0.1692718505859375, + "step": 86775 + }, + { + "epoch": 0.7503609999048863, + "grad_norm": 0.2717573510411089, + "learning_rate": 4.146478434631628e-06, + "loss": 0.29234619140625, + "step": 86780 + }, + { + "epoch": 0.7504042334264295, + "grad_norm": 12.915377495205755, + "learning_rate": 4.146290162901187e-06, + "loss": 0.25333251953125, + "step": 86785 + }, + { + "epoch": 0.7504474669479728, + "grad_norm": 28.124503105408923, + "learning_rate": 4.1461018858841575e-06, + "loss": 0.16540184020996093, + "step": 86790 + }, + { + "epoch": 0.750490700469516, + "grad_norm": 20.373009512733773, + "learning_rate": 4.145913603581407e-06, + "loss": 0.45638580322265626, + "step": 86795 + }, + { + "epoch": 0.7505339339910593, + "grad_norm": 34.07252624502705, + "learning_rate": 4.145725315993804e-06, + "loss": 0.22300262451171876, + "step": 86800 + }, + { + "epoch": 0.7505771675126026, + "grad_norm": 12.452885151281937, + "learning_rate": 4.145537023122217e-06, + "loss": 0.16058425903320311, + "step": 86805 + }, + { + "epoch": 0.7506204010341458, + "grad_norm": 11.624384084928014, + "learning_rate": 4.145348724967515e-06, + "loss": 0.2609382629394531, + "step": 86810 + }, + { + "epoch": 0.7506636345556891, + "grad_norm": 0.509790125638261, + "learning_rate": 4.145160421530567e-06, + "loss": 0.25077285766601565, + "step": 86815 + }, + { + "epoch": 0.7507068680772324, + "grad_norm": 8.827571813389074, + "learning_rate": 4.144972112812238e-06, + "loss": 0.07624359130859375, + "step": 86820 + }, + { + "epoch": 0.7507501015987756, + "grad_norm": 2.527081526157638, + "learning_rate": 4.144783798813399e-06, + "loss": 0.09406661987304688, + "step": 86825 + }, + { + "epoch": 0.7507933351203189, + "grad_norm": 0.12107773400388612, + "learning_rate": 4.14459547953492e-06, + "loss": 0.20338668823242187, + "step": 86830 + }, + { + "epoch": 0.7508365686418621, + "grad_norm": 1.1667641756854783, + "learning_rate": 4.1444071549776674e-06, + "loss": 0.03405303955078125, + "step": 86835 + }, + { + "epoch": 0.7508798021634054, + "grad_norm": 34.12209832217845, + "learning_rate": 4.144218825142509e-06, + "loss": 0.16655349731445312, + "step": 86840 + }, + { + "epoch": 0.7509230356849487, + "grad_norm": 0.45693072693688247, + "learning_rate": 4.144030490030316e-06, + "loss": 0.04239349365234375, + "step": 86845 + }, + { + "epoch": 0.7509662692064919, + "grad_norm": 4.190338987320019, + "learning_rate": 4.143842149641954e-06, + "loss": 0.059864234924316403, + "step": 86850 + }, + { + "epoch": 0.7510095027280352, + "grad_norm": 31.251454774519402, + "learning_rate": 4.1436538039782935e-06, + "loss": 0.12537384033203125, + "step": 86855 + }, + { + "epoch": 0.7510527362495785, + "grad_norm": 2.623643499517929, + "learning_rate": 4.143465453040203e-06, + "loss": 0.17010269165039063, + "step": 86860 + }, + { + "epoch": 0.7510959697711217, + "grad_norm": 1.7707191751029083, + "learning_rate": 4.143277096828551e-06, + "loss": 0.209893798828125, + "step": 86865 + }, + { + "epoch": 0.751139203292665, + "grad_norm": 38.229895755259136, + "learning_rate": 4.143088735344206e-06, + "loss": 0.11828498840332032, + "step": 86870 + }, + { + "epoch": 0.7511824368142083, + "grad_norm": 13.170335260945214, + "learning_rate": 4.142900368588036e-06, + "loss": 0.12598876953125, + "step": 86875 + }, + { + "epoch": 0.7512256703357515, + "grad_norm": 13.913872946157877, + "learning_rate": 4.142711996560911e-06, + "loss": 0.06672439575195313, + "step": 86880 + }, + { + "epoch": 0.7512689038572948, + "grad_norm": 0.61014964998869, + "learning_rate": 4.1425236192637e-06, + "loss": 0.15780029296875, + "step": 86885 + }, + { + "epoch": 0.7513121373788381, + "grad_norm": 16.825213922948226, + "learning_rate": 4.14233523669727e-06, + "loss": 0.5296417236328125, + "step": 86890 + }, + { + "epoch": 0.7513553709003813, + "grad_norm": 3.616532933292272, + "learning_rate": 4.1421468488624915e-06, + "loss": 0.044174575805664064, + "step": 86895 + }, + { + "epoch": 0.7513986044219246, + "grad_norm": 0.5009661587570082, + "learning_rate": 4.141958455760232e-06, + "loss": 0.0715780258178711, + "step": 86900 + }, + { + "epoch": 0.7514418379434679, + "grad_norm": 22.42384802713741, + "learning_rate": 4.141770057391361e-06, + "loss": 0.160028076171875, + "step": 86905 + }, + { + "epoch": 0.7514850714650111, + "grad_norm": 0.32718861396700716, + "learning_rate": 4.141581653756748e-06, + "loss": 0.18749542236328126, + "step": 86910 + }, + { + "epoch": 0.7515283049865544, + "grad_norm": 0.5993749445662756, + "learning_rate": 4.14139324485726e-06, + "loss": 0.01104278564453125, + "step": 86915 + }, + { + "epoch": 0.7515715385080977, + "grad_norm": 4.191111814242021, + "learning_rate": 4.141204830693769e-06, + "loss": 0.14899444580078125, + "step": 86920 + }, + { + "epoch": 0.7516147720296409, + "grad_norm": 27.4403805024297, + "learning_rate": 4.141016411267142e-06, + "loss": 0.2933784484863281, + "step": 86925 + }, + { + "epoch": 0.7516580055511841, + "grad_norm": 37.73273089087591, + "learning_rate": 4.140827986578248e-06, + "loss": 0.3001270294189453, + "step": 86930 + }, + { + "epoch": 0.7517012390727275, + "grad_norm": 2.116033027986699, + "learning_rate": 4.140639556627955e-06, + "loss": 0.253900146484375, + "step": 86935 + }, + { + "epoch": 0.7517444725942707, + "grad_norm": 8.192174498127324, + "learning_rate": 4.140451121417133e-06, + "loss": 0.23665618896484375, + "step": 86940 + }, + { + "epoch": 0.7517877061158139, + "grad_norm": 44.388550316435875, + "learning_rate": 4.140262680946653e-06, + "loss": 0.153436279296875, + "step": 86945 + }, + { + "epoch": 0.7518309396373573, + "grad_norm": 17.637968530200062, + "learning_rate": 4.14007423521738e-06, + "loss": 0.0686981201171875, + "step": 86950 + }, + { + "epoch": 0.7518741731589005, + "grad_norm": 5.367707274921029, + "learning_rate": 4.139885784230187e-06, + "loss": 0.403515625, + "step": 86955 + }, + { + "epoch": 0.7519174066804437, + "grad_norm": 13.223803022409736, + "learning_rate": 4.139697327985942e-06, + "loss": 0.1165252685546875, + "step": 86960 + }, + { + "epoch": 0.751960640201987, + "grad_norm": 97.95387531221294, + "learning_rate": 4.139508866485512e-06, + "loss": 0.5506599426269532, + "step": 86965 + }, + { + "epoch": 0.7520038737235303, + "grad_norm": 2.5256161163395485, + "learning_rate": 4.139320399729768e-06, + "loss": 0.283868408203125, + "step": 86970 + }, + { + "epoch": 0.7520471072450735, + "grad_norm": 5.409304141354786, + "learning_rate": 4.139131927719581e-06, + "loss": 0.2480175018310547, + "step": 86975 + }, + { + "epoch": 0.7520903407666168, + "grad_norm": 14.418430462563471, + "learning_rate": 4.138943450455816e-06, + "loss": 0.102581787109375, + "step": 86980 + }, + { + "epoch": 0.7521335742881601, + "grad_norm": 4.392774758770661, + "learning_rate": 4.138754967939345e-06, + "loss": 0.043994140625, + "step": 86985 + }, + { + "epoch": 0.7521768078097033, + "grad_norm": 0.0968148214780277, + "learning_rate": 4.138566480171037e-06, + "loss": 0.0521270751953125, + "step": 86990 + }, + { + "epoch": 0.7522200413312466, + "grad_norm": 6.292772213561931, + "learning_rate": 4.13837798715176e-06, + "loss": 0.145263671875, + "step": 86995 + }, + { + "epoch": 0.7522632748527899, + "grad_norm": 0.793607351990162, + "learning_rate": 4.138189488882386e-06, + "loss": 0.09802627563476562, + "step": 87000 + }, + { + "epoch": 0.7523065083743331, + "grad_norm": 3.506119287343912, + "learning_rate": 4.138000985363782e-06, + "loss": 0.227496337890625, + "step": 87005 + }, + { + "epoch": 0.7523497418958763, + "grad_norm": 10.426317858296116, + "learning_rate": 4.137812476596818e-06, + "loss": 0.08487205505371094, + "step": 87010 + }, + { + "epoch": 0.7523929754174197, + "grad_norm": 5.045565633955442, + "learning_rate": 4.137623962582363e-06, + "loss": 0.08267860412597657, + "step": 87015 + }, + { + "epoch": 0.7524362089389629, + "grad_norm": 13.125105252136699, + "learning_rate": 4.137435443321287e-06, + "loss": 0.2689910888671875, + "step": 87020 + }, + { + "epoch": 0.7524794424605061, + "grad_norm": 4.316607041611107, + "learning_rate": 4.137246918814459e-06, + "loss": 0.24339599609375, + "step": 87025 + }, + { + "epoch": 0.7525226759820495, + "grad_norm": 8.401762227812231, + "learning_rate": 4.1370583890627485e-06, + "loss": 0.43743438720703126, + "step": 87030 + }, + { + "epoch": 0.7525659095035927, + "grad_norm": 1.7801763501595256, + "learning_rate": 4.136869854067026e-06, + "loss": 0.06302032470703126, + "step": 87035 + }, + { + "epoch": 0.7526091430251359, + "grad_norm": 0.8502173046040358, + "learning_rate": 4.13668131382816e-06, + "loss": 0.285626220703125, + "step": 87040 + }, + { + "epoch": 0.7526523765466793, + "grad_norm": 21.804515487738747, + "learning_rate": 4.13649276834702e-06, + "loss": 0.11019287109375, + "step": 87045 + }, + { + "epoch": 0.7526956100682225, + "grad_norm": 3.209058973355902, + "learning_rate": 4.136304217624476e-06, + "loss": 0.088616943359375, + "step": 87050 + }, + { + "epoch": 0.7527388435897657, + "grad_norm": 16.869703165197674, + "learning_rate": 4.136115661661397e-06, + "loss": 0.4343994140625, + "step": 87055 + }, + { + "epoch": 0.7527820771113091, + "grad_norm": 52.664048263189436, + "learning_rate": 4.135927100458653e-06, + "loss": 0.22548751831054686, + "step": 87060 + }, + { + "epoch": 0.7528253106328523, + "grad_norm": 12.12571545063261, + "learning_rate": 4.135738534017114e-06, + "loss": 0.11226806640625, + "step": 87065 + }, + { + "epoch": 0.7528685441543955, + "grad_norm": 21.089789435907523, + "learning_rate": 4.13554996233765e-06, + "loss": 0.19652023315429687, + "step": 87070 + }, + { + "epoch": 0.7529117776759389, + "grad_norm": 4.251970053201964, + "learning_rate": 4.135361385421128e-06, + "loss": 0.10286216735839844, + "step": 87075 + }, + { + "epoch": 0.7529550111974821, + "grad_norm": 130.26489473186737, + "learning_rate": 4.13517280326842e-06, + "loss": 0.5508893013000489, + "step": 87080 + }, + { + "epoch": 0.7529982447190253, + "grad_norm": 0.18571159811011925, + "learning_rate": 4.134984215880396e-06, + "loss": 0.015439605712890625, + "step": 87085 + }, + { + "epoch": 0.7530414782405687, + "grad_norm": 2.176044452236998, + "learning_rate": 4.134795623257925e-06, + "loss": 0.15414962768554688, + "step": 87090 + }, + { + "epoch": 0.7530847117621119, + "grad_norm": 8.480131993720965, + "learning_rate": 4.134607025401877e-06, + "loss": 0.0978759765625, + "step": 87095 + }, + { + "epoch": 0.7531279452836551, + "grad_norm": 4.324124575215095, + "learning_rate": 4.134418422313122e-06, + "loss": 0.19559783935546876, + "step": 87100 + }, + { + "epoch": 0.7531711788051983, + "grad_norm": 1.8175781824267585, + "learning_rate": 4.1342298139925284e-06, + "loss": 0.07924957275390625, + "step": 87105 + }, + { + "epoch": 0.7532144123267417, + "grad_norm": 2.256963662425557, + "learning_rate": 4.134041200440967e-06, + "loss": 0.188446044921875, + "step": 87110 + }, + { + "epoch": 0.7532576458482849, + "grad_norm": 6.235890093655849, + "learning_rate": 4.1338525816593094e-06, + "loss": 0.24298248291015626, + "step": 87115 + }, + { + "epoch": 0.7533008793698281, + "grad_norm": 5.104508049976248, + "learning_rate": 4.1336639576484236e-06, + "loss": 0.0878509521484375, + "step": 87120 + }, + { + "epoch": 0.7533441128913715, + "grad_norm": 0.6901392706911523, + "learning_rate": 4.1334753284091785e-06, + "loss": 0.10653800964355468, + "step": 87125 + }, + { + "epoch": 0.7533873464129147, + "grad_norm": 2.050809576750728, + "learning_rate": 4.133286693942445e-06, + "loss": 0.06568603515625, + "step": 87130 + }, + { + "epoch": 0.753430579934458, + "grad_norm": 4.7810891725292795, + "learning_rate": 4.133098054249095e-06, + "loss": 0.051055908203125, + "step": 87135 + }, + { + "epoch": 0.7534738134560013, + "grad_norm": 2.357817474191496, + "learning_rate": 4.132909409329996e-06, + "loss": 0.10389328002929688, + "step": 87140 + }, + { + "epoch": 0.7535170469775445, + "grad_norm": 3.7782107245403393, + "learning_rate": 4.132720759186019e-06, + "loss": 0.08883819580078126, + "step": 87145 + }, + { + "epoch": 0.7535602804990877, + "grad_norm": 6.279109372481647, + "learning_rate": 4.132532103818035e-06, + "loss": 0.1830047607421875, + "step": 87150 + }, + { + "epoch": 0.7536035140206311, + "grad_norm": 0.043277008446505574, + "learning_rate": 4.132343443226912e-06, + "loss": 0.3522144317626953, + "step": 87155 + }, + { + "epoch": 0.7536467475421743, + "grad_norm": 20.40230009306368, + "learning_rate": 4.132154777413521e-06, + "loss": 0.17954483032226562, + "step": 87160 + }, + { + "epoch": 0.7536899810637175, + "grad_norm": 7.726884569418477, + "learning_rate": 4.131966106378732e-06, + "loss": 0.29624786376953127, + "step": 87165 + }, + { + "epoch": 0.7537332145852609, + "grad_norm": 2.919138807446465, + "learning_rate": 4.131777430123416e-06, + "loss": 0.048464202880859376, + "step": 87170 + }, + { + "epoch": 0.7537764481068041, + "grad_norm": 2.854546886806803, + "learning_rate": 4.1315887486484425e-06, + "loss": 0.1147491455078125, + "step": 87175 + }, + { + "epoch": 0.7538196816283473, + "grad_norm": 10.693371101366735, + "learning_rate": 4.131400061954682e-06, + "loss": 0.06273345947265625, + "step": 87180 + }, + { + "epoch": 0.7538629151498906, + "grad_norm": 1.8595379901652114, + "learning_rate": 4.131211370043003e-06, + "loss": 0.013710784912109374, + "step": 87185 + }, + { + "epoch": 0.7539061486714339, + "grad_norm": 5.388792285618186, + "learning_rate": 4.131022672914278e-06, + "loss": 0.3714569091796875, + "step": 87190 + }, + { + "epoch": 0.7539493821929771, + "grad_norm": 2.742957720924458, + "learning_rate": 4.130833970569375e-06, + "loss": 0.09087677001953125, + "step": 87195 + }, + { + "epoch": 0.7539926157145204, + "grad_norm": 9.617204767101809, + "learning_rate": 4.1306452630091686e-06, + "loss": 0.1494720458984375, + "step": 87200 + }, + { + "epoch": 0.7540358492360637, + "grad_norm": 1.6990609100529537, + "learning_rate": 4.130456550234524e-06, + "loss": 0.13287353515625, + "step": 87205 + }, + { + "epoch": 0.7540790827576069, + "grad_norm": 4.956347059456797, + "learning_rate": 4.130267832246314e-06, + "loss": 0.14415969848632812, + "step": 87210 + }, + { + "epoch": 0.7541223162791502, + "grad_norm": 0.9949715004071894, + "learning_rate": 4.130079109045409e-06, + "loss": 0.037322998046875, + "step": 87215 + }, + { + "epoch": 0.7541655498006935, + "grad_norm": 0.8848818831744587, + "learning_rate": 4.129890380632678e-06, + "loss": 0.09047698974609375, + "step": 87220 + }, + { + "epoch": 0.7542087833222367, + "grad_norm": 2.4007975824395347, + "learning_rate": 4.129701647008994e-06, + "loss": 0.02993621826171875, + "step": 87225 + }, + { + "epoch": 0.75425201684378, + "grad_norm": 2.712356649876676, + "learning_rate": 4.129512908175225e-06, + "loss": 0.07207794189453125, + "step": 87230 + }, + { + "epoch": 0.7542952503653233, + "grad_norm": 12.660835043829058, + "learning_rate": 4.129324164132242e-06, + "loss": 0.27523040771484375, + "step": 87235 + }, + { + "epoch": 0.7543384838868665, + "grad_norm": 7.637512561401792, + "learning_rate": 4.129135414880917e-06, + "loss": 0.3451869964599609, + "step": 87240 + }, + { + "epoch": 0.7543817174084098, + "grad_norm": 6.946051554083539, + "learning_rate": 4.128946660422118e-06, + "loss": 0.07064247131347656, + "step": 87245 + }, + { + "epoch": 0.7544249509299531, + "grad_norm": 19.016130961979854, + "learning_rate": 4.128757900756716e-06, + "loss": 0.1189056396484375, + "step": 87250 + }, + { + "epoch": 0.7544681844514963, + "grad_norm": 9.703983746520327, + "learning_rate": 4.128569135885584e-06, + "loss": 0.25740737915039064, + "step": 87255 + }, + { + "epoch": 0.7545114179730396, + "grad_norm": 3.4584820139423313, + "learning_rate": 4.12838036580959e-06, + "loss": 0.0783599853515625, + "step": 87260 + }, + { + "epoch": 0.7545546514945828, + "grad_norm": 7.471208696286041, + "learning_rate": 4.128191590529606e-06, + "loss": 0.08387298583984375, + "step": 87265 + }, + { + "epoch": 0.7545978850161261, + "grad_norm": 1.7762381275067423, + "learning_rate": 4.128002810046502e-06, + "loss": 0.064617919921875, + "step": 87270 + }, + { + "epoch": 0.7546411185376694, + "grad_norm": 1.717155883635366, + "learning_rate": 4.127814024361148e-06, + "loss": 0.25458297729492185, + "step": 87275 + }, + { + "epoch": 0.7546843520592126, + "grad_norm": 2.9484923297630155, + "learning_rate": 4.127625233474417e-06, + "loss": 0.1215423583984375, + "step": 87280 + }, + { + "epoch": 0.7547275855807559, + "grad_norm": 0.22097137205179745, + "learning_rate": 4.127436437387176e-06, + "loss": 0.127825927734375, + "step": 87285 + }, + { + "epoch": 0.7547708191022992, + "grad_norm": 4.823201308759952, + "learning_rate": 4.1272476361002985e-06, + "loss": 0.192425537109375, + "step": 87290 + }, + { + "epoch": 0.7548140526238424, + "grad_norm": 0.14710688092265628, + "learning_rate": 4.127058829614656e-06, + "loss": 0.08120880126953126, + "step": 87295 + }, + { + "epoch": 0.7548572861453857, + "grad_norm": 0.342905392566264, + "learning_rate": 4.126870017931116e-06, + "loss": 0.06571502685546875, + "step": 87300 + }, + { + "epoch": 0.754900519666929, + "grad_norm": 20.877606874159373, + "learning_rate": 4.126681201050551e-06, + "loss": 0.1403339385986328, + "step": 87305 + }, + { + "epoch": 0.7549437531884722, + "grad_norm": 5.014815279474688, + "learning_rate": 4.126492378973832e-06, + "loss": 0.1324798583984375, + "step": 87310 + }, + { + "epoch": 0.7549869867100155, + "grad_norm": 0.5991179761387478, + "learning_rate": 4.12630355170183e-06, + "loss": 0.166070556640625, + "step": 87315 + }, + { + "epoch": 0.7550302202315587, + "grad_norm": 51.116951734848335, + "learning_rate": 4.126114719235416e-06, + "loss": 0.5015140533447265, + "step": 87320 + }, + { + "epoch": 0.755073453753102, + "grad_norm": 1.8318589912398515, + "learning_rate": 4.12592588157546e-06, + "loss": 0.03546333312988281, + "step": 87325 + }, + { + "epoch": 0.7551166872746453, + "grad_norm": 37.597309637370365, + "learning_rate": 4.125737038722833e-06, + "loss": 0.23475875854492187, + "step": 87330 + }, + { + "epoch": 0.7551599207961885, + "grad_norm": 4.742936093081202, + "learning_rate": 4.125548190678407e-06, + "loss": 0.44572906494140624, + "step": 87335 + }, + { + "epoch": 0.7552031543177318, + "grad_norm": 4.502188830795132, + "learning_rate": 4.125359337443052e-06, + "loss": 0.1483783721923828, + "step": 87340 + }, + { + "epoch": 0.7552463878392751, + "grad_norm": 11.199807569553794, + "learning_rate": 4.125170479017638e-06, + "loss": 0.15859375, + "step": 87345 + }, + { + "epoch": 0.7552896213608183, + "grad_norm": 3.9878204111890234, + "learning_rate": 4.124981615403039e-06, + "loss": 0.2435638427734375, + "step": 87350 + }, + { + "epoch": 0.7553328548823616, + "grad_norm": 14.036629555140125, + "learning_rate": 4.124792746600123e-06, + "loss": 0.09242095947265624, + "step": 87355 + }, + { + "epoch": 0.7553760884039048, + "grad_norm": 1.3438763726516003, + "learning_rate": 4.124603872609762e-06, + "loss": 0.4350870132446289, + "step": 87360 + }, + { + "epoch": 0.7554193219254481, + "grad_norm": 16.59584827525914, + "learning_rate": 4.124414993432828e-06, + "loss": 0.1520721435546875, + "step": 87365 + }, + { + "epoch": 0.7554625554469914, + "grad_norm": 33.54384455025707, + "learning_rate": 4.124226109070191e-06, + "loss": 0.13839302062988282, + "step": 87370 + }, + { + "epoch": 0.7555057889685346, + "grad_norm": 20.017421710757656, + "learning_rate": 4.124037219522724e-06, + "loss": 0.18670921325683593, + "step": 87375 + }, + { + "epoch": 0.7555490224900779, + "grad_norm": 0.09912552892777736, + "learning_rate": 4.1238483247912946e-06, + "loss": 0.22368392944335938, + "step": 87380 + }, + { + "epoch": 0.7555922560116212, + "grad_norm": 13.058076738358338, + "learning_rate": 4.1236594248767776e-06, + "loss": 0.0995208740234375, + "step": 87385 + }, + { + "epoch": 0.7556354895331644, + "grad_norm": 1.5710175394667423, + "learning_rate": 4.123470519780042e-06, + "loss": 0.026031494140625, + "step": 87390 + }, + { + "epoch": 0.7556787230547077, + "grad_norm": 25.508813611710405, + "learning_rate": 4.123281609501959e-06, + "loss": 0.1255828857421875, + "step": 87395 + }, + { + "epoch": 0.755721956576251, + "grad_norm": 0.8289621573810992, + "learning_rate": 4.1230926940434015e-06, + "loss": 0.16100082397460938, + "step": 87400 + }, + { + "epoch": 0.7557651900977942, + "grad_norm": 56.86973995871997, + "learning_rate": 4.122903773405239e-06, + "loss": 0.3432472229003906, + "step": 87405 + }, + { + "epoch": 0.7558084236193375, + "grad_norm": 3.483042355464892, + "learning_rate": 4.122714847588344e-06, + "loss": 0.1183563232421875, + "step": 87410 + }, + { + "epoch": 0.7558516571408808, + "grad_norm": 3.915849954792469, + "learning_rate": 4.122525916593587e-06, + "loss": 0.18530311584472656, + "step": 87415 + }, + { + "epoch": 0.755894890662424, + "grad_norm": 25.413016858382797, + "learning_rate": 4.1223369804218396e-06, + "loss": 0.22599639892578124, + "step": 87420 + }, + { + "epoch": 0.7559381241839673, + "grad_norm": 31.274107537140456, + "learning_rate": 4.122148039073973e-06, + "loss": 0.1658966064453125, + "step": 87425 + }, + { + "epoch": 0.7559813577055106, + "grad_norm": 5.4137401114757, + "learning_rate": 4.121959092550859e-06, + "loss": 0.1536083221435547, + "step": 87430 + }, + { + "epoch": 0.7560245912270538, + "grad_norm": 6.756471221050331, + "learning_rate": 4.12177014085337e-06, + "loss": 0.19347763061523438, + "step": 87435 + }, + { + "epoch": 0.756067824748597, + "grad_norm": 24.336581698641634, + "learning_rate": 4.121581183982375e-06, + "loss": 0.11084537506103516, + "step": 87440 + }, + { + "epoch": 0.7561110582701404, + "grad_norm": 14.3084599635627, + "learning_rate": 4.121392221938746e-06, + "loss": 0.288079833984375, + "step": 87445 + }, + { + "epoch": 0.7561542917916836, + "grad_norm": 5.904942298401907, + "learning_rate": 4.121203254723357e-06, + "loss": 0.18040618896484376, + "step": 87450 + }, + { + "epoch": 0.7561975253132268, + "grad_norm": 2.571170594992171, + "learning_rate": 4.121014282337076e-06, + "loss": 0.132220458984375, + "step": 87455 + }, + { + "epoch": 0.7562407588347702, + "grad_norm": 6.287679170139308, + "learning_rate": 4.120825304780777e-06, + "loss": 0.12131824493408203, + "step": 87460 + }, + { + "epoch": 0.7562839923563134, + "grad_norm": 12.13472309916441, + "learning_rate": 4.120636322055331e-06, + "loss": 0.36223602294921875, + "step": 87465 + }, + { + "epoch": 0.7563272258778566, + "grad_norm": 15.919545839746478, + "learning_rate": 4.120447334161609e-06, + "loss": 0.2624382019042969, + "step": 87470 + }, + { + "epoch": 0.7563704593994, + "grad_norm": 3.5598303194691403, + "learning_rate": 4.1202583411004815e-06, + "loss": 0.10702590942382813, + "step": 87475 + }, + { + "epoch": 0.7564136929209432, + "grad_norm": 0.710035624944718, + "learning_rate": 4.120069342872824e-06, + "loss": 0.15772590637207032, + "step": 87480 + }, + { + "epoch": 0.7564569264424864, + "grad_norm": 24.430213548413505, + "learning_rate": 4.119880339479505e-06, + "loss": 0.14805908203125, + "step": 87485 + }, + { + "epoch": 0.7565001599640298, + "grad_norm": 26.45683894811189, + "learning_rate": 4.119691330921396e-06, + "loss": 0.1592193603515625, + "step": 87490 + }, + { + "epoch": 0.756543393485573, + "grad_norm": 14.136899437249635, + "learning_rate": 4.1195023171993695e-06, + "loss": 0.14869441986083984, + "step": 87495 + }, + { + "epoch": 0.7565866270071162, + "grad_norm": 1.46578364584968, + "learning_rate": 4.119313298314298e-06, + "loss": 0.04840850830078125, + "step": 87500 + }, + { + "epoch": 0.7566298605286595, + "grad_norm": 0.7892019503899628, + "learning_rate": 4.1191242742670515e-06, + "loss": 0.2992236614227295, + "step": 87505 + }, + { + "epoch": 0.7566730940502028, + "grad_norm": 5.921727211163373, + "learning_rate": 4.118935245058504e-06, + "loss": 0.0730499267578125, + "step": 87510 + }, + { + "epoch": 0.756716327571746, + "grad_norm": 6.2883926628038145, + "learning_rate": 4.118746210689525e-06, + "loss": 0.16572418212890624, + "step": 87515 + }, + { + "epoch": 0.7567595610932893, + "grad_norm": 2.843783552813995, + "learning_rate": 4.1185571711609875e-06, + "loss": 0.3915271759033203, + "step": 87520 + }, + { + "epoch": 0.7568027946148326, + "grad_norm": 16.334627567503475, + "learning_rate": 4.118368126473764e-06, + "loss": 0.303125, + "step": 87525 + }, + { + "epoch": 0.7568460281363758, + "grad_norm": 24.523681145585957, + "learning_rate": 4.118179076628724e-06, + "loss": 0.14946212768554687, + "step": 87530 + }, + { + "epoch": 0.756889261657919, + "grad_norm": 1.364178289606804, + "learning_rate": 4.117990021626743e-06, + "loss": 0.65330810546875, + "step": 87535 + }, + { + "epoch": 0.7569324951794624, + "grad_norm": 4.948957437309477, + "learning_rate": 4.117800961468688e-06, + "loss": 0.07932510375976562, + "step": 87540 + }, + { + "epoch": 0.7569757287010056, + "grad_norm": 21.518456065030442, + "learning_rate": 4.117611896155437e-06, + "loss": 0.1686492919921875, + "step": 87545 + }, + { + "epoch": 0.7570189622225488, + "grad_norm": 0.24521202812251655, + "learning_rate": 4.117422825687856e-06, + "loss": 0.24055938720703124, + "step": 87550 + }, + { + "epoch": 0.7570621957440922, + "grad_norm": 2.1802581939629184, + "learning_rate": 4.117233750066821e-06, + "loss": 0.08880500793457032, + "step": 87555 + }, + { + "epoch": 0.7571054292656354, + "grad_norm": 1.0609597431891944, + "learning_rate": 4.117044669293201e-06, + "loss": 0.176629638671875, + "step": 87560 + }, + { + "epoch": 0.7571486627871786, + "grad_norm": 7.440603841688025, + "learning_rate": 4.116855583367872e-06, + "loss": 0.28964385986328123, + "step": 87565 + }, + { + "epoch": 0.757191896308722, + "grad_norm": 0.42658443373807314, + "learning_rate": 4.116666492291702e-06, + "loss": 0.10877838134765624, + "step": 87570 + }, + { + "epoch": 0.7572351298302652, + "grad_norm": 38.44905086103207, + "learning_rate": 4.116477396065566e-06, + "loss": 0.22830657958984374, + "step": 87575 + }, + { + "epoch": 0.7572783633518084, + "grad_norm": 0.7912757950157056, + "learning_rate": 4.116288294690335e-06, + "loss": 0.238238525390625, + "step": 87580 + }, + { + "epoch": 0.7573215968733518, + "grad_norm": 3.0919866702481618, + "learning_rate": 4.116099188166879e-06, + "loss": 0.08048095703125, + "step": 87585 + }, + { + "epoch": 0.757364830394895, + "grad_norm": 9.773965453560436, + "learning_rate": 4.1159100764960746e-06, + "loss": 0.12336196899414062, + "step": 87590 + }, + { + "epoch": 0.7574080639164382, + "grad_norm": 16.30888706815507, + "learning_rate": 4.115720959678791e-06, + "loss": 0.14986801147460938, + "step": 87595 + }, + { + "epoch": 0.7574512974379816, + "grad_norm": 9.078011249310299, + "learning_rate": 4.1155318377159e-06, + "loss": 0.052567481994628906, + "step": 87600 + }, + { + "epoch": 0.7574945309595248, + "grad_norm": 6.318176229620599, + "learning_rate": 4.115342710608275e-06, + "loss": 0.04819183349609375, + "step": 87605 + }, + { + "epoch": 0.757537764481068, + "grad_norm": 13.873368983373453, + "learning_rate": 4.115153578356789e-06, + "loss": 0.09530296325683593, + "step": 87610 + }, + { + "epoch": 0.7575809980026113, + "grad_norm": 1.596281280516417, + "learning_rate": 4.114964440962312e-06, + "loss": 0.07265777587890625, + "step": 87615 + }, + { + "epoch": 0.7576242315241546, + "grad_norm": 15.001119908259415, + "learning_rate": 4.114775298425718e-06, + "loss": 0.2163970947265625, + "step": 87620 + }, + { + "epoch": 0.7576674650456978, + "grad_norm": 6.731470448795407, + "learning_rate": 4.114586150747879e-06, + "loss": 0.0897003173828125, + "step": 87625 + }, + { + "epoch": 0.757710698567241, + "grad_norm": 30.84561887742745, + "learning_rate": 4.114396997929667e-06, + "loss": 0.3876808166503906, + "step": 87630 + }, + { + "epoch": 0.7577539320887844, + "grad_norm": 0.8619218987782173, + "learning_rate": 4.114207839971954e-06, + "loss": 0.076226806640625, + "step": 87635 + }, + { + "epoch": 0.7577971656103276, + "grad_norm": 11.66818176877291, + "learning_rate": 4.114018676875613e-06, + "loss": 0.0711761474609375, + "step": 87640 + }, + { + "epoch": 0.7578403991318708, + "grad_norm": 17.673006989548405, + "learning_rate": 4.113829508641517e-06, + "loss": 0.16627197265625, + "step": 87645 + }, + { + "epoch": 0.7578836326534142, + "grad_norm": 18.67695787391542, + "learning_rate": 4.113640335270537e-06, + "loss": 0.23389511108398436, + "step": 87650 + }, + { + "epoch": 0.7579268661749574, + "grad_norm": 1.3875488688401947, + "learning_rate": 4.113451156763547e-06, + "loss": 0.10894775390625, + "step": 87655 + }, + { + "epoch": 0.7579700996965006, + "grad_norm": 0.16326111745861766, + "learning_rate": 4.113261973121418e-06, + "loss": 0.40727920532226564, + "step": 87660 + }, + { + "epoch": 0.758013333218044, + "grad_norm": 8.07377434298047, + "learning_rate": 4.113072784345023e-06, + "loss": 0.06578216552734376, + "step": 87665 + }, + { + "epoch": 0.7580565667395872, + "grad_norm": 3.7240490807032995, + "learning_rate": 4.112883590435235e-06, + "loss": 0.10087890625, + "step": 87670 + }, + { + "epoch": 0.7580998002611304, + "grad_norm": 51.62239411201953, + "learning_rate": 4.112694391392927e-06, + "loss": 0.3349601745605469, + "step": 87675 + }, + { + "epoch": 0.7581430337826738, + "grad_norm": 1.4350140097919852, + "learning_rate": 4.112505187218969e-06, + "loss": 0.04262809753417969, + "step": 87680 + }, + { + "epoch": 0.758186267304217, + "grad_norm": 6.11368454627595, + "learning_rate": 4.112315977914237e-06, + "loss": 0.17142257690429688, + "step": 87685 + }, + { + "epoch": 0.7582295008257602, + "grad_norm": 12.346275732607065, + "learning_rate": 4.112126763479602e-06, + "loss": 0.1134613037109375, + "step": 87690 + }, + { + "epoch": 0.7582727343473036, + "grad_norm": 2.853643552420332, + "learning_rate": 4.111937543915936e-06, + "loss": 0.3211027145385742, + "step": 87695 + }, + { + "epoch": 0.7583159678688468, + "grad_norm": 0.6745339294460306, + "learning_rate": 4.111748319224112e-06, + "loss": 0.30676841735839844, + "step": 87700 + }, + { + "epoch": 0.75835920139039, + "grad_norm": 6.54369000849581, + "learning_rate": 4.111559089405004e-06, + "loss": 0.07781333923339843, + "step": 87705 + }, + { + "epoch": 0.7584024349119333, + "grad_norm": 4.430391623542247, + "learning_rate": 4.111369854459482e-06, + "loss": 0.0985382080078125, + "step": 87710 + }, + { + "epoch": 0.7584456684334766, + "grad_norm": 3.7406971023208175, + "learning_rate": 4.111180614388421e-06, + "loss": 0.13963470458984376, + "step": 87715 + }, + { + "epoch": 0.7584889019550198, + "grad_norm": 8.316962704742942, + "learning_rate": 4.1109913691926945e-06, + "loss": 0.1924560546875, + "step": 87720 + }, + { + "epoch": 0.7585321354765631, + "grad_norm": 7.220782049037121, + "learning_rate": 4.110802118873172e-06, + "loss": 0.377545166015625, + "step": 87725 + }, + { + "epoch": 0.7585753689981064, + "grad_norm": 2.956219269911191, + "learning_rate": 4.11061286343073e-06, + "loss": 0.36701011657714844, + "step": 87730 + }, + { + "epoch": 0.7586186025196496, + "grad_norm": 24.25320770018308, + "learning_rate": 4.110423602866239e-06, + "loss": 0.196728515625, + "step": 87735 + }, + { + "epoch": 0.7586618360411929, + "grad_norm": 17.440244570083824, + "learning_rate": 4.110234337180573e-06, + "loss": 0.05897789001464844, + "step": 87740 + }, + { + "epoch": 0.7587050695627362, + "grad_norm": 1.7045371954451345, + "learning_rate": 4.110045066374603e-06, + "loss": 0.11354827880859375, + "step": 87745 + }, + { + "epoch": 0.7587483030842794, + "grad_norm": 1.17756806348863, + "learning_rate": 4.109855790449205e-06, + "loss": 0.113214111328125, + "step": 87750 + }, + { + "epoch": 0.7587915366058227, + "grad_norm": 4.812266321267527, + "learning_rate": 4.109666509405249e-06, + "loss": 0.0572296142578125, + "step": 87755 + }, + { + "epoch": 0.758834770127366, + "grad_norm": 30.420410567839426, + "learning_rate": 4.109477223243608e-06, + "loss": 0.2860107421875, + "step": 87760 + }, + { + "epoch": 0.7588780036489092, + "grad_norm": 2.906583412362509, + "learning_rate": 4.109287931965158e-06, + "loss": 0.15416297912597657, + "step": 87765 + }, + { + "epoch": 0.7589212371704525, + "grad_norm": 13.213379390414131, + "learning_rate": 4.109098635570769e-06, + "loss": 0.03807525634765625, + "step": 87770 + }, + { + "epoch": 0.7589644706919958, + "grad_norm": 0.7384373364148438, + "learning_rate": 4.108909334061316e-06, + "loss": 0.034368896484375, + "step": 87775 + }, + { + "epoch": 0.759007704213539, + "grad_norm": 2.320601843085421, + "learning_rate": 4.1087200274376705e-06, + "loss": 0.1204010009765625, + "step": 87780 + }, + { + "epoch": 0.7590509377350823, + "grad_norm": 1.5221200640866128, + "learning_rate": 4.108530715700707e-06, + "loss": 0.0580230712890625, + "step": 87785 + }, + { + "epoch": 0.7590941712566255, + "grad_norm": 8.132141703072202, + "learning_rate": 4.108341398851297e-06, + "loss": 0.209942626953125, + "step": 87790 + }, + { + "epoch": 0.7591374047781688, + "grad_norm": 1.4909129462986628, + "learning_rate": 4.108152076890315e-06, + "loss": 0.017693328857421874, + "step": 87795 + }, + { + "epoch": 0.759180638299712, + "grad_norm": 5.3450132666158074, + "learning_rate": 4.107962749818633e-06, + "loss": 0.2912773132324219, + "step": 87800 + }, + { + "epoch": 0.7592238718212553, + "grad_norm": 2.006446942759243, + "learning_rate": 4.107773417637125e-06, + "loss": 0.18449935913085938, + "step": 87805 + }, + { + "epoch": 0.7592671053427986, + "grad_norm": 36.827553661018385, + "learning_rate": 4.107584080346663e-06, + "loss": 0.5679031372070312, + "step": 87810 + }, + { + "epoch": 0.7593103388643418, + "grad_norm": 18.18627418740021, + "learning_rate": 4.107394737948122e-06, + "loss": 0.35564193725585935, + "step": 87815 + }, + { + "epoch": 0.7593535723858851, + "grad_norm": 3.0145218508286207, + "learning_rate": 4.1072053904423745e-06, + "loss": 0.1885009765625, + "step": 87820 + }, + { + "epoch": 0.7593968059074284, + "grad_norm": 5.242884796028356, + "learning_rate": 4.1070160378302925e-06, + "loss": 0.08758544921875, + "step": 87825 + }, + { + "epoch": 0.7594400394289716, + "grad_norm": 26.29734881380694, + "learning_rate": 4.1068266801127515e-06, + "loss": 0.28177490234375, + "step": 87830 + }, + { + "epoch": 0.7594832729505149, + "grad_norm": 32.31817525468004, + "learning_rate": 4.106637317290624e-06, + "loss": 0.15689697265625, + "step": 87835 + }, + { + "epoch": 0.7595265064720582, + "grad_norm": 0.6859976828147555, + "learning_rate": 4.106447949364782e-06, + "loss": 0.08041191101074219, + "step": 87840 + }, + { + "epoch": 0.7595697399936014, + "grad_norm": 6.153938286735018, + "learning_rate": 4.106258576336099e-06, + "loss": 0.0568450927734375, + "step": 87845 + }, + { + "epoch": 0.7596129735151447, + "grad_norm": 11.871945785775639, + "learning_rate": 4.10606919820545e-06, + "loss": 0.185174560546875, + "step": 87850 + }, + { + "epoch": 0.759656207036688, + "grad_norm": 1.506328071690533, + "learning_rate": 4.105879814973708e-06, + "loss": 0.1901031494140625, + "step": 87855 + }, + { + "epoch": 0.7596994405582312, + "grad_norm": 2.9721724955716895, + "learning_rate": 4.105690426641746e-06, + "loss": 0.0743408203125, + "step": 87860 + }, + { + "epoch": 0.7597426740797745, + "grad_norm": 0.32858337475515337, + "learning_rate": 4.105501033210437e-06, + "loss": 0.17546234130859376, + "step": 87865 + }, + { + "epoch": 0.7597859076013178, + "grad_norm": 43.1948269883902, + "learning_rate": 4.105311634680655e-06, + "loss": 0.41693572998046874, + "step": 87870 + }, + { + "epoch": 0.759829141122861, + "grad_norm": 8.272934537079456, + "learning_rate": 4.105122231053274e-06, + "loss": 0.1694976806640625, + "step": 87875 + }, + { + "epoch": 0.7598723746444043, + "grad_norm": 10.12784245574341, + "learning_rate": 4.1049328223291665e-06, + "loss": 0.03522186279296875, + "step": 87880 + }, + { + "epoch": 0.7599156081659475, + "grad_norm": 14.148130347704937, + "learning_rate": 4.104743408509207e-06, + "loss": 0.23330078125, + "step": 87885 + }, + { + "epoch": 0.7599588416874908, + "grad_norm": 12.056077609442555, + "learning_rate": 4.104553989594268e-06, + "loss": 0.17357025146484376, + "step": 87890 + }, + { + "epoch": 0.7600020752090341, + "grad_norm": 3.0703425306345298, + "learning_rate": 4.104364565585223e-06, + "loss": 0.0365631103515625, + "step": 87895 + }, + { + "epoch": 0.7600453087305773, + "grad_norm": 1.1464114830860357, + "learning_rate": 4.104175136482947e-06, + "loss": 0.13207550048828126, + "step": 87900 + }, + { + "epoch": 0.7600885422521206, + "grad_norm": 30.24788305994352, + "learning_rate": 4.103985702288311e-06, + "loss": 0.4962018966674805, + "step": 87905 + }, + { + "epoch": 0.7601317757736639, + "grad_norm": 27.669511781816446, + "learning_rate": 4.103796263002193e-06, + "loss": 0.170721435546875, + "step": 87910 + }, + { + "epoch": 0.7601750092952071, + "grad_norm": 3.111884878512903, + "learning_rate": 4.103606818625464e-06, + "loss": 0.032031822204589847, + "step": 87915 + }, + { + "epoch": 0.7602182428167504, + "grad_norm": 38.818858575957115, + "learning_rate": 4.103417369158997e-06, + "loss": 0.5868728637695313, + "step": 87920 + }, + { + "epoch": 0.7602614763382937, + "grad_norm": 2.9736650444931367, + "learning_rate": 4.103227914603666e-06, + "loss": 0.27208423614501953, + "step": 87925 + }, + { + "epoch": 0.7603047098598369, + "grad_norm": 12.736900730177396, + "learning_rate": 4.103038454960347e-06, + "loss": 0.07763824462890626, + "step": 87930 + }, + { + "epoch": 0.7603479433813802, + "grad_norm": 1.6285990257622047, + "learning_rate": 4.102848990229911e-06, + "loss": 0.1072021484375, + "step": 87935 + }, + { + "epoch": 0.7603911769029235, + "grad_norm": 0.9666787310372714, + "learning_rate": 4.102659520413233e-06, + "loss": 0.11776885986328126, + "step": 87940 + }, + { + "epoch": 0.7604344104244667, + "grad_norm": 0.7867391587126693, + "learning_rate": 4.1024700455111875e-06, + "loss": 0.0624053955078125, + "step": 87945 + }, + { + "epoch": 0.76047764394601, + "grad_norm": 0.5278893872450539, + "learning_rate": 4.1022805655246465e-06, + "loss": 0.090057373046875, + "step": 87950 + }, + { + "epoch": 0.7605208774675533, + "grad_norm": 12.58132328367526, + "learning_rate": 4.102091080454485e-06, + "loss": 0.07779788970947266, + "step": 87955 + }, + { + "epoch": 0.7605641109890965, + "grad_norm": 29.29412497083463, + "learning_rate": 4.101901590301577e-06, + "loss": 0.08189659118652344, + "step": 87960 + }, + { + "epoch": 0.7606073445106397, + "grad_norm": 4.70929496911521, + "learning_rate": 4.101712095066797e-06, + "loss": 0.0866729736328125, + "step": 87965 + }, + { + "epoch": 0.7606505780321831, + "grad_norm": 1.9571688685039734, + "learning_rate": 4.101522594751017e-06, + "loss": 0.1546173095703125, + "step": 87970 + }, + { + "epoch": 0.7606938115537263, + "grad_norm": 9.39208533614935, + "learning_rate": 4.101333089355113e-06, + "loss": 0.1713836669921875, + "step": 87975 + }, + { + "epoch": 0.7607370450752695, + "grad_norm": 3.4439020186821274, + "learning_rate": 4.101143578879957e-06, + "loss": 0.156170654296875, + "step": 87980 + }, + { + "epoch": 0.7607802785968129, + "grad_norm": 5.232244476529405, + "learning_rate": 4.100954063326425e-06, + "loss": 0.40659027099609374, + "step": 87985 + }, + { + "epoch": 0.7608235121183561, + "grad_norm": 6.646667867277535, + "learning_rate": 4.1007645426953904e-06, + "loss": 0.14412384033203124, + "step": 87990 + }, + { + "epoch": 0.7608667456398993, + "grad_norm": 7.08287752507795, + "learning_rate": 4.100575016987726e-06, + "loss": 0.1655853271484375, + "step": 87995 + }, + { + "epoch": 0.7609099791614427, + "grad_norm": 3.767110479634651, + "learning_rate": 4.1003854862043076e-06, + "loss": 0.03341178894042969, + "step": 88000 + }, + { + "epoch": 0.7609532126829859, + "grad_norm": 22.36347355319353, + "learning_rate": 4.100195950346009e-06, + "loss": 0.20470476150512695, + "step": 88005 + }, + { + "epoch": 0.7609964462045291, + "grad_norm": 2.8372624987872697, + "learning_rate": 4.100006409413702e-06, + "loss": 0.2013824462890625, + "step": 88010 + }, + { + "epoch": 0.7610396797260724, + "grad_norm": 8.321387995477117, + "learning_rate": 4.099816863408265e-06, + "loss": 0.2278289794921875, + "step": 88015 + }, + { + "epoch": 0.7610829132476157, + "grad_norm": 15.509737330653394, + "learning_rate": 4.099627312330569e-06, + "loss": 0.1106048583984375, + "step": 88020 + }, + { + "epoch": 0.7611261467691589, + "grad_norm": 2.586580293725584, + "learning_rate": 4.099437756181489e-06, + "loss": 0.23514862060546876, + "step": 88025 + }, + { + "epoch": 0.7611693802907022, + "grad_norm": 4.745949689614772, + "learning_rate": 4.099248194961898e-06, + "loss": 0.36161651611328127, + "step": 88030 + }, + { + "epoch": 0.7612126138122455, + "grad_norm": 0.3851243122717502, + "learning_rate": 4.099058628672673e-06, + "loss": 0.03640670776367187, + "step": 88035 + }, + { + "epoch": 0.7612558473337887, + "grad_norm": 2.853234434206487, + "learning_rate": 4.098869057314686e-06, + "loss": 0.025574874877929688, + "step": 88040 + }, + { + "epoch": 0.761299080855332, + "grad_norm": 0.4412696473405313, + "learning_rate": 4.098679480888812e-06, + "loss": 0.05905609130859375, + "step": 88045 + }, + { + "epoch": 0.7613423143768753, + "grad_norm": 0.18026931011843234, + "learning_rate": 4.098489899395926e-06, + "loss": 0.0444854736328125, + "step": 88050 + }, + { + "epoch": 0.7613855478984185, + "grad_norm": 27.705358202896928, + "learning_rate": 4.098300312836901e-06, + "loss": 0.11858596801757812, + "step": 88055 + }, + { + "epoch": 0.7614287814199617, + "grad_norm": 3.3155479330392947, + "learning_rate": 4.098110721212613e-06, + "loss": 0.45902328491210936, + "step": 88060 + }, + { + "epoch": 0.7614720149415051, + "grad_norm": 6.96293750570868, + "learning_rate": 4.097921124523933e-06, + "loss": 0.11871414184570313, + "step": 88065 + }, + { + "epoch": 0.7615152484630483, + "grad_norm": 7.83657920860511, + "learning_rate": 4.09773152277174e-06, + "loss": 0.050304412841796875, + "step": 88070 + }, + { + "epoch": 0.7615584819845915, + "grad_norm": 20.36093726170591, + "learning_rate": 4.097541915956906e-06, + "loss": 0.2036958694458008, + "step": 88075 + }, + { + "epoch": 0.7616017155061349, + "grad_norm": 5.20711079616148, + "learning_rate": 4.097352304080304e-06, + "loss": 0.2089752197265625, + "step": 88080 + }, + { + "epoch": 0.7616449490276781, + "grad_norm": 25.233589296389987, + "learning_rate": 4.0971626871428125e-06, + "loss": 0.14832191467285155, + "step": 88085 + }, + { + "epoch": 0.7616881825492213, + "grad_norm": 1.4803921732259202, + "learning_rate": 4.096973065145302e-06, + "loss": 0.20292892456054687, + "step": 88090 + }, + { + "epoch": 0.7617314160707647, + "grad_norm": 30.654793698990545, + "learning_rate": 4.0967834380886485e-06, + "loss": 0.11517105102539063, + "step": 88095 + }, + { + "epoch": 0.7617746495923079, + "grad_norm": 18.306501014065333, + "learning_rate": 4.096593805973727e-06, + "loss": 0.09654998779296875, + "step": 88100 + }, + { + "epoch": 0.7618178831138511, + "grad_norm": 11.345081933220818, + "learning_rate": 4.096404168801414e-06, + "loss": 0.13851318359375, + "step": 88105 + }, + { + "epoch": 0.7618611166353945, + "grad_norm": 1.397810290726582, + "learning_rate": 4.096214526572578e-06, + "loss": 0.1206146240234375, + "step": 88110 + }, + { + "epoch": 0.7619043501569377, + "grad_norm": 19.894625334559013, + "learning_rate": 4.0960248792881e-06, + "loss": 0.12807445526123046, + "step": 88115 + }, + { + "epoch": 0.7619475836784809, + "grad_norm": 10.118734012871618, + "learning_rate": 4.095835226948852e-06, + "loss": 0.45457763671875, + "step": 88120 + }, + { + "epoch": 0.7619908172000243, + "grad_norm": 3.630736882154413, + "learning_rate": 4.095645569555708e-06, + "loss": 0.1465057373046875, + "step": 88125 + }, + { + "epoch": 0.7620340507215675, + "grad_norm": 58.91631643975708, + "learning_rate": 4.095455907109544e-06, + "loss": 0.2608772277832031, + "step": 88130 + }, + { + "epoch": 0.7620772842431107, + "grad_norm": 7.204833878790414, + "learning_rate": 4.095266239611234e-06, + "loss": 0.11872940063476563, + "step": 88135 + }, + { + "epoch": 0.762120517764654, + "grad_norm": 0.938737968619357, + "learning_rate": 4.095076567061653e-06, + "loss": 0.0274658203125, + "step": 88140 + }, + { + "epoch": 0.7621637512861973, + "grad_norm": 0.5782105284011337, + "learning_rate": 4.094886889461675e-06, + "loss": 0.3177825927734375, + "step": 88145 + }, + { + "epoch": 0.7622069848077405, + "grad_norm": 18.7071138440466, + "learning_rate": 4.094697206812175e-06, + "loss": 0.10093612670898437, + "step": 88150 + }, + { + "epoch": 0.7622502183292837, + "grad_norm": 8.89103623914524, + "learning_rate": 4.0945075191140295e-06, + "loss": 0.07777938842773438, + "step": 88155 + }, + { + "epoch": 0.7622934518508271, + "grad_norm": 2.312646752699992, + "learning_rate": 4.094317826368111e-06, + "loss": 0.26107940673828123, + "step": 88160 + }, + { + "epoch": 0.7623366853723703, + "grad_norm": 1.3264203919848607, + "learning_rate": 4.094128128575296e-06, + "loss": 0.23668441772460938, + "step": 88165 + }, + { + "epoch": 0.7623799188939135, + "grad_norm": 11.1197743192118, + "learning_rate": 4.093938425736459e-06, + "loss": 0.23857498168945312, + "step": 88170 + }, + { + "epoch": 0.7624231524154569, + "grad_norm": 2.8032020323389446, + "learning_rate": 4.093748717852474e-06, + "loss": 0.14988632202148439, + "step": 88175 + }, + { + "epoch": 0.7624663859370001, + "grad_norm": 0.05472685793293271, + "learning_rate": 4.093559004924217e-06, + "loss": 0.17306938171386718, + "step": 88180 + }, + { + "epoch": 0.7625096194585433, + "grad_norm": 48.17472555749264, + "learning_rate": 4.093369286952563e-06, + "loss": 0.5504280090332031, + "step": 88185 + }, + { + "epoch": 0.7625528529800867, + "grad_norm": 0.047948152422368905, + "learning_rate": 4.093179563938385e-06, + "loss": 0.11340141296386719, + "step": 88190 + }, + { + "epoch": 0.7625960865016299, + "grad_norm": 15.0113019284496, + "learning_rate": 4.092989835882561e-06, + "loss": 0.4784088134765625, + "step": 88195 + }, + { + "epoch": 0.7626393200231731, + "grad_norm": 7.31004097249342, + "learning_rate": 4.092800102785964e-06, + "loss": 0.02872772216796875, + "step": 88200 + }, + { + "epoch": 0.7626825535447165, + "grad_norm": 25.677290082233554, + "learning_rate": 4.092610364649469e-06, + "loss": 0.121392822265625, + "step": 88205 + }, + { + "epoch": 0.7627257870662597, + "grad_norm": 16.667360252600997, + "learning_rate": 4.0924206214739515e-06, + "loss": 0.3907470703125, + "step": 88210 + }, + { + "epoch": 0.7627690205878029, + "grad_norm": 4.1205774178401136, + "learning_rate": 4.0922308732602865e-06, + "loss": 0.203997802734375, + "step": 88215 + }, + { + "epoch": 0.7628122541093463, + "grad_norm": 25.929873875036446, + "learning_rate": 4.09204112000935e-06, + "loss": 0.23928565979003907, + "step": 88220 + }, + { + "epoch": 0.7628554876308895, + "grad_norm": 12.85074259867361, + "learning_rate": 4.091851361722016e-06, + "loss": 0.120513916015625, + "step": 88225 + }, + { + "epoch": 0.7628987211524327, + "grad_norm": 4.131064812646353, + "learning_rate": 4.091661598399161e-06, + "loss": 0.33769378662109373, + "step": 88230 + }, + { + "epoch": 0.762941954673976, + "grad_norm": 0.9887374570647189, + "learning_rate": 4.091471830041657e-06, + "loss": 0.208123779296875, + "step": 88235 + }, + { + "epoch": 0.7629851881955193, + "grad_norm": 1.2144977439925775, + "learning_rate": 4.091282056650383e-06, + "loss": 0.12266082763671875, + "step": 88240 + }, + { + "epoch": 0.7630284217170625, + "grad_norm": 26.5288890900108, + "learning_rate": 4.091092278226212e-06, + "loss": 0.2036651611328125, + "step": 88245 + }, + { + "epoch": 0.7630716552386058, + "grad_norm": 1.1477511306165593, + "learning_rate": 4.09090249477002e-06, + "loss": 0.16252670288085938, + "step": 88250 + }, + { + "epoch": 0.7631148887601491, + "grad_norm": 1.0042096623764938, + "learning_rate": 4.090712706282681e-06, + "loss": 0.25631027221679686, + "step": 88255 + }, + { + "epoch": 0.7631581222816923, + "grad_norm": 1.9179715338829493, + "learning_rate": 4.090522912765073e-06, + "loss": 0.02263641357421875, + "step": 88260 + }, + { + "epoch": 0.7632013558032356, + "grad_norm": 0.04466159983478387, + "learning_rate": 4.090333114218069e-06, + "loss": 0.17094078063964843, + "step": 88265 + }, + { + "epoch": 0.7632445893247789, + "grad_norm": 2.512345315985398, + "learning_rate": 4.090143310642545e-06, + "loss": 0.1156158447265625, + "step": 88270 + }, + { + "epoch": 0.7632878228463221, + "grad_norm": 0.4523114392767762, + "learning_rate": 4.089953502039376e-06, + "loss": 0.06665802001953125, + "step": 88275 + }, + { + "epoch": 0.7633310563678654, + "grad_norm": 35.52014339979111, + "learning_rate": 4.0897636884094386e-06, + "loss": 0.1726470947265625, + "step": 88280 + }, + { + "epoch": 0.7633742898894087, + "grad_norm": 13.556639373823037, + "learning_rate": 4.089573869753607e-06, + "loss": 0.20626506805419922, + "step": 88285 + }, + { + "epoch": 0.7634175234109519, + "grad_norm": 1.0583721517506792, + "learning_rate": 4.089384046072757e-06, + "loss": 0.0501495361328125, + "step": 88290 + }, + { + "epoch": 0.7634607569324952, + "grad_norm": 1.1060657897943003, + "learning_rate": 4.089194217367763e-06, + "loss": 0.37361679077148435, + "step": 88295 + }, + { + "epoch": 0.7635039904540385, + "grad_norm": 0.8543765405969937, + "learning_rate": 4.089004383639503e-06, + "loss": 0.04571380615234375, + "step": 88300 + }, + { + "epoch": 0.7635472239755817, + "grad_norm": 12.416954416378559, + "learning_rate": 4.088814544888851e-06, + "loss": 0.49027099609375, + "step": 88305 + }, + { + "epoch": 0.763590457497125, + "grad_norm": 23.023468350203686, + "learning_rate": 4.088624701116682e-06, + "loss": 0.30175628662109377, + "step": 88310 + }, + { + "epoch": 0.7636336910186682, + "grad_norm": 5.772129219392071, + "learning_rate": 4.088434852323872e-06, + "loss": 0.063592529296875, + "step": 88315 + }, + { + "epoch": 0.7636769245402115, + "grad_norm": 23.47910091317815, + "learning_rate": 4.088244998511296e-06, + "loss": 0.1917510986328125, + "step": 88320 + }, + { + "epoch": 0.7637201580617548, + "grad_norm": 0.6989253166063701, + "learning_rate": 4.088055139679832e-06, + "loss": 0.01125640869140625, + "step": 88325 + }, + { + "epoch": 0.763763391583298, + "grad_norm": 22.3773018118117, + "learning_rate": 4.087865275830352e-06, + "loss": 0.239154052734375, + "step": 88330 + }, + { + "epoch": 0.7638066251048413, + "grad_norm": 3.8295957467381223, + "learning_rate": 4.087675406963734e-06, + "loss": 0.16855621337890625, + "step": 88335 + }, + { + "epoch": 0.7638498586263845, + "grad_norm": 10.25491710706629, + "learning_rate": 4.0874855330808546e-06, + "loss": 0.15001792907714845, + "step": 88340 + }, + { + "epoch": 0.7638930921479278, + "grad_norm": 3.297502409254878, + "learning_rate": 4.087295654182586e-06, + "loss": 0.028066253662109374, + "step": 88345 + }, + { + "epoch": 0.7639363256694711, + "grad_norm": 13.583235456127948, + "learning_rate": 4.087105770269807e-06, + "loss": 0.08200759887695312, + "step": 88350 + }, + { + "epoch": 0.7639795591910143, + "grad_norm": 2.8084548893549703, + "learning_rate": 4.086915881343392e-06, + "loss": 0.1956512451171875, + "step": 88355 + }, + { + "epoch": 0.7640227927125576, + "grad_norm": 16.82705870773304, + "learning_rate": 4.086725987404217e-06, + "loss": 0.06440887451171876, + "step": 88360 + }, + { + "epoch": 0.7640660262341009, + "grad_norm": 4.198197082685626, + "learning_rate": 4.086536088453158e-06, + "loss": 0.20842208862304687, + "step": 88365 + }, + { + "epoch": 0.7641092597556441, + "grad_norm": 9.189068791045985, + "learning_rate": 4.08634618449109e-06, + "loss": 0.111627197265625, + "step": 88370 + }, + { + "epoch": 0.7641524932771874, + "grad_norm": 14.377681405987692, + "learning_rate": 4.08615627551889e-06, + "loss": 0.06991500854492187, + "step": 88375 + }, + { + "epoch": 0.7641957267987307, + "grad_norm": 15.430427342075552, + "learning_rate": 4.085966361537432e-06, + "loss": 0.0946441650390625, + "step": 88380 + }, + { + "epoch": 0.7642389603202739, + "grad_norm": 0.5839747594214372, + "learning_rate": 4.085776442547595e-06, + "loss": 0.0386688232421875, + "step": 88385 + }, + { + "epoch": 0.7642821938418172, + "grad_norm": 13.697364259131582, + "learning_rate": 4.085586518550252e-06, + "loss": 0.34368743896484377, + "step": 88390 + }, + { + "epoch": 0.7643254273633605, + "grad_norm": 2.9509589865029513, + "learning_rate": 4.08539658954628e-06, + "loss": 0.15479888916015624, + "step": 88395 + }, + { + "epoch": 0.7643686608849037, + "grad_norm": 0.7832256505727672, + "learning_rate": 4.085206655536554e-06, + "loss": 0.046876144409179685, + "step": 88400 + }, + { + "epoch": 0.764411894406447, + "grad_norm": 4.222565817385386, + "learning_rate": 4.085016716521952e-06, + "loss": 0.07853660583496094, + "step": 88405 + }, + { + "epoch": 0.7644551279279902, + "grad_norm": 9.592700319212822, + "learning_rate": 4.084826772503349e-06, + "loss": 0.220513916015625, + "step": 88410 + }, + { + "epoch": 0.7644983614495335, + "grad_norm": 52.70691124545338, + "learning_rate": 4.084636823481619e-06, + "loss": 0.2763916015625, + "step": 88415 + }, + { + "epoch": 0.7645415949710768, + "grad_norm": 5.103617129061796, + "learning_rate": 4.084446869457642e-06, + "loss": 0.0603485107421875, + "step": 88420 + }, + { + "epoch": 0.76458482849262, + "grad_norm": 9.63511041952545, + "learning_rate": 4.084256910432291e-06, + "loss": 0.27801361083984377, + "step": 88425 + }, + { + "epoch": 0.7646280620141633, + "grad_norm": 22.540420703515455, + "learning_rate": 4.0840669464064426e-06, + "loss": 0.2219940185546875, + "step": 88430 + }, + { + "epoch": 0.7646712955357066, + "grad_norm": 0.7116449641027637, + "learning_rate": 4.083876977380972e-06, + "loss": 0.06632270812988281, + "step": 88435 + }, + { + "epoch": 0.7647145290572498, + "grad_norm": 6.622223383110608, + "learning_rate": 4.083687003356759e-06, + "loss": 0.159405517578125, + "step": 88440 + }, + { + "epoch": 0.7647577625787931, + "grad_norm": 6.811804694472518, + "learning_rate": 4.083497024334676e-06, + "loss": 0.43880157470703124, + "step": 88445 + }, + { + "epoch": 0.7648009961003364, + "grad_norm": 0.1823433594520553, + "learning_rate": 4.083307040315601e-06, + "loss": 0.03882293701171875, + "step": 88450 + }, + { + "epoch": 0.7648442296218796, + "grad_norm": 23.902081469454483, + "learning_rate": 4.083117051300409e-06, + "loss": 0.2495880126953125, + "step": 88455 + }, + { + "epoch": 0.7648874631434229, + "grad_norm": 6.54549460022931, + "learning_rate": 4.082927057289977e-06, + "loss": 0.06724357604980469, + "step": 88460 + }, + { + "epoch": 0.7649306966649662, + "grad_norm": 0.22568586258779894, + "learning_rate": 4.082737058285181e-06, + "loss": 0.3123291015625, + "step": 88465 + }, + { + "epoch": 0.7649739301865094, + "grad_norm": 0.26688101830009386, + "learning_rate": 4.082547054286899e-06, + "loss": 0.0333648681640625, + "step": 88470 + }, + { + "epoch": 0.7650171637080527, + "grad_norm": 2.2707968686449016, + "learning_rate": 4.082357045296003e-06, + "loss": 0.12317008972167968, + "step": 88475 + }, + { + "epoch": 0.765060397229596, + "grad_norm": 7.894526285220393, + "learning_rate": 4.082167031313373e-06, + "loss": 0.07462654113769532, + "step": 88480 + }, + { + "epoch": 0.7651036307511392, + "grad_norm": 9.63688045407826, + "learning_rate": 4.081977012339886e-06, + "loss": 0.03615264892578125, + "step": 88485 + }, + { + "epoch": 0.7651468642726824, + "grad_norm": 6.865884601413144, + "learning_rate": 4.081786988376414e-06, + "loss": 0.059112548828125, + "step": 88490 + }, + { + "epoch": 0.7651900977942258, + "grad_norm": 6.810304550027398, + "learning_rate": 4.081596959423836e-06, + "loss": 0.11797103881835938, + "step": 88495 + }, + { + "epoch": 0.765233331315769, + "grad_norm": 15.033456475008395, + "learning_rate": 4.081406925483031e-06, + "loss": 0.06833877563476562, + "step": 88500 + }, + { + "epoch": 0.7652765648373122, + "grad_norm": 3.6975996051656805, + "learning_rate": 4.0812168865548714e-06, + "loss": 0.04427623748779297, + "step": 88505 + }, + { + "epoch": 0.7653197983588556, + "grad_norm": 1.8108756965613413, + "learning_rate": 4.081026842640235e-06, + "loss": 0.1518646240234375, + "step": 88510 + }, + { + "epoch": 0.7653630318803988, + "grad_norm": 8.148736238385633, + "learning_rate": 4.0808367937399975e-06, + "loss": 0.143316650390625, + "step": 88515 + }, + { + "epoch": 0.765406265401942, + "grad_norm": 25.45441736685452, + "learning_rate": 4.080646739855038e-06, + "loss": 0.4404327392578125, + "step": 88520 + }, + { + "epoch": 0.7654494989234853, + "grad_norm": 0.2719219055727268, + "learning_rate": 4.08045668098623e-06, + "loss": 0.058481597900390626, + "step": 88525 + }, + { + "epoch": 0.7654927324450286, + "grad_norm": 7.200652462181781, + "learning_rate": 4.080266617134451e-06, + "loss": 0.03913459777832031, + "step": 88530 + }, + { + "epoch": 0.7655359659665718, + "grad_norm": 10.887944934740894, + "learning_rate": 4.080076548300579e-06, + "loss": 0.43805389404296874, + "step": 88535 + }, + { + "epoch": 0.7655791994881151, + "grad_norm": 0.5122324000720735, + "learning_rate": 4.079886474485488e-06, + "loss": 0.026507568359375, + "step": 88540 + }, + { + "epoch": 0.7656224330096584, + "grad_norm": 1.5256424166513523, + "learning_rate": 4.079696395690056e-06, + "loss": 0.0793304443359375, + "step": 88545 + }, + { + "epoch": 0.7656656665312016, + "grad_norm": 4.293838467329688, + "learning_rate": 4.079506311915162e-06, + "loss": 0.19877243041992188, + "step": 88550 + }, + { + "epoch": 0.765708900052745, + "grad_norm": 8.820863889892257, + "learning_rate": 4.0793162231616776e-06, + "loss": 0.1054962158203125, + "step": 88555 + }, + { + "epoch": 0.7657521335742882, + "grad_norm": 3.7830184044743413, + "learning_rate": 4.079126129430484e-06, + "loss": 0.29591522216796873, + "step": 88560 + }, + { + "epoch": 0.7657953670958314, + "grad_norm": 3.3599506293816526, + "learning_rate": 4.078936030722455e-06, + "loss": 0.070697021484375, + "step": 88565 + }, + { + "epoch": 0.7658386006173747, + "grad_norm": 0.7774471665652, + "learning_rate": 4.078745927038469e-06, + "loss": 0.6363418579101563, + "step": 88570 + }, + { + "epoch": 0.765881834138918, + "grad_norm": 24.15832978297043, + "learning_rate": 4.0785558183794e-06, + "loss": 0.2882511138916016, + "step": 88575 + }, + { + "epoch": 0.7659250676604612, + "grad_norm": 0.51887695317654, + "learning_rate": 4.07836570474613e-06, + "loss": 0.1802398681640625, + "step": 88580 + }, + { + "epoch": 0.7659683011820044, + "grad_norm": 3.70422776362004, + "learning_rate": 4.078175586139531e-06, + "loss": 0.1060546875, + "step": 88585 + }, + { + "epoch": 0.7660115347035478, + "grad_norm": 3.8316765277700715, + "learning_rate": 4.0779854625604814e-06, + "loss": 0.08932609558105468, + "step": 88590 + }, + { + "epoch": 0.766054768225091, + "grad_norm": 3.666254818165526, + "learning_rate": 4.077795334009859e-06, + "loss": 0.5143692016601562, + "step": 88595 + }, + { + "epoch": 0.7660980017466342, + "grad_norm": 2.982577550202131, + "learning_rate": 4.077605200488538e-06, + "loss": 0.235986328125, + "step": 88600 + }, + { + "epoch": 0.7661412352681776, + "grad_norm": 9.429647004692216, + "learning_rate": 4.077415061997399e-06, + "loss": 0.05883331298828125, + "step": 88605 + }, + { + "epoch": 0.7661844687897208, + "grad_norm": 1.3180932239689516, + "learning_rate": 4.077224918537316e-06, + "loss": 0.038698577880859376, + "step": 88610 + }, + { + "epoch": 0.766227702311264, + "grad_norm": 4.386876535955838, + "learning_rate": 4.077034770109167e-06, + "loss": 0.03957939147949219, + "step": 88615 + }, + { + "epoch": 0.7662709358328074, + "grad_norm": 18.84361513503119, + "learning_rate": 4.076844616713829e-06, + "loss": 0.07990226745605469, + "step": 88620 + }, + { + "epoch": 0.7663141693543506, + "grad_norm": 2.6599235665501126, + "learning_rate": 4.0766544583521786e-06, + "loss": 0.09790802001953125, + "step": 88625 + }, + { + "epoch": 0.7663574028758938, + "grad_norm": 13.98465304444216, + "learning_rate": 4.076464295025093e-06, + "loss": 0.08612747192382812, + "step": 88630 + }, + { + "epoch": 0.7664006363974372, + "grad_norm": 0.451712621273286, + "learning_rate": 4.076274126733448e-06, + "loss": 0.20446243286132812, + "step": 88635 + }, + { + "epoch": 0.7664438699189804, + "grad_norm": 9.540176509660041, + "learning_rate": 4.076083953478123e-06, + "loss": 0.21952667236328124, + "step": 88640 + }, + { + "epoch": 0.7664871034405236, + "grad_norm": 7.716946879912023, + "learning_rate": 4.075893775259994e-06, + "loss": 0.0956451416015625, + "step": 88645 + }, + { + "epoch": 0.766530336962067, + "grad_norm": 2.060944606217714, + "learning_rate": 4.075703592079938e-06, + "loss": 0.11698036193847657, + "step": 88650 + }, + { + "epoch": 0.7665735704836102, + "grad_norm": 1.4036622672688857, + "learning_rate": 4.0755134039388325e-06, + "loss": 0.10289306640625, + "step": 88655 + }, + { + "epoch": 0.7666168040051534, + "grad_norm": 2.9264483681666267, + "learning_rate": 4.075323210837553e-06, + "loss": 0.11362800598144532, + "step": 88660 + }, + { + "epoch": 0.7666600375266966, + "grad_norm": 0.16484105333640145, + "learning_rate": 4.075133012776979e-06, + "loss": 0.08588104248046875, + "step": 88665 + }, + { + "epoch": 0.76670327104824, + "grad_norm": 70.56549973503687, + "learning_rate": 4.0749428097579855e-06, + "loss": 0.34661102294921875, + "step": 88670 + }, + { + "epoch": 0.7667465045697832, + "grad_norm": 10.879324182568608, + "learning_rate": 4.074752601781451e-06, + "loss": 0.06640892028808594, + "step": 88675 + }, + { + "epoch": 0.7667897380913264, + "grad_norm": 15.043220665578678, + "learning_rate": 4.074562388848254e-06, + "loss": 0.2690338134765625, + "step": 88680 + }, + { + "epoch": 0.7668329716128698, + "grad_norm": 22.254717460329736, + "learning_rate": 4.074372170959268e-06, + "loss": 0.3881988525390625, + "step": 88685 + }, + { + "epoch": 0.766876205134413, + "grad_norm": 4.298740266267062, + "learning_rate": 4.0741819481153736e-06, + "loss": 0.18646469116210937, + "step": 88690 + }, + { + "epoch": 0.7669194386559562, + "grad_norm": 3.523940179607365, + "learning_rate": 4.073991720317447e-06, + "loss": 0.04113922119140625, + "step": 88695 + }, + { + "epoch": 0.7669626721774996, + "grad_norm": 3.7030639001331407, + "learning_rate": 4.073801487566365e-06, + "loss": 0.07197151184082032, + "step": 88700 + }, + { + "epoch": 0.7670059056990428, + "grad_norm": 1.2705457671142182, + "learning_rate": 4.073611249863005e-06, + "loss": 0.1175323486328125, + "step": 88705 + }, + { + "epoch": 0.767049139220586, + "grad_norm": 0.821921961526278, + "learning_rate": 4.073421007208247e-06, + "loss": 0.20909194946289061, + "step": 88710 + }, + { + "epoch": 0.7670923727421294, + "grad_norm": 45.53459360630102, + "learning_rate": 4.0732307596029635e-06, + "loss": 0.5385696411132812, + "step": 88715 + }, + { + "epoch": 0.7671356062636726, + "grad_norm": 11.333787240803852, + "learning_rate": 4.073040507048036e-06, + "loss": 0.2838565826416016, + "step": 88720 + }, + { + "epoch": 0.7671788397852158, + "grad_norm": 0.6332459250677049, + "learning_rate": 4.0728502495443405e-06, + "loss": 0.052356719970703125, + "step": 88725 + }, + { + "epoch": 0.7672220733067592, + "grad_norm": 0.8242145741852579, + "learning_rate": 4.072659987092753e-06, + "loss": 0.16197547912597657, + "step": 88730 + }, + { + "epoch": 0.7672653068283024, + "grad_norm": 0.5400451839729956, + "learning_rate": 4.072469719694155e-06, + "loss": 0.10502052307128906, + "step": 88735 + }, + { + "epoch": 0.7673085403498456, + "grad_norm": 9.648511815718722, + "learning_rate": 4.072279447349419e-06, + "loss": 0.27523651123046877, + "step": 88740 + }, + { + "epoch": 0.7673517738713889, + "grad_norm": 0.33754444744405127, + "learning_rate": 4.072089170059426e-06, + "loss": 0.019792938232421876, + "step": 88745 + }, + { + "epoch": 0.7673950073929322, + "grad_norm": 2.7901568638805636, + "learning_rate": 4.071898887825052e-06, + "loss": 0.08518199920654297, + "step": 88750 + }, + { + "epoch": 0.7674382409144754, + "grad_norm": 5.903747070781686, + "learning_rate": 4.0717086006471766e-06, + "loss": 0.08018951416015625, + "step": 88755 + }, + { + "epoch": 0.7674814744360187, + "grad_norm": 56.9880419972909, + "learning_rate": 4.071518308526675e-06, + "loss": 0.3866889953613281, + "step": 88760 + }, + { + "epoch": 0.767524707957562, + "grad_norm": 3.9381929769013877, + "learning_rate": 4.071328011464424e-06, + "loss": 0.14537353515625, + "step": 88765 + }, + { + "epoch": 0.7675679414791052, + "grad_norm": 3.4489850118667906, + "learning_rate": 4.071137709461305e-06, + "loss": 0.2313446044921875, + "step": 88770 + }, + { + "epoch": 0.7676111750006485, + "grad_norm": 5.302392699647011, + "learning_rate": 4.070947402518193e-06, + "loss": 0.27090911865234374, + "step": 88775 + }, + { + "epoch": 0.7676544085221918, + "grad_norm": 41.067038013649075, + "learning_rate": 4.070757090635965e-06, + "loss": 0.2809356689453125, + "step": 88780 + }, + { + "epoch": 0.767697642043735, + "grad_norm": 2.3434282318305604, + "learning_rate": 4.070566773815501e-06, + "loss": 0.15230865478515626, + "step": 88785 + }, + { + "epoch": 0.7677408755652783, + "grad_norm": 2.0888211139492574, + "learning_rate": 4.0703764520576784e-06, + "loss": 0.09104232788085938, + "step": 88790 + }, + { + "epoch": 0.7677841090868216, + "grad_norm": 1.0393515905285853, + "learning_rate": 4.070186125363373e-06, + "loss": 0.11739540100097656, + "step": 88795 + }, + { + "epoch": 0.7678273426083648, + "grad_norm": 1.7329122440826663, + "learning_rate": 4.069995793733464e-06, + "loss": 0.13266448974609374, + "step": 88800 + }, + { + "epoch": 0.7678705761299081, + "grad_norm": 8.790055236034934, + "learning_rate": 4.069805457168829e-06, + "loss": 0.06142120361328125, + "step": 88805 + }, + { + "epoch": 0.7679138096514514, + "grad_norm": 19.090453529539328, + "learning_rate": 4.069615115670346e-06, + "loss": 0.03917999267578125, + "step": 88810 + }, + { + "epoch": 0.7679570431729946, + "grad_norm": 21.593239933797697, + "learning_rate": 4.069424769238892e-06, + "loss": 0.225714111328125, + "step": 88815 + }, + { + "epoch": 0.7680002766945379, + "grad_norm": 0.505743659025484, + "learning_rate": 4.069234417875346e-06, + "loss": 0.0912240982055664, + "step": 88820 + }, + { + "epoch": 0.7680435102160812, + "grad_norm": 1.0410751888563732, + "learning_rate": 4.069044061580585e-06, + "loss": 0.09217529296875, + "step": 88825 + }, + { + "epoch": 0.7680867437376244, + "grad_norm": 20.875681954389634, + "learning_rate": 4.068853700355487e-06, + "loss": 0.34829254150390626, + "step": 88830 + }, + { + "epoch": 0.7681299772591677, + "grad_norm": 61.886971578677105, + "learning_rate": 4.0686633342009305e-06, + "loss": 0.323345947265625, + "step": 88835 + }, + { + "epoch": 0.7681732107807109, + "grad_norm": 8.357360528412139, + "learning_rate": 4.068472963117794e-06, + "loss": 0.0918670654296875, + "step": 88840 + }, + { + "epoch": 0.7682164443022542, + "grad_norm": 0.484329014935575, + "learning_rate": 4.068282587106953e-06, + "loss": 0.07001113891601562, + "step": 88845 + }, + { + "epoch": 0.7682596778237974, + "grad_norm": 3.8549070042320612, + "learning_rate": 4.068092206169289e-06, + "loss": 0.16617202758789062, + "step": 88850 + }, + { + "epoch": 0.7683029113453407, + "grad_norm": 0.31573677940520073, + "learning_rate": 4.067901820305676e-06, + "loss": 0.3488361358642578, + "step": 88855 + }, + { + "epoch": 0.768346144866884, + "grad_norm": 11.970281377206241, + "learning_rate": 4.067711429516995e-06, + "loss": 0.08647842407226562, + "step": 88860 + }, + { + "epoch": 0.7683893783884272, + "grad_norm": 2.8474887221811462, + "learning_rate": 4.067521033804124e-06, + "loss": 0.13642349243164062, + "step": 88865 + }, + { + "epoch": 0.7684326119099705, + "grad_norm": 7.8240421003329494, + "learning_rate": 4.06733063316794e-06, + "loss": 0.09572677612304688, + "step": 88870 + }, + { + "epoch": 0.7684758454315138, + "grad_norm": 2.0089954740671723, + "learning_rate": 4.067140227609321e-06, + "loss": 0.1959014892578125, + "step": 88875 + }, + { + "epoch": 0.768519078953057, + "grad_norm": 1.8439998574045229, + "learning_rate": 4.066949817129146e-06, + "loss": 0.16513099670410156, + "step": 88880 + }, + { + "epoch": 0.7685623124746003, + "grad_norm": 9.681202933898081, + "learning_rate": 4.066759401728292e-06, + "loss": 0.29974365234375, + "step": 88885 + }, + { + "epoch": 0.7686055459961436, + "grad_norm": 6.116667800153793, + "learning_rate": 4.066568981407638e-06, + "loss": 0.12253303527832031, + "step": 88890 + }, + { + "epoch": 0.7686487795176868, + "grad_norm": 53.955323783659814, + "learning_rate": 4.0663785561680635e-06, + "loss": 0.28920440673828124, + "step": 88895 + }, + { + "epoch": 0.7686920130392301, + "grad_norm": 29.38039922292704, + "learning_rate": 4.066188126010444e-06, + "loss": 0.2875640869140625, + "step": 88900 + }, + { + "epoch": 0.7687352465607734, + "grad_norm": 0.06890283773420544, + "learning_rate": 4.065997690935659e-06, + "loss": 0.11976737976074218, + "step": 88905 + }, + { + "epoch": 0.7687784800823166, + "grad_norm": 1.0532519504819402, + "learning_rate": 4.065807250944587e-06, + "loss": 0.052575302124023435, + "step": 88910 + }, + { + "epoch": 0.7688217136038599, + "grad_norm": 20.909069070665144, + "learning_rate": 4.065616806038106e-06, + "loss": 0.09281349182128906, + "step": 88915 + }, + { + "epoch": 0.7688649471254031, + "grad_norm": 7.0344289102834585, + "learning_rate": 4.065426356217096e-06, + "loss": 0.30728683471679685, + "step": 88920 + }, + { + "epoch": 0.7689081806469464, + "grad_norm": 0.07101874564593313, + "learning_rate": 4.065235901482432e-06, + "loss": 0.19368972778320312, + "step": 88925 + }, + { + "epoch": 0.7689514141684897, + "grad_norm": 1.3523786139315617, + "learning_rate": 4.065045441834995e-06, + "loss": 0.02066497802734375, + "step": 88930 + }, + { + "epoch": 0.7689946476900329, + "grad_norm": 29.708022539863105, + "learning_rate": 4.064854977275662e-06, + "loss": 0.28450736999511717, + "step": 88935 + }, + { + "epoch": 0.7690378812115762, + "grad_norm": 1.2531955990764825, + "learning_rate": 4.064664507805312e-06, + "loss": 0.06057815551757813, + "step": 88940 + }, + { + "epoch": 0.7690811147331195, + "grad_norm": 2.648166639248711, + "learning_rate": 4.064474033424824e-06, + "loss": 0.053461456298828126, + "step": 88945 + }, + { + "epoch": 0.7691243482546627, + "grad_norm": 1.765610309414859, + "learning_rate": 4.064283554135076e-06, + "loss": 0.2899810791015625, + "step": 88950 + }, + { + "epoch": 0.769167581776206, + "grad_norm": 0.08895257500691489, + "learning_rate": 4.0640930699369446e-06, + "loss": 0.11270523071289062, + "step": 88955 + }, + { + "epoch": 0.7692108152977493, + "grad_norm": 2.334609031774952, + "learning_rate": 4.063902580831312e-06, + "loss": 0.06326446533203126, + "step": 88960 + }, + { + "epoch": 0.7692540488192925, + "grad_norm": 22.948190781791826, + "learning_rate": 4.0637120868190535e-06, + "loss": 0.13563690185546876, + "step": 88965 + }, + { + "epoch": 0.7692972823408358, + "grad_norm": 5.648767070796486, + "learning_rate": 4.063521587901048e-06, + "loss": 0.13037109375, + "step": 88970 + }, + { + "epoch": 0.7693405158623791, + "grad_norm": 2.4772550493606253, + "learning_rate": 4.063331084078176e-06, + "loss": 0.21497802734375, + "step": 88975 + }, + { + "epoch": 0.7693837493839223, + "grad_norm": 11.401300511951234, + "learning_rate": 4.063140575351316e-06, + "loss": 0.1205596923828125, + "step": 88980 + }, + { + "epoch": 0.7694269829054656, + "grad_norm": 8.742344971959763, + "learning_rate": 4.062950061721344e-06, + "loss": 0.1582763671875, + "step": 88985 + }, + { + "epoch": 0.7694702164270089, + "grad_norm": 6.96381776795118, + "learning_rate": 4.062759543189141e-06, + "loss": 0.0802978515625, + "step": 88990 + }, + { + "epoch": 0.7695134499485521, + "grad_norm": 17.37366851098706, + "learning_rate": 4.062569019755585e-06, + "loss": 0.24535369873046875, + "step": 88995 + }, + { + "epoch": 0.7695566834700954, + "grad_norm": 11.929182507880526, + "learning_rate": 4.062378491421554e-06, + "loss": 0.10734100341796875, + "step": 89000 + }, + { + "epoch": 0.7695999169916387, + "grad_norm": 6.547497724376033, + "learning_rate": 4.062187958187927e-06, + "loss": 0.17220458984375, + "step": 89005 + }, + { + "epoch": 0.7696431505131819, + "grad_norm": 1.0849474655144475, + "learning_rate": 4.061997420055585e-06, + "loss": 0.33078460693359374, + "step": 89010 + }, + { + "epoch": 0.7696863840347251, + "grad_norm": 1.2528022214757508, + "learning_rate": 4.061806877025403e-06, + "loss": 0.128704833984375, + "step": 89015 + }, + { + "epoch": 0.7697296175562685, + "grad_norm": 0.1663511800272005, + "learning_rate": 4.061616329098262e-06, + "loss": 0.13615875244140624, + "step": 89020 + }, + { + "epoch": 0.7697728510778117, + "grad_norm": 7.100897270237209, + "learning_rate": 4.061425776275039e-06, + "loss": 0.058075332641601564, + "step": 89025 + }, + { + "epoch": 0.7698160845993549, + "grad_norm": 21.964201325932574, + "learning_rate": 4.0612352185566165e-06, + "loss": 0.17750778198242187, + "step": 89030 + }, + { + "epoch": 0.7698593181208983, + "grad_norm": 5.248728618147769, + "learning_rate": 4.06104465594387e-06, + "loss": 0.147088623046875, + "step": 89035 + }, + { + "epoch": 0.7699025516424415, + "grad_norm": 25.06521169700159, + "learning_rate": 4.060854088437679e-06, + "loss": 0.21502265930175782, + "step": 89040 + }, + { + "epoch": 0.7699457851639847, + "grad_norm": 1.8556328973406047, + "learning_rate": 4.060663516038922e-06, + "loss": 0.04215316772460938, + "step": 89045 + }, + { + "epoch": 0.769989018685528, + "grad_norm": 8.363957695184244, + "learning_rate": 4.060472938748479e-06, + "loss": 0.05966644287109375, + "step": 89050 + }, + { + "epoch": 0.7700322522070713, + "grad_norm": 10.091681310004331, + "learning_rate": 4.060282356567228e-06, + "loss": 0.1268035888671875, + "step": 89055 + }, + { + "epoch": 0.7700754857286145, + "grad_norm": 12.430198138274754, + "learning_rate": 4.06009176949605e-06, + "loss": 0.1457122802734375, + "step": 89060 + }, + { + "epoch": 0.7701187192501578, + "grad_norm": 3.9287439696470203, + "learning_rate": 4.059901177535821e-06, + "loss": 0.10858612060546875, + "step": 89065 + }, + { + "epoch": 0.7701619527717011, + "grad_norm": 0.3935550174158176, + "learning_rate": 4.059710580687423e-06, + "loss": 0.062245750427246095, + "step": 89070 + }, + { + "epoch": 0.7702051862932443, + "grad_norm": 3.4652224237399296, + "learning_rate": 4.059519978951733e-06, + "loss": 0.18287429809570313, + "step": 89075 + }, + { + "epoch": 0.7702484198147876, + "grad_norm": 1.735072023155438, + "learning_rate": 4.059329372329629e-06, + "loss": 0.43524169921875, + "step": 89080 + }, + { + "epoch": 0.7702916533363309, + "grad_norm": 0.12408812028463778, + "learning_rate": 4.059138760821993e-06, + "loss": 0.154656982421875, + "step": 89085 + }, + { + "epoch": 0.7703348868578741, + "grad_norm": 2.444557444719117, + "learning_rate": 4.058948144429702e-06, + "loss": 0.0528533935546875, + "step": 89090 + }, + { + "epoch": 0.7703781203794173, + "grad_norm": 8.926060016156582, + "learning_rate": 4.058757523153637e-06, + "loss": 0.1529205322265625, + "step": 89095 + }, + { + "epoch": 0.7704213539009607, + "grad_norm": 30.97960745809664, + "learning_rate": 4.058566896994674e-06, + "loss": 0.12233161926269531, + "step": 89100 + }, + { + "epoch": 0.7704645874225039, + "grad_norm": 43.286417748582835, + "learning_rate": 4.058376265953695e-06, + "loss": 0.26270294189453125, + "step": 89105 + }, + { + "epoch": 0.7705078209440471, + "grad_norm": 4.734321840157398, + "learning_rate": 4.0581856300315775e-06, + "loss": 0.10637969970703125, + "step": 89110 + }, + { + "epoch": 0.7705510544655905, + "grad_norm": 7.9428306276483855, + "learning_rate": 4.0579949892292015e-06, + "loss": 0.0763641357421875, + "step": 89115 + }, + { + "epoch": 0.7705942879871337, + "grad_norm": 60.85277092620337, + "learning_rate": 4.057804343547446e-06, + "loss": 0.2989166259765625, + "step": 89120 + }, + { + "epoch": 0.7706375215086769, + "grad_norm": 3.078818042269713, + "learning_rate": 4.0576136929871915e-06, + "loss": 0.2439697265625, + "step": 89125 + }, + { + "epoch": 0.7706807550302203, + "grad_norm": 8.714254779571935, + "learning_rate": 4.057423037549315e-06, + "loss": 0.2098252296447754, + "step": 89130 + }, + { + "epoch": 0.7707239885517635, + "grad_norm": 7.131845745507535, + "learning_rate": 4.057232377234697e-06, + "loss": 0.04615707397460937, + "step": 89135 + }, + { + "epoch": 0.7707672220733067, + "grad_norm": 2.457008417886658, + "learning_rate": 4.057041712044217e-06, + "loss": 0.138262939453125, + "step": 89140 + }, + { + "epoch": 0.7708104555948501, + "grad_norm": 4.178373824110165, + "learning_rate": 4.056851041978754e-06, + "loss": 0.12503509521484374, + "step": 89145 + }, + { + "epoch": 0.7708536891163933, + "grad_norm": 2.533114875822806, + "learning_rate": 4.0566603670391875e-06, + "loss": 0.3158290863037109, + "step": 89150 + }, + { + "epoch": 0.7708969226379365, + "grad_norm": 39.934760854690495, + "learning_rate": 4.056469687226397e-06, + "loss": 0.1641326904296875, + "step": 89155 + }, + { + "epoch": 0.7709401561594799, + "grad_norm": 45.053640082156, + "learning_rate": 4.05627900254126e-06, + "loss": 0.4195526123046875, + "step": 89160 + }, + { + "epoch": 0.7709833896810231, + "grad_norm": 24.22945496021944, + "learning_rate": 4.05608831298466e-06, + "loss": 0.3908817291259766, + "step": 89165 + }, + { + "epoch": 0.7710266232025663, + "grad_norm": 3.477527299357147, + "learning_rate": 4.055897618557472e-06, + "loss": 0.0916259765625, + "step": 89170 + }, + { + "epoch": 0.7710698567241097, + "grad_norm": 8.731346337855905, + "learning_rate": 4.055706919260578e-06, + "loss": 0.07755126953125, + "step": 89175 + }, + { + "epoch": 0.7711130902456529, + "grad_norm": 0.045420730935175894, + "learning_rate": 4.055516215094858e-06, + "loss": 0.016263389587402345, + "step": 89180 + }, + { + "epoch": 0.7711563237671961, + "grad_norm": 5.206502090921397, + "learning_rate": 4.0553255060611904e-06, + "loss": 0.0439300537109375, + "step": 89185 + }, + { + "epoch": 0.7711995572887393, + "grad_norm": 4.372918044921832, + "learning_rate": 4.055134792160454e-06, + "loss": 0.026497650146484374, + "step": 89190 + }, + { + "epoch": 0.7712427908102827, + "grad_norm": 2.973387617889132, + "learning_rate": 4.054944073393529e-06, + "loss": 0.1108062744140625, + "step": 89195 + }, + { + "epoch": 0.7712860243318259, + "grad_norm": 24.100701599410545, + "learning_rate": 4.054753349761295e-06, + "loss": 0.44046669006347655, + "step": 89200 + }, + { + "epoch": 0.7713292578533691, + "grad_norm": 5.823376485936311, + "learning_rate": 4.054562621264633e-06, + "loss": 0.0376373291015625, + "step": 89205 + }, + { + "epoch": 0.7713724913749125, + "grad_norm": 26.92169696123405, + "learning_rate": 4.05437188790442e-06, + "loss": 0.247998046875, + "step": 89210 + }, + { + "epoch": 0.7714157248964557, + "grad_norm": 17.200180022932948, + "learning_rate": 4.054181149681538e-06, + "loss": 0.08901824951171874, + "step": 89215 + }, + { + "epoch": 0.7714589584179989, + "grad_norm": 26.347322584782155, + "learning_rate": 4.053990406596866e-06, + "loss": 0.09766845703125, + "step": 89220 + }, + { + "epoch": 0.7715021919395423, + "grad_norm": 32.309872871696875, + "learning_rate": 4.053799658651281e-06, + "loss": 0.49254150390625, + "step": 89225 + }, + { + "epoch": 0.7715454254610855, + "grad_norm": 5.262234795827126, + "learning_rate": 4.0536089058456676e-06, + "loss": 0.2433563232421875, + "step": 89230 + }, + { + "epoch": 0.7715886589826287, + "grad_norm": 6.085850887211275, + "learning_rate": 4.053418148180903e-06, + "loss": 0.04717559814453125, + "step": 89235 + }, + { + "epoch": 0.7716318925041721, + "grad_norm": 3.5987850036381674, + "learning_rate": 4.053227385657865e-06, + "loss": 0.2049652099609375, + "step": 89240 + }, + { + "epoch": 0.7716751260257153, + "grad_norm": 9.17152838588526, + "learning_rate": 4.053036618277438e-06, + "loss": 0.12301177978515625, + "step": 89245 + }, + { + "epoch": 0.7717183595472585, + "grad_norm": 4.272214850528067, + "learning_rate": 4.052845846040497e-06, + "loss": 0.1455169677734375, + "step": 89250 + }, + { + "epoch": 0.7717615930688019, + "grad_norm": 4.1695394781855075, + "learning_rate": 4.052655068947924e-06, + "loss": 0.08811569213867188, + "step": 89255 + }, + { + "epoch": 0.7718048265903451, + "grad_norm": 11.128187206724142, + "learning_rate": 4.0524642870006e-06, + "loss": 0.17962703704833985, + "step": 89260 + }, + { + "epoch": 0.7718480601118883, + "grad_norm": 25.68175858318674, + "learning_rate": 4.0522735001994036e-06, + "loss": 0.19988861083984374, + "step": 89265 + }, + { + "epoch": 0.7718912936334316, + "grad_norm": 1.0813378523335322, + "learning_rate": 4.052082708545215e-06, + "loss": 0.09633522033691407, + "step": 89270 + }, + { + "epoch": 0.7719345271549749, + "grad_norm": 4.97149878928036, + "learning_rate": 4.051891912038912e-06, + "loss": 0.06049957275390625, + "step": 89275 + }, + { + "epoch": 0.7719777606765181, + "grad_norm": 0.9330174628829386, + "learning_rate": 4.051701110681378e-06, + "loss": 0.1423095703125, + "step": 89280 + }, + { + "epoch": 0.7720209941980614, + "grad_norm": 25.232979216610175, + "learning_rate": 4.051510304473492e-06, + "loss": 0.15855331420898439, + "step": 89285 + }, + { + "epoch": 0.7720642277196047, + "grad_norm": 2.1190626759451914, + "learning_rate": 4.051319493416132e-06, + "loss": 0.03086700439453125, + "step": 89290 + }, + { + "epoch": 0.7721074612411479, + "grad_norm": 3.390526921921936, + "learning_rate": 4.0511286775101804e-06, + "loss": 0.0373870849609375, + "step": 89295 + }, + { + "epoch": 0.7721506947626912, + "grad_norm": 32.38842096707518, + "learning_rate": 4.050937856756516e-06, + "loss": 0.20147781372070311, + "step": 89300 + }, + { + "epoch": 0.7721939282842345, + "grad_norm": 34.62595397969164, + "learning_rate": 4.0507470311560184e-06, + "loss": 0.19531421661376952, + "step": 89305 + }, + { + "epoch": 0.7722371618057777, + "grad_norm": 2.4959359329893984, + "learning_rate": 4.050556200709569e-06, + "loss": 0.17042617797851561, + "step": 89310 + }, + { + "epoch": 0.772280395327321, + "grad_norm": 2.2632204571500862, + "learning_rate": 4.050365365418047e-06, + "loss": 0.20966339111328125, + "step": 89315 + }, + { + "epoch": 0.7723236288488643, + "grad_norm": 0.7996620221192157, + "learning_rate": 4.050174525282333e-06, + "loss": 0.22619895935058593, + "step": 89320 + }, + { + "epoch": 0.7723668623704075, + "grad_norm": 4.3399320043444725, + "learning_rate": 4.049983680303307e-06, + "loss": 0.04087905883789063, + "step": 89325 + }, + { + "epoch": 0.7724100958919508, + "grad_norm": 4.249110983878906, + "learning_rate": 4.049792830481849e-06, + "loss": 0.22786903381347656, + "step": 89330 + }, + { + "epoch": 0.7724533294134941, + "grad_norm": 13.326583639726946, + "learning_rate": 4.049601975818838e-06, + "loss": 0.10020599365234376, + "step": 89335 + }, + { + "epoch": 0.7724965629350373, + "grad_norm": 6.981619175032935, + "learning_rate": 4.049411116315157e-06, + "loss": 0.09132957458496094, + "step": 89340 + }, + { + "epoch": 0.7725397964565806, + "grad_norm": 27.898079628915724, + "learning_rate": 4.049220251971684e-06, + "loss": 0.3253021240234375, + "step": 89345 + }, + { + "epoch": 0.7725830299781239, + "grad_norm": 26.392037319856062, + "learning_rate": 4.0490293827893e-06, + "loss": 0.19853878021240234, + "step": 89350 + }, + { + "epoch": 0.7726262634996671, + "grad_norm": 16.20365405587738, + "learning_rate": 4.0488385087688844e-06, + "loss": 0.095086669921875, + "step": 89355 + }, + { + "epoch": 0.7726694970212103, + "grad_norm": 0.3359048816687715, + "learning_rate": 4.04864762991132e-06, + "loss": 0.1130828857421875, + "step": 89360 + }, + { + "epoch": 0.7727127305427536, + "grad_norm": 1.750505387637951, + "learning_rate": 4.048456746217484e-06, + "loss": 0.16826324462890624, + "step": 89365 + }, + { + "epoch": 0.7727559640642969, + "grad_norm": 0.16892942724338297, + "learning_rate": 4.048265857688258e-06, + "loss": 0.12897815704345703, + "step": 89370 + }, + { + "epoch": 0.7727991975858401, + "grad_norm": 13.586919856655385, + "learning_rate": 4.048074964324523e-06, + "loss": 0.20617475509643554, + "step": 89375 + }, + { + "epoch": 0.7728424311073834, + "grad_norm": 8.662584765088242, + "learning_rate": 4.04788406612716e-06, + "loss": 0.238134765625, + "step": 89380 + }, + { + "epoch": 0.7728856646289267, + "grad_norm": 0.052479397731685765, + "learning_rate": 4.047693163097047e-06, + "loss": 0.14777145385742188, + "step": 89385 + }, + { + "epoch": 0.77292889815047, + "grad_norm": 16.481770866333964, + "learning_rate": 4.047502255235065e-06, + "loss": 0.12277069091796874, + "step": 89390 + }, + { + "epoch": 0.7729721316720132, + "grad_norm": 5.049183273659607, + "learning_rate": 4.047311342542096e-06, + "loss": 0.049869537353515625, + "step": 89395 + }, + { + "epoch": 0.7730153651935565, + "grad_norm": 1.0732173230548883, + "learning_rate": 4.04712042501902e-06, + "loss": 0.06610260009765626, + "step": 89400 + }, + { + "epoch": 0.7730585987150997, + "grad_norm": 50.00787290048067, + "learning_rate": 4.046929502666717e-06, + "loss": 0.26668720245361327, + "step": 89405 + }, + { + "epoch": 0.773101832236643, + "grad_norm": 1.9059837488786677, + "learning_rate": 4.046738575486069e-06, + "loss": 0.1215484619140625, + "step": 89410 + }, + { + "epoch": 0.7731450657581863, + "grad_norm": 0.9592551318426336, + "learning_rate": 4.046547643477952e-06, + "loss": 0.04926166534423828, + "step": 89415 + }, + { + "epoch": 0.7731882992797295, + "grad_norm": 0.07034366008447182, + "learning_rate": 4.0463567066432514e-06, + "loss": 0.19003219604492189, + "step": 89420 + }, + { + "epoch": 0.7732315328012728, + "grad_norm": 0.9914744251720683, + "learning_rate": 4.0461657649828464e-06, + "loss": 0.17730712890625, + "step": 89425 + }, + { + "epoch": 0.7732747663228161, + "grad_norm": 0.1788276186187289, + "learning_rate": 4.045974818497616e-06, + "loss": 0.04573822021484375, + "step": 89430 + }, + { + "epoch": 0.7733179998443593, + "grad_norm": 0.22065654001032337, + "learning_rate": 4.045783867188444e-06, + "loss": 0.12324447631835937, + "step": 89435 + }, + { + "epoch": 0.7733612333659026, + "grad_norm": 0.3986677273278298, + "learning_rate": 4.045592911056209e-06, + "loss": 0.08403396606445312, + "step": 89440 + }, + { + "epoch": 0.7734044668874458, + "grad_norm": 12.819917437816471, + "learning_rate": 4.04540195010179e-06, + "loss": 0.110888671875, + "step": 89445 + }, + { + "epoch": 0.7734477004089891, + "grad_norm": 57.90710094836365, + "learning_rate": 4.045210984326071e-06, + "loss": 0.177593994140625, + "step": 89450 + }, + { + "epoch": 0.7734909339305324, + "grad_norm": 1.8094769456863857, + "learning_rate": 4.045020013729931e-06, + "loss": 0.2259613037109375, + "step": 89455 + }, + { + "epoch": 0.7735341674520756, + "grad_norm": 17.631899411240443, + "learning_rate": 4.044829038314252e-06, + "loss": 0.608270263671875, + "step": 89460 + }, + { + "epoch": 0.7735774009736189, + "grad_norm": 34.29401372540524, + "learning_rate": 4.0446380580799125e-06, + "loss": 0.224029541015625, + "step": 89465 + }, + { + "epoch": 0.7736206344951622, + "grad_norm": 8.7575377470859, + "learning_rate": 4.044447073027795e-06, + "loss": 0.20949478149414064, + "step": 89470 + }, + { + "epoch": 0.7736638680167054, + "grad_norm": 4.904438915745895, + "learning_rate": 4.044256083158781e-06, + "loss": 0.05249481201171875, + "step": 89475 + }, + { + "epoch": 0.7737071015382487, + "grad_norm": 12.042833304103944, + "learning_rate": 4.044065088473748e-06, + "loss": 0.16230621337890624, + "step": 89480 + }, + { + "epoch": 0.773750335059792, + "grad_norm": 16.255008796972522, + "learning_rate": 4.043874088973581e-06, + "loss": 0.08937263488769531, + "step": 89485 + }, + { + "epoch": 0.7737935685813352, + "grad_norm": 61.947658081953854, + "learning_rate": 4.0436830846591585e-06, + "loss": 0.42072525024414065, + "step": 89490 + }, + { + "epoch": 0.7738368021028785, + "grad_norm": 0.3526761329389765, + "learning_rate": 4.043492075531361e-06, + "loss": 0.1810699462890625, + "step": 89495 + }, + { + "epoch": 0.7738800356244218, + "grad_norm": 15.904479057744673, + "learning_rate": 4.043301061591071e-06, + "loss": 0.4623558044433594, + "step": 89500 + }, + { + "epoch": 0.773923269145965, + "grad_norm": 13.124323200729336, + "learning_rate": 4.043110042839168e-06, + "loss": 0.03603630065917969, + "step": 89505 + }, + { + "epoch": 0.7739665026675083, + "grad_norm": 8.703048764200126, + "learning_rate": 4.042919019276535e-06, + "loss": 0.11440696716308593, + "step": 89510 + }, + { + "epoch": 0.7740097361890516, + "grad_norm": 19.111821401092367, + "learning_rate": 4.04272799090405e-06, + "loss": 0.13040847778320314, + "step": 89515 + }, + { + "epoch": 0.7740529697105948, + "grad_norm": 29.004470672975717, + "learning_rate": 4.042536957722597e-06, + "loss": 0.2509735107421875, + "step": 89520 + }, + { + "epoch": 0.7740962032321381, + "grad_norm": 7.553139049680001, + "learning_rate": 4.042345919733055e-06, + "loss": 0.08824615478515625, + "step": 89525 + }, + { + "epoch": 0.7741394367536814, + "grad_norm": 2.4437557164924755, + "learning_rate": 4.042154876936305e-06, + "loss": 0.12661209106445312, + "step": 89530 + }, + { + "epoch": 0.7741826702752246, + "grad_norm": 4.276364237365816, + "learning_rate": 4.04196382933323e-06, + "loss": 0.03892822265625, + "step": 89535 + }, + { + "epoch": 0.7742259037967678, + "grad_norm": 5.4918430806111145, + "learning_rate": 4.04177277692471e-06, + "loss": 0.1720458984375, + "step": 89540 + }, + { + "epoch": 0.7742691373183112, + "grad_norm": 4.835717992591145, + "learning_rate": 4.041581719711625e-06, + "loss": 0.06235809326171875, + "step": 89545 + }, + { + "epoch": 0.7743123708398544, + "grad_norm": 4.0282338698285125, + "learning_rate": 4.041390657694858e-06, + "loss": 0.1719646453857422, + "step": 89550 + }, + { + "epoch": 0.7743556043613976, + "grad_norm": 96.42024325950992, + "learning_rate": 4.041199590875288e-06, + "loss": 0.43415679931640627, + "step": 89555 + }, + { + "epoch": 0.774398837882941, + "grad_norm": 0.10232055651861642, + "learning_rate": 4.041008519253798e-06, + "loss": 0.19375152587890626, + "step": 89560 + }, + { + "epoch": 0.7744420714044842, + "grad_norm": 4.851153515654956, + "learning_rate": 4.04081744283127e-06, + "loss": 0.09882049560546875, + "step": 89565 + }, + { + "epoch": 0.7744853049260274, + "grad_norm": 0.16291090682381085, + "learning_rate": 4.040626361608583e-06, + "loss": 0.17324295043945312, + "step": 89570 + }, + { + "epoch": 0.7745285384475707, + "grad_norm": 0.4907796137647234, + "learning_rate": 4.040435275586619e-06, + "loss": 0.1894287109375, + "step": 89575 + }, + { + "epoch": 0.774571771969114, + "grad_norm": 9.289085696703063, + "learning_rate": 4.04024418476626e-06, + "loss": 0.049176025390625, + "step": 89580 + }, + { + "epoch": 0.7746150054906572, + "grad_norm": 2.0800674268757633, + "learning_rate": 4.040053089148386e-06, + "loss": 0.056539154052734374, + "step": 89585 + }, + { + "epoch": 0.7746582390122005, + "grad_norm": 13.750677540375806, + "learning_rate": 4.039861988733879e-06, + "loss": 0.18097763061523436, + "step": 89590 + }, + { + "epoch": 0.7747014725337438, + "grad_norm": 0.9649233787048592, + "learning_rate": 4.039670883523621e-06, + "loss": 0.034931182861328125, + "step": 89595 + }, + { + "epoch": 0.774744706055287, + "grad_norm": 0.9198999505236554, + "learning_rate": 4.039479773518493e-06, + "loss": 0.04921417236328125, + "step": 89600 + }, + { + "epoch": 0.7747879395768303, + "grad_norm": 6.97544149934753, + "learning_rate": 4.0392886587193755e-06, + "loss": 0.07959709167480469, + "step": 89605 + }, + { + "epoch": 0.7748311730983736, + "grad_norm": 2.305595130850205, + "learning_rate": 4.039097539127151e-06, + "loss": 0.1913055419921875, + "step": 89610 + }, + { + "epoch": 0.7748744066199168, + "grad_norm": 2.3473777199725427, + "learning_rate": 4.0389064147427e-06, + "loss": 0.19421844482421874, + "step": 89615 + }, + { + "epoch": 0.77491764014146, + "grad_norm": 2.9914299152138124, + "learning_rate": 4.038715285566905e-06, + "loss": 0.170550537109375, + "step": 89620 + }, + { + "epoch": 0.7749608736630034, + "grad_norm": 2.1873509907854904, + "learning_rate": 4.038524151600647e-06, + "loss": 0.0960723876953125, + "step": 89625 + }, + { + "epoch": 0.7750041071845466, + "grad_norm": 2.312638484389046, + "learning_rate": 4.038333012844806e-06, + "loss": 0.10865478515625, + "step": 89630 + }, + { + "epoch": 0.7750473407060898, + "grad_norm": 6.635951405318223, + "learning_rate": 4.038141869300267e-06, + "loss": 0.08370513916015625, + "step": 89635 + }, + { + "epoch": 0.7750905742276332, + "grad_norm": 1.9313294348320111, + "learning_rate": 4.037950720967907e-06, + "loss": 0.04669189453125, + "step": 89640 + }, + { + "epoch": 0.7751338077491764, + "grad_norm": 18.235526194562073, + "learning_rate": 4.037759567848611e-06, + "loss": 0.122064208984375, + "step": 89645 + }, + { + "epoch": 0.7751770412707196, + "grad_norm": 3.299857530724204, + "learning_rate": 4.03756840994326e-06, + "loss": 0.077972412109375, + "step": 89650 + }, + { + "epoch": 0.775220274792263, + "grad_norm": 11.704269740523763, + "learning_rate": 4.037377247252735e-06, + "loss": 0.1395599365234375, + "step": 89655 + }, + { + "epoch": 0.7752635083138062, + "grad_norm": 7.57796360347271, + "learning_rate": 4.037186079777918e-06, + "loss": 0.21177520751953124, + "step": 89660 + }, + { + "epoch": 0.7753067418353494, + "grad_norm": 13.52189694703643, + "learning_rate": 4.03699490751969e-06, + "loss": 0.049313926696777345, + "step": 89665 + }, + { + "epoch": 0.7753499753568928, + "grad_norm": 9.82033379367376, + "learning_rate": 4.036803730478933e-06, + "loss": 0.07964324951171875, + "step": 89670 + }, + { + "epoch": 0.775393208878436, + "grad_norm": 10.085374055127339, + "learning_rate": 4.036612548656529e-06, + "loss": 0.227655029296875, + "step": 89675 + }, + { + "epoch": 0.7754364423999792, + "grad_norm": 32.267359455210084, + "learning_rate": 4.03642136205336e-06, + "loss": 0.1788543701171875, + "step": 89680 + }, + { + "epoch": 0.7754796759215226, + "grad_norm": 0.07945590093689187, + "learning_rate": 4.036230170670307e-06, + "loss": 0.07098312377929687, + "step": 89685 + }, + { + "epoch": 0.7755229094430658, + "grad_norm": 12.32444982143851, + "learning_rate": 4.036038974508252e-06, + "loss": 0.22163467407226561, + "step": 89690 + }, + { + "epoch": 0.775566142964609, + "grad_norm": 6.285243569618019, + "learning_rate": 4.035847773568078e-06, + "loss": 0.110919189453125, + "step": 89695 + }, + { + "epoch": 0.7756093764861524, + "grad_norm": 2.941902027410412, + "learning_rate": 4.035656567850663e-06, + "loss": 0.39492340087890626, + "step": 89700 + }, + { + "epoch": 0.7756526100076956, + "grad_norm": 4.197026233368696, + "learning_rate": 4.035465357356892e-06, + "loss": 0.39849853515625, + "step": 89705 + }, + { + "epoch": 0.7756958435292388, + "grad_norm": 13.64384085742075, + "learning_rate": 4.035274142087648e-06, + "loss": 0.14866294860839843, + "step": 89710 + }, + { + "epoch": 0.775739077050782, + "grad_norm": 2.0580097193644296, + "learning_rate": 4.0350829220438105e-06, + "loss": 0.0340576171875, + "step": 89715 + }, + { + "epoch": 0.7757823105723254, + "grad_norm": 2.7422936416280534, + "learning_rate": 4.034891697226261e-06, + "loss": 0.03804168701171875, + "step": 89720 + }, + { + "epoch": 0.7758255440938686, + "grad_norm": 4.362491069774828, + "learning_rate": 4.0347004676358835e-06, + "loss": 0.068426513671875, + "step": 89725 + }, + { + "epoch": 0.7758687776154118, + "grad_norm": 7.1040259299902955, + "learning_rate": 4.034509233273559e-06, + "loss": 0.09713668823242187, + "step": 89730 + }, + { + "epoch": 0.7759120111369552, + "grad_norm": 3.2997981862389336, + "learning_rate": 4.034317994140167e-06, + "loss": 0.11321563720703125, + "step": 89735 + }, + { + "epoch": 0.7759552446584984, + "grad_norm": 28.058777246111557, + "learning_rate": 4.034126750236595e-06, + "loss": 0.19422454833984376, + "step": 89740 + }, + { + "epoch": 0.7759984781800416, + "grad_norm": 0.3053930527329936, + "learning_rate": 4.03393550156372e-06, + "loss": 0.07965545654296875, + "step": 89745 + }, + { + "epoch": 0.776041711701585, + "grad_norm": 2.8735712733948144, + "learning_rate": 4.033744248122426e-06, + "loss": 0.12122917175292969, + "step": 89750 + }, + { + "epoch": 0.7760849452231282, + "grad_norm": 0.02257175252122341, + "learning_rate": 4.033552989913596e-06, + "loss": 0.06265754699707031, + "step": 89755 + }, + { + "epoch": 0.7761281787446714, + "grad_norm": 3.5423127239405807, + "learning_rate": 4.033361726938109e-06, + "loss": 0.0860198974609375, + "step": 89760 + }, + { + "epoch": 0.7761714122662148, + "grad_norm": 6.692538819608655, + "learning_rate": 4.033170459196851e-06, + "loss": 0.1729351043701172, + "step": 89765 + }, + { + "epoch": 0.776214645787758, + "grad_norm": 11.382077701264517, + "learning_rate": 4.032979186690701e-06, + "loss": 0.34414825439453123, + "step": 89770 + }, + { + "epoch": 0.7762578793093012, + "grad_norm": 3.5557628684244884, + "learning_rate": 4.032787909420542e-06, + "loss": 0.05856170654296875, + "step": 89775 + }, + { + "epoch": 0.7763011128308446, + "grad_norm": 0.2405116188152741, + "learning_rate": 4.032596627387257e-06, + "loss": 0.21330490112304687, + "step": 89780 + }, + { + "epoch": 0.7763443463523878, + "grad_norm": 7.284200102751113, + "learning_rate": 4.032405340591727e-06, + "loss": 0.24761962890625, + "step": 89785 + }, + { + "epoch": 0.776387579873931, + "grad_norm": 1.3671109185909336, + "learning_rate": 4.032214049034836e-06, + "loss": 0.16925430297851562, + "step": 89790 + }, + { + "epoch": 0.7764308133954743, + "grad_norm": 0.4023500103900879, + "learning_rate": 4.032022752717464e-06, + "loss": 0.18482666015625, + "step": 89795 + }, + { + "epoch": 0.7764740469170176, + "grad_norm": 21.973328519102697, + "learning_rate": 4.031831451640494e-06, + "loss": 0.15189208984375, + "step": 89800 + }, + { + "epoch": 0.7765172804385608, + "grad_norm": 2.791547976772107, + "learning_rate": 4.03164014580481e-06, + "loss": 0.16727294921875, + "step": 89805 + }, + { + "epoch": 0.7765605139601041, + "grad_norm": 25.46131504215371, + "learning_rate": 4.031448835211292e-06, + "loss": 0.41467819213867185, + "step": 89810 + }, + { + "epoch": 0.7766037474816474, + "grad_norm": 7.088199447851076, + "learning_rate": 4.031257519860822e-06, + "loss": 0.13566131591796876, + "step": 89815 + }, + { + "epoch": 0.7766469810031906, + "grad_norm": 20.840027042493478, + "learning_rate": 4.031066199754285e-06, + "loss": 0.0689584732055664, + "step": 89820 + }, + { + "epoch": 0.7766902145247339, + "grad_norm": 1.1632814183746572, + "learning_rate": 4.030874874892561e-06, + "loss": 0.169329833984375, + "step": 89825 + }, + { + "epoch": 0.7767334480462772, + "grad_norm": 13.199341169274588, + "learning_rate": 4.0306835452765335e-06, + "loss": 0.196685791015625, + "step": 89830 + }, + { + "epoch": 0.7767766815678204, + "grad_norm": 0.22671731893051597, + "learning_rate": 4.030492210907084e-06, + "loss": 0.12790946960449218, + "step": 89835 + }, + { + "epoch": 0.7768199150893637, + "grad_norm": 4.998529725916609, + "learning_rate": 4.030300871785097e-06, + "loss": 0.06474609375, + "step": 89840 + }, + { + "epoch": 0.776863148610907, + "grad_norm": 4.474254990784737, + "learning_rate": 4.030109527911451e-06, + "loss": 0.0341064453125, + "step": 89845 + }, + { + "epoch": 0.7769063821324502, + "grad_norm": 26.806842279468892, + "learning_rate": 4.029918179287033e-06, + "loss": 0.21387939453125, + "step": 89850 + }, + { + "epoch": 0.7769496156539935, + "grad_norm": 7.76278941017395, + "learning_rate": 4.029726825912723e-06, + "loss": 0.44197540283203124, + "step": 89855 + }, + { + "epoch": 0.7769928491755368, + "grad_norm": 11.157973196753714, + "learning_rate": 4.029535467789403e-06, + "loss": 0.0550537109375, + "step": 89860 + }, + { + "epoch": 0.77703608269708, + "grad_norm": 1.7006342499092182, + "learning_rate": 4.029344104917957e-06, + "loss": 0.32506504058837893, + "step": 89865 + }, + { + "epoch": 0.7770793162186233, + "grad_norm": 4.247491638748256, + "learning_rate": 4.029152737299267e-06, + "loss": 0.14842643737792968, + "step": 89870 + }, + { + "epoch": 0.7771225497401666, + "grad_norm": 3.656990124004697, + "learning_rate": 4.0289613649342146e-06, + "loss": 0.046844482421875, + "step": 89875 + }, + { + "epoch": 0.7771657832617098, + "grad_norm": 21.876841843604367, + "learning_rate": 4.028769987823684e-06, + "loss": 0.1397735595703125, + "step": 89880 + }, + { + "epoch": 0.777209016783253, + "grad_norm": 13.77299010892285, + "learning_rate": 4.028578605968558e-06, + "loss": 0.08543472290039063, + "step": 89885 + }, + { + "epoch": 0.7772522503047963, + "grad_norm": 2.093395650934871, + "learning_rate": 4.028387219369717e-06, + "loss": 0.15078125, + "step": 89890 + }, + { + "epoch": 0.7772954838263396, + "grad_norm": 24.91867538243898, + "learning_rate": 4.028195828028046e-06, + "loss": 0.10738677978515625, + "step": 89895 + }, + { + "epoch": 0.7773387173478828, + "grad_norm": 0.7185243699765683, + "learning_rate": 4.0280044319444255e-06, + "loss": 0.1058441162109375, + "step": 89900 + }, + { + "epoch": 0.7773819508694261, + "grad_norm": 4.73836026428108, + "learning_rate": 4.027813031119741e-06, + "loss": 0.07109527587890625, + "step": 89905 + }, + { + "epoch": 0.7774251843909694, + "grad_norm": 7.727510712701479, + "learning_rate": 4.027621625554872e-06, + "loss": 0.13565216064453126, + "step": 89910 + }, + { + "epoch": 0.7774684179125126, + "grad_norm": 7.157646891828516, + "learning_rate": 4.027430215250704e-06, + "loss": 0.15050201416015624, + "step": 89915 + }, + { + "epoch": 0.7775116514340559, + "grad_norm": 0.706939378555173, + "learning_rate": 4.027238800208119e-06, + "loss": 0.29024581909179686, + "step": 89920 + }, + { + "epoch": 0.7775548849555992, + "grad_norm": 4.4462797979109245, + "learning_rate": 4.027047380427997e-06, + "loss": 0.060396575927734376, + "step": 89925 + }, + { + "epoch": 0.7775981184771424, + "grad_norm": 14.369123911315382, + "learning_rate": 4.0268559559112244e-06, + "loss": 0.30904083251953124, + "step": 89930 + }, + { + "epoch": 0.7776413519986857, + "grad_norm": 42.17829879165709, + "learning_rate": 4.026664526658684e-06, + "loss": 0.22992095947265626, + "step": 89935 + }, + { + "epoch": 0.777684585520229, + "grad_norm": 5.194226389897368, + "learning_rate": 4.026473092671257e-06, + "loss": 0.1900604248046875, + "step": 89940 + }, + { + "epoch": 0.7777278190417722, + "grad_norm": 15.806033458781421, + "learning_rate": 4.0262816539498265e-06, + "loss": 0.1393503189086914, + "step": 89945 + }, + { + "epoch": 0.7777710525633155, + "grad_norm": 3.9833056613033273, + "learning_rate": 4.026090210495276e-06, + "loss": 0.025587749481201173, + "step": 89950 + }, + { + "epoch": 0.7778142860848588, + "grad_norm": 1.7819548307853634, + "learning_rate": 4.025898762308488e-06, + "loss": 0.29073944091796877, + "step": 89955 + }, + { + "epoch": 0.777857519606402, + "grad_norm": 5.597811232444296, + "learning_rate": 4.025707309390345e-06, + "loss": 0.3187591552734375, + "step": 89960 + }, + { + "epoch": 0.7779007531279453, + "grad_norm": 0.30766945170501164, + "learning_rate": 4.025515851741731e-06, + "loss": 0.20105972290039062, + "step": 89965 + }, + { + "epoch": 0.7779439866494885, + "grad_norm": 6.862021510349754, + "learning_rate": 4.025324389363529e-06, + "loss": 0.08834381103515625, + "step": 89970 + }, + { + "epoch": 0.7779872201710318, + "grad_norm": 17.15412021451746, + "learning_rate": 4.025132922256621e-06, + "loss": 0.062299346923828124, + "step": 89975 + }, + { + "epoch": 0.7780304536925751, + "grad_norm": 9.976677265222449, + "learning_rate": 4.024941450421891e-06, + "loss": 0.15247230529785155, + "step": 89980 + }, + { + "epoch": 0.7780736872141183, + "grad_norm": 3.083746309475198, + "learning_rate": 4.024749973860221e-06, + "loss": 0.683502197265625, + "step": 89985 + }, + { + "epoch": 0.7781169207356616, + "grad_norm": 92.18765492258115, + "learning_rate": 4.0245584925724945e-06, + "loss": 0.32356605529785154, + "step": 89990 + }, + { + "epoch": 0.7781601542572049, + "grad_norm": 3.3323777732896214, + "learning_rate": 4.024367006559596e-06, + "loss": 0.35447998046875, + "step": 89995 + }, + { + "epoch": 0.7782033877787481, + "grad_norm": 6.791090799470336, + "learning_rate": 4.024175515822406e-06, + "loss": 0.0665863037109375, + "step": 90000 + }, + { + "epoch": 0.7782466213002914, + "grad_norm": 18.214309443078943, + "learning_rate": 4.023984020361809e-06, + "loss": 0.1319305419921875, + "step": 90005 + }, + { + "epoch": 0.7782898548218347, + "grad_norm": 2.7549205634906877, + "learning_rate": 4.023792520178689e-06, + "loss": 0.0948883056640625, + "step": 90010 + }, + { + "epoch": 0.7783330883433779, + "grad_norm": 2.295870137452126, + "learning_rate": 4.023601015273928e-06, + "loss": 0.29939117431640627, + "step": 90015 + }, + { + "epoch": 0.7783763218649212, + "grad_norm": 34.35031085057971, + "learning_rate": 4.02340950564841e-06, + "loss": 0.18319549560546874, + "step": 90020 + }, + { + "epoch": 0.7784195553864645, + "grad_norm": 0.5121101092036668, + "learning_rate": 4.023217991303017e-06, + "loss": 0.04513473510742187, + "step": 90025 + }, + { + "epoch": 0.7784627889080077, + "grad_norm": 8.335028962266316, + "learning_rate": 4.0230264722386335e-06, + "loss": 0.48308792114257815, + "step": 90030 + }, + { + "epoch": 0.778506022429551, + "grad_norm": 3.5489281464741773, + "learning_rate": 4.0228349484561425e-06, + "loss": 0.111114501953125, + "step": 90035 + }, + { + "epoch": 0.7785492559510943, + "grad_norm": 21.151777401214336, + "learning_rate": 4.022643419956427e-06, + "loss": 0.18666038513183594, + "step": 90040 + }, + { + "epoch": 0.7785924894726375, + "grad_norm": 19.215406908059055, + "learning_rate": 4.0224518867403705e-06, + "loss": 0.22699451446533203, + "step": 90045 + }, + { + "epoch": 0.7786357229941808, + "grad_norm": 1.088137371267477, + "learning_rate": 4.022260348808855e-06, + "loss": 0.16427459716796874, + "step": 90050 + }, + { + "epoch": 0.778678956515724, + "grad_norm": 13.073893916513454, + "learning_rate": 4.022068806162767e-06, + "loss": 0.07429275512695313, + "step": 90055 + }, + { + "epoch": 0.7787221900372673, + "grad_norm": 0.17692081722454125, + "learning_rate": 4.021877258802987e-06, + "loss": 0.054144668579101565, + "step": 90060 + }, + { + "epoch": 0.7787654235588105, + "grad_norm": 34.724573146505705, + "learning_rate": 4.021685706730399e-06, + "loss": 0.1622823715209961, + "step": 90065 + }, + { + "epoch": 0.7788086570803538, + "grad_norm": 7.204481943179964, + "learning_rate": 4.021494149945887e-06, + "loss": 0.11497650146484376, + "step": 90070 + }, + { + "epoch": 0.7788518906018971, + "grad_norm": 6.096701042576602, + "learning_rate": 4.021302588450335e-06, + "loss": 0.12570343017578126, + "step": 90075 + }, + { + "epoch": 0.7788951241234403, + "grad_norm": 5.463103008363254, + "learning_rate": 4.0211110222446255e-06, + "loss": 0.21851348876953125, + "step": 90080 + }, + { + "epoch": 0.7789383576449836, + "grad_norm": 13.319701051108597, + "learning_rate": 4.020919451329642e-06, + "loss": 0.16288166046142577, + "step": 90085 + }, + { + "epoch": 0.7789815911665269, + "grad_norm": 1.0259252884253114, + "learning_rate": 4.020727875706268e-06, + "loss": 0.06859130859375, + "step": 90090 + }, + { + "epoch": 0.7790248246880701, + "grad_norm": 2.8457736804887794, + "learning_rate": 4.020536295375387e-06, + "loss": 0.0759796142578125, + "step": 90095 + }, + { + "epoch": 0.7790680582096134, + "grad_norm": 0.3312269114259954, + "learning_rate": 4.020344710337883e-06, + "loss": 0.13067855834960937, + "step": 90100 + }, + { + "epoch": 0.7791112917311567, + "grad_norm": 9.273648482690277, + "learning_rate": 4.020153120594639e-06, + "loss": 0.12382659912109376, + "step": 90105 + }, + { + "epoch": 0.7791545252526999, + "grad_norm": 0.11765493974312419, + "learning_rate": 4.019961526146541e-06, + "loss": 0.13220500946044922, + "step": 90110 + }, + { + "epoch": 0.7791977587742432, + "grad_norm": 0.133338982270571, + "learning_rate": 4.019769926994468e-06, + "loss": 0.04839324951171875, + "step": 90115 + }, + { + "epoch": 0.7792409922957865, + "grad_norm": 0.35093347364566724, + "learning_rate": 4.019578323139307e-06, + "loss": 0.46089839935302734, + "step": 90120 + }, + { + "epoch": 0.7792842258173297, + "grad_norm": 12.97088089583464, + "learning_rate": 4.019386714581941e-06, + "loss": 0.4031988143920898, + "step": 90125 + }, + { + "epoch": 0.779327459338873, + "grad_norm": 2.4083374592925173, + "learning_rate": 4.0191951013232535e-06, + "loss": 0.022934913635253906, + "step": 90130 + }, + { + "epoch": 0.7793706928604163, + "grad_norm": 1.604668291303044, + "learning_rate": 4.019003483364129e-06, + "loss": 0.17473983764648438, + "step": 90135 + }, + { + "epoch": 0.7794139263819595, + "grad_norm": 14.650173001921031, + "learning_rate": 4.0188118607054495e-06, + "loss": 0.4770782470703125, + "step": 90140 + }, + { + "epoch": 0.7794571599035027, + "grad_norm": 0.45268229977296004, + "learning_rate": 4.018620233348101e-06, + "loss": 0.17351760864257812, + "step": 90145 + }, + { + "epoch": 0.7795003934250461, + "grad_norm": 11.196676479589724, + "learning_rate": 4.018428601292964e-06, + "loss": 0.32128753662109377, + "step": 90150 + }, + { + "epoch": 0.7795436269465893, + "grad_norm": 1.7135352679331046, + "learning_rate": 4.018236964540925e-06, + "loss": 0.08582344055175781, + "step": 90155 + }, + { + "epoch": 0.7795868604681325, + "grad_norm": 0.7654436879181044, + "learning_rate": 4.018045323092868e-06, + "loss": 0.09039039611816406, + "step": 90160 + }, + { + "epoch": 0.7796300939896759, + "grad_norm": 1.2406265603914022, + "learning_rate": 4.017853676949675e-06, + "loss": 0.128631591796875, + "step": 90165 + }, + { + "epoch": 0.7796733275112191, + "grad_norm": 16.769806868927457, + "learning_rate": 4.017662026112232e-06, + "loss": 0.34011306762695315, + "step": 90170 + }, + { + "epoch": 0.7797165610327623, + "grad_norm": 18.377624733325064, + "learning_rate": 4.01747037058142e-06, + "loss": 0.3064472198486328, + "step": 90175 + }, + { + "epoch": 0.7797597945543057, + "grad_norm": 16.148536750777613, + "learning_rate": 4.017278710358125e-06, + "loss": 0.049483108520507815, + "step": 90180 + }, + { + "epoch": 0.7798030280758489, + "grad_norm": 0.68161383780767, + "learning_rate": 4.0170870454432315e-06, + "loss": 0.05170440673828125, + "step": 90185 + }, + { + "epoch": 0.7798462615973921, + "grad_norm": 6.232425003524604, + "learning_rate": 4.016895375837622e-06, + "loss": 0.016605377197265625, + "step": 90190 + }, + { + "epoch": 0.7798894951189355, + "grad_norm": 2.0570683265541065, + "learning_rate": 4.0167037015421805e-06, + "loss": 0.0232177734375, + "step": 90195 + }, + { + "epoch": 0.7799327286404787, + "grad_norm": 29.40483430724805, + "learning_rate": 4.016512022557792e-06, + "loss": 0.47507190704345703, + "step": 90200 + }, + { + "epoch": 0.7799759621620219, + "grad_norm": 1.5847390205094523, + "learning_rate": 4.016320338885338e-06, + "loss": 0.08984527587890626, + "step": 90205 + }, + { + "epoch": 0.7800191956835653, + "grad_norm": 11.681918752061142, + "learning_rate": 4.016128650525706e-06, + "loss": 0.09548187255859375, + "step": 90210 + }, + { + "epoch": 0.7800624292051085, + "grad_norm": 0.2924143487716702, + "learning_rate": 4.015936957479779e-06, + "loss": 0.10879478454589844, + "step": 90215 + }, + { + "epoch": 0.7801056627266517, + "grad_norm": 0.13975346169440767, + "learning_rate": 4.0157452597484395e-06, + "loss": 0.1961080551147461, + "step": 90220 + }, + { + "epoch": 0.780148896248195, + "grad_norm": 2.326218073783739, + "learning_rate": 4.015553557332573e-06, + "loss": 0.27303409576416016, + "step": 90225 + }, + { + "epoch": 0.7801921297697383, + "grad_norm": 0.08765032793757695, + "learning_rate": 4.015361850233063e-06, + "loss": 0.13812179565429689, + "step": 90230 + }, + { + "epoch": 0.7802353632912815, + "grad_norm": 47.33580823357272, + "learning_rate": 4.0151701384507935e-06, + "loss": 0.3012275695800781, + "step": 90235 + }, + { + "epoch": 0.7802785968128247, + "grad_norm": 24.328456901708826, + "learning_rate": 4.0149784219866495e-06, + "loss": 0.1028106689453125, + "step": 90240 + }, + { + "epoch": 0.7803218303343681, + "grad_norm": 2.505713561396147, + "learning_rate": 4.014786700841514e-06, + "loss": 0.0820953369140625, + "step": 90245 + }, + { + "epoch": 0.7803650638559113, + "grad_norm": 10.297071611181204, + "learning_rate": 4.014594975016273e-06, + "loss": 0.06337814331054688, + "step": 90250 + }, + { + "epoch": 0.7804082973774545, + "grad_norm": 19.364774249412243, + "learning_rate": 4.01440324451181e-06, + "loss": 0.29486846923828125, + "step": 90255 + }, + { + "epoch": 0.7804515308989979, + "grad_norm": 12.492618434770613, + "learning_rate": 4.014211509329008e-06, + "loss": 0.07815093994140625, + "step": 90260 + }, + { + "epoch": 0.7804947644205411, + "grad_norm": 6.817816026852265, + "learning_rate": 4.0140197694687515e-06, + "loss": 0.08948688507080078, + "step": 90265 + }, + { + "epoch": 0.7805379979420843, + "grad_norm": 1.4847910081700482, + "learning_rate": 4.013828024931926e-06, + "loss": 0.19560394287109376, + "step": 90270 + }, + { + "epoch": 0.7805812314636277, + "grad_norm": 27.788017805715906, + "learning_rate": 4.013636275719415e-06, + "loss": 0.22892494201660157, + "step": 90275 + }, + { + "epoch": 0.7806244649851709, + "grad_norm": 3.907529772629744, + "learning_rate": 4.0134445218321044e-06, + "loss": 0.0693511962890625, + "step": 90280 + }, + { + "epoch": 0.7806676985067141, + "grad_norm": 20.255159441188532, + "learning_rate": 4.013252763270876e-06, + "loss": 0.11596641540527344, + "step": 90285 + }, + { + "epoch": 0.7807109320282575, + "grad_norm": 6.092160950232582, + "learning_rate": 4.013061000036615e-06, + "loss": 0.04783935546875, + "step": 90290 + }, + { + "epoch": 0.7807541655498007, + "grad_norm": 12.287937222929434, + "learning_rate": 4.012869232130207e-06, + "loss": 0.10988407135009766, + "step": 90295 + }, + { + "epoch": 0.7807973990713439, + "grad_norm": 0.5743819339359952, + "learning_rate": 4.012677459552536e-06, + "loss": 0.32213478088378905, + "step": 90300 + }, + { + "epoch": 0.7808406325928873, + "grad_norm": 1.5072399598438924, + "learning_rate": 4.012485682304484e-06, + "loss": 0.15868988037109374, + "step": 90305 + }, + { + "epoch": 0.7808838661144305, + "grad_norm": 7.053062232065506, + "learning_rate": 4.01229390038694e-06, + "loss": 0.223828125, + "step": 90310 + }, + { + "epoch": 0.7809270996359737, + "grad_norm": 21.87224859407309, + "learning_rate": 4.0121021138007846e-06, + "loss": 0.409710693359375, + "step": 90315 + }, + { + "epoch": 0.780970333157517, + "grad_norm": 70.51956012646667, + "learning_rate": 4.011910322546904e-06, + "loss": 0.4106964111328125, + "step": 90320 + }, + { + "epoch": 0.7810135666790603, + "grad_norm": 16.350932956870473, + "learning_rate": 4.011718526626182e-06, + "loss": 0.10355682373046875, + "step": 90325 + }, + { + "epoch": 0.7810568002006035, + "grad_norm": 2.083282150274714, + "learning_rate": 4.011526726039505e-06, + "loss": 0.10692825317382812, + "step": 90330 + }, + { + "epoch": 0.7811000337221468, + "grad_norm": 2.2567912048373673, + "learning_rate": 4.011334920787755e-06, + "loss": 0.111090087890625, + "step": 90335 + }, + { + "epoch": 0.7811432672436901, + "grad_norm": 4.913758596859692, + "learning_rate": 4.0111431108718175e-06, + "loss": 0.05275382995605469, + "step": 90340 + }, + { + "epoch": 0.7811865007652333, + "grad_norm": 0.965191434353829, + "learning_rate": 4.010951296292578e-06, + "loss": 0.09029388427734375, + "step": 90345 + }, + { + "epoch": 0.7812297342867766, + "grad_norm": 40.25189110108268, + "learning_rate": 4.0107594770509194e-06, + "loss": 0.29041366577148436, + "step": 90350 + }, + { + "epoch": 0.7812729678083199, + "grad_norm": 13.837943012123713, + "learning_rate": 4.010567653147728e-06, + "loss": 0.12945556640625, + "step": 90355 + }, + { + "epoch": 0.7813162013298631, + "grad_norm": 1.0516026909944007, + "learning_rate": 4.010375824583889e-06, + "loss": 0.12261123657226562, + "step": 90360 + }, + { + "epoch": 0.7813594348514064, + "grad_norm": 1.06638852061575, + "learning_rate": 4.010183991360284e-06, + "loss": 0.1159027099609375, + "step": 90365 + }, + { + "epoch": 0.7814026683729497, + "grad_norm": 7.212933339160301, + "learning_rate": 4.009992153477801e-06, + "loss": 0.18177642822265624, + "step": 90370 + }, + { + "epoch": 0.7814459018944929, + "grad_norm": 0.1493173629362914, + "learning_rate": 4.009800310937323e-06, + "loss": 0.09041519165039062, + "step": 90375 + }, + { + "epoch": 0.7814891354160362, + "grad_norm": 1.1546775169461316, + "learning_rate": 4.0096084637397355e-06, + "loss": 0.05145339965820313, + "step": 90380 + }, + { + "epoch": 0.7815323689375795, + "grad_norm": 26.19554997661166, + "learning_rate": 4.009416611885921e-06, + "loss": 0.1072967529296875, + "step": 90385 + }, + { + "epoch": 0.7815756024591227, + "grad_norm": 15.254936539727511, + "learning_rate": 4.009224755376769e-06, + "loss": 0.14724884033203126, + "step": 90390 + }, + { + "epoch": 0.781618835980666, + "grad_norm": 3.859444768217611, + "learning_rate": 4.009032894213161e-06, + "loss": 0.08732528686523437, + "step": 90395 + }, + { + "epoch": 0.7816620695022092, + "grad_norm": 3.7135879314216242, + "learning_rate": 4.008841028395983e-06, + "loss": 0.06053466796875, + "step": 90400 + }, + { + "epoch": 0.7817053030237525, + "grad_norm": 11.290083326352125, + "learning_rate": 4.0086491579261166e-06, + "loss": 0.2322052001953125, + "step": 90405 + }, + { + "epoch": 0.7817485365452957, + "grad_norm": 45.43840868867651, + "learning_rate": 4.008457282804451e-06, + "loss": 0.16092681884765625, + "step": 90410 + }, + { + "epoch": 0.781791770066839, + "grad_norm": 15.92862307257568, + "learning_rate": 4.008265403031871e-06, + "loss": 0.11818695068359375, + "step": 90415 + }, + { + "epoch": 0.7818350035883823, + "grad_norm": 3.8049661071536796, + "learning_rate": 4.008073518609258e-06, + "loss": 0.032706069946289065, + "step": 90420 + }, + { + "epoch": 0.7818782371099255, + "grad_norm": 0.08070086494302207, + "learning_rate": 4.0078816295374995e-06, + "loss": 0.09896697998046874, + "step": 90425 + }, + { + "epoch": 0.7819214706314688, + "grad_norm": 8.39448170247019, + "learning_rate": 4.00768973581748e-06, + "loss": 0.07362213134765624, + "step": 90430 + }, + { + "epoch": 0.7819647041530121, + "grad_norm": 10.789224348701088, + "learning_rate": 4.007497837450085e-06, + "loss": 0.05089111328125, + "step": 90435 + }, + { + "epoch": 0.7820079376745553, + "grad_norm": 1.207256989830795, + "learning_rate": 4.0073059344361985e-06, + "loss": 0.16277999877929689, + "step": 90440 + }, + { + "epoch": 0.7820511711960986, + "grad_norm": 32.924562399923275, + "learning_rate": 4.007114026776707e-06, + "loss": 0.17047042846679689, + "step": 90445 + }, + { + "epoch": 0.7820944047176419, + "grad_norm": 3.5385070170087083, + "learning_rate": 4.006922114472494e-06, + "loss": 0.06864242553710938, + "step": 90450 + }, + { + "epoch": 0.7821376382391851, + "grad_norm": 1.5368426425112691, + "learning_rate": 4.006730197524445e-06, + "loss": 0.05593185424804688, + "step": 90455 + }, + { + "epoch": 0.7821808717607284, + "grad_norm": 2.449877329360508, + "learning_rate": 4.006538275933444e-06, + "loss": 0.0353424072265625, + "step": 90460 + }, + { + "epoch": 0.7822241052822717, + "grad_norm": 6.553539815033921, + "learning_rate": 4.00634634970038e-06, + "loss": 0.16443862915039062, + "step": 90465 + }, + { + "epoch": 0.7822673388038149, + "grad_norm": 20.035111171289, + "learning_rate": 4.0061544188261336e-06, + "loss": 0.08084602355957031, + "step": 90470 + }, + { + "epoch": 0.7823105723253582, + "grad_norm": 26.38919890119286, + "learning_rate": 4.005962483311594e-06, + "loss": 0.17704925537109376, + "step": 90475 + }, + { + "epoch": 0.7823538058469015, + "grad_norm": 0.6285107674774141, + "learning_rate": 4.0057705431576415e-06, + "loss": 0.1225830078125, + "step": 90480 + }, + { + "epoch": 0.7823970393684447, + "grad_norm": 6.688996770196111, + "learning_rate": 4.005578598365166e-06, + "loss": 0.21350555419921874, + "step": 90485 + }, + { + "epoch": 0.782440272889988, + "grad_norm": 45.115793121557935, + "learning_rate": 4.00538664893505e-06, + "loss": 0.3116889953613281, + "step": 90490 + }, + { + "epoch": 0.7824835064115312, + "grad_norm": 1.574445946999853, + "learning_rate": 4.005194694868181e-06, + "loss": 0.2849029541015625, + "step": 90495 + }, + { + "epoch": 0.7825267399330745, + "grad_norm": 0.2793539357071149, + "learning_rate": 4.005002736165441e-06, + "loss": 0.0600341796875, + "step": 90500 + }, + { + "epoch": 0.7825699734546178, + "grad_norm": 2.589554164063927, + "learning_rate": 4.004810772827719e-06, + "loss": 0.06023101806640625, + "step": 90505 + }, + { + "epoch": 0.782613206976161, + "grad_norm": 0.45298479838807826, + "learning_rate": 4.004618804855898e-06, + "loss": 0.027822113037109374, + "step": 90510 + }, + { + "epoch": 0.7826564404977043, + "grad_norm": 11.108972143670908, + "learning_rate": 4.0044268322508624e-06, + "loss": 0.07160797119140624, + "step": 90515 + }, + { + "epoch": 0.7826996740192476, + "grad_norm": 0.09882785990384974, + "learning_rate": 4.0042348550135e-06, + "loss": 0.11857681274414063, + "step": 90520 + }, + { + "epoch": 0.7827429075407908, + "grad_norm": 32.01155925181465, + "learning_rate": 4.004042873144695e-06, + "loss": 0.4250762939453125, + "step": 90525 + }, + { + "epoch": 0.7827861410623341, + "grad_norm": 3.007536323469683, + "learning_rate": 4.003850886645334e-06, + "loss": 0.23029327392578125, + "step": 90530 + }, + { + "epoch": 0.7828293745838774, + "grad_norm": 5.538066555321034, + "learning_rate": 4.0036588955163e-06, + "loss": 0.024664306640625, + "step": 90535 + }, + { + "epoch": 0.7828726081054206, + "grad_norm": 4.453638277756146, + "learning_rate": 4.003466899758481e-06, + "loss": 0.2783653259277344, + "step": 90540 + }, + { + "epoch": 0.7829158416269639, + "grad_norm": 9.801615379084668, + "learning_rate": 4.0032748993727605e-06, + "loss": 0.13019752502441406, + "step": 90545 + }, + { + "epoch": 0.7829590751485072, + "grad_norm": 5.462581120584908, + "learning_rate": 4.003082894360024e-06, + "loss": 0.1060150146484375, + "step": 90550 + }, + { + "epoch": 0.7830023086700504, + "grad_norm": 0.9194815332422278, + "learning_rate": 4.002890884721159e-06, + "loss": 0.010027694702148437, + "step": 90555 + }, + { + "epoch": 0.7830455421915937, + "grad_norm": 20.809328558551684, + "learning_rate": 4.002698870457051e-06, + "loss": 0.399591064453125, + "step": 90560 + }, + { + "epoch": 0.783088775713137, + "grad_norm": 2.5379333643124684, + "learning_rate": 4.0025068515685825e-06, + "loss": 0.15049285888671876, + "step": 90565 + }, + { + "epoch": 0.7831320092346802, + "grad_norm": 1.6340282217159818, + "learning_rate": 4.0023148280566426e-06, + "loss": 0.08217697143554688, + "step": 90570 + }, + { + "epoch": 0.7831752427562234, + "grad_norm": 5.4174724824187, + "learning_rate": 4.002122799922114e-06, + "loss": 0.27319908142089844, + "step": 90575 + }, + { + "epoch": 0.7832184762777668, + "grad_norm": 4.05178270747302, + "learning_rate": 4.001930767165884e-06, + "loss": 0.09560546875, + "step": 90580 + }, + { + "epoch": 0.78326170979931, + "grad_norm": 1.2679486378844427, + "learning_rate": 4.001738729788838e-06, + "loss": 0.031322479248046875, + "step": 90585 + }, + { + "epoch": 0.7833049433208532, + "grad_norm": 1.2726487725149414, + "learning_rate": 4.001546687791862e-06, + "loss": 0.5012344360351563, + "step": 90590 + }, + { + "epoch": 0.7833481768423965, + "grad_norm": 2.2580456834348794, + "learning_rate": 4.001354641175839e-06, + "loss": 0.09144668579101563, + "step": 90595 + }, + { + "epoch": 0.7833914103639398, + "grad_norm": 1.4959092228161637, + "learning_rate": 4.00116258994166e-06, + "loss": 0.09307441711425782, + "step": 90600 + }, + { + "epoch": 0.783434643885483, + "grad_norm": 0.8190986623078921, + "learning_rate": 4.000970534090205e-06, + "loss": 0.27287979125976564, + "step": 90605 + }, + { + "epoch": 0.7834778774070263, + "grad_norm": 2.1596730829431063, + "learning_rate": 4.0007784736223626e-06, + "loss": 0.049626922607421874, + "step": 90610 + }, + { + "epoch": 0.7835211109285696, + "grad_norm": 16.96057737104628, + "learning_rate": 4.00058640853902e-06, + "loss": 0.5697128295898437, + "step": 90615 + }, + { + "epoch": 0.7835643444501128, + "grad_norm": 4.699440591267996, + "learning_rate": 4.00039433884106e-06, + "loss": 0.3913902282714844, + "step": 90620 + }, + { + "epoch": 0.7836075779716561, + "grad_norm": 26.88204890147355, + "learning_rate": 4.00020226452937e-06, + "loss": 0.20261363983154296, + "step": 90625 + }, + { + "epoch": 0.7836508114931994, + "grad_norm": 6.245015907665309, + "learning_rate": 4.000010185604835e-06, + "loss": 0.2954986572265625, + "step": 90630 + }, + { + "epoch": 0.7836940450147426, + "grad_norm": 28.688615995224804, + "learning_rate": 3.999818102068342e-06, + "loss": 0.1638580322265625, + "step": 90635 + }, + { + "epoch": 0.7837372785362859, + "grad_norm": 36.126155271617804, + "learning_rate": 3.9996260139207765e-06, + "loss": 0.5602985382080078, + "step": 90640 + }, + { + "epoch": 0.7837805120578292, + "grad_norm": 9.671662299592953, + "learning_rate": 3.999433921163024e-06, + "loss": 0.4033515930175781, + "step": 90645 + }, + { + "epoch": 0.7838237455793724, + "grad_norm": 7.87087666346306, + "learning_rate": 3.999241823795971e-06, + "loss": 0.0513641357421875, + "step": 90650 + }, + { + "epoch": 0.7838669791009157, + "grad_norm": 15.530631209621323, + "learning_rate": 3.999049721820502e-06, + "loss": 0.07274818420410156, + "step": 90655 + }, + { + "epoch": 0.783910212622459, + "grad_norm": 2.0395737275294388, + "learning_rate": 3.998857615237504e-06, + "loss": 0.22672462463378906, + "step": 90660 + }, + { + "epoch": 0.7839534461440022, + "grad_norm": 45.99705460185188, + "learning_rate": 3.998665504047863e-06, + "loss": 0.5990219116210938, + "step": 90665 + }, + { + "epoch": 0.7839966796655454, + "grad_norm": 23.47768186724206, + "learning_rate": 3.998473388252466e-06, + "loss": 0.3524656295776367, + "step": 90670 + }, + { + "epoch": 0.7840399131870888, + "grad_norm": 0.5335639053956388, + "learning_rate": 3.998281267852197e-06, + "loss": 0.26055755615234377, + "step": 90675 + }, + { + "epoch": 0.784083146708632, + "grad_norm": 0.6556720736949628, + "learning_rate": 3.998089142847942e-06, + "loss": 0.64024658203125, + "step": 90680 + }, + { + "epoch": 0.7841263802301752, + "grad_norm": 0.5227683501200041, + "learning_rate": 3.99789701324059e-06, + "loss": 0.09536361694335938, + "step": 90685 + }, + { + "epoch": 0.7841696137517186, + "grad_norm": 3.2184920691652406, + "learning_rate": 3.997704879031024e-06, + "loss": 0.23834457397460937, + "step": 90690 + }, + { + "epoch": 0.7842128472732618, + "grad_norm": 0.050780783981035076, + "learning_rate": 3.997512740220132e-06, + "loss": 0.045251083374023435, + "step": 90695 + }, + { + "epoch": 0.784256080794805, + "grad_norm": 8.150133577571287, + "learning_rate": 3.997320596808799e-06, + "loss": 0.06240921020507813, + "step": 90700 + }, + { + "epoch": 0.7842993143163484, + "grad_norm": 59.79861218043674, + "learning_rate": 3.997128448797912e-06, + "loss": 0.1651885986328125, + "step": 90705 + }, + { + "epoch": 0.7843425478378916, + "grad_norm": 2.171272882211009, + "learning_rate": 3.996936296188357e-06, + "loss": 0.05201568603515625, + "step": 90710 + }, + { + "epoch": 0.7843857813594348, + "grad_norm": 5.25031682543348, + "learning_rate": 3.996744138981018e-06, + "loss": 0.148529052734375, + "step": 90715 + }, + { + "epoch": 0.7844290148809782, + "grad_norm": 15.836052151437904, + "learning_rate": 3.996551977176784e-06, + "loss": 0.1261322021484375, + "step": 90720 + }, + { + "epoch": 0.7844722484025214, + "grad_norm": 153.94663348954637, + "learning_rate": 3.996359810776541e-06, + "loss": 0.16779632568359376, + "step": 90725 + }, + { + "epoch": 0.7845154819240646, + "grad_norm": 0.5649021639548997, + "learning_rate": 3.9961676397811744e-06, + "loss": 0.2512969970703125, + "step": 90730 + }, + { + "epoch": 0.784558715445608, + "grad_norm": 25.767366904346076, + "learning_rate": 3.99597546419157e-06, + "loss": 0.21050338745117186, + "step": 90735 + }, + { + "epoch": 0.7846019489671512, + "grad_norm": 38.8794779881181, + "learning_rate": 3.995783284008616e-06, + "loss": 0.5002166748046875, + "step": 90740 + }, + { + "epoch": 0.7846451824886944, + "grad_norm": 3.305913399285433, + "learning_rate": 3.995591099233196e-06, + "loss": 0.042439842224121095, + "step": 90745 + }, + { + "epoch": 0.7846884160102376, + "grad_norm": 55.677654180337726, + "learning_rate": 3.995398909866199e-06, + "loss": 0.611529541015625, + "step": 90750 + }, + { + "epoch": 0.784731649531781, + "grad_norm": 1.2472567047452658, + "learning_rate": 3.99520671590851e-06, + "loss": 0.1470874786376953, + "step": 90755 + }, + { + "epoch": 0.7847748830533242, + "grad_norm": 1.0568924514381277, + "learning_rate": 3.995014517361015e-06, + "loss": 0.045467376708984375, + "step": 90760 + }, + { + "epoch": 0.7848181165748674, + "grad_norm": 34.31718457394804, + "learning_rate": 3.9948223142246015e-06, + "loss": 0.30886077880859375, + "step": 90765 + }, + { + "epoch": 0.7848613500964108, + "grad_norm": 5.805867508180952, + "learning_rate": 3.994630106500156e-06, + "loss": 0.33228759765625, + "step": 90770 + }, + { + "epoch": 0.784904583617954, + "grad_norm": 9.294184022244371, + "learning_rate": 3.994437894188562e-06, + "loss": 0.11511802673339844, + "step": 90775 + }, + { + "epoch": 0.7849478171394972, + "grad_norm": 22.576552089369383, + "learning_rate": 3.99424567729071e-06, + "loss": 0.2413818359375, + "step": 90780 + }, + { + "epoch": 0.7849910506610406, + "grad_norm": 1.071565565584803, + "learning_rate": 3.994053455807484e-06, + "loss": 0.0942718505859375, + "step": 90785 + }, + { + "epoch": 0.7850342841825838, + "grad_norm": 4.564435525110332, + "learning_rate": 3.993861229739773e-06, + "loss": 0.21370010375976561, + "step": 90790 + }, + { + "epoch": 0.785077517704127, + "grad_norm": 1.0991401772657718, + "learning_rate": 3.993668999088461e-06, + "loss": 0.08862457275390626, + "step": 90795 + }, + { + "epoch": 0.7851207512256704, + "grad_norm": 13.018373927345804, + "learning_rate": 3.993476763854434e-06, + "loss": 0.1986572265625, + "step": 90800 + }, + { + "epoch": 0.7851639847472136, + "grad_norm": 19.312708031980147, + "learning_rate": 3.993284524038581e-06, + "loss": 0.20254440307617189, + "step": 90805 + }, + { + "epoch": 0.7852072182687568, + "grad_norm": 5.570263613425525, + "learning_rate": 3.993092279641788e-06, + "loss": 0.1875223159790039, + "step": 90810 + }, + { + "epoch": 0.7852504517903002, + "grad_norm": 0.8863609683684784, + "learning_rate": 3.99290003066494e-06, + "loss": 0.1059356689453125, + "step": 90815 + }, + { + "epoch": 0.7852936853118434, + "grad_norm": 7.476213647691735, + "learning_rate": 3.992707777108927e-06, + "loss": 0.27857666015625, + "step": 90820 + }, + { + "epoch": 0.7853369188333866, + "grad_norm": 8.588108090755325, + "learning_rate": 3.992515518974632e-06, + "loss": 0.26814727783203124, + "step": 90825 + }, + { + "epoch": 0.78538015235493, + "grad_norm": 8.215113294319357, + "learning_rate": 3.992323256262942e-06, + "loss": 0.11776885986328126, + "step": 90830 + }, + { + "epoch": 0.7854233858764732, + "grad_norm": 17.100362654025204, + "learning_rate": 3.9921309889747465e-06, + "loss": 0.09985198974609374, + "step": 90835 + }, + { + "epoch": 0.7854666193980164, + "grad_norm": 4.911228453559641, + "learning_rate": 3.99193871711093e-06, + "loss": 0.035592842102050784, + "step": 90840 + }, + { + "epoch": 0.7855098529195597, + "grad_norm": 17.949996741464453, + "learning_rate": 3.991746440672381e-06, + "loss": 0.4025543212890625, + "step": 90845 + }, + { + "epoch": 0.785553086441103, + "grad_norm": 6.575891955637586, + "learning_rate": 3.991554159659983e-06, + "loss": 0.09403610229492188, + "step": 90850 + }, + { + "epoch": 0.7855963199626462, + "grad_norm": 97.06017830443135, + "learning_rate": 3.9913618740746244e-06, + "loss": 0.2137542724609375, + "step": 90855 + }, + { + "epoch": 0.7856395534841895, + "grad_norm": 17.423866078957087, + "learning_rate": 3.991169583917195e-06, + "loss": 0.22339954376220703, + "step": 90860 + }, + { + "epoch": 0.7856827870057328, + "grad_norm": 7.503516665696581, + "learning_rate": 3.990977289188577e-06, + "loss": 0.04156494140625, + "step": 90865 + }, + { + "epoch": 0.785726020527276, + "grad_norm": 47.25212327907897, + "learning_rate": 3.99078498988966e-06, + "loss": 0.195074462890625, + "step": 90870 + }, + { + "epoch": 0.7857692540488193, + "grad_norm": 47.45053482054707, + "learning_rate": 3.990592686021331e-06, + "loss": 0.21599884033203126, + "step": 90875 + }, + { + "epoch": 0.7858124875703626, + "grad_norm": 7.92079347256476, + "learning_rate": 3.990400377584475e-06, + "loss": 0.19674835205078126, + "step": 90880 + }, + { + "epoch": 0.7858557210919058, + "grad_norm": 0.6570848574911309, + "learning_rate": 3.99020806457998e-06, + "loss": 0.1071807861328125, + "step": 90885 + }, + { + "epoch": 0.785898954613449, + "grad_norm": 7.568620203870754, + "learning_rate": 3.990015747008733e-06, + "loss": 0.18911170959472656, + "step": 90890 + }, + { + "epoch": 0.7859421881349924, + "grad_norm": 1.2671571337760792, + "learning_rate": 3.98982342487162e-06, + "loss": 0.08176355361938477, + "step": 90895 + }, + { + "epoch": 0.7859854216565356, + "grad_norm": 8.07383417204101, + "learning_rate": 3.989631098169531e-06, + "loss": 0.11978759765625, + "step": 90900 + }, + { + "epoch": 0.7860286551780788, + "grad_norm": 19.416637495645958, + "learning_rate": 3.98943876690335e-06, + "loss": 0.13754348754882811, + "step": 90905 + }, + { + "epoch": 0.7860718886996222, + "grad_norm": 0.5084436574120145, + "learning_rate": 3.989246431073964e-06, + "loss": 0.0361358642578125, + "step": 90910 + }, + { + "epoch": 0.7861151222211654, + "grad_norm": 0.5770090483611221, + "learning_rate": 3.989054090682261e-06, + "loss": 0.13163604736328124, + "step": 90915 + }, + { + "epoch": 0.7861583557427086, + "grad_norm": 4.5825374588022605, + "learning_rate": 3.988861745729129e-06, + "loss": 0.3604278564453125, + "step": 90920 + }, + { + "epoch": 0.7862015892642519, + "grad_norm": 26.526206906723793, + "learning_rate": 3.988669396215453e-06, + "loss": 0.316455078125, + "step": 90925 + }, + { + "epoch": 0.7862448227857952, + "grad_norm": 4.590203780682916, + "learning_rate": 3.988477042142121e-06, + "loss": 0.1010162353515625, + "step": 90930 + }, + { + "epoch": 0.7862880563073384, + "grad_norm": 0.4522753071018298, + "learning_rate": 3.9882846835100214e-06, + "loss": 0.134967041015625, + "step": 90935 + }, + { + "epoch": 0.7863312898288817, + "grad_norm": 26.959129133660777, + "learning_rate": 3.98809232032004e-06, + "loss": 0.14442596435546876, + "step": 90940 + }, + { + "epoch": 0.786374523350425, + "grad_norm": 5.199203965353083, + "learning_rate": 3.987899952573062e-06, + "loss": 0.07018566131591797, + "step": 90945 + }, + { + "epoch": 0.7864177568719682, + "grad_norm": 56.0733950662856, + "learning_rate": 3.987707580269979e-06, + "loss": 0.5080303192138672, + "step": 90950 + }, + { + "epoch": 0.7864609903935115, + "grad_norm": 1.732125938942808, + "learning_rate": 3.987515203411675e-06, + "loss": 0.10706253051757812, + "step": 90955 + }, + { + "epoch": 0.7865042239150548, + "grad_norm": 0.641105874815421, + "learning_rate": 3.9873228219990385e-06, + "loss": 0.061519622802734375, + "step": 90960 + }, + { + "epoch": 0.786547457436598, + "grad_norm": 3.3062344068048244, + "learning_rate": 3.9871304360329566e-06, + "loss": 0.12974853515625, + "step": 90965 + }, + { + "epoch": 0.7865906909581413, + "grad_norm": 0.05676047951074147, + "learning_rate": 3.986938045514315e-06, + "loss": 0.07764263153076172, + "step": 90970 + }, + { + "epoch": 0.7866339244796846, + "grad_norm": 2.3331901877327037, + "learning_rate": 3.986745650444003e-06, + "loss": 0.1883514404296875, + "step": 90975 + }, + { + "epoch": 0.7866771580012278, + "grad_norm": 8.371870254083259, + "learning_rate": 3.986553250822908e-06, + "loss": 0.06501922607421876, + "step": 90980 + }, + { + "epoch": 0.7867203915227711, + "grad_norm": 2.1904717562832605, + "learning_rate": 3.9863608466519165e-06, + "loss": 0.304058837890625, + "step": 90985 + }, + { + "epoch": 0.7867636250443144, + "grad_norm": 25.84045603279721, + "learning_rate": 3.986168437931915e-06, + "loss": 0.4877952575683594, + "step": 90990 + }, + { + "epoch": 0.7868068585658576, + "grad_norm": 0.3341920519558477, + "learning_rate": 3.9859760246637925e-06, + "loss": 0.1499267578125, + "step": 90995 + }, + { + "epoch": 0.7868500920874009, + "grad_norm": 5.478373645616109, + "learning_rate": 3.985783606848435e-06, + "loss": 0.140142822265625, + "step": 91000 + }, + { + "epoch": 0.7868933256089442, + "grad_norm": 0.11588975443240855, + "learning_rate": 3.985591184486731e-06, + "loss": 0.02693023681640625, + "step": 91005 + }, + { + "epoch": 0.7869365591304874, + "grad_norm": 20.461214541548784, + "learning_rate": 3.985398757579568e-06, + "loss": 0.07443809509277344, + "step": 91010 + }, + { + "epoch": 0.7869797926520307, + "grad_norm": 34.10727445366172, + "learning_rate": 3.985206326127833e-06, + "loss": 0.3679088592529297, + "step": 91015 + }, + { + "epoch": 0.7870230261735739, + "grad_norm": 15.997035860691824, + "learning_rate": 3.9850138901324134e-06, + "loss": 0.10821304321289063, + "step": 91020 + }, + { + "epoch": 0.7870662596951172, + "grad_norm": 16.20439687394996, + "learning_rate": 3.984821449594197e-06, + "loss": 0.1208953857421875, + "step": 91025 + }, + { + "epoch": 0.7871094932166605, + "grad_norm": 1.8324625517874724, + "learning_rate": 3.98462900451407e-06, + "loss": 0.08988761901855469, + "step": 91030 + }, + { + "epoch": 0.7871527267382037, + "grad_norm": 0.1950311218731155, + "learning_rate": 3.9844365548929215e-06, + "loss": 0.09451141357421874, + "step": 91035 + }, + { + "epoch": 0.787195960259747, + "grad_norm": 3.1962239623052864, + "learning_rate": 3.9842441007316385e-06, + "loss": 0.2783172607421875, + "step": 91040 + }, + { + "epoch": 0.7872391937812903, + "grad_norm": 4.937374140657688, + "learning_rate": 3.98405164203111e-06, + "loss": 0.2820220947265625, + "step": 91045 + }, + { + "epoch": 0.7872824273028335, + "grad_norm": 1.4631224475213007, + "learning_rate": 3.983859178792221e-06, + "loss": 0.053985595703125, + "step": 91050 + }, + { + "epoch": 0.7873256608243768, + "grad_norm": 5.318758006839767, + "learning_rate": 3.9836667110158596e-06, + "loss": 0.340869140625, + "step": 91055 + }, + { + "epoch": 0.7873688943459201, + "grad_norm": 6.14149427654213, + "learning_rate": 3.983474238702915e-06, + "loss": 0.08966712951660157, + "step": 91060 + }, + { + "epoch": 0.7874121278674633, + "grad_norm": 6.3862238334274375, + "learning_rate": 3.983281761854276e-06, + "loss": 0.14412384033203124, + "step": 91065 + }, + { + "epoch": 0.7874553613890066, + "grad_norm": 43.57837729186897, + "learning_rate": 3.983089280470826e-06, + "loss": 0.39306640625, + "step": 91070 + }, + { + "epoch": 0.7874985949105499, + "grad_norm": 33.51270726838796, + "learning_rate": 3.982896794553456e-06, + "loss": 0.2915199279785156, + "step": 91075 + }, + { + "epoch": 0.7875418284320931, + "grad_norm": 19.354080817744112, + "learning_rate": 3.982704304103052e-06, + "loss": 0.090576171875, + "step": 91080 + }, + { + "epoch": 0.7875850619536364, + "grad_norm": 1.0834540817826304, + "learning_rate": 3.9825118091205035e-06, + "loss": 0.05582275390625, + "step": 91085 + }, + { + "epoch": 0.7876282954751797, + "grad_norm": 19.28997563136317, + "learning_rate": 3.982319309606697e-06, + "loss": 0.2516204833984375, + "step": 91090 + }, + { + "epoch": 0.7876715289967229, + "grad_norm": 4.956290489433566, + "learning_rate": 3.9821268055625215e-06, + "loss": 0.057323455810546875, + "step": 91095 + }, + { + "epoch": 0.7877147625182661, + "grad_norm": 3.73209389210799, + "learning_rate": 3.981934296988863e-06, + "loss": 0.41878280639648435, + "step": 91100 + }, + { + "epoch": 0.7877579960398094, + "grad_norm": 3.2483289857260473, + "learning_rate": 3.9817417838866105e-06, + "loss": 0.39466552734375, + "step": 91105 + }, + { + "epoch": 0.7878012295613527, + "grad_norm": 11.987110165395682, + "learning_rate": 3.9815492662566515e-06, + "loss": 0.1601862907409668, + "step": 91110 + }, + { + "epoch": 0.7878444630828959, + "grad_norm": 14.372858576898546, + "learning_rate": 3.981356744099875e-06, + "loss": 0.07247581481933593, + "step": 91115 + }, + { + "epoch": 0.7878876966044392, + "grad_norm": 0.4144956986256364, + "learning_rate": 3.9811642174171665e-06, + "loss": 0.42449951171875, + "step": 91120 + }, + { + "epoch": 0.7879309301259825, + "grad_norm": 1.89030584233363, + "learning_rate": 3.980971686209416e-06, + "loss": 0.10518970489501953, + "step": 91125 + }, + { + "epoch": 0.7879741636475257, + "grad_norm": 23.929176394843186, + "learning_rate": 3.9807791504775115e-06, + "loss": 0.2270751953125, + "step": 91130 + }, + { + "epoch": 0.788017397169069, + "grad_norm": 3.592175451752823, + "learning_rate": 3.9805866102223385e-06, + "loss": 0.18453407287597656, + "step": 91135 + }, + { + "epoch": 0.7880606306906123, + "grad_norm": 12.470642112715167, + "learning_rate": 3.980394065444787e-06, + "loss": 0.30116729736328124, + "step": 91140 + }, + { + "epoch": 0.7881038642121555, + "grad_norm": 7.052243315295674, + "learning_rate": 3.980201516145746e-06, + "loss": 0.03837509155273437, + "step": 91145 + }, + { + "epoch": 0.7881470977336988, + "grad_norm": 23.625308028857518, + "learning_rate": 3.980008962326101e-06, + "loss": 0.21044158935546875, + "step": 91150 + }, + { + "epoch": 0.7881903312552421, + "grad_norm": 12.577102629398322, + "learning_rate": 3.9798164039867424e-06, + "loss": 0.08471832275390626, + "step": 91155 + }, + { + "epoch": 0.7882335647767853, + "grad_norm": 0.9584411335551059, + "learning_rate": 3.979623841128557e-06, + "loss": 0.028966522216796874, + "step": 91160 + }, + { + "epoch": 0.7882767982983286, + "grad_norm": 26.916420308194635, + "learning_rate": 3.979431273752432e-06, + "loss": 0.24544830322265626, + "step": 91165 + }, + { + "epoch": 0.7883200318198719, + "grad_norm": 17.68146108114808, + "learning_rate": 3.979238701859257e-06, + "loss": 0.22431488037109376, + "step": 91170 + }, + { + "epoch": 0.7883632653414151, + "grad_norm": 35.97159189911296, + "learning_rate": 3.97904612544992e-06, + "loss": 0.3497016906738281, + "step": 91175 + }, + { + "epoch": 0.7884064988629584, + "grad_norm": 0.22719955228506938, + "learning_rate": 3.978853544525308e-06, + "loss": 0.39442901611328124, + "step": 91180 + }, + { + "epoch": 0.7884497323845017, + "grad_norm": 9.34155198570733, + "learning_rate": 3.978660959086311e-06, + "loss": 0.09355010986328124, + "step": 91185 + }, + { + "epoch": 0.7884929659060449, + "grad_norm": 5.699179066347239, + "learning_rate": 3.978468369133815e-06, + "loss": 0.059438133239746095, + "step": 91190 + }, + { + "epoch": 0.7885361994275881, + "grad_norm": 0.645165833227507, + "learning_rate": 3.97827577466871e-06, + "loss": 0.010140419006347656, + "step": 91195 + }, + { + "epoch": 0.7885794329491315, + "grad_norm": 31.207584276878595, + "learning_rate": 3.978083175691883e-06, + "loss": 0.489776611328125, + "step": 91200 + }, + { + "epoch": 0.7886226664706747, + "grad_norm": 2.70948804795337, + "learning_rate": 3.977890572204224e-06, + "loss": 0.17509765625, + "step": 91205 + }, + { + "epoch": 0.7886658999922179, + "grad_norm": 5.209584584591741, + "learning_rate": 3.977697964206619e-06, + "loss": 0.26005401611328127, + "step": 91210 + }, + { + "epoch": 0.7887091335137613, + "grad_norm": 7.896374612731596, + "learning_rate": 3.977505351699958e-06, + "loss": 0.08586044311523437, + "step": 91215 + }, + { + "epoch": 0.7887523670353045, + "grad_norm": 4.786199357391181, + "learning_rate": 3.977312734685129e-06, + "loss": 0.17370338439941407, + "step": 91220 + }, + { + "epoch": 0.7887956005568477, + "grad_norm": 0.7398303969401675, + "learning_rate": 3.977120113163019e-06, + "loss": 0.04054107666015625, + "step": 91225 + }, + { + "epoch": 0.7888388340783911, + "grad_norm": 8.415748410442516, + "learning_rate": 3.976927487134517e-06, + "loss": 0.474713134765625, + "step": 91230 + }, + { + "epoch": 0.7888820675999343, + "grad_norm": 0.36645733233273886, + "learning_rate": 3.976734856600513e-06, + "loss": 0.028873443603515625, + "step": 91235 + }, + { + "epoch": 0.7889253011214775, + "grad_norm": 8.142539194430245, + "learning_rate": 3.976542221561894e-06, + "loss": 0.10324325561523437, + "step": 91240 + }, + { + "epoch": 0.7889685346430209, + "grad_norm": 3.492920138252785, + "learning_rate": 3.976349582019548e-06, + "loss": 0.09098739624023437, + "step": 91245 + }, + { + "epoch": 0.7890117681645641, + "grad_norm": 3.5033132927955752, + "learning_rate": 3.976156937974364e-06, + "loss": 0.23230667114257814, + "step": 91250 + }, + { + "epoch": 0.7890550016861073, + "grad_norm": 2.3253227545754447, + "learning_rate": 3.975964289427231e-06, + "loss": 0.11231689453125, + "step": 91255 + }, + { + "epoch": 0.7890982352076507, + "grad_norm": 8.916734410435133, + "learning_rate": 3.975771636379036e-06, + "loss": 0.3384185791015625, + "step": 91260 + }, + { + "epoch": 0.7891414687291939, + "grad_norm": 4.22299713448698, + "learning_rate": 3.97557897883067e-06, + "loss": 0.16018447875976563, + "step": 91265 + }, + { + "epoch": 0.7891847022507371, + "grad_norm": 4.329326833254968, + "learning_rate": 3.975386316783019e-06, + "loss": 0.13240280151367187, + "step": 91270 + }, + { + "epoch": 0.7892279357722803, + "grad_norm": 1.331920500931994, + "learning_rate": 3.975193650236972e-06, + "loss": 0.1160888671875, + "step": 91275 + }, + { + "epoch": 0.7892711692938237, + "grad_norm": 29.307664835994004, + "learning_rate": 3.975000979193419e-06, + "loss": 0.22346649169921876, + "step": 91280 + }, + { + "epoch": 0.7893144028153669, + "grad_norm": 3.7934197165969095, + "learning_rate": 3.974808303653247e-06, + "loss": 0.37958450317382814, + "step": 91285 + }, + { + "epoch": 0.7893576363369101, + "grad_norm": 10.196221294727764, + "learning_rate": 3.974615623617346e-06, + "loss": 0.1200164794921875, + "step": 91290 + }, + { + "epoch": 0.7894008698584535, + "grad_norm": 0.5136184034852781, + "learning_rate": 3.974422939086603e-06, + "loss": 0.26840362548828123, + "step": 91295 + }, + { + "epoch": 0.7894441033799967, + "grad_norm": 7.387877845805678, + "learning_rate": 3.974230250061908e-06, + "loss": 0.27532958984375, + "step": 91300 + }, + { + "epoch": 0.7894873369015399, + "grad_norm": 4.545234459928419, + "learning_rate": 3.974037556544149e-06, + "loss": 0.15801849365234374, + "step": 91305 + }, + { + "epoch": 0.7895305704230833, + "grad_norm": 0.7571662607215093, + "learning_rate": 3.973844858534215e-06, + "loss": 0.07035751342773437, + "step": 91310 + }, + { + "epoch": 0.7895738039446265, + "grad_norm": 12.165697976641079, + "learning_rate": 3.973652156032994e-06, + "loss": 0.07068252563476562, + "step": 91315 + }, + { + "epoch": 0.7896170374661697, + "grad_norm": 5.414592621943014, + "learning_rate": 3.973459449041376e-06, + "loss": 0.05556640625, + "step": 91320 + }, + { + "epoch": 0.7896602709877131, + "grad_norm": 49.951580055760616, + "learning_rate": 3.973266737560248e-06, + "loss": 0.31055450439453125, + "step": 91325 + }, + { + "epoch": 0.7897035045092563, + "grad_norm": 0.35464474803516144, + "learning_rate": 3.973074021590501e-06, + "loss": 0.0471893310546875, + "step": 91330 + }, + { + "epoch": 0.7897467380307995, + "grad_norm": 43.06516548825522, + "learning_rate": 3.972881301133022e-06, + "loss": 0.293780517578125, + "step": 91335 + }, + { + "epoch": 0.7897899715523429, + "grad_norm": 2.5047755150813433, + "learning_rate": 3.9726885761887005e-06, + "loss": 0.036553955078125, + "step": 91340 + }, + { + "epoch": 0.7898332050738861, + "grad_norm": 5.4427918462054565, + "learning_rate": 3.972495846758426e-06, + "loss": 0.124493408203125, + "step": 91345 + }, + { + "epoch": 0.7898764385954293, + "grad_norm": 1.902391859473415, + "learning_rate": 3.972303112843086e-06, + "loss": 0.25262908935546874, + "step": 91350 + }, + { + "epoch": 0.7899196721169727, + "grad_norm": 15.801058731369862, + "learning_rate": 3.972110374443569e-06, + "loss": 0.1827178955078125, + "step": 91355 + }, + { + "epoch": 0.7899629056385159, + "grad_norm": 17.975866091831097, + "learning_rate": 3.9719176315607665e-06, + "loss": 0.177227783203125, + "step": 91360 + }, + { + "epoch": 0.7900061391600591, + "grad_norm": 12.618618081008206, + "learning_rate": 3.971724884195564e-06, + "loss": 0.12458877563476563, + "step": 91365 + }, + { + "epoch": 0.7900493726816024, + "grad_norm": 0.41277032605727576, + "learning_rate": 3.971532132348854e-06, + "loss": 0.1937164306640625, + "step": 91370 + }, + { + "epoch": 0.7900926062031457, + "grad_norm": 0.9721220843725449, + "learning_rate": 3.971339376021522e-06, + "loss": 0.051250457763671875, + "step": 91375 + }, + { + "epoch": 0.7901358397246889, + "grad_norm": 2.662187150411551, + "learning_rate": 3.9711466152144605e-06, + "loss": 0.0671630859375, + "step": 91380 + }, + { + "epoch": 0.7901790732462322, + "grad_norm": 0.8558042592558373, + "learning_rate": 3.970953849928555e-06, + "loss": 0.06662559509277344, + "step": 91385 + }, + { + "epoch": 0.7902223067677755, + "grad_norm": 3.5476311772600884, + "learning_rate": 3.9707610801646975e-06, + "loss": 0.0917327880859375, + "step": 91390 + }, + { + "epoch": 0.7902655402893187, + "grad_norm": 6.453640485026824, + "learning_rate": 3.9705683059237736e-06, + "loss": 0.3445220947265625, + "step": 91395 + }, + { + "epoch": 0.790308773810862, + "grad_norm": 6.729755761059664, + "learning_rate": 3.970375527206677e-06, + "loss": 0.11634807586669922, + "step": 91400 + }, + { + "epoch": 0.7903520073324053, + "grad_norm": 0.30046426753298, + "learning_rate": 3.970182744014292e-06, + "loss": 0.07432708740234376, + "step": 91405 + }, + { + "epoch": 0.7903952408539485, + "grad_norm": 26.161255068578512, + "learning_rate": 3.969989956347512e-06, + "loss": 0.535943603515625, + "step": 91410 + }, + { + "epoch": 0.7904384743754918, + "grad_norm": 15.27295071145674, + "learning_rate": 3.969797164207222e-06, + "loss": 0.07388381958007813, + "step": 91415 + }, + { + "epoch": 0.7904817078970351, + "grad_norm": 1.575032470263727, + "learning_rate": 3.969604367594314e-06, + "loss": 0.3687347412109375, + "step": 91420 + }, + { + "epoch": 0.7905249414185783, + "grad_norm": 10.189104361709338, + "learning_rate": 3.969411566509676e-06, + "loss": 0.1290863037109375, + "step": 91425 + }, + { + "epoch": 0.7905681749401215, + "grad_norm": 5.15781839761641, + "learning_rate": 3.969218760954198e-06, + "loss": 0.03765602111816406, + "step": 91430 + }, + { + "epoch": 0.7906114084616649, + "grad_norm": 10.828065979500735, + "learning_rate": 3.969025950928768e-06, + "loss": 0.16377487182617187, + "step": 91435 + }, + { + "epoch": 0.7906546419832081, + "grad_norm": 9.232460514344323, + "learning_rate": 3.968833136434277e-06, + "loss": 0.24477825164794922, + "step": 91440 + }, + { + "epoch": 0.7906978755047513, + "grad_norm": 3.5981986480970063, + "learning_rate": 3.968640317471614e-06, + "loss": 0.257177734375, + "step": 91445 + }, + { + "epoch": 0.7907411090262946, + "grad_norm": 3.4318646818560725, + "learning_rate": 3.968447494041665e-06, + "loss": 0.24447021484375, + "step": 91450 + }, + { + "epoch": 0.7907843425478379, + "grad_norm": 11.881783690538475, + "learning_rate": 3.968254666145322e-06, + "loss": 0.05813159942626953, + "step": 91455 + }, + { + "epoch": 0.7908275760693811, + "grad_norm": 1.1996591586763445, + "learning_rate": 3.968061833783476e-06, + "loss": 0.15767364501953124, + "step": 91460 + }, + { + "epoch": 0.7908708095909244, + "grad_norm": 10.430799052562659, + "learning_rate": 3.967868996957013e-06, + "loss": 0.12139968872070313, + "step": 91465 + }, + { + "epoch": 0.7909140431124677, + "grad_norm": 44.24948137837492, + "learning_rate": 3.967676155666824e-06, + "loss": 0.5793479919433594, + "step": 91470 + }, + { + "epoch": 0.7909572766340109, + "grad_norm": 0.9614479259631442, + "learning_rate": 3.967483309913799e-06, + "loss": 0.01674652099609375, + "step": 91475 + }, + { + "epoch": 0.7910005101555542, + "grad_norm": 3.3563849618159054, + "learning_rate": 3.967290459698825e-06, + "loss": 0.1881744384765625, + "step": 91480 + }, + { + "epoch": 0.7910437436770975, + "grad_norm": 6.351080782828436, + "learning_rate": 3.967097605022793e-06, + "loss": 0.020298004150390625, + "step": 91485 + }, + { + "epoch": 0.7910869771986407, + "grad_norm": 31.576782886873833, + "learning_rate": 3.9669047458865934e-06, + "loss": 0.1718017578125, + "step": 91490 + }, + { + "epoch": 0.791130210720184, + "grad_norm": 6.044284109439682, + "learning_rate": 3.9667118822911136e-06, + "loss": 0.091192626953125, + "step": 91495 + }, + { + "epoch": 0.7911734442417273, + "grad_norm": 4.510715738446663, + "learning_rate": 3.966519014237244e-06, + "loss": 0.08173294067382812, + "step": 91500 + }, + { + "epoch": 0.7912166777632705, + "grad_norm": 9.645836413082494, + "learning_rate": 3.9663261417258745e-06, + "loss": 0.167645263671875, + "step": 91505 + }, + { + "epoch": 0.7912599112848138, + "grad_norm": 36.19774885315131, + "learning_rate": 3.966133264757894e-06, + "loss": 0.16673583984375, + "step": 91510 + }, + { + "epoch": 0.7913031448063571, + "grad_norm": 2.0128870040952234, + "learning_rate": 3.965940383334193e-06, + "loss": 0.029930496215820314, + "step": 91515 + }, + { + "epoch": 0.7913463783279003, + "grad_norm": 8.720024523313631, + "learning_rate": 3.965747497455659e-06, + "loss": 0.107269287109375, + "step": 91520 + }, + { + "epoch": 0.7913896118494436, + "grad_norm": 9.931960421758962, + "learning_rate": 3.965554607123184e-06, + "loss": 0.16300029754638673, + "step": 91525 + }, + { + "epoch": 0.7914328453709869, + "grad_norm": 1.2852451833007512, + "learning_rate": 3.9653617123376555e-06, + "loss": 0.03531646728515625, + "step": 91530 + }, + { + "epoch": 0.7914760788925301, + "grad_norm": 1.985082364791297, + "learning_rate": 3.965168813099964e-06, + "loss": 0.040679931640625, + "step": 91535 + }, + { + "epoch": 0.7915193124140734, + "grad_norm": 28.42294578274519, + "learning_rate": 3.964975909411001e-06, + "loss": 0.18140754699707032, + "step": 91540 + }, + { + "epoch": 0.7915625459356166, + "grad_norm": 11.7555259822462, + "learning_rate": 3.964783001271653e-06, + "loss": 0.14832763671875, + "step": 91545 + }, + { + "epoch": 0.7916057794571599, + "grad_norm": 3.279745764183928, + "learning_rate": 3.964590088682811e-06, + "loss": 0.062404632568359375, + "step": 91550 + }, + { + "epoch": 0.7916490129787032, + "grad_norm": 0.43926732034674587, + "learning_rate": 3.964397171645365e-06, + "loss": 0.1508758544921875, + "step": 91555 + }, + { + "epoch": 0.7916922465002464, + "grad_norm": 16.34566795592865, + "learning_rate": 3.9642042501602045e-06, + "loss": 0.14966392517089844, + "step": 91560 + }, + { + "epoch": 0.7917354800217897, + "grad_norm": 2.531247377751099, + "learning_rate": 3.964011324228218e-06, + "loss": 0.2173431396484375, + "step": 91565 + }, + { + "epoch": 0.791778713543333, + "grad_norm": 16.262413130504356, + "learning_rate": 3.9638183938502984e-06, + "loss": 0.41796340942382815, + "step": 91570 + }, + { + "epoch": 0.7918219470648762, + "grad_norm": 0.07668845568066333, + "learning_rate": 3.9636254590273335e-06, + "loss": 0.07015457153320312, + "step": 91575 + }, + { + "epoch": 0.7918651805864195, + "grad_norm": 4.810082405512035, + "learning_rate": 3.963432519760212e-06, + "loss": 0.20470924377441407, + "step": 91580 + }, + { + "epoch": 0.7919084141079628, + "grad_norm": 27.003125319042585, + "learning_rate": 3.963239576049825e-06, + "loss": 0.5227092742919922, + "step": 91585 + }, + { + "epoch": 0.791951647629506, + "grad_norm": 5.910762474523012, + "learning_rate": 3.963046627897062e-06, + "loss": 0.04796905517578125, + "step": 91590 + }, + { + "epoch": 0.7919948811510493, + "grad_norm": 1.65284433490429, + "learning_rate": 3.962853675302813e-06, + "loss": 0.1126678466796875, + "step": 91595 + }, + { + "epoch": 0.7920381146725926, + "grad_norm": 0.6471330717152527, + "learning_rate": 3.962660718267969e-06, + "loss": 0.07863349914550781, + "step": 91600 + }, + { + "epoch": 0.7920813481941358, + "grad_norm": 4.263734566089053, + "learning_rate": 3.962467756793418e-06, + "loss": 0.203857421875, + "step": 91605 + }, + { + "epoch": 0.7921245817156791, + "grad_norm": 0.09012535414071107, + "learning_rate": 3.9622747908800515e-06, + "loss": 0.055757331848144534, + "step": 91610 + }, + { + "epoch": 0.7921678152372224, + "grad_norm": 0.08153489891406154, + "learning_rate": 3.962081820528758e-06, + "loss": 0.2725028991699219, + "step": 91615 + }, + { + "epoch": 0.7922110487587656, + "grad_norm": 21.353460141148407, + "learning_rate": 3.961888845740428e-06, + "loss": 0.1030548095703125, + "step": 91620 + }, + { + "epoch": 0.7922542822803088, + "grad_norm": 7.49504599102257, + "learning_rate": 3.961695866515952e-06, + "loss": 0.2548797607421875, + "step": 91625 + }, + { + "epoch": 0.7922975158018521, + "grad_norm": 23.65321205801235, + "learning_rate": 3.961502882856219e-06, + "loss": 0.1477386474609375, + "step": 91630 + }, + { + "epoch": 0.7923407493233954, + "grad_norm": 16.861256081009913, + "learning_rate": 3.9613098947621215e-06, + "loss": 0.1966114044189453, + "step": 91635 + }, + { + "epoch": 0.7923839828449386, + "grad_norm": 25.666470086723525, + "learning_rate": 3.961116902234546e-06, + "loss": 0.439202880859375, + "step": 91640 + }, + { + "epoch": 0.792427216366482, + "grad_norm": 4.660077768635317, + "learning_rate": 3.960923905274385e-06, + "loss": 0.13492965698242188, + "step": 91645 + }, + { + "epoch": 0.7924704498880252, + "grad_norm": 6.216152276113638, + "learning_rate": 3.960730903882526e-06, + "loss": 0.2209320068359375, + "step": 91650 + }, + { + "epoch": 0.7925136834095684, + "grad_norm": 37.50441842635564, + "learning_rate": 3.960537898059864e-06, + "loss": 0.5791618347167968, + "step": 91655 + }, + { + "epoch": 0.7925569169311117, + "grad_norm": 0.8558071064010078, + "learning_rate": 3.960344887807284e-06, + "loss": 0.08660354614257812, + "step": 91660 + }, + { + "epoch": 0.792600150452655, + "grad_norm": 34.75558791810738, + "learning_rate": 3.960151873125679e-06, + "loss": 0.2762184143066406, + "step": 91665 + }, + { + "epoch": 0.7926433839741982, + "grad_norm": 7.263016339307066, + "learning_rate": 3.959958854015938e-06, + "loss": 0.11201591491699218, + "step": 91670 + }, + { + "epoch": 0.7926866174957415, + "grad_norm": 1.1121219520099226, + "learning_rate": 3.959765830478952e-06, + "loss": 0.112213134765625, + "step": 91675 + }, + { + "epoch": 0.7927298510172848, + "grad_norm": 7.127657564920615, + "learning_rate": 3.9595728025156105e-06, + "loss": 0.32028350830078123, + "step": 91680 + }, + { + "epoch": 0.792773084538828, + "grad_norm": 6.451571165231276, + "learning_rate": 3.959379770126804e-06, + "loss": 0.1359720230102539, + "step": 91685 + }, + { + "epoch": 0.7928163180603713, + "grad_norm": 0.632365783599009, + "learning_rate": 3.959186733313423e-06, + "loss": 0.293511962890625, + "step": 91690 + }, + { + "epoch": 0.7928595515819146, + "grad_norm": 14.7083311546929, + "learning_rate": 3.958993692076358e-06, + "loss": 0.2610599517822266, + "step": 91695 + }, + { + "epoch": 0.7929027851034578, + "grad_norm": 3.0329547679519617, + "learning_rate": 3.958800646416498e-06, + "loss": 0.06611480712890624, + "step": 91700 + }, + { + "epoch": 0.792946018625001, + "grad_norm": 4.133734033636611, + "learning_rate": 3.958607596334734e-06, + "loss": 0.08902816772460938, + "step": 91705 + }, + { + "epoch": 0.7929892521465444, + "grad_norm": 12.947444026735376, + "learning_rate": 3.958414541831957e-06, + "loss": 0.222918701171875, + "step": 91710 + }, + { + "epoch": 0.7930324856680876, + "grad_norm": 11.53967964023931, + "learning_rate": 3.958221482909057e-06, + "loss": 0.1233734130859375, + "step": 91715 + }, + { + "epoch": 0.7930757191896308, + "grad_norm": 12.322378685439531, + "learning_rate": 3.958028419566924e-06, + "loss": 0.10623779296875, + "step": 91720 + }, + { + "epoch": 0.7931189527111742, + "grad_norm": 0.315279857995572, + "learning_rate": 3.957835351806449e-06, + "loss": 0.12517852783203126, + "step": 91725 + }, + { + "epoch": 0.7931621862327174, + "grad_norm": 4.964172661836047, + "learning_rate": 3.957642279628521e-06, + "loss": 0.316375732421875, + "step": 91730 + }, + { + "epoch": 0.7932054197542606, + "grad_norm": 5.945264331835072, + "learning_rate": 3.957449203034032e-06, + "loss": 0.07961044311523438, + "step": 91735 + }, + { + "epoch": 0.793248653275804, + "grad_norm": 17.144032362363042, + "learning_rate": 3.957256122023871e-06, + "loss": 0.165447998046875, + "step": 91740 + }, + { + "epoch": 0.7932918867973472, + "grad_norm": 5.376519900059838, + "learning_rate": 3.957063036598931e-06, + "loss": 0.059635353088378903, + "step": 91745 + }, + { + "epoch": 0.7933351203188904, + "grad_norm": 5.032148711869542, + "learning_rate": 3.9568699467601005e-06, + "loss": 0.084503173828125, + "step": 91750 + }, + { + "epoch": 0.7933783538404338, + "grad_norm": 3.767143114606515, + "learning_rate": 3.956676852508269e-06, + "loss": 0.15135650634765624, + "step": 91755 + }, + { + "epoch": 0.793421587361977, + "grad_norm": 7.635294140602445, + "learning_rate": 3.956483753844329e-06, + "loss": 0.11248779296875, + "step": 91760 + }, + { + "epoch": 0.7934648208835202, + "grad_norm": 12.038453477835342, + "learning_rate": 3.9562906507691714e-06, + "loss": 0.08369789123535157, + "step": 91765 + }, + { + "epoch": 0.7935080544050636, + "grad_norm": 6.89794085689632, + "learning_rate": 3.956097543283685e-06, + "loss": 0.05982131958007812, + "step": 91770 + }, + { + "epoch": 0.7935512879266068, + "grad_norm": 4.113408722269233, + "learning_rate": 3.955904431388761e-06, + "loss": 0.045970916748046875, + "step": 91775 + }, + { + "epoch": 0.79359452144815, + "grad_norm": 4.942107068738371, + "learning_rate": 3.955711315085291e-06, + "loss": 0.15320243835449218, + "step": 91780 + }, + { + "epoch": 0.7936377549696934, + "grad_norm": 2.4192297264583633, + "learning_rate": 3.955518194374164e-06, + "loss": 0.286065673828125, + "step": 91785 + }, + { + "epoch": 0.7936809884912366, + "grad_norm": 37.377024939046606, + "learning_rate": 3.955325069256272e-06, + "loss": 0.36750946044921873, + "step": 91790 + }, + { + "epoch": 0.7937242220127798, + "grad_norm": 23.15582959594257, + "learning_rate": 3.955131939732505e-06, + "loss": 0.24381141662597655, + "step": 91795 + }, + { + "epoch": 0.793767455534323, + "grad_norm": 12.389876868586741, + "learning_rate": 3.954938805803755e-06, + "loss": 0.07929763793945313, + "step": 91800 + }, + { + "epoch": 0.7938106890558664, + "grad_norm": 1.4508810827976637, + "learning_rate": 3.95474566747091e-06, + "loss": 0.10367774963378906, + "step": 91805 + }, + { + "epoch": 0.7938539225774096, + "grad_norm": 1.7595199816906013, + "learning_rate": 3.954552524734863e-06, + "loss": 0.2486663818359375, + "step": 91810 + }, + { + "epoch": 0.7938971560989528, + "grad_norm": 5.043576982771422, + "learning_rate": 3.954359377596504e-06, + "loss": 0.03490505218505859, + "step": 91815 + }, + { + "epoch": 0.7939403896204962, + "grad_norm": 0.8109945670979237, + "learning_rate": 3.9541662260567235e-06, + "loss": 0.15265045166015626, + "step": 91820 + }, + { + "epoch": 0.7939836231420394, + "grad_norm": 3.5080848088576713, + "learning_rate": 3.953973070116413e-06, + "loss": 0.09666271209716797, + "step": 91825 + }, + { + "epoch": 0.7940268566635826, + "grad_norm": 1.7401947607023165, + "learning_rate": 3.953779909776463e-06, + "loss": 0.04992218017578125, + "step": 91830 + }, + { + "epoch": 0.794070090185126, + "grad_norm": 0.32914944262528534, + "learning_rate": 3.953586745037765e-06, + "loss": 0.06738739013671875, + "step": 91835 + }, + { + "epoch": 0.7941133237066692, + "grad_norm": 3.1551206792793502, + "learning_rate": 3.9533935759012085e-06, + "loss": 0.06061477661132812, + "step": 91840 + }, + { + "epoch": 0.7941565572282124, + "grad_norm": 1.1313387175605298, + "learning_rate": 3.953200402367685e-06, + "loss": 0.13895950317382813, + "step": 91845 + }, + { + "epoch": 0.7941997907497558, + "grad_norm": 3.971824282479991, + "learning_rate": 3.9530072244380855e-06, + "loss": 0.32372360229492186, + "step": 91850 + }, + { + "epoch": 0.794243024271299, + "grad_norm": 24.472783894475246, + "learning_rate": 3.952814042113302e-06, + "loss": 0.10040283203125, + "step": 91855 + }, + { + "epoch": 0.7942862577928422, + "grad_norm": 0.8301797305419089, + "learning_rate": 3.9526208553942236e-06, + "loss": 0.2682370185852051, + "step": 91860 + }, + { + "epoch": 0.7943294913143856, + "grad_norm": 6.423574244515549, + "learning_rate": 3.952427664281741e-06, + "loss": 0.032201385498046874, + "step": 91865 + }, + { + "epoch": 0.7943727248359288, + "grad_norm": 6.554964514215882, + "learning_rate": 3.952234468776747e-06, + "loss": 0.0794952392578125, + "step": 91870 + }, + { + "epoch": 0.794415958357472, + "grad_norm": 10.940407285190375, + "learning_rate": 3.952041268880131e-06, + "loss": 0.0885009765625, + "step": 91875 + }, + { + "epoch": 0.7944591918790153, + "grad_norm": 1.283455570543051, + "learning_rate": 3.9518480645927866e-06, + "loss": 0.06786079406738281, + "step": 91880 + }, + { + "epoch": 0.7945024254005586, + "grad_norm": 18.872282708230465, + "learning_rate": 3.951654855915602e-06, + "loss": 0.246142578125, + "step": 91885 + }, + { + "epoch": 0.7945456589221018, + "grad_norm": 11.911498726337436, + "learning_rate": 3.95146164284947e-06, + "loss": 0.22452926635742188, + "step": 91890 + }, + { + "epoch": 0.7945888924436451, + "grad_norm": 0.3034218469934945, + "learning_rate": 3.95126842539528e-06, + "loss": 0.12020339965820312, + "step": 91895 + }, + { + "epoch": 0.7946321259651884, + "grad_norm": 3.044585585010917, + "learning_rate": 3.951075203553923e-06, + "loss": 0.4515777587890625, + "step": 91900 + }, + { + "epoch": 0.7946753594867316, + "grad_norm": 1.1197196775384302, + "learning_rate": 3.950881977326293e-06, + "loss": 0.14112415313720703, + "step": 91905 + }, + { + "epoch": 0.7947185930082749, + "grad_norm": 2.165591379280854, + "learning_rate": 3.950688746713279e-06, + "loss": 0.20786552429199218, + "step": 91910 + }, + { + "epoch": 0.7947618265298182, + "grad_norm": 7.090345322075544, + "learning_rate": 3.950495511715772e-06, + "loss": 0.1498199462890625, + "step": 91915 + }, + { + "epoch": 0.7948050600513614, + "grad_norm": 2.044184555330944, + "learning_rate": 3.950302272334664e-06, + "loss": 0.05496406555175781, + "step": 91920 + }, + { + "epoch": 0.7948482935729047, + "grad_norm": 11.68663273441717, + "learning_rate": 3.950109028570846e-06, + "loss": 0.3538238525390625, + "step": 91925 + }, + { + "epoch": 0.794891527094448, + "grad_norm": 13.05128583101453, + "learning_rate": 3.949915780425209e-06, + "loss": 0.04172534942626953, + "step": 91930 + }, + { + "epoch": 0.7949347606159912, + "grad_norm": 17.940368344239577, + "learning_rate": 3.949722527898645e-06, + "loss": 0.061132621765136716, + "step": 91935 + }, + { + "epoch": 0.7949779941375344, + "grad_norm": 26.267137811968777, + "learning_rate": 3.949529270992044e-06, + "loss": 0.1794647216796875, + "step": 91940 + }, + { + "epoch": 0.7950212276590778, + "grad_norm": 24.634151188149296, + "learning_rate": 3.949336009706297e-06, + "loss": 0.19557228088378906, + "step": 91945 + }, + { + "epoch": 0.795064461180621, + "grad_norm": 0.47195786863129396, + "learning_rate": 3.9491427440422975e-06, + "loss": 0.14387435913085939, + "step": 91950 + }, + { + "epoch": 0.7951076947021642, + "grad_norm": 8.42704713514622, + "learning_rate": 3.948949474000935e-06, + "loss": 0.2842529296875, + "step": 91955 + }, + { + "epoch": 0.7951509282237076, + "grad_norm": 8.61961547616732, + "learning_rate": 3.948756199583101e-06, + "loss": 0.0787811279296875, + "step": 91960 + }, + { + "epoch": 0.7951941617452508, + "grad_norm": 1.4395801582192107, + "learning_rate": 3.948562920789688e-06, + "loss": 0.0744293212890625, + "step": 91965 + }, + { + "epoch": 0.795237395266794, + "grad_norm": 3.1822455403646184, + "learning_rate": 3.948369637621587e-06, + "loss": 0.04863128662109375, + "step": 91970 + }, + { + "epoch": 0.7952806287883373, + "grad_norm": 3.519630229205879, + "learning_rate": 3.9481763500796886e-06, + "loss": 0.1580352783203125, + "step": 91975 + }, + { + "epoch": 0.7953238623098806, + "grad_norm": 1.7531322885944813, + "learning_rate": 3.947983058164885e-06, + "loss": 0.07666053771972656, + "step": 91980 + }, + { + "epoch": 0.7953670958314238, + "grad_norm": 37.42820523534794, + "learning_rate": 3.947789761878066e-06, + "loss": 0.5689834594726563, + "step": 91985 + }, + { + "epoch": 0.7954103293529671, + "grad_norm": 39.5406662924775, + "learning_rate": 3.947596461220125e-06, + "loss": 0.7550537109375, + "step": 91990 + }, + { + "epoch": 0.7954535628745104, + "grad_norm": 24.836022053805536, + "learning_rate": 3.947403156191953e-06, + "loss": 0.25864219665527344, + "step": 91995 + }, + { + "epoch": 0.7954967963960536, + "grad_norm": 17.04352538971263, + "learning_rate": 3.947209846794441e-06, + "loss": 0.12861404418945313, + "step": 92000 + }, + { + "epoch": 0.7955400299175969, + "grad_norm": 18.8951076120696, + "learning_rate": 3.947016533028482e-06, + "loss": 0.29153594970703123, + "step": 92005 + }, + { + "epoch": 0.7955832634391402, + "grad_norm": 0.5710015320576659, + "learning_rate": 3.9468232148949655e-06, + "loss": 0.05738525390625, + "step": 92010 + }, + { + "epoch": 0.7956264969606834, + "grad_norm": 0.8650558893219394, + "learning_rate": 3.9466298923947835e-06, + "loss": 0.010145187377929688, + "step": 92015 + }, + { + "epoch": 0.7956697304822267, + "grad_norm": 0.7967576353056932, + "learning_rate": 3.946436565528829e-06, + "loss": 0.15005836486816407, + "step": 92020 + }, + { + "epoch": 0.79571296400377, + "grad_norm": 6.342254080345691, + "learning_rate": 3.9462432342979925e-06, + "loss": 0.20083217620849608, + "step": 92025 + }, + { + "epoch": 0.7957561975253132, + "grad_norm": 3.923468673740947, + "learning_rate": 3.9460498987031665e-06, + "loss": 0.03329925537109375, + "step": 92030 + }, + { + "epoch": 0.7957994310468565, + "grad_norm": 7.496010591659415, + "learning_rate": 3.945856558745241e-06, + "loss": 0.2939056396484375, + "step": 92035 + }, + { + "epoch": 0.7958426645683998, + "grad_norm": 2.5491697033919722, + "learning_rate": 3.945663214425109e-06, + "loss": 0.0554351806640625, + "step": 92040 + }, + { + "epoch": 0.795885898089943, + "grad_norm": 1.4423084093871463, + "learning_rate": 3.945469865743661e-06, + "loss": 0.1406890869140625, + "step": 92045 + }, + { + "epoch": 0.7959291316114863, + "grad_norm": 47.43040219759182, + "learning_rate": 3.94527651270179e-06, + "loss": 0.1950042724609375, + "step": 92050 + }, + { + "epoch": 0.7959723651330295, + "grad_norm": 30.586692505118396, + "learning_rate": 3.945083155300388e-06, + "loss": 0.45824127197265624, + "step": 92055 + }, + { + "epoch": 0.7960155986545728, + "grad_norm": 7.775913038333262, + "learning_rate": 3.944889793540345e-06, + "loss": 0.44871826171875, + "step": 92060 + }, + { + "epoch": 0.7960588321761161, + "grad_norm": 24.51594280023347, + "learning_rate": 3.944696427422555e-06, + "loss": 0.04997711181640625, + "step": 92065 + }, + { + "epoch": 0.7961020656976593, + "grad_norm": 1.4399764640941761, + "learning_rate": 3.944503056947907e-06, + "loss": 0.033948516845703124, + "step": 92070 + }, + { + "epoch": 0.7961452992192026, + "grad_norm": 1.8543949279237057, + "learning_rate": 3.944309682117295e-06, + "loss": 0.3985687255859375, + "step": 92075 + }, + { + "epoch": 0.7961885327407459, + "grad_norm": 5.5473956488403, + "learning_rate": 3.944116302931611e-06, + "loss": 0.3039398193359375, + "step": 92080 + }, + { + "epoch": 0.7962317662622891, + "grad_norm": 0.8693657690614326, + "learning_rate": 3.943922919391745e-06, + "loss": 0.04728145599365234, + "step": 92085 + }, + { + "epoch": 0.7962749997838324, + "grad_norm": 30.541051527524075, + "learning_rate": 3.9437295314985895e-06, + "loss": 0.5672515869140625, + "step": 92090 + }, + { + "epoch": 0.7963182333053757, + "grad_norm": 13.062838714646, + "learning_rate": 3.943536139253039e-06, + "loss": 0.09567489624023437, + "step": 92095 + }, + { + "epoch": 0.7963614668269189, + "grad_norm": 0.25169210698401573, + "learning_rate": 3.943342742655981e-06, + "loss": 0.02458019256591797, + "step": 92100 + }, + { + "epoch": 0.7964047003484622, + "grad_norm": 18.82371327375405, + "learning_rate": 3.94314934170831e-06, + "loss": 0.174176025390625, + "step": 92105 + }, + { + "epoch": 0.7964479338700055, + "grad_norm": 0.1700514891090162, + "learning_rate": 3.942955936410918e-06, + "loss": 0.06942634582519532, + "step": 92110 + }, + { + "epoch": 0.7964911673915487, + "grad_norm": 2.7465491757968166, + "learning_rate": 3.942762526764697e-06, + "loss": 0.05448455810546875, + "step": 92115 + }, + { + "epoch": 0.796534400913092, + "grad_norm": 16.64438205391051, + "learning_rate": 3.942569112770537e-06, + "loss": 0.2946758270263672, + "step": 92120 + }, + { + "epoch": 0.7965776344346353, + "grad_norm": 15.39654738296953, + "learning_rate": 3.942375694429333e-06, + "loss": 0.13652496337890624, + "step": 92125 + }, + { + "epoch": 0.7966208679561785, + "grad_norm": 6.130436450424739, + "learning_rate": 3.942182271741975e-06, + "loss": 0.08203926086425781, + "step": 92130 + }, + { + "epoch": 0.7966641014777218, + "grad_norm": 5.022092499586856, + "learning_rate": 3.941988844709356e-06, + "loss": 0.09210205078125, + "step": 92135 + }, + { + "epoch": 0.796707334999265, + "grad_norm": 12.84425479599877, + "learning_rate": 3.9417954133323676e-06, + "loss": 0.15334625244140626, + "step": 92140 + }, + { + "epoch": 0.7967505685208083, + "grad_norm": 1.5310726696603565, + "learning_rate": 3.941601977611902e-06, + "loss": 0.07553749084472657, + "step": 92145 + }, + { + "epoch": 0.7967938020423515, + "grad_norm": 16.673427945982706, + "learning_rate": 3.941408537548851e-06, + "loss": 0.18210830688476562, + "step": 92150 + }, + { + "epoch": 0.7968370355638948, + "grad_norm": 8.158875397536875, + "learning_rate": 3.941215093144107e-06, + "loss": 0.1012664794921875, + "step": 92155 + }, + { + "epoch": 0.7968802690854381, + "grad_norm": 1.219051730373007, + "learning_rate": 3.941021644398562e-06, + "loss": 0.0462738037109375, + "step": 92160 + }, + { + "epoch": 0.7969235026069813, + "grad_norm": 0.23665954420388458, + "learning_rate": 3.940828191313108e-06, + "loss": 0.07619895935058593, + "step": 92165 + }, + { + "epoch": 0.7969667361285246, + "grad_norm": 7.427965147923779, + "learning_rate": 3.940634733888638e-06, + "loss": 0.157464599609375, + "step": 92170 + }, + { + "epoch": 0.7970099696500679, + "grad_norm": 13.324048622453013, + "learning_rate": 3.940441272126045e-06, + "loss": 0.302557373046875, + "step": 92175 + }, + { + "epoch": 0.7970532031716111, + "grad_norm": 1.3748705998659572, + "learning_rate": 3.940247806026218e-06, + "loss": 0.14338531494140624, + "step": 92180 + }, + { + "epoch": 0.7970964366931544, + "grad_norm": 5.112514247645268, + "learning_rate": 3.9400543355900514e-06, + "loss": 0.20716552734375, + "step": 92185 + }, + { + "epoch": 0.7971396702146977, + "grad_norm": 1.2349288084941377, + "learning_rate": 3.939860860818438e-06, + "loss": 0.34558868408203125, + "step": 92190 + }, + { + "epoch": 0.7971829037362409, + "grad_norm": 25.625072812437786, + "learning_rate": 3.93966738171227e-06, + "loss": 0.07572021484375, + "step": 92195 + }, + { + "epoch": 0.7972261372577842, + "grad_norm": 5.902177583681141, + "learning_rate": 3.939473898272438e-06, + "loss": 0.07974281311035156, + "step": 92200 + }, + { + "epoch": 0.7972693707793275, + "grad_norm": 9.13450594310885, + "learning_rate": 3.939280410499835e-06, + "loss": 0.1359100341796875, + "step": 92205 + }, + { + "epoch": 0.7973126043008707, + "grad_norm": 12.038466131093267, + "learning_rate": 3.939086918395355e-06, + "loss": 0.08989677429199219, + "step": 92210 + }, + { + "epoch": 0.797355837822414, + "grad_norm": 4.064042132945264, + "learning_rate": 3.938893421959888e-06, + "loss": 0.0321075439453125, + "step": 92215 + }, + { + "epoch": 0.7973990713439573, + "grad_norm": 1.7636973061850418, + "learning_rate": 3.938699921194328e-06, + "loss": 0.04251279830932617, + "step": 92220 + }, + { + "epoch": 0.7974423048655005, + "grad_norm": 15.28357289525006, + "learning_rate": 3.938506416099567e-06, + "loss": 0.4042167663574219, + "step": 92225 + }, + { + "epoch": 0.7974855383870437, + "grad_norm": 1.737987920966991, + "learning_rate": 3.938312906676497e-06, + "loss": 0.19705657958984374, + "step": 92230 + }, + { + "epoch": 0.7975287719085871, + "grad_norm": 9.421652184774175, + "learning_rate": 3.938119392926011e-06, + "loss": 0.543115234375, + "step": 92235 + }, + { + "epoch": 0.7975720054301303, + "grad_norm": 59.53229223359922, + "learning_rate": 3.937925874849001e-06, + "loss": 0.1609893798828125, + "step": 92240 + }, + { + "epoch": 0.7976152389516735, + "grad_norm": 0.8498541414410632, + "learning_rate": 3.937732352446361e-06, + "loss": 0.1240509033203125, + "step": 92245 + }, + { + "epoch": 0.7976584724732169, + "grad_norm": 7.54731782674105, + "learning_rate": 3.93753882571898e-06, + "loss": 0.09811439514160156, + "step": 92250 + }, + { + "epoch": 0.7977017059947601, + "grad_norm": 1.9647907853895734, + "learning_rate": 3.937345294667755e-06, + "loss": 0.41124420166015624, + "step": 92255 + }, + { + "epoch": 0.7977449395163033, + "grad_norm": 0.7779764536624654, + "learning_rate": 3.937151759293575e-06, + "loss": 0.074932861328125, + "step": 92260 + }, + { + "epoch": 0.7977881730378467, + "grad_norm": 2.841955035384271, + "learning_rate": 3.936958219597334e-06, + "loss": 0.015451812744140625, + "step": 92265 + }, + { + "epoch": 0.7978314065593899, + "grad_norm": 0.6585726145656724, + "learning_rate": 3.9367646755799245e-06, + "loss": 0.1155670166015625, + "step": 92270 + }, + { + "epoch": 0.7978746400809331, + "grad_norm": 39.6316682704821, + "learning_rate": 3.936571127242239e-06, + "loss": 0.2736297607421875, + "step": 92275 + }, + { + "epoch": 0.7979178736024765, + "grad_norm": 14.346871857217266, + "learning_rate": 3.936377574585171e-06, + "loss": 0.12747802734375, + "step": 92280 + }, + { + "epoch": 0.7979611071240197, + "grad_norm": 4.119737317126879, + "learning_rate": 3.936184017609613e-06, + "loss": 0.1461883544921875, + "step": 92285 + }, + { + "epoch": 0.7980043406455629, + "grad_norm": 31.352936409842115, + "learning_rate": 3.935990456316455e-06, + "loss": 0.068408203125, + "step": 92290 + }, + { + "epoch": 0.7980475741671063, + "grad_norm": 0.3209628181752577, + "learning_rate": 3.935796890706593e-06, + "loss": 0.05291423797607422, + "step": 92295 + }, + { + "epoch": 0.7980908076886495, + "grad_norm": 1.1642971053874591, + "learning_rate": 3.935603320780918e-06, + "loss": 0.37959136962890627, + "step": 92300 + }, + { + "epoch": 0.7981340412101927, + "grad_norm": 1.7028876996945617, + "learning_rate": 3.935409746540323e-06, + "loss": 0.30817489624023436, + "step": 92305 + }, + { + "epoch": 0.798177274731736, + "grad_norm": 1.439165308437613, + "learning_rate": 3.935216167985701e-06, + "loss": 0.08478965759277343, + "step": 92310 + }, + { + "epoch": 0.7982205082532793, + "grad_norm": 8.710289990907825, + "learning_rate": 3.935022585117945e-06, + "loss": 0.2690277099609375, + "step": 92315 + }, + { + "epoch": 0.7982637417748225, + "grad_norm": 12.064028148901597, + "learning_rate": 3.934828997937947e-06, + "loss": 0.15971832275390624, + "step": 92320 + }, + { + "epoch": 0.7983069752963657, + "grad_norm": 0.05334734068703679, + "learning_rate": 3.934635406446601e-06, + "loss": 0.07519111633300782, + "step": 92325 + }, + { + "epoch": 0.7983502088179091, + "grad_norm": 30.79284464606657, + "learning_rate": 3.934441810644798e-06, + "loss": 0.4175384521484375, + "step": 92330 + }, + { + "epoch": 0.7983934423394523, + "grad_norm": 3.801779116537227, + "learning_rate": 3.934248210533433e-06, + "loss": 0.2319915771484375, + "step": 92335 + }, + { + "epoch": 0.7984366758609955, + "grad_norm": 24.13800469736774, + "learning_rate": 3.934054606113397e-06, + "loss": 0.307916259765625, + "step": 92340 + }, + { + "epoch": 0.7984799093825389, + "grad_norm": 4.506257468537123, + "learning_rate": 3.933860997385584e-06, + "loss": 0.06680755615234375, + "step": 92345 + }, + { + "epoch": 0.7985231429040821, + "grad_norm": 0.3259549442746635, + "learning_rate": 3.933667384350887e-06, + "loss": 0.18569107055664064, + "step": 92350 + }, + { + "epoch": 0.7985663764256253, + "grad_norm": 5.2124605664563814, + "learning_rate": 3.933473767010198e-06, + "loss": 0.04595947265625, + "step": 92355 + }, + { + "epoch": 0.7986096099471687, + "grad_norm": 1.6002999591779297, + "learning_rate": 3.93328014536441e-06, + "loss": 0.11344451904296875, + "step": 92360 + }, + { + "epoch": 0.7986528434687119, + "grad_norm": 46.01426342036743, + "learning_rate": 3.933086519414418e-06, + "loss": 0.2126678466796875, + "step": 92365 + }, + { + "epoch": 0.7986960769902551, + "grad_norm": 25.361574096558282, + "learning_rate": 3.9328928891611124e-06, + "loss": 0.19990081787109376, + "step": 92370 + }, + { + "epoch": 0.7987393105117985, + "grad_norm": 3.3250240829963027, + "learning_rate": 3.932699254605388e-06, + "loss": 0.20287094116210938, + "step": 92375 + }, + { + "epoch": 0.7987825440333417, + "grad_norm": 11.58288187379728, + "learning_rate": 3.932505615748135e-06, + "loss": 0.079949951171875, + "step": 92380 + }, + { + "epoch": 0.7988257775548849, + "grad_norm": 5.8722131317466735, + "learning_rate": 3.932311972590251e-06, + "loss": 0.2655181884765625, + "step": 92385 + }, + { + "epoch": 0.7988690110764283, + "grad_norm": 0.7803866800265802, + "learning_rate": 3.9321183251326245e-06, + "loss": 0.0449066162109375, + "step": 92390 + }, + { + "epoch": 0.7989122445979715, + "grad_norm": 39.292922849451735, + "learning_rate": 3.9319246733761525e-06, + "loss": 0.42749481201171874, + "step": 92395 + }, + { + "epoch": 0.7989554781195147, + "grad_norm": 13.755140509987424, + "learning_rate": 3.931731017321725e-06, + "loss": 0.13624267578125, + "step": 92400 + }, + { + "epoch": 0.798998711641058, + "grad_norm": 1.273970299604474, + "learning_rate": 3.931537356970236e-06, + "loss": 0.023383331298828126, + "step": 92405 + }, + { + "epoch": 0.7990419451626013, + "grad_norm": 14.637090427867467, + "learning_rate": 3.93134369232258e-06, + "loss": 0.24021949768066406, + "step": 92410 + }, + { + "epoch": 0.7990851786841445, + "grad_norm": 20.05656434451133, + "learning_rate": 3.93115002337965e-06, + "loss": 0.3741485595703125, + "step": 92415 + }, + { + "epoch": 0.7991284122056878, + "grad_norm": 0.4467921421900951, + "learning_rate": 3.9309563501423375e-06, + "loss": 0.05616607666015625, + "step": 92420 + }, + { + "epoch": 0.7991716457272311, + "grad_norm": 7.65103398587203, + "learning_rate": 3.930762672611536e-06, + "loss": 0.15691490173339845, + "step": 92425 + }, + { + "epoch": 0.7992148792487743, + "grad_norm": 18.42180739764779, + "learning_rate": 3.930568990788139e-06, + "loss": 0.501165771484375, + "step": 92430 + }, + { + "epoch": 0.7992581127703176, + "grad_norm": 29.405156055602273, + "learning_rate": 3.930375304673041e-06, + "loss": 0.22984352111816406, + "step": 92435 + }, + { + "epoch": 0.7993013462918609, + "grad_norm": 0.8857785828366646, + "learning_rate": 3.9301816142671335e-06, + "loss": 0.09855842590332031, + "step": 92440 + }, + { + "epoch": 0.7993445798134041, + "grad_norm": 0.17305186977375742, + "learning_rate": 3.9299879195713106e-06, + "loss": 0.09964351654052735, + "step": 92445 + }, + { + "epoch": 0.7993878133349474, + "grad_norm": 4.595243731022256, + "learning_rate": 3.929794220586467e-06, + "loss": 0.1456390380859375, + "step": 92450 + }, + { + "epoch": 0.7994310468564907, + "grad_norm": 5.942740649580259, + "learning_rate": 3.929600517313493e-06, + "loss": 0.178472900390625, + "step": 92455 + }, + { + "epoch": 0.7994742803780339, + "grad_norm": 15.430187798581143, + "learning_rate": 3.929406809753285e-06, + "loss": 0.15454673767089844, + "step": 92460 + }, + { + "epoch": 0.7995175138995771, + "grad_norm": 0.22783441132572155, + "learning_rate": 3.929213097906734e-06, + "loss": 0.14294204711914063, + "step": 92465 + }, + { + "epoch": 0.7995607474211205, + "grad_norm": 6.0271175497669445, + "learning_rate": 3.929019381774734e-06, + "loss": 0.08320159912109375, + "step": 92470 + }, + { + "epoch": 0.7996039809426637, + "grad_norm": 5.437678914011208, + "learning_rate": 3.928825661358179e-06, + "loss": 0.15954132080078126, + "step": 92475 + }, + { + "epoch": 0.799647214464207, + "grad_norm": 0.9253612641925588, + "learning_rate": 3.928631936657963e-06, + "loss": 0.47479896545410155, + "step": 92480 + }, + { + "epoch": 0.7996904479857503, + "grad_norm": 10.426944056722705, + "learning_rate": 3.928438207674977e-06, + "loss": 0.1568511962890625, + "step": 92485 + }, + { + "epoch": 0.7997336815072935, + "grad_norm": 0.909210644618662, + "learning_rate": 3.928244474410118e-06, + "loss": 0.264678955078125, + "step": 92490 + }, + { + "epoch": 0.7997769150288367, + "grad_norm": 3.4302851152836875, + "learning_rate": 3.928050736864276e-06, + "loss": 0.210693359375, + "step": 92495 + }, + { + "epoch": 0.79982014855038, + "grad_norm": 60.50034220352452, + "learning_rate": 3.927856995038347e-06, + "loss": 0.26302375793457033, + "step": 92500 + }, + { + "epoch": 0.7998633820719233, + "grad_norm": 3.7777724742941565, + "learning_rate": 3.927663248933222e-06, + "loss": 0.034039306640625, + "step": 92505 + }, + { + "epoch": 0.7999066155934665, + "grad_norm": 23.33922724250579, + "learning_rate": 3.927469498549797e-06, + "loss": 0.14366607666015624, + "step": 92510 + }, + { + "epoch": 0.7999498491150098, + "grad_norm": 0.07210953608005104, + "learning_rate": 3.927275743888966e-06, + "loss": 0.29071731567382814, + "step": 92515 + }, + { + "epoch": 0.7999930826365531, + "grad_norm": 0.5822423155476799, + "learning_rate": 3.9270819849516186e-06, + "loss": 0.02685127258300781, + "step": 92520 + }, + { + "epoch": 0.8000363161580963, + "grad_norm": 1.6916218608749354, + "learning_rate": 3.9268882217386525e-06, + "loss": 0.16524658203125, + "step": 92525 + }, + { + "epoch": 0.8000795496796396, + "grad_norm": 22.142822138799215, + "learning_rate": 3.926694454250959e-06, + "loss": 0.4348182678222656, + "step": 92530 + }, + { + "epoch": 0.8001227832011829, + "grad_norm": 36.600823601669276, + "learning_rate": 3.926500682489433e-06, + "loss": 0.6305038452148437, + "step": 92535 + }, + { + "epoch": 0.8001660167227261, + "grad_norm": 3.078114642542992, + "learning_rate": 3.926306906454969e-06, + "loss": 0.079742431640625, + "step": 92540 + }, + { + "epoch": 0.8002092502442694, + "grad_norm": 7.268501895306266, + "learning_rate": 3.926113126148458e-06, + "loss": 0.0688934326171875, + "step": 92545 + }, + { + "epoch": 0.8002524837658127, + "grad_norm": 0.6426060544512229, + "learning_rate": 3.925919341570794e-06, + "loss": 0.040313720703125, + "step": 92550 + }, + { + "epoch": 0.8002957172873559, + "grad_norm": 5.409806456682416, + "learning_rate": 3.925725552722874e-06, + "loss": 0.1094573974609375, + "step": 92555 + }, + { + "epoch": 0.8003389508088992, + "grad_norm": 7.351447214148301, + "learning_rate": 3.925531759605589e-06, + "loss": 0.1980926513671875, + "step": 92560 + }, + { + "epoch": 0.8003821843304425, + "grad_norm": 25.42163328835812, + "learning_rate": 3.925337962219833e-06, + "loss": 0.3294792175292969, + "step": 92565 + }, + { + "epoch": 0.8004254178519857, + "grad_norm": 14.99158472966388, + "learning_rate": 3.9251441605665e-06, + "loss": 0.09705772399902343, + "step": 92570 + }, + { + "epoch": 0.800468651373529, + "grad_norm": 1.372209049818296, + "learning_rate": 3.924950354646483e-06, + "loss": 0.0439849853515625, + "step": 92575 + }, + { + "epoch": 0.8005118848950722, + "grad_norm": 0.8953540462281582, + "learning_rate": 3.9247565444606775e-06, + "loss": 0.11642074584960938, + "step": 92580 + }, + { + "epoch": 0.8005551184166155, + "grad_norm": 6.757350826701074, + "learning_rate": 3.924562730009977e-06, + "loss": 0.18437156677246094, + "step": 92585 + }, + { + "epoch": 0.8005983519381588, + "grad_norm": 33.70799256992889, + "learning_rate": 3.924368911295275e-06, + "loss": 0.52410888671875, + "step": 92590 + }, + { + "epoch": 0.800641585459702, + "grad_norm": 1.2823332416116464, + "learning_rate": 3.924175088317465e-06, + "loss": 0.06194610595703125, + "step": 92595 + }, + { + "epoch": 0.8006848189812453, + "grad_norm": 0.5635062924624193, + "learning_rate": 3.9239812610774406e-06, + "loss": 0.06687240600585938, + "step": 92600 + }, + { + "epoch": 0.8007280525027886, + "grad_norm": 1.80541559381667, + "learning_rate": 3.923787429576096e-06, + "loss": 0.14883956909179688, + "step": 92605 + }, + { + "epoch": 0.8007712860243318, + "grad_norm": 16.315970296215383, + "learning_rate": 3.923593593814327e-06, + "loss": 0.4135009765625, + "step": 92610 + }, + { + "epoch": 0.8008145195458751, + "grad_norm": 0.6787537504924882, + "learning_rate": 3.923399753793025e-06, + "loss": 0.06412429809570312, + "step": 92615 + }, + { + "epoch": 0.8008577530674184, + "grad_norm": 13.689856851874847, + "learning_rate": 3.923205909513084e-06, + "loss": 0.1080108642578125, + "step": 92620 + }, + { + "epoch": 0.8009009865889616, + "grad_norm": 4.084245462450997, + "learning_rate": 3.9230120609754005e-06, + "loss": 0.563348388671875, + "step": 92625 + }, + { + "epoch": 0.8009442201105049, + "grad_norm": 1.9960631017762054, + "learning_rate": 3.922818208180866e-06, + "loss": 0.12706737518310546, + "step": 92630 + }, + { + "epoch": 0.8009874536320482, + "grad_norm": 15.426846102434077, + "learning_rate": 3.922624351130376e-06, + "loss": 0.4164558410644531, + "step": 92635 + }, + { + "epoch": 0.8010306871535914, + "grad_norm": 0.5127395067227509, + "learning_rate": 3.922430489824824e-06, + "loss": 0.2246551513671875, + "step": 92640 + }, + { + "epoch": 0.8010739206751347, + "grad_norm": 4.522033395134737, + "learning_rate": 3.922236624265103e-06, + "loss": 0.35846519470214844, + "step": 92645 + }, + { + "epoch": 0.801117154196678, + "grad_norm": 1.321412291506661, + "learning_rate": 3.9220427544521095e-06, + "loss": 0.18853759765625, + "step": 92650 + }, + { + "epoch": 0.8011603877182212, + "grad_norm": 5.791173170118194, + "learning_rate": 3.921848880386736e-06, + "loss": 0.221112060546875, + "step": 92655 + }, + { + "epoch": 0.8012036212397645, + "grad_norm": 53.904198461436685, + "learning_rate": 3.921655002069876e-06, + "loss": 0.23097152709960939, + "step": 92660 + }, + { + "epoch": 0.8012468547613077, + "grad_norm": 3.2602550902590273, + "learning_rate": 3.921461119502426e-06, + "loss": 0.18607120513916015, + "step": 92665 + }, + { + "epoch": 0.801290088282851, + "grad_norm": 17.869585479487768, + "learning_rate": 3.921267232685277e-06, + "loss": 0.1027252197265625, + "step": 92670 + }, + { + "epoch": 0.8013333218043942, + "grad_norm": 8.34773747301781, + "learning_rate": 3.921073341619327e-06, + "loss": 0.2110107421875, + "step": 92675 + }, + { + "epoch": 0.8013765553259375, + "grad_norm": 4.487732916900986, + "learning_rate": 3.9208794463054665e-06, + "loss": 0.21533699035644532, + "step": 92680 + }, + { + "epoch": 0.8014197888474808, + "grad_norm": 1.2614342346238956, + "learning_rate": 3.920685546744593e-06, + "loss": 0.20234947204589843, + "step": 92685 + }, + { + "epoch": 0.801463022369024, + "grad_norm": 5.788851131836947, + "learning_rate": 3.920491642937597e-06, + "loss": 0.13173980712890626, + "step": 92690 + }, + { + "epoch": 0.8015062558905673, + "grad_norm": 0.7226855643848199, + "learning_rate": 3.9202977348853764e-06, + "loss": 0.07695770263671875, + "step": 92695 + }, + { + "epoch": 0.8015494894121106, + "grad_norm": 2.4628015612757816, + "learning_rate": 3.920103822588824e-06, + "loss": 0.0336212158203125, + "step": 92700 + }, + { + "epoch": 0.8015927229336538, + "grad_norm": 35.125354116211064, + "learning_rate": 3.919909906048834e-06, + "loss": 0.445184326171875, + "step": 92705 + }, + { + "epoch": 0.8016359564551971, + "grad_norm": 1.6227807922175927, + "learning_rate": 3.919715985266299e-06, + "loss": 0.1874847412109375, + "step": 92710 + }, + { + "epoch": 0.8016791899767404, + "grad_norm": 1.9577214048566864, + "learning_rate": 3.919522060242117e-06, + "loss": 0.14758739471435547, + "step": 92715 + }, + { + "epoch": 0.8017224234982836, + "grad_norm": 6.429515908187052, + "learning_rate": 3.9193281309771805e-06, + "loss": 0.10803375244140626, + "step": 92720 + }, + { + "epoch": 0.8017656570198269, + "grad_norm": 0.47656764611757696, + "learning_rate": 3.919134197472382e-06, + "loss": 0.4406341552734375, + "step": 92725 + }, + { + "epoch": 0.8018088905413702, + "grad_norm": 0.3647651882349916, + "learning_rate": 3.918940259728621e-06, + "loss": 0.08083877563476563, + "step": 92730 + }, + { + "epoch": 0.8018521240629134, + "grad_norm": 3.702912713693849, + "learning_rate": 3.918746317746787e-06, + "loss": 0.11965103149414062, + "step": 92735 + }, + { + "epoch": 0.8018953575844567, + "grad_norm": 7.213242240606571, + "learning_rate": 3.918552371527775e-06, + "loss": 0.24363765716552735, + "step": 92740 + }, + { + "epoch": 0.801938591106, + "grad_norm": 4.895785217804324, + "learning_rate": 3.918358421072482e-06, + "loss": 0.0635406494140625, + "step": 92745 + }, + { + "epoch": 0.8019818246275432, + "grad_norm": 22.167265027651695, + "learning_rate": 3.9181644663818015e-06, + "loss": 0.23180389404296875, + "step": 92750 + }, + { + "epoch": 0.8020250581490864, + "grad_norm": 5.683842763535096, + "learning_rate": 3.9179705074566264e-06, + "loss": 0.3337574005126953, + "step": 92755 + }, + { + "epoch": 0.8020682916706298, + "grad_norm": 0.8738657003139461, + "learning_rate": 3.917776544297853e-06, + "loss": 0.030842399597167967, + "step": 92760 + }, + { + "epoch": 0.802111525192173, + "grad_norm": 3.88843653926837, + "learning_rate": 3.917582576906375e-06, + "loss": 0.16099853515625, + "step": 92765 + }, + { + "epoch": 0.8021547587137162, + "grad_norm": 6.946679860468267, + "learning_rate": 3.917388605283088e-06, + "loss": 0.0734405517578125, + "step": 92770 + }, + { + "epoch": 0.8021979922352596, + "grad_norm": 34.74940288524373, + "learning_rate": 3.917194629428884e-06, + "loss": 0.4301136016845703, + "step": 92775 + }, + { + "epoch": 0.8022412257568028, + "grad_norm": 0.1315037054201113, + "learning_rate": 3.917000649344662e-06, + "loss": 0.18590593338012695, + "step": 92780 + }, + { + "epoch": 0.802284459278346, + "grad_norm": 1.4163504165973009, + "learning_rate": 3.9168066650313125e-06, + "loss": 0.3841072082519531, + "step": 92785 + }, + { + "epoch": 0.8023276927998894, + "grad_norm": 12.793210915102724, + "learning_rate": 3.916612676489731e-06, + "loss": 0.32764434814453125, + "step": 92790 + }, + { + "epoch": 0.8023709263214326, + "grad_norm": 28.79345202113418, + "learning_rate": 3.916418683720815e-06, + "loss": 0.2957763671875, + "step": 92795 + }, + { + "epoch": 0.8024141598429758, + "grad_norm": 2.680157793209006, + "learning_rate": 3.916224686725456e-06, + "loss": 0.07757110595703125, + "step": 92800 + }, + { + "epoch": 0.8024573933645192, + "grad_norm": 2.7791853119848855, + "learning_rate": 3.916030685504548e-06, + "loss": 0.19139328002929687, + "step": 92805 + }, + { + "epoch": 0.8025006268860624, + "grad_norm": 13.134814735097018, + "learning_rate": 3.91583668005899e-06, + "loss": 0.0387786865234375, + "step": 92810 + }, + { + "epoch": 0.8025438604076056, + "grad_norm": 7.093799941396712, + "learning_rate": 3.915642670389673e-06, + "loss": 0.1705230712890625, + "step": 92815 + }, + { + "epoch": 0.802587093929149, + "grad_norm": 8.057979900457296, + "learning_rate": 3.915448656497493e-06, + "loss": 0.041335105895996094, + "step": 92820 + }, + { + "epoch": 0.8026303274506922, + "grad_norm": 29.9459623875706, + "learning_rate": 3.915254638383345e-06, + "loss": 0.0924224853515625, + "step": 92825 + }, + { + "epoch": 0.8026735609722354, + "grad_norm": 6.600344770095196, + "learning_rate": 3.915060616048123e-06, + "loss": 0.14716072082519532, + "step": 92830 + }, + { + "epoch": 0.8027167944937788, + "grad_norm": 2.4381892409202526, + "learning_rate": 3.914866589492722e-06, + "loss": 0.023974227905273437, + "step": 92835 + }, + { + "epoch": 0.802760028015322, + "grad_norm": 10.36787087844707, + "learning_rate": 3.914672558718037e-06, + "loss": 0.11572265625, + "step": 92840 + }, + { + "epoch": 0.8028032615368652, + "grad_norm": 18.568983762081935, + "learning_rate": 3.914478523724964e-06, + "loss": 0.2902618408203125, + "step": 92845 + }, + { + "epoch": 0.8028464950584084, + "grad_norm": 73.82242988507319, + "learning_rate": 3.914284484514397e-06, + "loss": 0.158636474609375, + "step": 92850 + }, + { + "epoch": 0.8028897285799518, + "grad_norm": 15.48611989951293, + "learning_rate": 3.91409044108723e-06, + "loss": 0.0916168212890625, + "step": 92855 + }, + { + "epoch": 0.802932962101495, + "grad_norm": 10.130314768553765, + "learning_rate": 3.9138963934443576e-06, + "loss": 0.0784820556640625, + "step": 92860 + }, + { + "epoch": 0.8029761956230382, + "grad_norm": 4.348495180295246, + "learning_rate": 3.913702341586678e-06, + "loss": 0.084429931640625, + "step": 92865 + }, + { + "epoch": 0.8030194291445816, + "grad_norm": 0.4819830231097809, + "learning_rate": 3.913508285515083e-06, + "loss": 0.2215362548828125, + "step": 92870 + }, + { + "epoch": 0.8030626626661248, + "grad_norm": 5.346595201857679, + "learning_rate": 3.913314225230468e-06, + "loss": 0.06258926391601563, + "step": 92875 + }, + { + "epoch": 0.803105896187668, + "grad_norm": 21.0906583375151, + "learning_rate": 3.913120160733729e-06, + "loss": 0.0712249755859375, + "step": 92880 + }, + { + "epoch": 0.8031491297092114, + "grad_norm": 6.851277178817505, + "learning_rate": 3.9129260920257604e-06, + "loss": 0.24749755859375, + "step": 92885 + }, + { + "epoch": 0.8031923632307546, + "grad_norm": 15.47923235503727, + "learning_rate": 3.912732019107457e-06, + "loss": 0.3617225646972656, + "step": 92890 + }, + { + "epoch": 0.8032355967522978, + "grad_norm": 7.033865309605283, + "learning_rate": 3.912537941979715e-06, + "loss": 0.2458881378173828, + "step": 92895 + }, + { + "epoch": 0.8032788302738412, + "grad_norm": 5.9751787565918875, + "learning_rate": 3.912343860643428e-06, + "loss": 0.115472412109375, + "step": 92900 + }, + { + "epoch": 0.8033220637953844, + "grad_norm": 5.928507434830138, + "learning_rate": 3.9121497750994925e-06, + "loss": 0.1904205322265625, + "step": 92905 + }, + { + "epoch": 0.8033652973169276, + "grad_norm": 4.160149660705991, + "learning_rate": 3.911955685348802e-06, + "loss": 0.06718330383300782, + "step": 92910 + }, + { + "epoch": 0.803408530838471, + "grad_norm": 7.307107645367684, + "learning_rate": 3.911761591392253e-06, + "loss": 0.1364654541015625, + "step": 92915 + }, + { + "epoch": 0.8034517643600142, + "grad_norm": 5.6568974950677315, + "learning_rate": 3.911567493230739e-06, + "loss": 0.121600341796875, + "step": 92920 + }, + { + "epoch": 0.8034949978815574, + "grad_norm": 0.7841384345480191, + "learning_rate": 3.9113733908651575e-06, + "loss": 0.0443389892578125, + "step": 92925 + }, + { + "epoch": 0.8035382314031007, + "grad_norm": 0.4849895747383709, + "learning_rate": 3.911179284296403e-06, + "loss": 0.16480789184570313, + "step": 92930 + }, + { + "epoch": 0.803581464924644, + "grad_norm": 12.263102971402436, + "learning_rate": 3.910985173525369e-06, + "loss": 0.4254547119140625, + "step": 92935 + }, + { + "epoch": 0.8036246984461872, + "grad_norm": 1.834183742382833, + "learning_rate": 3.910791058552952e-06, + "loss": 0.1211273193359375, + "step": 92940 + }, + { + "epoch": 0.8036679319677305, + "grad_norm": 6.659265811320751, + "learning_rate": 3.910596939380046e-06, + "loss": 0.1618194580078125, + "step": 92945 + }, + { + "epoch": 0.8037111654892738, + "grad_norm": 5.1845743993271, + "learning_rate": 3.910402816007548e-06, + "loss": 0.10523529052734375, + "step": 92950 + }, + { + "epoch": 0.803754399010817, + "grad_norm": 1.9465755781162704, + "learning_rate": 3.9102086884363546e-06, + "loss": 0.08956985473632813, + "step": 92955 + }, + { + "epoch": 0.8037976325323603, + "grad_norm": 11.50332816862376, + "learning_rate": 3.9100145566673575e-06, + "loss": 0.17951202392578125, + "step": 92960 + }, + { + "epoch": 0.8038408660539036, + "grad_norm": 2.7685899403237397, + "learning_rate": 3.909820420701454e-06, + "loss": 0.1736133575439453, + "step": 92965 + }, + { + "epoch": 0.8038840995754468, + "grad_norm": 32.46819877396866, + "learning_rate": 3.909626280539538e-06, + "loss": 0.1958221435546875, + "step": 92970 + }, + { + "epoch": 0.80392733309699, + "grad_norm": 72.58669241609543, + "learning_rate": 3.9094321361825075e-06, + "loss": 0.40289745330810545, + "step": 92975 + }, + { + "epoch": 0.8039705666185334, + "grad_norm": 2.3594755507153655, + "learning_rate": 3.909237987631256e-06, + "loss": 0.086614990234375, + "step": 92980 + }, + { + "epoch": 0.8040138001400766, + "grad_norm": 4.9462867645935304, + "learning_rate": 3.909043834886679e-06, + "loss": 0.11899185180664062, + "step": 92985 + }, + { + "epoch": 0.8040570336616198, + "grad_norm": 3.4840662683701176, + "learning_rate": 3.9088496779496724e-06, + "loss": 0.0849538803100586, + "step": 92990 + }, + { + "epoch": 0.8041002671831632, + "grad_norm": 0.7976939488115742, + "learning_rate": 3.908655516821131e-06, + "loss": 0.20948944091796876, + "step": 92995 + }, + { + "epoch": 0.8041435007047064, + "grad_norm": 1.3270547374338206, + "learning_rate": 3.908461351501951e-06, + "loss": 0.12018470764160157, + "step": 93000 + }, + { + "epoch": 0.8041867342262496, + "grad_norm": 8.188181044851857, + "learning_rate": 3.908267181993027e-06, + "loss": 0.07029190063476562, + "step": 93005 + }, + { + "epoch": 0.804229967747793, + "grad_norm": 1.4222338893668303, + "learning_rate": 3.9080730082952564e-06, + "loss": 0.08461761474609375, + "step": 93010 + }, + { + "epoch": 0.8042732012693362, + "grad_norm": 0.5609570816241793, + "learning_rate": 3.907878830409532e-06, + "loss": 0.17384490966796876, + "step": 93015 + }, + { + "epoch": 0.8043164347908794, + "grad_norm": 9.172318802734543, + "learning_rate": 3.907684648336752e-06, + "loss": 0.0900665283203125, + "step": 93020 + }, + { + "epoch": 0.8043596683124227, + "grad_norm": 15.025119897125803, + "learning_rate": 3.907490462077809e-06, + "loss": 0.150115966796875, + "step": 93025 + }, + { + "epoch": 0.804402901833966, + "grad_norm": 38.590476356526224, + "learning_rate": 3.9072962716336e-06, + "loss": 0.11543350219726563, + "step": 93030 + }, + { + "epoch": 0.8044461353555092, + "grad_norm": 1.8107973047197756, + "learning_rate": 3.907102077005022e-06, + "loss": 0.165899658203125, + "step": 93035 + }, + { + "epoch": 0.8044893688770525, + "grad_norm": 17.663889775891285, + "learning_rate": 3.906907878192969e-06, + "loss": 0.08592376708984376, + "step": 93040 + }, + { + "epoch": 0.8045326023985958, + "grad_norm": 0.6892254635994675, + "learning_rate": 3.906713675198337e-06, + "loss": 0.12420997619628907, + "step": 93045 + }, + { + "epoch": 0.804575835920139, + "grad_norm": 12.101268795723719, + "learning_rate": 3.906519468022022e-06, + "loss": 0.45218048095703123, + "step": 93050 + }, + { + "epoch": 0.8046190694416823, + "grad_norm": 16.426997915485305, + "learning_rate": 3.906325256664919e-06, + "loss": 0.219390869140625, + "step": 93055 + }, + { + "epoch": 0.8046623029632256, + "grad_norm": 49.985006318939234, + "learning_rate": 3.906131041127923e-06, + "loss": 0.17869110107421876, + "step": 93060 + }, + { + "epoch": 0.8047055364847688, + "grad_norm": 11.497065016488836, + "learning_rate": 3.905936821411931e-06, + "loss": 0.15754852294921876, + "step": 93065 + }, + { + "epoch": 0.8047487700063121, + "grad_norm": 14.313935183101233, + "learning_rate": 3.90574259751784e-06, + "loss": 0.11242561340332032, + "step": 93070 + }, + { + "epoch": 0.8047920035278554, + "grad_norm": 24.59239338528484, + "learning_rate": 3.905548369446543e-06, + "loss": 0.178216552734375, + "step": 93075 + }, + { + "epoch": 0.8048352370493986, + "grad_norm": 20.189430848548156, + "learning_rate": 3.9053541371989374e-06, + "loss": 0.06888885498046875, + "step": 93080 + }, + { + "epoch": 0.8048784705709419, + "grad_norm": 15.854818639137621, + "learning_rate": 3.905159900775918e-06, + "loss": 0.17513504028320312, + "step": 93085 + }, + { + "epoch": 0.8049217040924852, + "grad_norm": 1.8664223790526573, + "learning_rate": 3.904965660178381e-06, + "loss": 0.09412841796875, + "step": 93090 + }, + { + "epoch": 0.8049649376140284, + "grad_norm": 0.46994916573473977, + "learning_rate": 3.9047714154072226e-06, + "loss": 0.14540634155273438, + "step": 93095 + }, + { + "epoch": 0.8050081711355717, + "grad_norm": 18.201502333316448, + "learning_rate": 3.904577166463339e-06, + "loss": 0.11110916137695312, + "step": 93100 + }, + { + "epoch": 0.8050514046571149, + "grad_norm": 18.19138875819581, + "learning_rate": 3.904382913347625e-06, + "loss": 0.13332595825195312, + "step": 93105 + }, + { + "epoch": 0.8050946381786582, + "grad_norm": 3.657558558770764, + "learning_rate": 3.904188656060977e-06, + "loss": 0.06890907287597656, + "step": 93110 + }, + { + "epoch": 0.8051378717002015, + "grad_norm": 34.09556876415769, + "learning_rate": 3.90399439460429e-06, + "loss": 0.12557945251464844, + "step": 93115 + }, + { + "epoch": 0.8051811052217447, + "grad_norm": 5.245789843544906, + "learning_rate": 3.903800128978461e-06, + "loss": 0.1195526123046875, + "step": 93120 + }, + { + "epoch": 0.805224338743288, + "grad_norm": 44.80565123500588, + "learning_rate": 3.903605859184387e-06, + "loss": 0.14048004150390625, + "step": 93125 + }, + { + "epoch": 0.8052675722648313, + "grad_norm": 15.352622717724978, + "learning_rate": 3.903411585222962e-06, + "loss": 0.07525787353515626, + "step": 93130 + }, + { + "epoch": 0.8053108057863745, + "grad_norm": 6.003721627816234, + "learning_rate": 3.903217307095082e-06, + "loss": 0.11541824340820313, + "step": 93135 + }, + { + "epoch": 0.8053540393079178, + "grad_norm": 4.216642446839631, + "learning_rate": 3.903023024801644e-06, + "loss": 0.07777481079101563, + "step": 93140 + }, + { + "epoch": 0.805397272829461, + "grad_norm": 4.4834341757931035, + "learning_rate": 3.902828738343543e-06, + "loss": 0.11447906494140625, + "step": 93145 + }, + { + "epoch": 0.8054405063510043, + "grad_norm": 8.614819760387377, + "learning_rate": 3.902634447721676e-06, + "loss": 0.22160720825195312, + "step": 93150 + }, + { + "epoch": 0.8054837398725476, + "grad_norm": 33.957181942118865, + "learning_rate": 3.902440152936939e-06, + "loss": 0.1746673583984375, + "step": 93155 + }, + { + "epoch": 0.8055269733940909, + "grad_norm": 3.493435037711347, + "learning_rate": 3.902245853990228e-06, + "loss": 0.11608047485351562, + "step": 93160 + }, + { + "epoch": 0.8055702069156341, + "grad_norm": 0.37693850428006664, + "learning_rate": 3.902051550882438e-06, + "loss": 0.037200927734375, + "step": 93165 + }, + { + "epoch": 0.8056134404371774, + "grad_norm": 0.3563771521949583, + "learning_rate": 3.9018572436144655e-06, + "loss": 0.0898895263671875, + "step": 93170 + }, + { + "epoch": 0.8056566739587206, + "grad_norm": 27.96937082056711, + "learning_rate": 3.901662932187209e-06, + "loss": 0.2642059326171875, + "step": 93175 + }, + { + "epoch": 0.8056999074802639, + "grad_norm": 0.0424821665256241, + "learning_rate": 3.9014686166015614e-06, + "loss": 0.07700481414794921, + "step": 93180 + }, + { + "epoch": 0.8057431410018071, + "grad_norm": 11.666090780322907, + "learning_rate": 3.901274296858421e-06, + "loss": 0.204150390625, + "step": 93185 + }, + { + "epoch": 0.8057863745233504, + "grad_norm": 5.012100778103978, + "learning_rate": 3.9010799729586825e-06, + "loss": 0.037646484375, + "step": 93190 + }, + { + "epoch": 0.8058296080448937, + "grad_norm": 17.278451496925655, + "learning_rate": 3.900885644903242e-06, + "loss": 0.251873779296875, + "step": 93195 + }, + { + "epoch": 0.8058728415664369, + "grad_norm": 0.973228268423112, + "learning_rate": 3.900691312692999e-06, + "loss": 0.30599250793457033, + "step": 93200 + }, + { + "epoch": 0.8059160750879802, + "grad_norm": 6.487051384838878, + "learning_rate": 3.900496976328844e-06, + "loss": 0.17117462158203126, + "step": 93205 + }, + { + "epoch": 0.8059593086095235, + "grad_norm": 16.62304706560556, + "learning_rate": 3.900302635811679e-06, + "loss": 0.0844329833984375, + "step": 93210 + }, + { + "epoch": 0.8060025421310667, + "grad_norm": 20.700645116078235, + "learning_rate": 3.900108291142397e-06, + "loss": 0.4677764892578125, + "step": 93215 + }, + { + "epoch": 0.80604577565261, + "grad_norm": 68.91262538488043, + "learning_rate": 3.899913942321894e-06, + "loss": 0.22470703125, + "step": 93220 + }, + { + "epoch": 0.8060890091741533, + "grad_norm": 4.896651517944027, + "learning_rate": 3.89971958935107e-06, + "loss": 0.09070968627929688, + "step": 93225 + }, + { + "epoch": 0.8061322426956965, + "grad_norm": 0.9715588712840201, + "learning_rate": 3.899525232230817e-06, + "loss": 0.06591644287109374, + "step": 93230 + }, + { + "epoch": 0.8061754762172398, + "grad_norm": 3.0097794046780213, + "learning_rate": 3.899330870962033e-06, + "loss": 0.0855133056640625, + "step": 93235 + }, + { + "epoch": 0.8062187097387831, + "grad_norm": 2.0759381219710997, + "learning_rate": 3.899136505545615e-06, + "loss": 0.1917083740234375, + "step": 93240 + }, + { + "epoch": 0.8062619432603263, + "grad_norm": 31.404515183346692, + "learning_rate": 3.898942135982459e-06, + "loss": 0.15052108764648436, + "step": 93245 + }, + { + "epoch": 0.8063051767818696, + "grad_norm": 0.2829210708920107, + "learning_rate": 3.89874776227346e-06, + "loss": 0.14804534912109374, + "step": 93250 + }, + { + "epoch": 0.8063484103034129, + "grad_norm": 2.0048835176753674, + "learning_rate": 3.898553384419517e-06, + "loss": 0.03131256103515625, + "step": 93255 + }, + { + "epoch": 0.8063916438249561, + "grad_norm": 0.26016927968616166, + "learning_rate": 3.898359002421526e-06, + "loss": 0.057303237915039065, + "step": 93260 + }, + { + "epoch": 0.8064348773464994, + "grad_norm": 1.2711658144262832, + "learning_rate": 3.89816461628038e-06, + "loss": 0.07011260986328124, + "step": 93265 + }, + { + "epoch": 0.8064781108680427, + "grad_norm": 8.212551889656334, + "learning_rate": 3.89797022599698e-06, + "loss": 0.18810653686523438, + "step": 93270 + }, + { + "epoch": 0.8065213443895859, + "grad_norm": 6.643960064958697, + "learning_rate": 3.897775831572221e-06, + "loss": 0.062786865234375, + "step": 93275 + }, + { + "epoch": 0.8065645779111291, + "grad_norm": 48.171001326407975, + "learning_rate": 3.897581433006999e-06, + "loss": 0.16525192260742189, + "step": 93280 + }, + { + "epoch": 0.8066078114326725, + "grad_norm": 0.34135919010669435, + "learning_rate": 3.89738703030221e-06, + "loss": 0.14232330322265624, + "step": 93285 + }, + { + "epoch": 0.8066510449542157, + "grad_norm": 2.2136577180326107, + "learning_rate": 3.897192623458752e-06, + "loss": 0.0343841552734375, + "step": 93290 + }, + { + "epoch": 0.8066942784757589, + "grad_norm": 2.690720041694937, + "learning_rate": 3.896998212477521e-06, + "loss": 0.088702392578125, + "step": 93295 + }, + { + "epoch": 0.8067375119973023, + "grad_norm": 4.817302869282315, + "learning_rate": 3.896803797359413e-06, + "loss": 0.04210357666015625, + "step": 93300 + }, + { + "epoch": 0.8067807455188455, + "grad_norm": 2.478848391662359, + "learning_rate": 3.896609378105325e-06, + "loss": 0.21518783569335936, + "step": 93305 + }, + { + "epoch": 0.8068239790403887, + "grad_norm": 5.05308709152808, + "learning_rate": 3.896414954716154e-06, + "loss": 0.2027923583984375, + "step": 93310 + }, + { + "epoch": 0.8068672125619321, + "grad_norm": 30.592088716573414, + "learning_rate": 3.896220527192796e-06, + "loss": 0.22081375122070312, + "step": 93315 + }, + { + "epoch": 0.8069104460834753, + "grad_norm": 16.60763852998459, + "learning_rate": 3.896026095536149e-06, + "loss": 0.2583015441894531, + "step": 93320 + }, + { + "epoch": 0.8069536796050185, + "grad_norm": 23.083779354004953, + "learning_rate": 3.895831659747108e-06, + "loss": 0.3263336181640625, + "step": 93325 + }, + { + "epoch": 0.8069969131265619, + "grad_norm": 15.478062010200604, + "learning_rate": 3.895637219826571e-06, + "loss": 0.1894378662109375, + "step": 93330 + }, + { + "epoch": 0.8070401466481051, + "grad_norm": 31.210802382605138, + "learning_rate": 3.895442775775435e-06, + "loss": 0.4121490478515625, + "step": 93335 + }, + { + "epoch": 0.8070833801696483, + "grad_norm": 47.47092550670514, + "learning_rate": 3.895248327594594e-06, + "loss": 0.1677001953125, + "step": 93340 + }, + { + "epoch": 0.8071266136911917, + "grad_norm": 11.574487002670413, + "learning_rate": 3.895053875284948e-06, + "loss": 0.281011962890625, + "step": 93345 + }, + { + "epoch": 0.8071698472127349, + "grad_norm": 0.7320712499364199, + "learning_rate": 3.8948594188473935e-06, + "loss": 0.10705909729003907, + "step": 93350 + }, + { + "epoch": 0.8072130807342781, + "grad_norm": 14.410205571824557, + "learning_rate": 3.8946649582828255e-06, + "loss": 0.2079345703125, + "step": 93355 + }, + { + "epoch": 0.8072563142558213, + "grad_norm": 28.73742991253295, + "learning_rate": 3.894470493592142e-06, + "loss": 0.2060821533203125, + "step": 93360 + }, + { + "epoch": 0.8072995477773647, + "grad_norm": 21.456018418496182, + "learning_rate": 3.894276024776238e-06, + "loss": 0.36858596801757815, + "step": 93365 + }, + { + "epoch": 0.8073427812989079, + "grad_norm": 1.8110821391270333, + "learning_rate": 3.894081551836014e-06, + "loss": 0.15630950927734374, + "step": 93370 + }, + { + "epoch": 0.8073860148204511, + "grad_norm": 12.560553792022706, + "learning_rate": 3.893887074772366e-06, + "loss": 0.05021820068359375, + "step": 93375 + }, + { + "epoch": 0.8074292483419945, + "grad_norm": 15.905913924484667, + "learning_rate": 3.893692593586188e-06, + "loss": 0.08767623901367187, + "step": 93380 + }, + { + "epoch": 0.8074724818635377, + "grad_norm": 11.842681660085725, + "learning_rate": 3.893498108278379e-06, + "loss": 0.3031951904296875, + "step": 93385 + }, + { + "epoch": 0.8075157153850809, + "grad_norm": 2.566694287750582, + "learning_rate": 3.893303618849835e-06, + "loss": 0.0243133544921875, + "step": 93390 + }, + { + "epoch": 0.8075589489066243, + "grad_norm": 1.2410912523180408, + "learning_rate": 3.893109125301453e-06, + "loss": 0.5829540252685547, + "step": 93395 + }, + { + "epoch": 0.8076021824281675, + "grad_norm": 0.529256969542748, + "learning_rate": 3.892914627634133e-06, + "loss": 0.02657318115234375, + "step": 93400 + }, + { + "epoch": 0.8076454159497107, + "grad_norm": 2.303757532122008, + "learning_rate": 3.892720125848769e-06, + "loss": 0.0281219482421875, + "step": 93405 + }, + { + "epoch": 0.8076886494712541, + "grad_norm": 8.577748315724474, + "learning_rate": 3.892525619946257e-06, + "loss": 0.04290618896484375, + "step": 93410 + }, + { + "epoch": 0.8077318829927973, + "grad_norm": 12.024407024403429, + "learning_rate": 3.892331109927497e-06, + "loss": 0.132427978515625, + "step": 93415 + }, + { + "epoch": 0.8077751165143405, + "grad_norm": 12.134906839504316, + "learning_rate": 3.892136595793385e-06, + "loss": 0.1050872802734375, + "step": 93420 + }, + { + "epoch": 0.8078183500358839, + "grad_norm": 24.028162244430664, + "learning_rate": 3.891942077544817e-06, + "loss": 0.242425537109375, + "step": 93425 + }, + { + "epoch": 0.8078615835574271, + "grad_norm": 9.456108633559916, + "learning_rate": 3.891747555182692e-06, + "loss": 0.06472091674804688, + "step": 93430 + }, + { + "epoch": 0.8079048170789703, + "grad_norm": 7.500955108441148, + "learning_rate": 3.891553028707906e-06, + "loss": 0.180828857421875, + "step": 93435 + }, + { + "epoch": 0.8079480506005137, + "grad_norm": 0.38433561553908363, + "learning_rate": 3.891358498121355e-06, + "loss": 0.055941009521484376, + "step": 93440 + }, + { + "epoch": 0.8079912841220569, + "grad_norm": 5.721727621937552, + "learning_rate": 3.891163963423938e-06, + "loss": 0.05187530517578125, + "step": 93445 + }, + { + "epoch": 0.8080345176436001, + "grad_norm": 0.11782998536125992, + "learning_rate": 3.890969424616552e-06, + "loss": 0.0331298828125, + "step": 93450 + }, + { + "epoch": 0.8080777511651434, + "grad_norm": 16.961800312322428, + "learning_rate": 3.890774881700093e-06, + "loss": 0.23558006286621094, + "step": 93455 + }, + { + "epoch": 0.8081209846866867, + "grad_norm": 2.0245982943309517, + "learning_rate": 3.890580334675459e-06, + "loss": 0.02208099365234375, + "step": 93460 + }, + { + "epoch": 0.8081642182082299, + "grad_norm": 4.577207031260676, + "learning_rate": 3.890385783543548e-06, + "loss": 0.2767822265625, + "step": 93465 + }, + { + "epoch": 0.8082074517297732, + "grad_norm": 3.7233705636310805, + "learning_rate": 3.890191228305256e-06, + "loss": 0.08669319152832031, + "step": 93470 + }, + { + "epoch": 0.8082506852513165, + "grad_norm": 8.80448362658953, + "learning_rate": 3.88999666896148e-06, + "loss": 0.057137680053710935, + "step": 93475 + }, + { + "epoch": 0.8082939187728597, + "grad_norm": 2.2160838321531364, + "learning_rate": 3.889802105513119e-06, + "loss": 0.2926490783691406, + "step": 93480 + }, + { + "epoch": 0.808337152294403, + "grad_norm": 11.683577192156843, + "learning_rate": 3.8896075379610695e-06, + "loss": 0.721697998046875, + "step": 93485 + }, + { + "epoch": 0.8083803858159463, + "grad_norm": 1.161938078486382, + "learning_rate": 3.889412966306227e-06, + "loss": 0.4952735900878906, + "step": 93490 + }, + { + "epoch": 0.8084236193374895, + "grad_norm": 8.076876524538699, + "learning_rate": 3.889218390549492e-06, + "loss": 0.08031768798828125, + "step": 93495 + }, + { + "epoch": 0.8084668528590327, + "grad_norm": 7.0708898960664195, + "learning_rate": 3.88902381069176e-06, + "loss": 0.2353302001953125, + "step": 93500 + }, + { + "epoch": 0.8085100863805761, + "grad_norm": 0.7201092830597877, + "learning_rate": 3.8888292267339285e-06, + "loss": 0.16317138671875, + "step": 93505 + }, + { + "epoch": 0.8085533199021193, + "grad_norm": 13.953028414535071, + "learning_rate": 3.888634638676895e-06, + "loss": 0.115899658203125, + "step": 93510 + }, + { + "epoch": 0.8085965534236625, + "grad_norm": 2.5211395184545586, + "learning_rate": 3.8884400465215585e-06, + "loss": 0.228009033203125, + "step": 93515 + }, + { + "epoch": 0.8086397869452059, + "grad_norm": 12.054083160940598, + "learning_rate": 3.888245450268813e-06, + "loss": 0.168463134765625, + "step": 93520 + }, + { + "epoch": 0.8086830204667491, + "grad_norm": 3.6669909733631045, + "learning_rate": 3.888050849919559e-06, + "loss": 0.11962814331054687, + "step": 93525 + }, + { + "epoch": 0.8087262539882923, + "grad_norm": 4.5477237656576195, + "learning_rate": 3.8878562454746934e-06, + "loss": 0.5094406127929687, + "step": 93530 + }, + { + "epoch": 0.8087694875098356, + "grad_norm": 30.219138011514815, + "learning_rate": 3.887661636935113e-06, + "loss": 0.19092864990234376, + "step": 93535 + }, + { + "epoch": 0.8088127210313789, + "grad_norm": 0.19607819739322557, + "learning_rate": 3.887467024301714e-06, + "loss": 0.12241363525390625, + "step": 93540 + }, + { + "epoch": 0.8088559545529221, + "grad_norm": 35.48428474956718, + "learning_rate": 3.8872724075753975e-06, + "loss": 0.222015380859375, + "step": 93545 + }, + { + "epoch": 0.8088991880744654, + "grad_norm": 26.204473696190572, + "learning_rate": 3.887077786757059e-06, + "loss": 0.2583320617675781, + "step": 93550 + }, + { + "epoch": 0.8089424215960087, + "grad_norm": 1.1799766449146576, + "learning_rate": 3.886883161847595e-06, + "loss": 0.20364303588867189, + "step": 93555 + }, + { + "epoch": 0.8089856551175519, + "grad_norm": 0.016832456451574163, + "learning_rate": 3.8866885328479044e-06, + "loss": 0.060635709762573244, + "step": 93560 + }, + { + "epoch": 0.8090288886390952, + "grad_norm": 0.3730598661877132, + "learning_rate": 3.886493899758885e-06, + "loss": 0.4459541320800781, + "step": 93565 + }, + { + "epoch": 0.8090721221606385, + "grad_norm": 1.449622593150568, + "learning_rate": 3.8862992625814335e-06, + "loss": 0.03241043090820313, + "step": 93570 + }, + { + "epoch": 0.8091153556821817, + "grad_norm": 12.284406691118443, + "learning_rate": 3.886104621316449e-06, + "loss": 0.2079132080078125, + "step": 93575 + }, + { + "epoch": 0.809158589203725, + "grad_norm": 3.257302707639821, + "learning_rate": 3.885909975964828e-06, + "loss": 0.18437118530273439, + "step": 93580 + }, + { + "epoch": 0.8092018227252683, + "grad_norm": 16.339901296646936, + "learning_rate": 3.8857153265274676e-06, + "loss": 0.0803009033203125, + "step": 93585 + }, + { + "epoch": 0.8092450562468115, + "grad_norm": 1.731924358093083, + "learning_rate": 3.885520673005267e-06, + "loss": 0.14964447021484376, + "step": 93590 + }, + { + "epoch": 0.8092882897683548, + "grad_norm": 6.0749567157320445, + "learning_rate": 3.885326015399124e-06, + "loss": 0.123468017578125, + "step": 93595 + }, + { + "epoch": 0.8093315232898981, + "grad_norm": 4.163846534336624, + "learning_rate": 3.885131353709935e-06, + "loss": 0.027765655517578126, + "step": 93600 + }, + { + "epoch": 0.8093747568114413, + "grad_norm": 0.8803709542036211, + "learning_rate": 3.8849366879385985e-06, + "loss": 0.1566650390625, + "step": 93605 + }, + { + "epoch": 0.8094179903329846, + "grad_norm": 3.9336732504851635, + "learning_rate": 3.884742018086013e-06, + "loss": 0.04686279296875, + "step": 93610 + }, + { + "epoch": 0.8094612238545279, + "grad_norm": 35.486180199394745, + "learning_rate": 3.884547344153074e-06, + "loss": 0.204888916015625, + "step": 93615 + }, + { + "epoch": 0.8095044573760711, + "grad_norm": 2.66888068343998, + "learning_rate": 3.884352666140681e-06, + "loss": 0.38133773803710935, + "step": 93620 + }, + { + "epoch": 0.8095476908976144, + "grad_norm": 21.367725389185814, + "learning_rate": 3.884157984049732e-06, + "loss": 0.14497337341308594, + "step": 93625 + }, + { + "epoch": 0.8095909244191576, + "grad_norm": 6.933009973814963, + "learning_rate": 3.883963297881125e-06, + "loss": 0.06207275390625, + "step": 93630 + }, + { + "epoch": 0.8096341579407009, + "grad_norm": 16.280378044504676, + "learning_rate": 3.883768607635758e-06, + "loss": 0.052740478515625, + "step": 93635 + }, + { + "epoch": 0.8096773914622442, + "grad_norm": 20.269309213937255, + "learning_rate": 3.883573913314526e-06, + "loss": 0.100860595703125, + "step": 93640 + }, + { + "epoch": 0.8097206249837874, + "grad_norm": 6.562914674859246, + "learning_rate": 3.883379214918331e-06, + "loss": 0.07450103759765625, + "step": 93645 + }, + { + "epoch": 0.8097638585053307, + "grad_norm": 2.7200062428688607, + "learning_rate": 3.883184512448068e-06, + "loss": 0.2732364654541016, + "step": 93650 + }, + { + "epoch": 0.809807092026874, + "grad_norm": 0.7547294323902547, + "learning_rate": 3.882989805904637e-06, + "loss": 0.06562423706054688, + "step": 93655 + }, + { + "epoch": 0.8098503255484172, + "grad_norm": 2.2519916850969612, + "learning_rate": 3.882795095288935e-06, + "loss": 0.28110504150390625, + "step": 93660 + }, + { + "epoch": 0.8098935590699605, + "grad_norm": 1.3726655986935292, + "learning_rate": 3.882600380601859e-06, + "loss": 0.14806365966796875, + "step": 93665 + }, + { + "epoch": 0.8099367925915038, + "grad_norm": 12.455918295389825, + "learning_rate": 3.882405661844309e-06, + "loss": 0.12697296142578124, + "step": 93670 + }, + { + "epoch": 0.809980026113047, + "grad_norm": 29.82951960469572, + "learning_rate": 3.882210939017182e-06, + "loss": 0.203143310546875, + "step": 93675 + }, + { + "epoch": 0.8100232596345903, + "grad_norm": 10.022571051098462, + "learning_rate": 3.882016212121375e-06, + "loss": 0.0531280517578125, + "step": 93680 + }, + { + "epoch": 0.8100664931561335, + "grad_norm": 17.845137789290767, + "learning_rate": 3.881821481157789e-06, + "loss": 0.215899658203125, + "step": 93685 + }, + { + "epoch": 0.8101097266776768, + "grad_norm": 14.096844636684242, + "learning_rate": 3.881626746127319e-06, + "loss": 0.11537017822265624, + "step": 93690 + }, + { + "epoch": 0.8101529601992201, + "grad_norm": 1.6504507357543654, + "learning_rate": 3.881432007030865e-06, + "loss": 0.0470947265625, + "step": 93695 + }, + { + "epoch": 0.8101961937207633, + "grad_norm": 1.0290505146979965, + "learning_rate": 3.881237263869324e-06, + "loss": 0.111956787109375, + "step": 93700 + }, + { + "epoch": 0.8102394272423066, + "grad_norm": 3.17761019862778, + "learning_rate": 3.8810425166435945e-06, + "loss": 0.02568397521972656, + "step": 93705 + }, + { + "epoch": 0.8102826607638498, + "grad_norm": 1.3405917546493356, + "learning_rate": 3.8808477653545745e-06, + "loss": 0.0591156005859375, + "step": 93710 + }, + { + "epoch": 0.8103258942853931, + "grad_norm": 26.261161890007095, + "learning_rate": 3.880653010003163e-06, + "loss": 0.19029541015625, + "step": 93715 + }, + { + "epoch": 0.8103691278069364, + "grad_norm": 0.3024739340501992, + "learning_rate": 3.880458250590259e-06, + "loss": 0.2144073486328125, + "step": 93720 + }, + { + "epoch": 0.8104123613284796, + "grad_norm": 0.22207628995848336, + "learning_rate": 3.880263487116758e-06, + "loss": 0.07282829284667969, + "step": 93725 + }, + { + "epoch": 0.8104555948500229, + "grad_norm": 6.051297383797549, + "learning_rate": 3.880068719583558e-06, + "loss": 0.04142608642578125, + "step": 93730 + }, + { + "epoch": 0.8104988283715662, + "grad_norm": 1.7483445666879909, + "learning_rate": 3.87987394799156e-06, + "loss": 0.3048973083496094, + "step": 93735 + }, + { + "epoch": 0.8105420618931094, + "grad_norm": 9.645805806175318, + "learning_rate": 3.8796791723416615e-06, + "loss": 0.06258544921875, + "step": 93740 + }, + { + "epoch": 0.8105852954146527, + "grad_norm": 0.35190946418333, + "learning_rate": 3.8794843926347606e-06, + "loss": 0.101898193359375, + "step": 93745 + }, + { + "epoch": 0.810628528936196, + "grad_norm": 0.8034243603160742, + "learning_rate": 3.879289608871755e-06, + "loss": 0.4437652587890625, + "step": 93750 + }, + { + "epoch": 0.8106717624577392, + "grad_norm": 26.88993634117472, + "learning_rate": 3.879094821053544e-06, + "loss": 0.12059478759765625, + "step": 93755 + }, + { + "epoch": 0.8107149959792825, + "grad_norm": 8.758918604026176, + "learning_rate": 3.878900029181025e-06, + "loss": 0.3297698974609375, + "step": 93760 + }, + { + "epoch": 0.8107582295008258, + "grad_norm": 2.9104459079320035, + "learning_rate": 3.8787052332550955e-06, + "loss": 0.11863327026367188, + "step": 93765 + }, + { + "epoch": 0.810801463022369, + "grad_norm": 1.122386523215488, + "learning_rate": 3.878510433276657e-06, + "loss": 0.12377166748046875, + "step": 93770 + }, + { + "epoch": 0.8108446965439123, + "grad_norm": 14.73353797997312, + "learning_rate": 3.878315629246605e-06, + "loss": 0.108013916015625, + "step": 93775 + }, + { + "epoch": 0.8108879300654556, + "grad_norm": 20.618432623969735, + "learning_rate": 3.878120821165839e-06, + "loss": 0.1802276611328125, + "step": 93780 + }, + { + "epoch": 0.8109311635869988, + "grad_norm": 0.9432852320023812, + "learning_rate": 3.877926009035258e-06, + "loss": 0.07520179748535157, + "step": 93785 + }, + { + "epoch": 0.8109743971085421, + "grad_norm": 2.793921034305909, + "learning_rate": 3.877731192855758e-06, + "loss": 0.1716644287109375, + "step": 93790 + }, + { + "epoch": 0.8110176306300854, + "grad_norm": 0.6245271856402625, + "learning_rate": 3.877536372628242e-06, + "loss": 0.01890106201171875, + "step": 93795 + }, + { + "epoch": 0.8110608641516286, + "grad_norm": 15.546105430372435, + "learning_rate": 3.8773415483536046e-06, + "loss": 0.12765045166015626, + "step": 93800 + }, + { + "epoch": 0.8111040976731718, + "grad_norm": 0.1841215216439278, + "learning_rate": 3.877146720032745e-06, + "loss": 0.14998779296875, + "step": 93805 + }, + { + "epoch": 0.8111473311947152, + "grad_norm": 5.47310548935682, + "learning_rate": 3.876951887666562e-06, + "loss": 0.08054046630859375, + "step": 93810 + }, + { + "epoch": 0.8111905647162584, + "grad_norm": 0.03990807785573604, + "learning_rate": 3.876757051255955e-06, + "loss": 0.04614524841308594, + "step": 93815 + }, + { + "epoch": 0.8112337982378016, + "grad_norm": 12.65646774553507, + "learning_rate": 3.876562210801822e-06, + "loss": 0.074468994140625, + "step": 93820 + }, + { + "epoch": 0.811277031759345, + "grad_norm": 7.1642065234680015, + "learning_rate": 3.876367366305061e-06, + "loss": 0.03985099792480469, + "step": 93825 + }, + { + "epoch": 0.8113202652808882, + "grad_norm": 1.301436065975622, + "learning_rate": 3.876172517766572e-06, + "loss": 0.11289291381835938, + "step": 93830 + }, + { + "epoch": 0.8113634988024314, + "grad_norm": 8.670769615285504, + "learning_rate": 3.875977665187253e-06, + "loss": 0.08414764404296875, + "step": 93835 + }, + { + "epoch": 0.8114067323239748, + "grad_norm": 1.8495029532072995, + "learning_rate": 3.875782808568001e-06, + "loss": 0.029541015625, + "step": 93840 + }, + { + "epoch": 0.811449965845518, + "grad_norm": 3.459788329068792, + "learning_rate": 3.875587947909716e-06, + "loss": 0.0415740966796875, + "step": 93845 + }, + { + "epoch": 0.8114931993670612, + "grad_norm": 5.701880114598224, + "learning_rate": 3.875393083213299e-06, + "loss": 0.3124504089355469, + "step": 93850 + }, + { + "epoch": 0.8115364328886046, + "grad_norm": 1.4960285225992198, + "learning_rate": 3.875198214479644e-06, + "loss": 0.07677001953125, + "step": 93855 + }, + { + "epoch": 0.8115796664101478, + "grad_norm": 1.024446969909071, + "learning_rate": 3.875003341709654e-06, + "loss": 0.11501836776733398, + "step": 93860 + }, + { + "epoch": 0.811622899931691, + "grad_norm": 15.253310575155568, + "learning_rate": 3.874808464904225e-06, + "loss": 0.0884552001953125, + "step": 93865 + }, + { + "epoch": 0.8116661334532344, + "grad_norm": 17.187984445054077, + "learning_rate": 3.874613584064256e-06, + "loss": 0.1818084716796875, + "step": 93870 + }, + { + "epoch": 0.8117093669747776, + "grad_norm": 1.4064432917101033, + "learning_rate": 3.8744186991906475e-06, + "loss": 0.3840545654296875, + "step": 93875 + }, + { + "epoch": 0.8117526004963208, + "grad_norm": 6.463410424476734, + "learning_rate": 3.874223810284297e-06, + "loss": 0.06781005859375, + "step": 93880 + }, + { + "epoch": 0.811795834017864, + "grad_norm": 27.285129008103777, + "learning_rate": 3.874028917346104e-06, + "loss": 0.21669235229492187, + "step": 93885 + }, + { + "epoch": 0.8118390675394074, + "grad_norm": 0.17598960232940222, + "learning_rate": 3.873834020376966e-06, + "loss": 0.06852607727050782, + "step": 93890 + }, + { + "epoch": 0.8118823010609506, + "grad_norm": 0.8603717827610539, + "learning_rate": 3.873639119377783e-06, + "loss": 0.03620128631591797, + "step": 93895 + }, + { + "epoch": 0.8119255345824938, + "grad_norm": 44.141125331865254, + "learning_rate": 3.873444214349453e-06, + "loss": 0.1391448974609375, + "step": 93900 + }, + { + "epoch": 0.8119687681040372, + "grad_norm": 0.8871566343227499, + "learning_rate": 3.873249305292876e-06, + "loss": 0.025551795959472656, + "step": 93905 + }, + { + "epoch": 0.8120120016255804, + "grad_norm": 30.809674537057393, + "learning_rate": 3.873054392208951e-06, + "loss": 0.224029541015625, + "step": 93910 + }, + { + "epoch": 0.8120552351471236, + "grad_norm": 4.39652009697019, + "learning_rate": 3.872859475098576e-06, + "loss": 0.3383758544921875, + "step": 93915 + }, + { + "epoch": 0.812098468668667, + "grad_norm": 5.649505892622098, + "learning_rate": 3.87266455396265e-06, + "loss": 0.03273162841796875, + "step": 93920 + }, + { + "epoch": 0.8121417021902102, + "grad_norm": 1.6106154345414663, + "learning_rate": 3.872469628802072e-06, + "loss": 0.03134422302246094, + "step": 93925 + }, + { + "epoch": 0.8121849357117534, + "grad_norm": 6.117267202894456, + "learning_rate": 3.872274699617741e-06, + "loss": 0.15774993896484374, + "step": 93930 + }, + { + "epoch": 0.8122281692332968, + "grad_norm": 2.9542475052164807, + "learning_rate": 3.872079766410556e-06, + "loss": 0.13395233154296876, + "step": 93935 + }, + { + "epoch": 0.81227140275484, + "grad_norm": 0.9208480627237198, + "learning_rate": 3.871884829181418e-06, + "loss": 0.23708343505859375, + "step": 93940 + }, + { + "epoch": 0.8123146362763832, + "grad_norm": 5.395772075372919, + "learning_rate": 3.871689887931222e-06, + "loss": 0.39666900634765623, + "step": 93945 + }, + { + "epoch": 0.8123578697979266, + "grad_norm": 1.252584561250449, + "learning_rate": 3.87149494266087e-06, + "loss": 0.11717386245727539, + "step": 93950 + }, + { + "epoch": 0.8124011033194698, + "grad_norm": 2.4243898963637833, + "learning_rate": 3.8712999933712604e-06, + "loss": 0.2141864776611328, + "step": 93955 + }, + { + "epoch": 0.812444336841013, + "grad_norm": 19.05664669262794, + "learning_rate": 3.871105040063292e-06, + "loss": 0.18477783203125, + "step": 93960 + }, + { + "epoch": 0.8124875703625564, + "grad_norm": 18.387458592618568, + "learning_rate": 3.870910082737864e-06, + "loss": 0.3110393524169922, + "step": 93965 + }, + { + "epoch": 0.8125308038840996, + "grad_norm": 0.46277676451301447, + "learning_rate": 3.870715121395877e-06, + "loss": 0.07308807373046874, + "step": 93970 + }, + { + "epoch": 0.8125740374056428, + "grad_norm": 0.6592298523752431, + "learning_rate": 3.870520156038228e-06, + "loss": 0.067376708984375, + "step": 93975 + }, + { + "epoch": 0.812617270927186, + "grad_norm": 0.8799447176407419, + "learning_rate": 3.870325186665817e-06, + "loss": 0.1262359619140625, + "step": 93980 + }, + { + "epoch": 0.8126605044487294, + "grad_norm": 0.6838357381084214, + "learning_rate": 3.870130213279543e-06, + "loss": 0.04998493194580078, + "step": 93985 + }, + { + "epoch": 0.8127037379702726, + "grad_norm": 1.6612806002445704, + "learning_rate": 3.869935235880304e-06, + "loss": 0.09072189331054688, + "step": 93990 + }, + { + "epoch": 0.8127469714918159, + "grad_norm": 4.86738620866758, + "learning_rate": 3.869740254469002e-06, + "loss": 0.18068008422851561, + "step": 93995 + }, + { + "epoch": 0.8127902050133592, + "grad_norm": 0.9662491610140252, + "learning_rate": 3.869545269046535e-06, + "loss": 0.10449790954589844, + "step": 94000 + }, + { + "epoch": 0.8128334385349024, + "grad_norm": 0.4643604450481565, + "learning_rate": 3.869350279613802e-06, + "loss": 0.07886848449707032, + "step": 94005 + }, + { + "epoch": 0.8128766720564456, + "grad_norm": 4.172410286581336, + "learning_rate": 3.869155286171702e-06, + "loss": 0.021105384826660155, + "step": 94010 + }, + { + "epoch": 0.812919905577989, + "grad_norm": 12.375303903789856, + "learning_rate": 3.868960288721134e-06, + "loss": 0.6864295959472656, + "step": 94015 + }, + { + "epoch": 0.8129631390995322, + "grad_norm": 19.318946938317893, + "learning_rate": 3.868765287262999e-06, + "loss": 0.078271484375, + "step": 94020 + }, + { + "epoch": 0.8130063726210754, + "grad_norm": 17.95106726722064, + "learning_rate": 3.868570281798195e-06, + "loss": 0.07729949951171874, + "step": 94025 + }, + { + "epoch": 0.8130496061426188, + "grad_norm": 13.856531988715693, + "learning_rate": 3.8683752723276215e-06, + "loss": 0.2038118362426758, + "step": 94030 + }, + { + "epoch": 0.813092839664162, + "grad_norm": 5.86566093606606, + "learning_rate": 3.868180258852178e-06, + "loss": 0.48894081115722654, + "step": 94035 + }, + { + "epoch": 0.8131360731857052, + "grad_norm": 15.787866508376089, + "learning_rate": 3.8679852413727625e-06, + "loss": 0.1642791748046875, + "step": 94040 + }, + { + "epoch": 0.8131793067072486, + "grad_norm": 29.38761078462366, + "learning_rate": 3.867790219890277e-06, + "loss": 0.2343423843383789, + "step": 94045 + }, + { + "epoch": 0.8132225402287918, + "grad_norm": 7.245486340714033, + "learning_rate": 3.8675951944056205e-06, + "loss": 0.223089599609375, + "step": 94050 + }, + { + "epoch": 0.813265773750335, + "grad_norm": 8.684540466587608, + "learning_rate": 3.867400164919691e-06, + "loss": 0.1509246826171875, + "step": 94055 + }, + { + "epoch": 0.8133090072718783, + "grad_norm": 21.532690807874207, + "learning_rate": 3.867205131433389e-06, + "loss": 0.09509506225585937, + "step": 94060 + }, + { + "epoch": 0.8133522407934216, + "grad_norm": 12.199089770438095, + "learning_rate": 3.867010093947613e-06, + "loss": 0.0537353515625, + "step": 94065 + }, + { + "epoch": 0.8133954743149648, + "grad_norm": 1.5695970445451424, + "learning_rate": 3.866815052463262e-06, + "loss": 0.0778900146484375, + "step": 94070 + }, + { + "epoch": 0.8134387078365081, + "grad_norm": 2.4823743269125345, + "learning_rate": 3.866620006981238e-06, + "loss": 0.09178466796875, + "step": 94075 + }, + { + "epoch": 0.8134819413580514, + "grad_norm": 5.13250174827728, + "learning_rate": 3.8664249575024385e-06, + "loss": 0.02501983642578125, + "step": 94080 + }, + { + "epoch": 0.8135251748795946, + "grad_norm": 16.034557458394918, + "learning_rate": 3.866229904027764e-06, + "loss": 0.11288299560546874, + "step": 94085 + }, + { + "epoch": 0.8135684084011379, + "grad_norm": 13.913777990053266, + "learning_rate": 3.8660348465581146e-06, + "loss": 0.0500218391418457, + "step": 94090 + }, + { + "epoch": 0.8136116419226812, + "grad_norm": 6.348969536835029, + "learning_rate": 3.865839785094387e-06, + "loss": 0.08526077270507812, + "step": 94095 + }, + { + "epoch": 0.8136548754442244, + "grad_norm": 2.171376618303781, + "learning_rate": 3.865644719637484e-06, + "loss": 0.25809783935546876, + "step": 94100 + }, + { + "epoch": 0.8136981089657677, + "grad_norm": 6.582160129835488, + "learning_rate": 3.8654496501883045e-06, + "loss": 0.10898590087890625, + "step": 94105 + }, + { + "epoch": 0.813741342487311, + "grad_norm": 10.285948338410243, + "learning_rate": 3.865254576747746e-06, + "loss": 0.16649169921875, + "step": 94110 + }, + { + "epoch": 0.8137845760088542, + "grad_norm": 1.1845019708433804, + "learning_rate": 3.865059499316711e-06, + "loss": 0.069171142578125, + "step": 94115 + }, + { + "epoch": 0.8138278095303975, + "grad_norm": 49.037631731528315, + "learning_rate": 3.864864417896098e-06, + "loss": 0.2736473083496094, + "step": 94120 + }, + { + "epoch": 0.8138710430519408, + "grad_norm": 0.6722131039407073, + "learning_rate": 3.8646693324868065e-06, + "loss": 0.111663818359375, + "step": 94125 + }, + { + "epoch": 0.813914276573484, + "grad_norm": 3.354375732077232, + "learning_rate": 3.864474243089737e-06, + "loss": 0.39515151977539065, + "step": 94130 + }, + { + "epoch": 0.8139575100950273, + "grad_norm": 4.2943926864720146, + "learning_rate": 3.8642791497057886e-06, + "loss": 0.11646804809570313, + "step": 94135 + }, + { + "epoch": 0.8140007436165706, + "grad_norm": 2.3815129545361646, + "learning_rate": 3.864084052335861e-06, + "loss": 0.032806396484375, + "step": 94140 + }, + { + "epoch": 0.8140439771381138, + "grad_norm": 1.7189581608382847, + "learning_rate": 3.8638889509808535e-06, + "loss": 0.15565261840820313, + "step": 94145 + }, + { + "epoch": 0.8140872106596571, + "grad_norm": 3.1137643514640954, + "learning_rate": 3.863693845641667e-06, + "loss": 0.4048095703125, + "step": 94150 + }, + { + "epoch": 0.8141304441812003, + "grad_norm": 5.908833977561076, + "learning_rate": 3.8634987363192e-06, + "loss": 0.1987213134765625, + "step": 94155 + }, + { + "epoch": 0.8141736777027436, + "grad_norm": 1.184578333772483, + "learning_rate": 3.863303623014354e-06, + "loss": 0.18705291748046876, + "step": 94160 + }, + { + "epoch": 0.8142169112242869, + "grad_norm": 37.84836898728532, + "learning_rate": 3.8631085057280275e-06, + "loss": 0.461627197265625, + "step": 94165 + }, + { + "epoch": 0.8142601447458301, + "grad_norm": 42.04997679262916, + "learning_rate": 3.8629133844611215e-06, + "loss": 0.17068557739257811, + "step": 94170 + }, + { + "epoch": 0.8143033782673734, + "grad_norm": 0.5789721563690172, + "learning_rate": 3.8627182592145345e-06, + "loss": 0.07298240661621094, + "step": 94175 + }, + { + "epoch": 0.8143466117889167, + "grad_norm": 13.027083636849762, + "learning_rate": 3.862523129989169e-06, + "loss": 0.184893798828125, + "step": 94180 + }, + { + "epoch": 0.8143898453104599, + "grad_norm": 2.177045790931408, + "learning_rate": 3.862327996785921e-06, + "loss": 0.24341583251953125, + "step": 94185 + }, + { + "epoch": 0.8144330788320032, + "grad_norm": 9.128366311019542, + "learning_rate": 3.862132859605692e-06, + "loss": 0.14388427734375, + "step": 94190 + }, + { + "epoch": 0.8144763123535464, + "grad_norm": 11.540432292226903, + "learning_rate": 3.861937718449383e-06, + "loss": 0.2201751708984375, + "step": 94195 + }, + { + "epoch": 0.8145195458750897, + "grad_norm": 2.1667688337576227, + "learning_rate": 3.861742573317895e-06, + "loss": 0.12853126525878905, + "step": 94200 + }, + { + "epoch": 0.814562779396633, + "grad_norm": 7.446855416254287, + "learning_rate": 3.8615474242121245e-06, + "loss": 0.17338790893554687, + "step": 94205 + }, + { + "epoch": 0.8146060129181762, + "grad_norm": 1.1490731422945424, + "learning_rate": 3.861352271132974e-06, + "loss": 0.299139404296875, + "step": 94210 + }, + { + "epoch": 0.8146492464397195, + "grad_norm": 2.9280002366693068, + "learning_rate": 3.861157114081344e-06, + "loss": 0.100872802734375, + "step": 94215 + }, + { + "epoch": 0.8146924799612628, + "grad_norm": 6.461190889432323, + "learning_rate": 3.860961953058131e-06, + "loss": 0.1771575927734375, + "step": 94220 + }, + { + "epoch": 0.814735713482806, + "grad_norm": 20.75902321857254, + "learning_rate": 3.86076678806424e-06, + "loss": 0.191180419921875, + "step": 94225 + }, + { + "epoch": 0.8147789470043493, + "grad_norm": 2.0872075815266795, + "learning_rate": 3.860571619100568e-06, + "loss": 0.03055877685546875, + "step": 94230 + }, + { + "epoch": 0.8148221805258925, + "grad_norm": 1.1296933106275187, + "learning_rate": 3.860376446168015e-06, + "loss": 0.056072235107421875, + "step": 94235 + }, + { + "epoch": 0.8148654140474358, + "grad_norm": 0.18664799754314668, + "learning_rate": 3.8601812692674825e-06, + "loss": 0.26851348876953124, + "step": 94240 + }, + { + "epoch": 0.8149086475689791, + "grad_norm": 4.676443102591236, + "learning_rate": 3.85998608839987e-06, + "loss": 0.6602569580078125, + "step": 94245 + }, + { + "epoch": 0.8149518810905223, + "grad_norm": 4.326493466505153, + "learning_rate": 3.859790903566078e-06, + "loss": 0.45040283203125, + "step": 94250 + }, + { + "epoch": 0.8149951146120656, + "grad_norm": 0.7972817849501138, + "learning_rate": 3.859595714767005e-06, + "loss": 0.0523193359375, + "step": 94255 + }, + { + "epoch": 0.8150383481336089, + "grad_norm": 0.5156507462742845, + "learning_rate": 3.859400522003554e-06, + "loss": 0.140032958984375, + "step": 94260 + }, + { + "epoch": 0.8150815816551521, + "grad_norm": 20.823216437511213, + "learning_rate": 3.859205325276622e-06, + "loss": 0.1397258758544922, + "step": 94265 + }, + { + "epoch": 0.8151248151766954, + "grad_norm": 12.893023305031388, + "learning_rate": 3.859010124587112e-06, + "loss": 0.135797119140625, + "step": 94270 + }, + { + "epoch": 0.8151680486982387, + "grad_norm": 8.87559334985189, + "learning_rate": 3.858814919935924e-06, + "loss": 0.1519989013671875, + "step": 94275 + }, + { + "epoch": 0.8152112822197819, + "grad_norm": 2.7515208397832, + "learning_rate": 3.858619711323957e-06, + "loss": 0.04403533935546875, + "step": 94280 + }, + { + "epoch": 0.8152545157413252, + "grad_norm": 6.880652546409733, + "learning_rate": 3.85842449875211e-06, + "loss": 0.032830810546875, + "step": 94285 + }, + { + "epoch": 0.8152977492628685, + "grad_norm": 36.9533448362575, + "learning_rate": 3.858229282221287e-06, + "loss": 0.40888214111328125, + "step": 94290 + }, + { + "epoch": 0.8153409827844117, + "grad_norm": 5.991468777415245, + "learning_rate": 3.858034061732386e-06, + "loss": 0.2158935546875, + "step": 94295 + }, + { + "epoch": 0.815384216305955, + "grad_norm": 2.539323658637793, + "learning_rate": 3.857838837286307e-06, + "loss": 0.0744110107421875, + "step": 94300 + }, + { + "epoch": 0.8154274498274983, + "grad_norm": 0.03177924594321318, + "learning_rate": 3.8576436088839525e-06, + "loss": 0.01766548156738281, + "step": 94305 + }, + { + "epoch": 0.8154706833490415, + "grad_norm": 0.49647750831462184, + "learning_rate": 3.857448376526221e-06, + "loss": 0.04771099090576172, + "step": 94310 + }, + { + "epoch": 0.8155139168705848, + "grad_norm": 14.33393396314969, + "learning_rate": 3.857253140214012e-06, + "loss": 0.23469886779785157, + "step": 94315 + }, + { + "epoch": 0.8155571503921281, + "grad_norm": 8.761871442413872, + "learning_rate": 3.8570578999482286e-06, + "loss": 0.07777099609375, + "step": 94320 + }, + { + "epoch": 0.8156003839136713, + "grad_norm": 20.480934522049047, + "learning_rate": 3.85686265572977e-06, + "loss": 0.23277587890625, + "step": 94325 + }, + { + "epoch": 0.8156436174352145, + "grad_norm": 67.95401182861714, + "learning_rate": 3.8566674075595355e-06, + "loss": 0.451055908203125, + "step": 94330 + }, + { + "epoch": 0.8156868509567579, + "grad_norm": 0.5878447989468614, + "learning_rate": 3.856472155438427e-06, + "loss": 0.1385284423828125, + "step": 94335 + }, + { + "epoch": 0.8157300844783011, + "grad_norm": 11.614987136036822, + "learning_rate": 3.8562768993673455e-06, + "loss": 0.12323570251464844, + "step": 94340 + }, + { + "epoch": 0.8157733179998443, + "grad_norm": 8.7302622741701, + "learning_rate": 3.8560816393471906e-06, + "loss": 0.13268623352050782, + "step": 94345 + }, + { + "epoch": 0.8158165515213877, + "grad_norm": 0.32696789160506556, + "learning_rate": 3.855886375378862e-06, + "loss": 0.28660430908203127, + "step": 94350 + }, + { + "epoch": 0.8158597850429309, + "grad_norm": 0.2915573446617875, + "learning_rate": 3.85569110746326e-06, + "loss": 0.08351593017578125, + "step": 94355 + }, + { + "epoch": 0.8159030185644741, + "grad_norm": 0.16278154056011865, + "learning_rate": 3.8554958356012875e-06, + "loss": 0.046849822998046874, + "step": 94360 + }, + { + "epoch": 0.8159462520860175, + "grad_norm": 0.8386001325817809, + "learning_rate": 3.855300559793844e-06, + "loss": 0.17837181091308593, + "step": 94365 + }, + { + "epoch": 0.8159894856075607, + "grad_norm": 8.407298718249054, + "learning_rate": 3.85510528004183e-06, + "loss": 0.1079345703125, + "step": 94370 + }, + { + "epoch": 0.8160327191291039, + "grad_norm": 3.847801176585759, + "learning_rate": 3.854909996346146e-06, + "loss": 0.10640411376953125, + "step": 94375 + }, + { + "epoch": 0.8160759526506473, + "grad_norm": 1.5468829455293478, + "learning_rate": 3.854714708707692e-06, + "loss": 0.06726875305175781, + "step": 94380 + }, + { + "epoch": 0.8161191861721905, + "grad_norm": 30.484487852050826, + "learning_rate": 3.854519417127369e-06, + "loss": 0.2226898193359375, + "step": 94385 + }, + { + "epoch": 0.8161624196937337, + "grad_norm": 8.43253074767105, + "learning_rate": 3.854324121606079e-06, + "loss": 0.37735748291015625, + "step": 94390 + }, + { + "epoch": 0.816205653215277, + "grad_norm": 2.525919681308368, + "learning_rate": 3.8541288221447205e-06, + "loss": 0.09471435546875, + "step": 94395 + }, + { + "epoch": 0.8162488867368203, + "grad_norm": 0.8908289393919061, + "learning_rate": 3.853933518744196e-06, + "loss": 0.06529464721679687, + "step": 94400 + }, + { + "epoch": 0.8162921202583635, + "grad_norm": 4.754890883353468, + "learning_rate": 3.853738211405407e-06, + "loss": 0.189312744140625, + "step": 94405 + }, + { + "epoch": 0.8163353537799067, + "grad_norm": 0.3227221375187238, + "learning_rate": 3.85354290012925e-06, + "loss": 0.0494171142578125, + "step": 94410 + }, + { + "epoch": 0.8163785873014501, + "grad_norm": 24.118014687054153, + "learning_rate": 3.85334758491663e-06, + "loss": 0.23403072357177734, + "step": 94415 + }, + { + "epoch": 0.8164218208229933, + "grad_norm": 5.125158411248579, + "learning_rate": 3.853152265768447e-06, + "loss": 0.22043209075927733, + "step": 94420 + }, + { + "epoch": 0.8164650543445365, + "grad_norm": 15.693405279124908, + "learning_rate": 3.8529569426856e-06, + "loss": 0.11289749145507813, + "step": 94425 + }, + { + "epoch": 0.8165082878660799, + "grad_norm": 2.6089944202284143, + "learning_rate": 3.852761615668991e-06, + "loss": 0.04691314697265625, + "step": 94430 + }, + { + "epoch": 0.8165515213876231, + "grad_norm": 2.6485323189618777, + "learning_rate": 3.852566284719522e-06, + "loss": 0.203485107421875, + "step": 94435 + }, + { + "epoch": 0.8165947549091663, + "grad_norm": 3.5636068020658525, + "learning_rate": 3.852370949838092e-06, + "loss": 0.17581787109375, + "step": 94440 + }, + { + "epoch": 0.8166379884307097, + "grad_norm": 0.644330403810582, + "learning_rate": 3.852175611025601e-06, + "loss": 0.0243927001953125, + "step": 94445 + }, + { + "epoch": 0.8166812219522529, + "grad_norm": 5.298988139194479, + "learning_rate": 3.851980268282954e-06, + "loss": 0.11943893432617188, + "step": 94450 + }, + { + "epoch": 0.8167244554737961, + "grad_norm": 6.4146113817972115, + "learning_rate": 3.851784921611048e-06, + "loss": 0.0855621337890625, + "step": 94455 + }, + { + "epoch": 0.8167676889953395, + "grad_norm": 9.048760027529594, + "learning_rate": 3.851589571010784e-06, + "loss": 0.20043621063232422, + "step": 94460 + }, + { + "epoch": 0.8168109225168827, + "grad_norm": 1.3633051954290183, + "learning_rate": 3.851394216483065e-06, + "loss": 0.04610366821289062, + "step": 94465 + }, + { + "epoch": 0.8168541560384259, + "grad_norm": 2.6756951131134263, + "learning_rate": 3.851198858028791e-06, + "loss": 0.020395660400390626, + "step": 94470 + }, + { + "epoch": 0.8168973895599693, + "grad_norm": 5.844689616755688, + "learning_rate": 3.851003495648864e-06, + "loss": 0.135247802734375, + "step": 94475 + }, + { + "epoch": 0.8169406230815125, + "grad_norm": 16.265614079790844, + "learning_rate": 3.850808129344184e-06, + "loss": 0.179345703125, + "step": 94480 + }, + { + "epoch": 0.8169838566030557, + "grad_norm": 0.7572616839159512, + "learning_rate": 3.850612759115652e-06, + "loss": 0.10984230041503906, + "step": 94485 + }, + { + "epoch": 0.817027090124599, + "grad_norm": 2.4624406518678317, + "learning_rate": 3.850417384964168e-06, + "loss": 0.1566925048828125, + "step": 94490 + }, + { + "epoch": 0.8170703236461423, + "grad_norm": 5.949772448800551, + "learning_rate": 3.850222006890635e-06, + "loss": 0.09634552001953126, + "step": 94495 + }, + { + "epoch": 0.8171135571676855, + "grad_norm": 6.380318481111533, + "learning_rate": 3.850026624895953e-06, + "loss": 0.06415824890136719, + "step": 94500 + }, + { + "epoch": 0.8171567906892288, + "grad_norm": 0.36022434577227913, + "learning_rate": 3.849831238981023e-06, + "loss": 0.2568378448486328, + "step": 94505 + }, + { + "epoch": 0.8172000242107721, + "grad_norm": 3.4509017965549336, + "learning_rate": 3.849635849146748e-06, + "loss": 0.0628082275390625, + "step": 94510 + }, + { + "epoch": 0.8172432577323153, + "grad_norm": 0.2748804879482237, + "learning_rate": 3.849440455394026e-06, + "loss": 0.06894989013671875, + "step": 94515 + }, + { + "epoch": 0.8172864912538585, + "grad_norm": 19.151265018031655, + "learning_rate": 3.84924505772376e-06, + "loss": 0.14533233642578125, + "step": 94520 + }, + { + "epoch": 0.8173297247754019, + "grad_norm": 8.42660562310949, + "learning_rate": 3.8490496561368505e-06, + "loss": 0.39815940856933596, + "step": 94525 + }, + { + "epoch": 0.8173729582969451, + "grad_norm": 3.5959210715080903, + "learning_rate": 3.848854250634199e-06, + "loss": 0.13021354675292968, + "step": 94530 + }, + { + "epoch": 0.8174161918184883, + "grad_norm": 12.019168681815332, + "learning_rate": 3.8486588412167085e-06, + "loss": 0.4241649627685547, + "step": 94535 + }, + { + "epoch": 0.8174594253400317, + "grad_norm": 4.6571004901322715, + "learning_rate": 3.8484634278852766e-06, + "loss": 0.19379501342773436, + "step": 94540 + }, + { + "epoch": 0.8175026588615749, + "grad_norm": 32.159648646600594, + "learning_rate": 3.848268010640807e-06, + "loss": 0.11796340942382813, + "step": 94545 + }, + { + "epoch": 0.8175458923831181, + "grad_norm": 13.117920936336313, + "learning_rate": 3.8480725894842e-06, + "loss": 0.14808349609375, + "step": 94550 + }, + { + "epoch": 0.8175891259046615, + "grad_norm": 17.825806457401224, + "learning_rate": 3.847877164416356e-06, + "loss": 0.176788330078125, + "step": 94555 + }, + { + "epoch": 0.8176323594262047, + "grad_norm": 8.720578491668668, + "learning_rate": 3.847681735438179e-06, + "loss": 0.129632568359375, + "step": 94560 + }, + { + "epoch": 0.8176755929477479, + "grad_norm": 15.10372026082654, + "learning_rate": 3.847486302550569e-06, + "loss": 0.15294647216796875, + "step": 94565 + }, + { + "epoch": 0.8177188264692913, + "grad_norm": 19.288818763966766, + "learning_rate": 3.847290865754426e-06, + "loss": 0.107257080078125, + "step": 94570 + }, + { + "epoch": 0.8177620599908345, + "grad_norm": 7.532914475558679, + "learning_rate": 3.847095425050653e-06, + "loss": 0.08253765106201172, + "step": 94575 + }, + { + "epoch": 0.8178052935123777, + "grad_norm": 29.316210330651447, + "learning_rate": 3.846899980440151e-06, + "loss": 0.5143798828125, + "step": 94580 + }, + { + "epoch": 0.817848527033921, + "grad_norm": 0.764853919039327, + "learning_rate": 3.846704531923821e-06, + "loss": 0.31188812255859377, + "step": 94585 + }, + { + "epoch": 0.8178917605554643, + "grad_norm": 0.6929573392261951, + "learning_rate": 3.846509079502564e-06, + "loss": 0.03174037933349609, + "step": 94590 + }, + { + "epoch": 0.8179349940770075, + "grad_norm": 7.284778819234454, + "learning_rate": 3.846313623177282e-06, + "loss": 0.16825408935546876, + "step": 94595 + }, + { + "epoch": 0.8179782275985508, + "grad_norm": 19.020954243023265, + "learning_rate": 3.846118162948877e-06, + "loss": 0.11466598510742188, + "step": 94600 + }, + { + "epoch": 0.8180214611200941, + "grad_norm": 0.48720211202559666, + "learning_rate": 3.845922698818249e-06, + "loss": 0.11654529571533204, + "step": 94605 + }, + { + "epoch": 0.8180646946416373, + "grad_norm": 34.964174726330654, + "learning_rate": 3.8457272307863e-06, + "loss": 0.11779937744140626, + "step": 94610 + }, + { + "epoch": 0.8181079281631806, + "grad_norm": 0.12392992500088258, + "learning_rate": 3.845531758853933e-06, + "loss": 0.062066650390625, + "step": 94615 + }, + { + "epoch": 0.8181511616847239, + "grad_norm": 12.120119998881615, + "learning_rate": 3.845336283022047e-06, + "loss": 0.13890609741210938, + "step": 94620 + }, + { + "epoch": 0.8181943952062671, + "grad_norm": 5.890364233232756, + "learning_rate": 3.845140803291546e-06, + "loss": 0.1919769287109375, + "step": 94625 + }, + { + "epoch": 0.8182376287278104, + "grad_norm": 1.026764900285431, + "learning_rate": 3.84494531966333e-06, + "loss": 0.06283111572265625, + "step": 94630 + }, + { + "epoch": 0.8182808622493537, + "grad_norm": 3.181860592309467, + "learning_rate": 3.8447498321382994e-06, + "loss": 0.13341064453125, + "step": 94635 + }, + { + "epoch": 0.8183240957708969, + "grad_norm": 3.854041882322157, + "learning_rate": 3.8445543407173585e-06, + "loss": 0.1704315185546875, + "step": 94640 + }, + { + "epoch": 0.8183673292924402, + "grad_norm": 5.956205879528277, + "learning_rate": 3.844358845401408e-06, + "loss": 0.057622528076171874, + "step": 94645 + }, + { + "epoch": 0.8184105628139835, + "grad_norm": 5.526619405298506, + "learning_rate": 3.844163346191348e-06, + "loss": 0.24660797119140626, + "step": 94650 + }, + { + "epoch": 0.8184537963355267, + "grad_norm": 1.5761653786006258, + "learning_rate": 3.8439678430880816e-06, + "loss": 0.3546745300292969, + "step": 94655 + }, + { + "epoch": 0.81849702985707, + "grad_norm": 2.2667228818617096, + "learning_rate": 3.84377233609251e-06, + "loss": 0.09561538696289062, + "step": 94660 + }, + { + "epoch": 0.8185402633786132, + "grad_norm": 7.0418596319924385, + "learning_rate": 3.843576825205536e-06, + "loss": 0.3370628356933594, + "step": 94665 + }, + { + "epoch": 0.8185834969001565, + "grad_norm": 4.438226244973461, + "learning_rate": 3.843381310428059e-06, + "loss": 0.21098175048828124, + "step": 94670 + }, + { + "epoch": 0.8186267304216998, + "grad_norm": 1.9318882174496275, + "learning_rate": 3.8431857917609826e-06, + "loss": 0.1274688720703125, + "step": 94675 + }, + { + "epoch": 0.818669963943243, + "grad_norm": 9.70714496066757, + "learning_rate": 3.842990269205208e-06, + "loss": 0.43773345947265624, + "step": 94680 + }, + { + "epoch": 0.8187131974647863, + "grad_norm": 1.598111983471511, + "learning_rate": 3.8427947427616356e-06, + "loss": 0.08509178161621093, + "step": 94685 + }, + { + "epoch": 0.8187564309863296, + "grad_norm": 16.029204606435066, + "learning_rate": 3.842599212431168e-06, + "loss": 0.1871429443359375, + "step": 94690 + }, + { + "epoch": 0.8187996645078728, + "grad_norm": 15.695926628095961, + "learning_rate": 3.842403678214709e-06, + "loss": 0.10737457275390624, + "step": 94695 + }, + { + "epoch": 0.8188428980294161, + "grad_norm": 3.657075979267019, + "learning_rate": 3.842208140113157e-06, + "loss": 0.131341552734375, + "step": 94700 + }, + { + "epoch": 0.8188861315509594, + "grad_norm": 16.580789937478308, + "learning_rate": 3.842012598127418e-06, + "loss": 0.13037872314453125, + "step": 94705 + }, + { + "epoch": 0.8189293650725026, + "grad_norm": 9.77270614080086, + "learning_rate": 3.84181705225839e-06, + "loss": 0.1568756103515625, + "step": 94710 + }, + { + "epoch": 0.8189725985940459, + "grad_norm": 1.6179272556687347, + "learning_rate": 3.841621502506975e-06, + "loss": 0.14828453063964844, + "step": 94715 + }, + { + "epoch": 0.8190158321155891, + "grad_norm": 4.835385342860099, + "learning_rate": 3.841425948874077e-06, + "loss": 0.03218994140625, + "step": 94720 + }, + { + "epoch": 0.8190590656371324, + "grad_norm": 6.166234645128959, + "learning_rate": 3.841230391360597e-06, + "loss": 0.06194038391113281, + "step": 94725 + }, + { + "epoch": 0.8191022991586757, + "grad_norm": 64.51974773525747, + "learning_rate": 3.841034829967436e-06, + "loss": 0.88756103515625, + "step": 94730 + }, + { + "epoch": 0.819145532680219, + "grad_norm": 1.7358240425531333, + "learning_rate": 3.840839264695498e-06, + "loss": 0.10775146484375, + "step": 94735 + }, + { + "epoch": 0.8191887662017622, + "grad_norm": 5.064194818339016, + "learning_rate": 3.840643695545682e-06, + "loss": 0.09622955322265625, + "step": 94740 + }, + { + "epoch": 0.8192319997233055, + "grad_norm": 0.5473399084548717, + "learning_rate": 3.840448122518893e-06, + "loss": 0.035140609741210936, + "step": 94745 + }, + { + "epoch": 0.8192752332448487, + "grad_norm": 1.0555991211271287, + "learning_rate": 3.8402525456160306e-06, + "loss": 0.4543243408203125, + "step": 94750 + }, + { + "epoch": 0.819318466766392, + "grad_norm": 7.817084635574312, + "learning_rate": 3.8400569648379985e-06, + "loss": 0.10887603759765625, + "step": 94755 + }, + { + "epoch": 0.8193617002879352, + "grad_norm": 16.27015116674615, + "learning_rate": 3.839861380185697e-06, + "loss": 0.15148468017578126, + "step": 94760 + }, + { + "epoch": 0.8194049338094785, + "grad_norm": 0.9432668436416326, + "learning_rate": 3.839665791660029e-06, + "loss": 0.1097137451171875, + "step": 94765 + }, + { + "epoch": 0.8194481673310218, + "grad_norm": 1.576123280758993, + "learning_rate": 3.839470199261898e-06, + "loss": 0.1216552734375, + "step": 94770 + }, + { + "epoch": 0.819491400852565, + "grad_norm": 22.88570619644498, + "learning_rate": 3.839274602992203e-06, + "loss": 0.08867225646972657, + "step": 94775 + }, + { + "epoch": 0.8195346343741083, + "grad_norm": 0.22144259703982666, + "learning_rate": 3.839079002851848e-06, + "loss": 0.03059864044189453, + "step": 94780 + }, + { + "epoch": 0.8195778678956516, + "grad_norm": 4.019713820593294, + "learning_rate": 3.838883398841735e-06, + "loss": 0.09571533203125, + "step": 94785 + }, + { + "epoch": 0.8196211014171948, + "grad_norm": 1.3534945004248589, + "learning_rate": 3.838687790962767e-06, + "loss": 0.24111557006835938, + "step": 94790 + }, + { + "epoch": 0.8196643349387381, + "grad_norm": 0.27518058826203323, + "learning_rate": 3.838492179215844e-06, + "loss": 0.07002487182617187, + "step": 94795 + }, + { + "epoch": 0.8197075684602814, + "grad_norm": 0.7157664972221993, + "learning_rate": 3.838296563601869e-06, + "loss": 0.12247123718261718, + "step": 94800 + }, + { + "epoch": 0.8197508019818246, + "grad_norm": 16.879196899508738, + "learning_rate": 3.838100944121744e-06, + "loss": 0.11033096313476562, + "step": 94805 + }, + { + "epoch": 0.8197940355033679, + "grad_norm": 16.53199128890788, + "learning_rate": 3.837905320776371e-06, + "loss": 0.16173095703125, + "step": 94810 + }, + { + "epoch": 0.8198372690249112, + "grad_norm": 2.3288642484303628, + "learning_rate": 3.837709693566654e-06, + "loss": 0.05231456756591797, + "step": 94815 + }, + { + "epoch": 0.8198805025464544, + "grad_norm": 14.871251274685457, + "learning_rate": 3.837514062493494e-06, + "loss": 0.07106361389160157, + "step": 94820 + }, + { + "epoch": 0.8199237360679977, + "grad_norm": 3.38097274191181, + "learning_rate": 3.837318427557793e-06, + "loss": 0.0285308837890625, + "step": 94825 + }, + { + "epoch": 0.819966969589541, + "grad_norm": 10.645483350147492, + "learning_rate": 3.837122788760453e-06, + "loss": 0.0488311767578125, + "step": 94830 + }, + { + "epoch": 0.8200102031110842, + "grad_norm": 1.0464627859485143, + "learning_rate": 3.836927146102376e-06, + "loss": 0.031085968017578125, + "step": 94835 + }, + { + "epoch": 0.8200534366326274, + "grad_norm": 12.951953337529824, + "learning_rate": 3.836731499584466e-06, + "loss": 0.23108081817626952, + "step": 94840 + }, + { + "epoch": 0.8200966701541708, + "grad_norm": 5.3254376117069375, + "learning_rate": 3.836535849207624e-06, + "loss": 0.2549896240234375, + "step": 94845 + }, + { + "epoch": 0.820139903675714, + "grad_norm": 18.4237812033251, + "learning_rate": 3.836340194972754e-06, + "loss": 0.1886322021484375, + "step": 94850 + }, + { + "epoch": 0.8201831371972572, + "grad_norm": 0.8530162233963465, + "learning_rate": 3.836144536880755e-06, + "loss": 0.0230010986328125, + "step": 94855 + }, + { + "epoch": 0.8202263707188006, + "grad_norm": 7.242904089249109, + "learning_rate": 3.835948874932531e-06, + "loss": 0.35846939086914065, + "step": 94860 + }, + { + "epoch": 0.8202696042403438, + "grad_norm": 3.930323777338049, + "learning_rate": 3.835753209128986e-06, + "loss": 0.052448272705078125, + "step": 94865 + }, + { + "epoch": 0.820312837761887, + "grad_norm": 24.20251093664334, + "learning_rate": 3.83555753947102e-06, + "loss": 0.0618682861328125, + "step": 94870 + }, + { + "epoch": 0.8203560712834304, + "grad_norm": 29.12068922281167, + "learning_rate": 3.835361865959537e-06, + "loss": 0.622283935546875, + "step": 94875 + }, + { + "epoch": 0.8203993048049736, + "grad_norm": 16.45615243066968, + "learning_rate": 3.8351661885954395e-06, + "loss": 0.06438064575195312, + "step": 94880 + }, + { + "epoch": 0.8204425383265168, + "grad_norm": 17.026115711693407, + "learning_rate": 3.834970507379628e-06, + "loss": 0.1357391357421875, + "step": 94885 + }, + { + "epoch": 0.8204857718480602, + "grad_norm": 4.351475600659797, + "learning_rate": 3.834774822313007e-06, + "loss": 0.28538665771484373, + "step": 94890 + }, + { + "epoch": 0.8205290053696034, + "grad_norm": 0.09337288049197173, + "learning_rate": 3.8345791333964784e-06, + "loss": 0.1719745635986328, + "step": 94895 + }, + { + "epoch": 0.8205722388911466, + "grad_norm": 18.514435715029325, + "learning_rate": 3.834383440630945e-06, + "loss": 0.49574737548828124, + "step": 94900 + }, + { + "epoch": 0.82061547241269, + "grad_norm": 15.297753923128282, + "learning_rate": 3.8341877440173075e-06, + "loss": 0.05566558837890625, + "step": 94905 + }, + { + "epoch": 0.8206587059342332, + "grad_norm": 8.146905548494681, + "learning_rate": 3.833992043556471e-06, + "loss": 0.09000282287597657, + "step": 94910 + }, + { + "epoch": 0.8207019394557764, + "grad_norm": 18.56419698129264, + "learning_rate": 3.833796339249336e-06, + "loss": 0.32525787353515623, + "step": 94915 + }, + { + "epoch": 0.8207451729773197, + "grad_norm": 2.157149718369498, + "learning_rate": 3.833600631096807e-06, + "loss": 0.0479766845703125, + "step": 94920 + }, + { + "epoch": 0.820788406498863, + "grad_norm": 3.2270358829961494, + "learning_rate": 3.833404919099785e-06, + "loss": 0.2580322265625, + "step": 94925 + }, + { + "epoch": 0.8208316400204062, + "grad_norm": 2.783659415968796, + "learning_rate": 3.833209203259174e-06, + "loss": 0.17872676849365235, + "step": 94930 + }, + { + "epoch": 0.8208748735419494, + "grad_norm": 1.1038811295366395, + "learning_rate": 3.833013483575875e-06, + "loss": 0.190582275390625, + "step": 94935 + }, + { + "epoch": 0.8209181070634928, + "grad_norm": 1.1078477712699084, + "learning_rate": 3.832817760050791e-06, + "loss": 0.10729827880859374, + "step": 94940 + }, + { + "epoch": 0.820961340585036, + "grad_norm": 3.963350683227324, + "learning_rate": 3.832622032684825e-06, + "loss": 0.2170257568359375, + "step": 94945 + }, + { + "epoch": 0.8210045741065792, + "grad_norm": 6.551876432986725, + "learning_rate": 3.832426301478881e-06, + "loss": 0.08255538940429688, + "step": 94950 + }, + { + "epoch": 0.8210478076281226, + "grad_norm": 5.67649569981643, + "learning_rate": 3.832230566433859e-06, + "loss": 0.1855712890625, + "step": 94955 + }, + { + "epoch": 0.8210910411496658, + "grad_norm": 3.6944579031448077, + "learning_rate": 3.832034827550664e-06, + "loss": 0.1682098388671875, + "step": 94960 + }, + { + "epoch": 0.821134274671209, + "grad_norm": 0.027088940500607762, + "learning_rate": 3.831839084830198e-06, + "loss": 0.2252887725830078, + "step": 94965 + }, + { + "epoch": 0.8211775081927524, + "grad_norm": 3.64626896722944, + "learning_rate": 3.831643338273364e-06, + "loss": 0.04377937316894531, + "step": 94970 + }, + { + "epoch": 0.8212207417142956, + "grad_norm": 0.2683669023068381, + "learning_rate": 3.831447587881063e-06, + "loss": 0.0930206298828125, + "step": 94975 + }, + { + "epoch": 0.8212639752358388, + "grad_norm": 13.836582238296984, + "learning_rate": 3.8312518336542e-06, + "loss": 0.16650543212890626, + "step": 94980 + }, + { + "epoch": 0.8213072087573822, + "grad_norm": 26.37834726352445, + "learning_rate": 3.831056075593677e-06, + "loss": 0.19282493591308594, + "step": 94985 + }, + { + "epoch": 0.8213504422789254, + "grad_norm": 4.38168381700854, + "learning_rate": 3.8308603137003975e-06, + "loss": 0.26653900146484377, + "step": 94990 + }, + { + "epoch": 0.8213936758004686, + "grad_norm": 0.936745909861327, + "learning_rate": 3.8306645479752626e-06, + "loss": 0.1570098876953125, + "step": 94995 + }, + { + "epoch": 0.821436909322012, + "grad_norm": 0.1947856219215376, + "learning_rate": 3.830468778419176e-06, + "loss": 0.27626609802246094, + "step": 95000 + }, + { + "epoch": 0.8214801428435552, + "grad_norm": 49.366241485767915, + "learning_rate": 3.830273005033042e-06, + "loss": 0.32960824966430663, + "step": 95005 + }, + { + "epoch": 0.8215233763650984, + "grad_norm": 9.689106894094804, + "learning_rate": 3.830077227817762e-06, + "loss": 0.0814727783203125, + "step": 95010 + }, + { + "epoch": 0.8215666098866417, + "grad_norm": 5.536342063757986, + "learning_rate": 3.829881446774238e-06, + "loss": 0.6329315185546875, + "step": 95015 + }, + { + "epoch": 0.821609843408185, + "grad_norm": 1.744165541378544, + "learning_rate": 3.829685661903375e-06, + "loss": 0.01712799072265625, + "step": 95020 + }, + { + "epoch": 0.8216530769297282, + "grad_norm": 3.985895484826707, + "learning_rate": 3.829489873206075e-06, + "loss": 0.1742919921875, + "step": 95025 + }, + { + "epoch": 0.8216963104512714, + "grad_norm": 0.48287630187139247, + "learning_rate": 3.829294080683241e-06, + "loss": 0.041112327575683595, + "step": 95030 + }, + { + "epoch": 0.8217395439728148, + "grad_norm": 2.100533146292941, + "learning_rate": 3.829098284335775e-06, + "loss": 0.1320526123046875, + "step": 95035 + }, + { + "epoch": 0.821782777494358, + "grad_norm": 16.884644153328942, + "learning_rate": 3.828902484164583e-06, + "loss": 0.2076751708984375, + "step": 95040 + }, + { + "epoch": 0.8218260110159012, + "grad_norm": 6.738317694938837, + "learning_rate": 3.8287066801705644e-06, + "loss": 0.15875244140625, + "step": 95045 + }, + { + "epoch": 0.8218692445374446, + "grad_norm": 4.049711496607912, + "learning_rate": 3.828510872354624e-06, + "loss": 0.08461837768554688, + "step": 95050 + }, + { + "epoch": 0.8219124780589878, + "grad_norm": 0.22038734837068355, + "learning_rate": 3.828315060717665e-06, + "loss": 0.04700260162353516, + "step": 95055 + }, + { + "epoch": 0.821955711580531, + "grad_norm": 25.915307877122487, + "learning_rate": 3.828119245260591e-06, + "loss": 0.08772163391113282, + "step": 95060 + }, + { + "epoch": 0.8219989451020744, + "grad_norm": 10.937074888090965, + "learning_rate": 3.827923425984302e-06, + "loss": 0.09819164276123046, + "step": 95065 + }, + { + "epoch": 0.8220421786236176, + "grad_norm": 2.0310357186244152, + "learning_rate": 3.8277276028897045e-06, + "loss": 0.07076263427734375, + "step": 95070 + }, + { + "epoch": 0.8220854121451608, + "grad_norm": 4.792187290994239, + "learning_rate": 3.827531775977701e-06, + "loss": 0.0904388427734375, + "step": 95075 + }, + { + "epoch": 0.8221286456667042, + "grad_norm": 6.327831893604041, + "learning_rate": 3.827335945249194e-06, + "loss": 0.193963623046875, + "step": 95080 + }, + { + "epoch": 0.8221718791882474, + "grad_norm": 16.665259894943294, + "learning_rate": 3.827140110705086e-06, + "loss": 0.45212135314941404, + "step": 95085 + }, + { + "epoch": 0.8222151127097906, + "grad_norm": 4.138399957643718, + "learning_rate": 3.826944272346282e-06, + "loss": 0.042352294921875, + "step": 95090 + }, + { + "epoch": 0.822258346231334, + "grad_norm": 0.8256624560450322, + "learning_rate": 3.826748430173682e-06, + "loss": 0.10793952941894532, + "step": 95095 + }, + { + "epoch": 0.8223015797528772, + "grad_norm": 1.3931428357766122, + "learning_rate": 3.826552584188194e-06, + "loss": 0.12424774169921875, + "step": 95100 + }, + { + "epoch": 0.8223448132744204, + "grad_norm": 3.270321030619808, + "learning_rate": 3.826356734390716e-06, + "loss": 0.17683868408203124, + "step": 95105 + }, + { + "epoch": 0.8223880467959637, + "grad_norm": 29.902435261374343, + "learning_rate": 3.826160880782156e-06, + "loss": 0.23847198486328125, + "step": 95110 + }, + { + "epoch": 0.822431280317507, + "grad_norm": 18.82994756410598, + "learning_rate": 3.825965023363413e-06, + "loss": 0.221185302734375, + "step": 95115 + }, + { + "epoch": 0.8224745138390502, + "grad_norm": 0.9861397017846055, + "learning_rate": 3.8257691621353936e-06, + "loss": 0.24033012390136718, + "step": 95120 + }, + { + "epoch": 0.8225177473605935, + "grad_norm": 29.267116071974993, + "learning_rate": 3.825573297099e-06, + "loss": 0.25720748901367185, + "step": 95125 + }, + { + "epoch": 0.8225609808821368, + "grad_norm": 6.617544283989875, + "learning_rate": 3.825377428255134e-06, + "loss": 0.04462223052978516, + "step": 95130 + }, + { + "epoch": 0.82260421440368, + "grad_norm": 25.245359431487653, + "learning_rate": 3.8251815556047006e-06, + "loss": 0.13690948486328125, + "step": 95135 + }, + { + "epoch": 0.8226474479252233, + "grad_norm": 4.378917667099138, + "learning_rate": 3.8249856791486035e-06, + "loss": 0.0695404052734375, + "step": 95140 + }, + { + "epoch": 0.8226906814467666, + "grad_norm": 1.3430007373822406, + "learning_rate": 3.8247897988877446e-06, + "loss": 0.0328155517578125, + "step": 95145 + }, + { + "epoch": 0.8227339149683098, + "grad_norm": 2.6295616028012425, + "learning_rate": 3.824593914823028e-06, + "loss": 0.049755859375, + "step": 95150 + }, + { + "epoch": 0.8227771484898531, + "grad_norm": 1.2604643477884285, + "learning_rate": 3.824398026955358e-06, + "loss": 0.06432838439941406, + "step": 95155 + }, + { + "epoch": 0.8228203820113964, + "grad_norm": 0.4407445463021618, + "learning_rate": 3.824202135285636e-06, + "loss": 0.03033294677734375, + "step": 95160 + }, + { + "epoch": 0.8228636155329396, + "grad_norm": 1.8421841950072066, + "learning_rate": 3.824006239814768e-06, + "loss": 0.3302490234375, + "step": 95165 + }, + { + "epoch": 0.8229068490544829, + "grad_norm": 1.1881473179038178, + "learning_rate": 3.823810340543655e-06, + "loss": 0.07079696655273438, + "step": 95170 + }, + { + "epoch": 0.8229500825760262, + "grad_norm": 0.9122178655269442, + "learning_rate": 3.823614437473201e-06, + "loss": 0.18274993896484376, + "step": 95175 + }, + { + "epoch": 0.8229933160975694, + "grad_norm": 4.501972397578914, + "learning_rate": 3.823418530604311e-06, + "loss": 0.0713104248046875, + "step": 95180 + }, + { + "epoch": 0.8230365496191127, + "grad_norm": 33.326521603644906, + "learning_rate": 3.823222619937888e-06, + "loss": 0.282757568359375, + "step": 95185 + }, + { + "epoch": 0.8230797831406559, + "grad_norm": 5.607604355428405, + "learning_rate": 3.823026705474834e-06, + "loss": 0.14115505218505858, + "step": 95190 + }, + { + "epoch": 0.8231230166621992, + "grad_norm": 3.5056885557881032, + "learning_rate": 3.822830787216054e-06, + "loss": 0.069976806640625, + "step": 95195 + }, + { + "epoch": 0.8231662501837425, + "grad_norm": 27.927456380466428, + "learning_rate": 3.82263486516245e-06, + "loss": 0.20120391845703126, + "step": 95200 + }, + { + "epoch": 0.8232094837052857, + "grad_norm": 15.485491859729386, + "learning_rate": 3.8224389393149285e-06, + "loss": 0.26236667633056643, + "step": 95205 + }, + { + "epoch": 0.823252717226829, + "grad_norm": 9.869575186064722, + "learning_rate": 3.82224300967439e-06, + "loss": 0.2013916015625, + "step": 95210 + }, + { + "epoch": 0.8232959507483723, + "grad_norm": 2.64656485773859, + "learning_rate": 3.82204707624174e-06, + "loss": 0.20106964111328124, + "step": 95215 + }, + { + "epoch": 0.8233391842699155, + "grad_norm": 4.8887266295345, + "learning_rate": 3.821851139017882e-06, + "loss": 0.2317596435546875, + "step": 95220 + }, + { + "epoch": 0.8233824177914588, + "grad_norm": 43.74329695869185, + "learning_rate": 3.821655198003718e-06, + "loss": 0.3524620056152344, + "step": 95225 + }, + { + "epoch": 0.823425651313002, + "grad_norm": 9.933656526783547, + "learning_rate": 3.821459253200153e-06, + "loss": 0.32635498046875, + "step": 95230 + }, + { + "epoch": 0.8234688848345453, + "grad_norm": 12.023737549120167, + "learning_rate": 3.821263304608091e-06, + "loss": 0.1364105224609375, + "step": 95235 + }, + { + "epoch": 0.8235121183560886, + "grad_norm": 16.67753651638166, + "learning_rate": 3.8210673522284345e-06, + "loss": 0.160968017578125, + "step": 95240 + }, + { + "epoch": 0.8235553518776318, + "grad_norm": 15.653066581029938, + "learning_rate": 3.820871396062089e-06, + "loss": 0.114068603515625, + "step": 95245 + }, + { + "epoch": 0.8235985853991751, + "grad_norm": 19.72408867520505, + "learning_rate": 3.820675436109957e-06, + "loss": 0.5147216796875, + "step": 95250 + }, + { + "epoch": 0.8236418189207184, + "grad_norm": 12.971948147340356, + "learning_rate": 3.820479472372941e-06, + "loss": 0.09380722045898438, + "step": 95255 + }, + { + "epoch": 0.8236850524422616, + "grad_norm": 13.22452483918622, + "learning_rate": 3.820283504851947e-06, + "loss": 0.132562255859375, + "step": 95260 + }, + { + "epoch": 0.8237282859638049, + "grad_norm": 2.555080094782011, + "learning_rate": 3.820087533547879e-06, + "loss": 0.12931480407714843, + "step": 95265 + }, + { + "epoch": 0.8237715194853482, + "grad_norm": 6.667231685521783, + "learning_rate": 3.819891558461639e-06, + "loss": 0.1395801544189453, + "step": 95270 + }, + { + "epoch": 0.8238147530068914, + "grad_norm": 30.070034166694555, + "learning_rate": 3.819695579594132e-06, + "loss": 0.3595607280731201, + "step": 95275 + }, + { + "epoch": 0.8238579865284347, + "grad_norm": 5.945003784075263, + "learning_rate": 3.8194995969462606e-06, + "loss": 0.17623481750488282, + "step": 95280 + }, + { + "epoch": 0.8239012200499779, + "grad_norm": 0.088018683403436, + "learning_rate": 3.81930361051893e-06, + "loss": 0.089825439453125, + "step": 95285 + }, + { + "epoch": 0.8239444535715212, + "grad_norm": 2.396106685511471, + "learning_rate": 3.819107620313043e-06, + "loss": 0.19098663330078125, + "step": 95290 + }, + { + "epoch": 0.8239876870930645, + "grad_norm": 7.425529073102912, + "learning_rate": 3.818911626329505e-06, + "loss": 0.16041412353515624, + "step": 95295 + }, + { + "epoch": 0.8240309206146077, + "grad_norm": 13.365983542666676, + "learning_rate": 3.818715628569218e-06, + "loss": 0.10790061950683594, + "step": 95300 + }, + { + "epoch": 0.824074154136151, + "grad_norm": 15.862177380031287, + "learning_rate": 3.818519627033087e-06, + "loss": 0.10102767944335937, + "step": 95305 + }, + { + "epoch": 0.8241173876576943, + "grad_norm": 10.06924190579867, + "learning_rate": 3.818323621722015e-06, + "loss": 0.1662872314453125, + "step": 95310 + }, + { + "epoch": 0.8241606211792375, + "grad_norm": 1.2073895805598855, + "learning_rate": 3.818127612636908e-06, + "loss": 0.10269927978515625, + "step": 95315 + }, + { + "epoch": 0.8242038547007808, + "grad_norm": 16.06222596695128, + "learning_rate": 3.817931599778668e-06, + "loss": 0.10072250366210937, + "step": 95320 + }, + { + "epoch": 0.8242470882223241, + "grad_norm": 0.7777304299124038, + "learning_rate": 3.817735583148201e-06, + "loss": 0.32217044830322267, + "step": 95325 + }, + { + "epoch": 0.8242903217438673, + "grad_norm": 17.89706251798702, + "learning_rate": 3.817539562746409e-06, + "loss": 0.107757568359375, + "step": 95330 + }, + { + "epoch": 0.8243335552654106, + "grad_norm": 2.6892716644193833, + "learning_rate": 3.817343538574197e-06, + "loss": 0.05309562683105469, + "step": 95335 + }, + { + "epoch": 0.8243767887869539, + "grad_norm": 41.9399128545525, + "learning_rate": 3.817147510632468e-06, + "loss": 0.38062591552734376, + "step": 95340 + }, + { + "epoch": 0.8244200223084971, + "grad_norm": 13.050072904200134, + "learning_rate": 3.816951478922128e-06, + "loss": 0.18708343505859376, + "step": 95345 + }, + { + "epoch": 0.8244632558300404, + "grad_norm": 1.3618900984452338, + "learning_rate": 3.816755443444079e-06, + "loss": 0.07184600830078125, + "step": 95350 + }, + { + "epoch": 0.8245064893515837, + "grad_norm": 0.40678916066260523, + "learning_rate": 3.816559404199226e-06, + "loss": 0.07082881927490234, + "step": 95355 + }, + { + "epoch": 0.8245497228731269, + "grad_norm": 7.670512824536827, + "learning_rate": 3.816363361188474e-06, + "loss": 0.404949951171875, + "step": 95360 + }, + { + "epoch": 0.8245929563946701, + "grad_norm": 0.7265629444647184, + "learning_rate": 3.816167314412726e-06, + "loss": 0.09910659790039063, + "step": 95365 + }, + { + "epoch": 0.8246361899162135, + "grad_norm": 29.401379030165597, + "learning_rate": 3.815971263872885e-06, + "loss": 0.18910980224609375, + "step": 95370 + }, + { + "epoch": 0.8246794234377567, + "grad_norm": 3.782342690057675, + "learning_rate": 3.815775209569859e-06, + "loss": 0.0654510498046875, + "step": 95375 + }, + { + "epoch": 0.8247226569592999, + "grad_norm": 0.5775031020557257, + "learning_rate": 3.815579151504549e-06, + "loss": 0.0401519775390625, + "step": 95380 + }, + { + "epoch": 0.8247658904808433, + "grad_norm": 16.135319332555696, + "learning_rate": 3.81538308967786e-06, + "loss": 0.224932861328125, + "step": 95385 + }, + { + "epoch": 0.8248091240023865, + "grad_norm": 5.6641832239052015, + "learning_rate": 3.815187024090697e-06, + "loss": 0.23427848815917968, + "step": 95390 + }, + { + "epoch": 0.8248523575239297, + "grad_norm": 7.566455017217345, + "learning_rate": 3.8149909547439625e-06, + "loss": 0.3728193283081055, + "step": 95395 + }, + { + "epoch": 0.824895591045473, + "grad_norm": 8.303496845262897, + "learning_rate": 3.814794881638561e-06, + "loss": 0.05112953186035156, + "step": 95400 + }, + { + "epoch": 0.8249388245670163, + "grad_norm": 0.31572991245062526, + "learning_rate": 3.8145988047753983e-06, + "loss": 0.34985198974609377, + "step": 95405 + }, + { + "epoch": 0.8249820580885595, + "grad_norm": 12.775776640704102, + "learning_rate": 3.8144027241553784e-06, + "loss": 0.13977127075195311, + "step": 95410 + }, + { + "epoch": 0.8250252916101029, + "grad_norm": 6.569639210762601, + "learning_rate": 3.8142066397794045e-06, + "loss": 0.22540435791015626, + "step": 95415 + }, + { + "epoch": 0.8250685251316461, + "grad_norm": 12.365693793760219, + "learning_rate": 3.8140105516483814e-06, + "loss": 0.09448604583740235, + "step": 95420 + }, + { + "epoch": 0.8251117586531893, + "grad_norm": 0.7479171481174728, + "learning_rate": 3.813814459763214e-06, + "loss": 0.0891693115234375, + "step": 95425 + }, + { + "epoch": 0.8251549921747326, + "grad_norm": 0.9654986796142895, + "learning_rate": 3.813618364124805e-06, + "loss": 0.03987464904785156, + "step": 95430 + }, + { + "epoch": 0.8251982256962759, + "grad_norm": 3.0383597146135237, + "learning_rate": 3.8134222647340617e-06, + "loss": 0.10267982482910157, + "step": 95435 + }, + { + "epoch": 0.8252414592178191, + "grad_norm": 70.8279424394032, + "learning_rate": 3.813226161591887e-06, + "loss": 0.17004928588867188, + "step": 95440 + }, + { + "epoch": 0.8252846927393624, + "grad_norm": 2.419214631978474, + "learning_rate": 3.813030054699183e-06, + "loss": 0.211627197265625, + "step": 95445 + }, + { + "epoch": 0.8253279262609057, + "grad_norm": 1.7501486238583108, + "learning_rate": 3.8128339440568584e-06, + "loss": 0.1104583740234375, + "step": 95450 + }, + { + "epoch": 0.8253711597824489, + "grad_norm": 3.6384430354463233, + "learning_rate": 3.8126378296658137e-06, + "loss": 0.25132598876953127, + "step": 95455 + }, + { + "epoch": 0.8254143933039921, + "grad_norm": 45.51646932951335, + "learning_rate": 3.8124417115269565e-06, + "loss": 0.40915069580078123, + "step": 95460 + }, + { + "epoch": 0.8254576268255355, + "grad_norm": 74.32865354628903, + "learning_rate": 3.812245589641189e-06, + "loss": 0.267156982421875, + "step": 95465 + }, + { + "epoch": 0.8255008603470787, + "grad_norm": 19.90979806664767, + "learning_rate": 3.8120494640094177e-06, + "loss": 0.09115662574768066, + "step": 95470 + }, + { + "epoch": 0.8255440938686219, + "grad_norm": 5.536901234221899, + "learning_rate": 3.811853334632545e-06, + "loss": 0.05745086669921875, + "step": 95475 + }, + { + "epoch": 0.8255873273901653, + "grad_norm": 0.4600219959017181, + "learning_rate": 3.811657201511477e-06, + "loss": 0.019066238403320314, + "step": 95480 + }, + { + "epoch": 0.8256305609117085, + "grad_norm": 7.3245265322882, + "learning_rate": 3.8114610646471167e-06, + "loss": 0.04866485595703125, + "step": 95485 + }, + { + "epoch": 0.8256737944332517, + "grad_norm": 15.879154163968273, + "learning_rate": 3.8112649240403705e-06, + "loss": 0.09278182983398438, + "step": 95490 + }, + { + "epoch": 0.8257170279547951, + "grad_norm": 11.586893923729681, + "learning_rate": 3.8110687796921417e-06, + "loss": 0.10515365600585938, + "step": 95495 + }, + { + "epoch": 0.8257602614763383, + "grad_norm": 9.937104292902202, + "learning_rate": 3.8108726316033366e-06, + "loss": 0.17253570556640624, + "step": 95500 + }, + { + "epoch": 0.8258034949978815, + "grad_norm": 0.5443320800321443, + "learning_rate": 3.8106764797748574e-06, + "loss": 0.035245513916015624, + "step": 95505 + }, + { + "epoch": 0.8258467285194249, + "grad_norm": 8.278484358713513, + "learning_rate": 3.8104803242076094e-06, + "loss": 0.14273681640625, + "step": 95510 + }, + { + "epoch": 0.8258899620409681, + "grad_norm": 3.0343548351564165, + "learning_rate": 3.8102841649024995e-06, + "loss": 0.2198089599609375, + "step": 95515 + }, + { + "epoch": 0.8259331955625113, + "grad_norm": 40.432543461310004, + "learning_rate": 3.8100880018604298e-06, + "loss": 0.1420623779296875, + "step": 95520 + }, + { + "epoch": 0.8259764290840547, + "grad_norm": 4.890040735901828, + "learning_rate": 3.8098918350823056e-06, + "loss": 0.1552032470703125, + "step": 95525 + }, + { + "epoch": 0.8260196626055979, + "grad_norm": 5.022043932371652, + "learning_rate": 3.809695664569032e-06, + "loss": 0.05247421264648437, + "step": 95530 + }, + { + "epoch": 0.8260628961271411, + "grad_norm": 0.9064352890371885, + "learning_rate": 3.8094994903215136e-06, + "loss": 0.32057647705078124, + "step": 95535 + }, + { + "epoch": 0.8261061296486844, + "grad_norm": 14.609305967946272, + "learning_rate": 3.8093033123406545e-06, + "loss": 0.3564933776855469, + "step": 95540 + }, + { + "epoch": 0.8261493631702277, + "grad_norm": 2.862455258375446, + "learning_rate": 3.8091071306273608e-06, + "loss": 0.19741992950439452, + "step": 95545 + }, + { + "epoch": 0.8261925966917709, + "grad_norm": 12.753111337851555, + "learning_rate": 3.8089109451825367e-06, + "loss": 0.16610946655273437, + "step": 95550 + }, + { + "epoch": 0.8262358302133141, + "grad_norm": 2.326051482069865, + "learning_rate": 3.8087147560070865e-06, + "loss": 0.11141357421875, + "step": 95555 + }, + { + "epoch": 0.8262790637348575, + "grad_norm": 5.266272342275659, + "learning_rate": 3.8085185631019147e-06, + "loss": 0.096319580078125, + "step": 95560 + }, + { + "epoch": 0.8263222972564007, + "grad_norm": 3.070041918899321, + "learning_rate": 3.808322366467927e-06, + "loss": 0.4031105041503906, + "step": 95565 + }, + { + "epoch": 0.826365530777944, + "grad_norm": 37.909268203008715, + "learning_rate": 3.8081261661060285e-06, + "loss": 0.8208938598632812, + "step": 95570 + }, + { + "epoch": 0.8264087642994873, + "grad_norm": 12.786241911270762, + "learning_rate": 3.8079299620171224e-06, + "loss": 0.22810592651367187, + "step": 95575 + }, + { + "epoch": 0.8264519978210305, + "grad_norm": 0.9720267362226994, + "learning_rate": 3.807733754202116e-06, + "loss": 0.09109811782836914, + "step": 95580 + }, + { + "epoch": 0.8264952313425737, + "grad_norm": 8.726509149800952, + "learning_rate": 3.807537542661912e-06, + "loss": 0.19645252227783203, + "step": 95585 + }, + { + "epoch": 0.8265384648641171, + "grad_norm": 0.4393879003971361, + "learning_rate": 3.807341327397417e-06, + "loss": 0.2332530975341797, + "step": 95590 + }, + { + "epoch": 0.8265816983856603, + "grad_norm": 0.6627865830649676, + "learning_rate": 3.807145108409534e-06, + "loss": 0.25090560913085935, + "step": 95595 + }, + { + "epoch": 0.8266249319072035, + "grad_norm": 36.43397763425057, + "learning_rate": 3.80694888569917e-06, + "loss": 0.11245651245117187, + "step": 95600 + }, + { + "epoch": 0.8266681654287469, + "grad_norm": 0.442597057383638, + "learning_rate": 3.8067526592672288e-06, + "loss": 0.04542732238769531, + "step": 95605 + }, + { + "epoch": 0.8267113989502901, + "grad_norm": 1.4873139669129212, + "learning_rate": 3.8065564291146153e-06, + "loss": 0.3209362030029297, + "step": 95610 + }, + { + "epoch": 0.8267546324718333, + "grad_norm": 1.824430387425033, + "learning_rate": 3.806360195242236e-06, + "loss": 0.1264892578125, + "step": 95615 + }, + { + "epoch": 0.8267978659933767, + "grad_norm": 2.8807321372520684, + "learning_rate": 3.806163957650994e-06, + "loss": 0.08635406494140625, + "step": 95620 + }, + { + "epoch": 0.8268410995149199, + "grad_norm": 6.197424230441856, + "learning_rate": 3.8059677163417937e-06, + "loss": 0.14070510864257812, + "step": 95625 + }, + { + "epoch": 0.8268843330364631, + "grad_norm": 20.26600686500017, + "learning_rate": 3.805771471315543e-06, + "loss": 0.17217330932617186, + "step": 95630 + }, + { + "epoch": 0.8269275665580064, + "grad_norm": 2.648908376413645, + "learning_rate": 3.8055752225731463e-06, + "loss": 0.18927001953125, + "step": 95635 + }, + { + "epoch": 0.8269708000795497, + "grad_norm": 6.6316320298092615, + "learning_rate": 3.8053789701155063e-06, + "loss": 0.28469047546386717, + "step": 95640 + }, + { + "epoch": 0.8270140336010929, + "grad_norm": 31.42183909469889, + "learning_rate": 3.8051827139435306e-06, + "loss": 0.10569534301757813, + "step": 95645 + }, + { + "epoch": 0.8270572671226362, + "grad_norm": 1.6443816237921027, + "learning_rate": 3.8049864540581223e-06, + "loss": 0.14394969940185548, + "step": 95650 + }, + { + "epoch": 0.8271005006441795, + "grad_norm": 0.6668723080610456, + "learning_rate": 3.804790190460188e-06, + "loss": 0.18204498291015625, + "step": 95655 + }, + { + "epoch": 0.8271437341657227, + "grad_norm": 3.214704430979094, + "learning_rate": 3.804593923150633e-06, + "loss": 0.34579925537109374, + "step": 95660 + }, + { + "epoch": 0.827186967687266, + "grad_norm": 32.31574033766616, + "learning_rate": 3.804397652130362e-06, + "loss": 0.3575653076171875, + "step": 95665 + }, + { + "epoch": 0.8272302012088093, + "grad_norm": 5.736805256808461, + "learning_rate": 3.804201377400279e-06, + "loss": 0.042804718017578125, + "step": 95670 + }, + { + "epoch": 0.8272734347303525, + "grad_norm": 1.6406854559365114, + "learning_rate": 3.8040050989612913e-06, + "loss": 0.0773956298828125, + "step": 95675 + }, + { + "epoch": 0.8273166682518958, + "grad_norm": 6.4414601022446, + "learning_rate": 3.803808816814303e-06, + "loss": 0.16007232666015625, + "step": 95680 + }, + { + "epoch": 0.8273599017734391, + "grad_norm": 3.0597292498681727, + "learning_rate": 3.8036125309602184e-06, + "loss": 0.11777362823486329, + "step": 95685 + }, + { + "epoch": 0.8274031352949823, + "grad_norm": 4.1348737004923395, + "learning_rate": 3.803416241399945e-06, + "loss": 0.1287841796875, + "step": 95690 + }, + { + "epoch": 0.8274463688165256, + "grad_norm": 11.577110691165268, + "learning_rate": 3.803219948134387e-06, + "loss": 0.08471298217773438, + "step": 95695 + }, + { + "epoch": 0.8274896023380689, + "grad_norm": 0.17370876396541798, + "learning_rate": 3.803023651164449e-06, + "loss": 0.060467529296875, + "step": 95700 + }, + { + "epoch": 0.8275328358596121, + "grad_norm": 0.9387501797168367, + "learning_rate": 3.802827350491037e-06, + "loss": 0.09315719604492187, + "step": 95705 + }, + { + "epoch": 0.8275760693811554, + "grad_norm": 23.319858728522167, + "learning_rate": 3.8026310461150563e-06, + "loss": 0.191485595703125, + "step": 95710 + }, + { + "epoch": 0.8276193029026986, + "grad_norm": 8.17465945111339, + "learning_rate": 3.8024347380374124e-06, + "loss": 0.05931587219238281, + "step": 95715 + }, + { + "epoch": 0.8276625364242419, + "grad_norm": 1.7609410893400452, + "learning_rate": 3.8022384262590096e-06, + "loss": 0.16466827392578126, + "step": 95720 + }, + { + "epoch": 0.8277057699457852, + "grad_norm": 6.416945093887552, + "learning_rate": 3.802042110780755e-06, + "loss": 0.12315177917480469, + "step": 95725 + }, + { + "epoch": 0.8277490034673284, + "grad_norm": 1.9389098729574497, + "learning_rate": 3.801845791603552e-06, + "loss": 0.13719635009765624, + "step": 95730 + }, + { + "epoch": 0.8277922369888717, + "grad_norm": 1.8863295183192101, + "learning_rate": 3.8016494687283073e-06, + "loss": 0.10592269897460938, + "step": 95735 + }, + { + "epoch": 0.827835470510415, + "grad_norm": 3.2461664579095406, + "learning_rate": 3.801453142155927e-06, + "loss": 0.28241729736328125, + "step": 95740 + }, + { + "epoch": 0.8278787040319582, + "grad_norm": 2.491929639833242, + "learning_rate": 3.8012568118873158e-06, + "loss": 0.07102317810058593, + "step": 95745 + }, + { + "epoch": 0.8279219375535015, + "grad_norm": 13.792403809394916, + "learning_rate": 3.801060477923377e-06, + "loss": 0.10716533660888672, + "step": 95750 + }, + { + "epoch": 0.8279651710750447, + "grad_norm": 0.18501003608168265, + "learning_rate": 3.8008641402650203e-06, + "loss": 0.20724067687988282, + "step": 95755 + }, + { + "epoch": 0.828008404596588, + "grad_norm": 38.78648884936868, + "learning_rate": 3.8006677989131474e-06, + "loss": 0.24662322998046876, + "step": 95760 + }, + { + "epoch": 0.8280516381181313, + "grad_norm": 4.355212046455161, + "learning_rate": 3.8004714538686657e-06, + "loss": 0.44315185546875, + "step": 95765 + }, + { + "epoch": 0.8280948716396745, + "grad_norm": 24.939219340700774, + "learning_rate": 3.800275105132481e-06, + "loss": 0.27096710205078123, + "step": 95770 + }, + { + "epoch": 0.8281381051612178, + "grad_norm": 26.182711449550077, + "learning_rate": 3.8000787527054982e-06, + "loss": 0.5129981994628906, + "step": 95775 + }, + { + "epoch": 0.8281813386827611, + "grad_norm": 8.19408986710705, + "learning_rate": 3.799882396588622e-06, + "loss": 0.19535560607910157, + "step": 95780 + }, + { + "epoch": 0.8282245722043043, + "grad_norm": 14.979791567745332, + "learning_rate": 3.7996860367827596e-06, + "loss": 0.2756053924560547, + "step": 95785 + }, + { + "epoch": 0.8282678057258476, + "grad_norm": 16.752970763802654, + "learning_rate": 3.7994896732888153e-06, + "loss": 0.452166748046875, + "step": 95790 + }, + { + "epoch": 0.8283110392473909, + "grad_norm": 21.2067864983389, + "learning_rate": 3.799293306107695e-06, + "loss": 0.39158172607421876, + "step": 95795 + }, + { + "epoch": 0.8283542727689341, + "grad_norm": 0.3583464929644535, + "learning_rate": 3.799096935240306e-06, + "loss": 0.09263153076171875, + "step": 95800 + }, + { + "epoch": 0.8283975062904774, + "grad_norm": 2.7017841408985896, + "learning_rate": 3.7989005606875515e-06, + "loss": 0.1225341796875, + "step": 95805 + }, + { + "epoch": 0.8284407398120206, + "grad_norm": 2.5838028935031967, + "learning_rate": 3.798704182450339e-06, + "loss": 0.14229736328125, + "step": 95810 + }, + { + "epoch": 0.8284839733335639, + "grad_norm": 8.059198024805148, + "learning_rate": 3.7985078005295723e-06, + "loss": 0.07872695922851562, + "step": 95815 + }, + { + "epoch": 0.8285272068551072, + "grad_norm": 2.341713371583922, + "learning_rate": 3.7983114149261583e-06, + "loss": 0.30028076171875, + "step": 95820 + }, + { + "epoch": 0.8285704403766504, + "grad_norm": 5.676512651334515, + "learning_rate": 3.7981150256410035e-06, + "loss": 0.05584793090820313, + "step": 95825 + }, + { + "epoch": 0.8286136738981937, + "grad_norm": 0.10875422780043757, + "learning_rate": 3.7979186326750123e-06, + "loss": 0.04439697265625, + "step": 95830 + }, + { + "epoch": 0.828656907419737, + "grad_norm": 50.29880275958587, + "learning_rate": 3.7977222360290908e-06, + "loss": 0.21462631225585938, + "step": 95835 + }, + { + "epoch": 0.8287001409412802, + "grad_norm": 1.1758638085705042, + "learning_rate": 3.7975258357041456e-06, + "loss": 0.15111923217773438, + "step": 95840 + }, + { + "epoch": 0.8287433744628235, + "grad_norm": 46.959476267383245, + "learning_rate": 3.7973294317010805e-06, + "loss": 0.253387451171875, + "step": 95845 + }, + { + "epoch": 0.8287866079843668, + "grad_norm": 7.480824118320203, + "learning_rate": 3.7971330240208027e-06, + "loss": 0.038469696044921876, + "step": 95850 + }, + { + "epoch": 0.82882984150591, + "grad_norm": 20.331596238252583, + "learning_rate": 3.7969366126642185e-06, + "loss": 0.06359710693359374, + "step": 95855 + }, + { + "epoch": 0.8288730750274533, + "grad_norm": 31.753623065472066, + "learning_rate": 3.7967401976322318e-06, + "loss": 0.2332489013671875, + "step": 95860 + }, + { + "epoch": 0.8289163085489966, + "grad_norm": 0.6339120687453512, + "learning_rate": 3.796543778925751e-06, + "loss": 0.05875396728515625, + "step": 95865 + }, + { + "epoch": 0.8289595420705398, + "grad_norm": 3.322713307562633, + "learning_rate": 3.7963473565456805e-06, + "loss": 0.08140411376953124, + "step": 95870 + }, + { + "epoch": 0.8290027755920831, + "grad_norm": 14.012644133244423, + "learning_rate": 3.796150930492926e-06, + "loss": 0.13845329284667968, + "step": 95875 + }, + { + "epoch": 0.8290460091136264, + "grad_norm": 3.4924342757576294, + "learning_rate": 3.795954500768394e-06, + "loss": 0.13249435424804687, + "step": 95880 + }, + { + "epoch": 0.8290892426351696, + "grad_norm": 4.584249673636428, + "learning_rate": 3.79575806737299e-06, + "loss": 0.09507827758789063, + "step": 95885 + }, + { + "epoch": 0.8291324761567128, + "grad_norm": 8.060292932614834, + "learning_rate": 3.7955616303076204e-06, + "loss": 0.1957775115966797, + "step": 95890 + }, + { + "epoch": 0.8291757096782562, + "grad_norm": 33.05344634375383, + "learning_rate": 3.79536518957319e-06, + "loss": 0.1350006103515625, + "step": 95895 + }, + { + "epoch": 0.8292189431997994, + "grad_norm": 1.8809482707415957, + "learning_rate": 3.7951687451706065e-06, + "loss": 0.077752685546875, + "step": 95900 + }, + { + "epoch": 0.8292621767213426, + "grad_norm": 21.501829093483778, + "learning_rate": 3.794972297100775e-06, + "loss": 0.1554872512817383, + "step": 95905 + }, + { + "epoch": 0.829305410242886, + "grad_norm": 4.720771183326066, + "learning_rate": 3.7947758453646007e-06, + "loss": 0.04782257080078125, + "step": 95910 + }, + { + "epoch": 0.8293486437644292, + "grad_norm": 0.8740606239863339, + "learning_rate": 3.7945793899629915e-06, + "loss": 0.06777801513671874, + "step": 95915 + }, + { + "epoch": 0.8293918772859724, + "grad_norm": 8.313401672236116, + "learning_rate": 3.794382930896852e-06, + "loss": 0.19737701416015624, + "step": 95920 + }, + { + "epoch": 0.8294351108075158, + "grad_norm": 1.7458887724611722, + "learning_rate": 3.7941864681670883e-06, + "loss": 0.07686729431152343, + "step": 95925 + }, + { + "epoch": 0.829478344329059, + "grad_norm": 24.942083585193274, + "learning_rate": 3.793990001774606e-06, + "loss": 0.2157928466796875, + "step": 95930 + }, + { + "epoch": 0.8295215778506022, + "grad_norm": 36.11602373287351, + "learning_rate": 3.793793531720314e-06, + "loss": 0.14629974365234374, + "step": 95935 + }, + { + "epoch": 0.8295648113721455, + "grad_norm": 0.6689101144684331, + "learning_rate": 3.793597058005114e-06, + "loss": 0.031353759765625, + "step": 95940 + }, + { + "epoch": 0.8296080448936888, + "grad_norm": 4.055977329758264, + "learning_rate": 3.7934005806299168e-06, + "loss": 0.13619537353515626, + "step": 95945 + }, + { + "epoch": 0.829651278415232, + "grad_norm": 0.12469999229765354, + "learning_rate": 3.7932040995956256e-06, + "loss": 0.015305709838867188, + "step": 95950 + }, + { + "epoch": 0.8296945119367753, + "grad_norm": 0.6522590274819472, + "learning_rate": 3.793007614903146e-06, + "loss": 0.17346725463867188, + "step": 95955 + }, + { + "epoch": 0.8297377454583186, + "grad_norm": 0.9829163793386321, + "learning_rate": 3.792811126553386e-06, + "loss": 0.2924949645996094, + "step": 95960 + }, + { + "epoch": 0.8297809789798618, + "grad_norm": 54.910898218917254, + "learning_rate": 3.792614634547252e-06, + "loss": 0.24766845703125, + "step": 95965 + }, + { + "epoch": 0.829824212501405, + "grad_norm": 7.065856713237965, + "learning_rate": 3.792418138885649e-06, + "loss": 0.1480224609375, + "step": 95970 + }, + { + "epoch": 0.8298674460229484, + "grad_norm": 10.002986401380838, + "learning_rate": 3.7922216395694824e-06, + "loss": 0.12006149291992188, + "step": 95975 + }, + { + "epoch": 0.8299106795444916, + "grad_norm": 20.98897998424471, + "learning_rate": 3.792025136599661e-06, + "loss": 0.20168075561523438, + "step": 95980 + }, + { + "epoch": 0.8299539130660348, + "grad_norm": 4.737082001764916, + "learning_rate": 3.7918286299770888e-06, + "loss": 0.40008392333984377, + "step": 95985 + }, + { + "epoch": 0.8299971465875782, + "grad_norm": 2.3533211079211176, + "learning_rate": 3.7916321197026723e-06, + "loss": 0.0811767578125, + "step": 95990 + }, + { + "epoch": 0.8300403801091214, + "grad_norm": 18.85254342837717, + "learning_rate": 3.79143560577732e-06, + "loss": 0.21679420471191407, + "step": 95995 + }, + { + "epoch": 0.8300836136306646, + "grad_norm": 6.71897600082717, + "learning_rate": 3.791239088201936e-06, + "loss": 0.25599365234375, + "step": 96000 + }, + { + "epoch": 0.830126847152208, + "grad_norm": 17.394801376565066, + "learning_rate": 3.791042566977427e-06, + "loss": 0.344879150390625, + "step": 96005 + }, + { + "epoch": 0.8301700806737512, + "grad_norm": 14.311765670246684, + "learning_rate": 3.7908460421046998e-06, + "loss": 0.189190673828125, + "step": 96010 + }, + { + "epoch": 0.8302133141952944, + "grad_norm": 6.186646022678049, + "learning_rate": 3.79064951358466e-06, + "loss": 0.17374496459960936, + "step": 96015 + }, + { + "epoch": 0.8302565477168378, + "grad_norm": 17.36800234570328, + "learning_rate": 3.790452981418215e-06, + "loss": 0.14225997924804687, + "step": 96020 + }, + { + "epoch": 0.830299781238381, + "grad_norm": 1.769661375318304, + "learning_rate": 3.790256445606271e-06, + "loss": 0.13004608154296876, + "step": 96025 + }, + { + "epoch": 0.8303430147599242, + "grad_norm": 4.408337558668798, + "learning_rate": 3.790059906149734e-06, + "loss": 0.434930419921875, + "step": 96030 + }, + { + "epoch": 0.8303862482814676, + "grad_norm": 3.411722900059765, + "learning_rate": 3.7898633630495096e-06, + "loss": 0.04676074981689453, + "step": 96035 + }, + { + "epoch": 0.8304294818030108, + "grad_norm": 1.0091848741682727, + "learning_rate": 3.789666816306506e-06, + "loss": 0.085382080078125, + "step": 96040 + }, + { + "epoch": 0.830472715324554, + "grad_norm": 7.002205424621634, + "learning_rate": 3.789470265921629e-06, + "loss": 0.0523223876953125, + "step": 96045 + }, + { + "epoch": 0.8305159488460974, + "grad_norm": 0.5506691564690136, + "learning_rate": 3.7892737118957843e-06, + "loss": 0.268585205078125, + "step": 96050 + }, + { + "epoch": 0.8305591823676406, + "grad_norm": 9.225581548912709, + "learning_rate": 3.7890771542298794e-06, + "loss": 0.3017322540283203, + "step": 96055 + }, + { + "epoch": 0.8306024158891838, + "grad_norm": 30.669031930977134, + "learning_rate": 3.7888805929248204e-06, + "loss": 0.252178955078125, + "step": 96060 + }, + { + "epoch": 0.830645649410727, + "grad_norm": 5.146220681934366, + "learning_rate": 3.788684027981514e-06, + "loss": 0.12392082214355468, + "step": 96065 + }, + { + "epoch": 0.8306888829322704, + "grad_norm": 9.093596128377772, + "learning_rate": 3.7884874594008653e-06, + "loss": 0.118011474609375, + "step": 96070 + }, + { + "epoch": 0.8307321164538136, + "grad_norm": 10.89610757307477, + "learning_rate": 3.7882908871837827e-06, + "loss": 0.24433746337890624, + "step": 96075 + }, + { + "epoch": 0.8307753499753568, + "grad_norm": 15.310925075879057, + "learning_rate": 3.7880943113311734e-06, + "loss": 0.119659423828125, + "step": 96080 + }, + { + "epoch": 0.8308185834969002, + "grad_norm": 1.463517678195856, + "learning_rate": 3.787897731843942e-06, + "loss": 0.07136650085449218, + "step": 96085 + }, + { + "epoch": 0.8308618170184434, + "grad_norm": 22.141553751869488, + "learning_rate": 3.7877011487229952e-06, + "loss": 0.4105194091796875, + "step": 96090 + }, + { + "epoch": 0.8309050505399866, + "grad_norm": 1.731879702255007, + "learning_rate": 3.7875045619692417e-06, + "loss": 0.07343864440917969, + "step": 96095 + }, + { + "epoch": 0.83094828406153, + "grad_norm": 0.6369167114372745, + "learning_rate": 3.7873079715835853e-06, + "loss": 0.06705818176269532, + "step": 96100 + }, + { + "epoch": 0.8309915175830732, + "grad_norm": 31.11690484125819, + "learning_rate": 3.7871113775669346e-06, + "loss": 0.12911567687988282, + "step": 96105 + }, + { + "epoch": 0.8310347511046164, + "grad_norm": 0.9265997974281043, + "learning_rate": 3.786914779920196e-06, + "loss": 0.10887680053710938, + "step": 96110 + }, + { + "epoch": 0.8310779846261598, + "grad_norm": 2.492354978894703, + "learning_rate": 3.7867181786442755e-06, + "loss": 0.02281646728515625, + "step": 96115 + }, + { + "epoch": 0.831121218147703, + "grad_norm": 0.7206825350690201, + "learning_rate": 3.7865215737400805e-06, + "loss": 0.2692840576171875, + "step": 96120 + }, + { + "epoch": 0.8311644516692462, + "grad_norm": 3.5738850225252623, + "learning_rate": 3.7863249652085187e-06, + "loss": 0.08875503540039062, + "step": 96125 + }, + { + "epoch": 0.8312076851907896, + "grad_norm": 0.08618169861087935, + "learning_rate": 3.786128353050493e-06, + "loss": 0.3274364471435547, + "step": 96130 + }, + { + "epoch": 0.8312509187123328, + "grad_norm": 1.0207522693353497, + "learning_rate": 3.7859317372669148e-06, + "loss": 0.14720993041992186, + "step": 96135 + }, + { + "epoch": 0.831294152233876, + "grad_norm": 0.39667421853266893, + "learning_rate": 3.7857351178586884e-06, + "loss": 0.1299774169921875, + "step": 96140 + }, + { + "epoch": 0.8313373857554193, + "grad_norm": 0.7017283987336739, + "learning_rate": 3.7855384948267216e-06, + "loss": 0.0710601806640625, + "step": 96145 + }, + { + "epoch": 0.8313806192769626, + "grad_norm": 0.8273723972302289, + "learning_rate": 3.7853418681719196e-06, + "loss": 0.03449935913085937, + "step": 96150 + }, + { + "epoch": 0.8314238527985058, + "grad_norm": 64.49954643009299, + "learning_rate": 3.7851452378951907e-06, + "loss": 0.15400238037109376, + "step": 96155 + }, + { + "epoch": 0.8314670863200491, + "grad_norm": 0.3744248176956144, + "learning_rate": 3.7849486039974416e-06, + "loss": 0.09820709228515626, + "step": 96160 + }, + { + "epoch": 0.8315103198415924, + "grad_norm": 7.289430355657631, + "learning_rate": 3.7847519664795792e-06, + "loss": 0.04309158325195313, + "step": 96165 + }, + { + "epoch": 0.8315535533631356, + "grad_norm": 26.350276314421883, + "learning_rate": 3.7845553253425094e-06, + "loss": 0.276690673828125, + "step": 96170 + }, + { + "epoch": 0.8315967868846789, + "grad_norm": 0.7935960818674688, + "learning_rate": 3.7843586805871405e-06, + "loss": 0.10256195068359375, + "step": 96175 + }, + { + "epoch": 0.8316400204062222, + "grad_norm": 1.5411795746191583, + "learning_rate": 3.784162032214378e-06, + "loss": 0.031039047241210937, + "step": 96180 + }, + { + "epoch": 0.8316832539277654, + "grad_norm": 2.640493222636719, + "learning_rate": 3.7839653802251294e-06, + "loss": 0.42412490844726564, + "step": 96185 + }, + { + "epoch": 0.8317264874493087, + "grad_norm": 4.907609243644374, + "learning_rate": 3.7837687246203023e-06, + "loss": 0.19598159790039063, + "step": 96190 + }, + { + "epoch": 0.831769720970852, + "grad_norm": 0.6072035148938351, + "learning_rate": 3.7835720654008027e-06, + "loss": 0.0474945068359375, + "step": 96195 + }, + { + "epoch": 0.8318129544923952, + "grad_norm": 18.14670239963452, + "learning_rate": 3.7833754025675383e-06, + "loss": 0.3537689208984375, + "step": 96200 + }, + { + "epoch": 0.8318561880139385, + "grad_norm": 0.6611847761683082, + "learning_rate": 3.783178736121416e-06, + "loss": 0.1934814453125, + "step": 96205 + }, + { + "epoch": 0.8318994215354818, + "grad_norm": 5.103645047313432, + "learning_rate": 3.7829820660633415e-06, + "loss": 0.16406688690185547, + "step": 96210 + }, + { + "epoch": 0.831942655057025, + "grad_norm": 2.1113274987865682, + "learning_rate": 3.7827853923942235e-06, + "loss": 0.082568359375, + "step": 96215 + }, + { + "epoch": 0.8319858885785683, + "grad_norm": 0.9042990787388023, + "learning_rate": 3.782588715114969e-06, + "loss": 0.23360671997070312, + "step": 96220 + }, + { + "epoch": 0.8320291221001116, + "grad_norm": 24.287174303849984, + "learning_rate": 3.782392034226483e-06, + "loss": 0.37569198608398435, + "step": 96225 + }, + { + "epoch": 0.8320723556216548, + "grad_norm": 0.34383237944385076, + "learning_rate": 3.7821953497296756e-06, + "loss": 0.15826797485351562, + "step": 96230 + }, + { + "epoch": 0.832115589143198, + "grad_norm": 1.2489083351451153, + "learning_rate": 3.781998661625452e-06, + "loss": 0.12046318054199219, + "step": 96235 + }, + { + "epoch": 0.8321588226647413, + "grad_norm": 3.341478909689922, + "learning_rate": 3.781801969914719e-06, + "loss": 0.1612762451171875, + "step": 96240 + }, + { + "epoch": 0.8322020561862846, + "grad_norm": 14.937519219549857, + "learning_rate": 3.7816052745983845e-06, + "loss": 0.155120849609375, + "step": 96245 + }, + { + "epoch": 0.8322452897078279, + "grad_norm": 0.8589340148697633, + "learning_rate": 3.7814085756773564e-06, + "loss": 0.112255859375, + "step": 96250 + }, + { + "epoch": 0.8322885232293711, + "grad_norm": 6.508806580514579, + "learning_rate": 3.7812118731525406e-06, + "loss": 0.13814773559570312, + "step": 96255 + }, + { + "epoch": 0.8323317567509144, + "grad_norm": 2.029215038979377, + "learning_rate": 3.7810151670248447e-06, + "loss": 0.1558765411376953, + "step": 96260 + }, + { + "epoch": 0.8323749902724576, + "grad_norm": 3.9398065885242506, + "learning_rate": 3.7808184572951755e-06, + "loss": 0.05290431976318359, + "step": 96265 + }, + { + "epoch": 0.8324182237940009, + "grad_norm": 0.4483069384782662, + "learning_rate": 3.780621743964441e-06, + "loss": 0.04987659454345703, + "step": 96270 + }, + { + "epoch": 0.8324614573155442, + "grad_norm": 2.9891230326330933, + "learning_rate": 3.7804250270335477e-06, + "loss": 0.15023193359375, + "step": 96275 + }, + { + "epoch": 0.8325046908370874, + "grad_norm": 1.188968733954632, + "learning_rate": 3.7802283065034033e-06, + "loss": 0.015967559814453126, + "step": 96280 + }, + { + "epoch": 0.8325479243586307, + "grad_norm": 24.16069470690506, + "learning_rate": 3.780031582374915e-06, + "loss": 0.11925201416015625, + "step": 96285 + }, + { + "epoch": 0.832591157880174, + "grad_norm": 4.711835995620194, + "learning_rate": 3.7798348546489893e-06, + "loss": 0.105902099609375, + "step": 96290 + }, + { + "epoch": 0.8326343914017172, + "grad_norm": 1.92435376115014, + "learning_rate": 3.779638123326535e-06, + "loss": 0.05945587158203125, + "step": 96295 + }, + { + "epoch": 0.8326776249232605, + "grad_norm": 0.09780378902001959, + "learning_rate": 3.779441388408458e-06, + "loss": 0.2457141876220703, + "step": 96300 + }, + { + "epoch": 0.8327208584448038, + "grad_norm": 5.1398596994223, + "learning_rate": 3.7792446498956664e-06, + "loss": 0.0967864990234375, + "step": 96305 + }, + { + "epoch": 0.832764091966347, + "grad_norm": 4.5001784894998025, + "learning_rate": 3.7790479077890675e-06, + "loss": 0.06324310302734375, + "step": 96310 + }, + { + "epoch": 0.8328073254878903, + "grad_norm": 22.418877872584925, + "learning_rate": 3.7788511620895687e-06, + "loss": 0.309564208984375, + "step": 96315 + }, + { + "epoch": 0.8328505590094335, + "grad_norm": 5.435888817360604, + "learning_rate": 3.7786544127980767e-06, + "loss": 0.2827056884765625, + "step": 96320 + }, + { + "epoch": 0.8328937925309768, + "grad_norm": 7.114688857418344, + "learning_rate": 3.778457659915499e-06, + "loss": 0.090045166015625, + "step": 96325 + }, + { + "epoch": 0.8329370260525201, + "grad_norm": 4.367512195471608, + "learning_rate": 3.778260903442744e-06, + "loss": 0.186822509765625, + "step": 96330 + }, + { + "epoch": 0.8329802595740633, + "grad_norm": 9.934758785650052, + "learning_rate": 3.778064143380719e-06, + "loss": 0.08712005615234375, + "step": 96335 + }, + { + "epoch": 0.8330234930956066, + "grad_norm": 13.324781390404791, + "learning_rate": 3.7778673797303294e-06, + "loss": 0.07202663421630859, + "step": 96340 + }, + { + "epoch": 0.8330667266171499, + "grad_norm": 14.753412168628019, + "learning_rate": 3.777670612492485e-06, + "loss": 0.17009353637695312, + "step": 96345 + }, + { + "epoch": 0.8331099601386931, + "grad_norm": 22.374371379638085, + "learning_rate": 3.777473841668093e-06, + "loss": 0.41983642578125, + "step": 96350 + }, + { + "epoch": 0.8331531936602364, + "grad_norm": 16.052340803458517, + "learning_rate": 3.777277067258059e-06, + "loss": 0.16586036682128907, + "step": 96355 + }, + { + "epoch": 0.8331964271817797, + "grad_norm": 0.07840267804080747, + "learning_rate": 3.7770802892632923e-06, + "loss": 0.11456642150878907, + "step": 96360 + }, + { + "epoch": 0.8332396607033229, + "grad_norm": 5.25089767902959, + "learning_rate": 3.776883507684701e-06, + "loss": 0.0684967041015625, + "step": 96365 + }, + { + "epoch": 0.8332828942248662, + "grad_norm": 5.291145383317298, + "learning_rate": 3.7766867225231903e-06, + "loss": 0.0930511474609375, + "step": 96370 + }, + { + "epoch": 0.8333261277464095, + "grad_norm": 6.746620757103072, + "learning_rate": 3.77648993377967e-06, + "loss": 0.14512100219726562, + "step": 96375 + }, + { + "epoch": 0.8333693612679527, + "grad_norm": 19.743790429272558, + "learning_rate": 3.7762931414550457e-06, + "loss": 0.1988882064819336, + "step": 96380 + }, + { + "epoch": 0.833412594789496, + "grad_norm": 16.85726835350797, + "learning_rate": 3.776096345550227e-06, + "loss": 0.33489990234375, + "step": 96385 + }, + { + "epoch": 0.8334558283110393, + "grad_norm": 1.2153221131355503, + "learning_rate": 3.7758995460661206e-06, + "loss": 0.062349700927734376, + "step": 96390 + }, + { + "epoch": 0.8334990618325825, + "grad_norm": 15.505502818058133, + "learning_rate": 3.775702743003634e-06, + "loss": 0.12258453369140625, + "step": 96395 + }, + { + "epoch": 0.8335422953541258, + "grad_norm": 12.502560451032704, + "learning_rate": 3.775505936363674e-06, + "loss": 0.22769527435302733, + "step": 96400 + }, + { + "epoch": 0.8335855288756691, + "grad_norm": 34.6096234408278, + "learning_rate": 3.77530912614715e-06, + "loss": 0.1627593994140625, + "step": 96405 + }, + { + "epoch": 0.8336287623972123, + "grad_norm": 0.6344200219918071, + "learning_rate": 3.775112312354968e-06, + "loss": 0.016579246520996092, + "step": 96410 + }, + { + "epoch": 0.8336719959187555, + "grad_norm": 14.355816932664865, + "learning_rate": 3.774915494988038e-06, + "loss": 0.2018157958984375, + "step": 96415 + }, + { + "epoch": 0.8337152294402989, + "grad_norm": 5.600967463024027, + "learning_rate": 3.7747186740472647e-06, + "loss": 0.03474712371826172, + "step": 96420 + }, + { + "epoch": 0.8337584629618421, + "grad_norm": 11.46847112890929, + "learning_rate": 3.774521849533558e-06, + "loss": 0.21033134460449218, + "step": 96425 + }, + { + "epoch": 0.8338016964833853, + "grad_norm": 10.566190432880962, + "learning_rate": 3.774325021447825e-06, + "loss": 0.11905059814453126, + "step": 96430 + }, + { + "epoch": 0.8338449300049287, + "grad_norm": 23.61415062480093, + "learning_rate": 3.7741281897909734e-06, + "loss": 0.22647705078125, + "step": 96435 + }, + { + "epoch": 0.8338881635264719, + "grad_norm": 1.1059354059087416, + "learning_rate": 3.7739313545639105e-06, + "loss": 0.0773956298828125, + "step": 96440 + }, + { + "epoch": 0.8339313970480151, + "grad_norm": 16.192585150689094, + "learning_rate": 3.7737345157675448e-06, + "loss": 0.16335830688476563, + "step": 96445 + }, + { + "epoch": 0.8339746305695585, + "grad_norm": 8.200912282919838, + "learning_rate": 3.773537673402784e-06, + "loss": 0.203143310546875, + "step": 96450 + }, + { + "epoch": 0.8340178640911017, + "grad_norm": 7.3396712394385, + "learning_rate": 3.7733408274705365e-06, + "loss": 0.20358772277832032, + "step": 96455 + }, + { + "epoch": 0.8340610976126449, + "grad_norm": 1.2139952658339215, + "learning_rate": 3.773143977971709e-06, + "loss": 0.2485626220703125, + "step": 96460 + }, + { + "epoch": 0.8341043311341882, + "grad_norm": 48.587727626321914, + "learning_rate": 3.7729471249072093e-06, + "loss": 0.29858856201171874, + "step": 96465 + }, + { + "epoch": 0.8341475646557315, + "grad_norm": 16.19739753553969, + "learning_rate": 3.772750268277946e-06, + "loss": 0.08494338989257813, + "step": 96470 + }, + { + "epoch": 0.8341907981772747, + "grad_norm": 0.631336662015237, + "learning_rate": 3.772553408084827e-06, + "loss": 0.1323974609375, + "step": 96475 + }, + { + "epoch": 0.834234031698818, + "grad_norm": 0.530388328853129, + "learning_rate": 3.772356544328759e-06, + "loss": 0.198504638671875, + "step": 96480 + }, + { + "epoch": 0.8342772652203613, + "grad_norm": 2.974008775076336, + "learning_rate": 3.772159677010652e-06, + "loss": 0.19759864807128907, + "step": 96485 + }, + { + "epoch": 0.8343204987419045, + "grad_norm": 0.5166576635026691, + "learning_rate": 3.7719628061314125e-06, + "loss": 0.1967041015625, + "step": 96490 + }, + { + "epoch": 0.8343637322634477, + "grad_norm": 43.201955668351154, + "learning_rate": 3.7717659316919482e-06, + "loss": 0.28394775390625, + "step": 96495 + }, + { + "epoch": 0.8344069657849911, + "grad_norm": 2.985207192733869, + "learning_rate": 3.771569053693168e-06, + "loss": 0.04622955322265625, + "step": 96500 + }, + { + "epoch": 0.8344501993065343, + "grad_norm": 1.6788762392866465, + "learning_rate": 3.77137217213598e-06, + "loss": 0.1456085205078125, + "step": 96505 + }, + { + "epoch": 0.8344934328280775, + "grad_norm": 12.321532921454354, + "learning_rate": 3.7711752870212917e-06, + "loss": 0.07692184448242187, + "step": 96510 + }, + { + "epoch": 0.8345366663496209, + "grad_norm": 0.969878587423087, + "learning_rate": 3.7709783983500103e-06, + "loss": 0.02071533203125, + "step": 96515 + }, + { + "epoch": 0.8345798998711641, + "grad_norm": 14.567172374411133, + "learning_rate": 3.770781506123045e-06, + "loss": 0.0535614013671875, + "step": 96520 + }, + { + "epoch": 0.8346231333927073, + "grad_norm": 0.1329765071213829, + "learning_rate": 3.7705846103413037e-06, + "loss": 0.08035449981689453, + "step": 96525 + }, + { + "epoch": 0.8346663669142507, + "grad_norm": 7.845059667022285, + "learning_rate": 3.7703877110056937e-06, + "loss": 0.085321044921875, + "step": 96530 + }, + { + "epoch": 0.8347096004357939, + "grad_norm": 29.91435216428069, + "learning_rate": 3.7701908081171244e-06, + "loss": 0.2470458984375, + "step": 96535 + }, + { + "epoch": 0.8347528339573371, + "grad_norm": 16.77354438369668, + "learning_rate": 3.769993901676503e-06, + "loss": 0.1400177001953125, + "step": 96540 + }, + { + "epoch": 0.8347960674788805, + "grad_norm": 7.728812514058776, + "learning_rate": 3.769796991684737e-06, + "loss": 0.0447174072265625, + "step": 96545 + }, + { + "epoch": 0.8348393010004237, + "grad_norm": 1.7781785126052738, + "learning_rate": 3.769600078142736e-06, + "loss": 0.0223052978515625, + "step": 96550 + }, + { + "epoch": 0.8348825345219669, + "grad_norm": 5.960585109152105, + "learning_rate": 3.769403161051407e-06, + "loss": 0.04146499633789062, + "step": 96555 + }, + { + "epoch": 0.8349257680435103, + "grad_norm": 11.055590106696174, + "learning_rate": 3.769206240411659e-06, + "loss": 0.3443931579589844, + "step": 96560 + }, + { + "epoch": 0.8349690015650535, + "grad_norm": 8.88521319012635, + "learning_rate": 3.7690093162244e-06, + "loss": 0.0682952880859375, + "step": 96565 + }, + { + "epoch": 0.8350122350865967, + "grad_norm": 32.703224991980186, + "learning_rate": 3.7688123884905382e-06, + "loss": 0.5269630432128907, + "step": 96570 + }, + { + "epoch": 0.8350554686081401, + "grad_norm": 8.135248682032332, + "learning_rate": 3.76861545721098e-06, + "loss": 0.0822540283203125, + "step": 96575 + }, + { + "epoch": 0.8350987021296833, + "grad_norm": 0.7302916969094099, + "learning_rate": 3.7684185223866357e-06, + "loss": 0.16744842529296874, + "step": 96580 + }, + { + "epoch": 0.8351419356512265, + "grad_norm": 7.388387873455563, + "learning_rate": 3.7682215840184146e-06, + "loss": 0.28488502502441404, + "step": 96585 + }, + { + "epoch": 0.8351851691727697, + "grad_norm": 6.875567052551374, + "learning_rate": 3.768024642107223e-06, + "loss": 0.09401626586914062, + "step": 96590 + }, + { + "epoch": 0.8352284026943131, + "grad_norm": 3.8274212061702504, + "learning_rate": 3.7678276966539686e-06, + "loss": 0.088140869140625, + "step": 96595 + }, + { + "epoch": 0.8352716362158563, + "grad_norm": 2.5627269821588055, + "learning_rate": 3.7676307476595612e-06, + "loss": 0.18732452392578125, + "step": 96600 + }, + { + "epoch": 0.8353148697373995, + "grad_norm": 7.005477458467124, + "learning_rate": 3.7674337951249086e-06, + "loss": 0.05909423828125, + "step": 96605 + }, + { + "epoch": 0.8353581032589429, + "grad_norm": 10.764476129696222, + "learning_rate": 3.7672368390509186e-06, + "loss": 0.112701416015625, + "step": 96610 + }, + { + "epoch": 0.8354013367804861, + "grad_norm": 24.71465113670981, + "learning_rate": 3.767039879438501e-06, + "loss": 0.22211723327636718, + "step": 96615 + }, + { + "epoch": 0.8354445703020293, + "grad_norm": 1.4371465833411572, + "learning_rate": 3.7668429162885637e-06, + "loss": 0.28282470703125, + "step": 96620 + }, + { + "epoch": 0.8354878038235727, + "grad_norm": 40.30444372255531, + "learning_rate": 3.7666459496020136e-06, + "loss": 0.33711090087890627, + "step": 96625 + }, + { + "epoch": 0.8355310373451159, + "grad_norm": 18.50524114322925, + "learning_rate": 3.7664489793797604e-06, + "loss": 0.3557456970214844, + "step": 96630 + }, + { + "epoch": 0.8355742708666591, + "grad_norm": 26.848840720389916, + "learning_rate": 3.766252005622712e-06, + "loss": 0.5610252380371094, + "step": 96635 + }, + { + "epoch": 0.8356175043882025, + "grad_norm": 1.8843714908949614, + "learning_rate": 3.766055028331777e-06, + "loss": 0.07780647277832031, + "step": 96640 + }, + { + "epoch": 0.8356607379097457, + "grad_norm": 0.4253193632559754, + "learning_rate": 3.765858047507864e-06, + "loss": 0.08204193115234375, + "step": 96645 + }, + { + "epoch": 0.8357039714312889, + "grad_norm": 3.4623397182733457, + "learning_rate": 3.7656610631518824e-06, + "loss": 0.17771835327148439, + "step": 96650 + }, + { + "epoch": 0.8357472049528323, + "grad_norm": 11.244218600187821, + "learning_rate": 3.7654640752647385e-06, + "loss": 0.0807861328125, + "step": 96655 + }, + { + "epoch": 0.8357904384743755, + "grad_norm": 0.3429392300101963, + "learning_rate": 3.765267083847342e-06, + "loss": 0.09940643310546875, + "step": 96660 + }, + { + "epoch": 0.8358336719959187, + "grad_norm": 1.278174276021209, + "learning_rate": 3.765070088900602e-06, + "loss": 0.05287017822265625, + "step": 96665 + }, + { + "epoch": 0.835876905517462, + "grad_norm": 7.925569702958283, + "learning_rate": 3.764873090425426e-06, + "loss": 0.10093116760253906, + "step": 96670 + }, + { + "epoch": 0.8359201390390053, + "grad_norm": 4.625623310637544, + "learning_rate": 3.764676088422722e-06, + "loss": 0.15621490478515626, + "step": 96675 + }, + { + "epoch": 0.8359633725605485, + "grad_norm": 7.042751604245016, + "learning_rate": 3.7644790828934002e-06, + "loss": 0.07993202209472657, + "step": 96680 + }, + { + "epoch": 0.8360066060820918, + "grad_norm": 0.22302958416475507, + "learning_rate": 3.7642820738383693e-06, + "loss": 0.1370086669921875, + "step": 96685 + }, + { + "epoch": 0.8360498396036351, + "grad_norm": 9.074541879323192, + "learning_rate": 3.7640850612585352e-06, + "loss": 0.1230560302734375, + "step": 96690 + }, + { + "epoch": 0.8360930731251783, + "grad_norm": 6.6606128150504205, + "learning_rate": 3.7638880451548095e-06, + "loss": 0.0641754150390625, + "step": 96695 + }, + { + "epoch": 0.8361363066467216, + "grad_norm": 21.284862934503472, + "learning_rate": 3.7636910255280996e-06, + "loss": 0.16217422485351562, + "step": 96700 + }, + { + "epoch": 0.8361795401682649, + "grad_norm": 33.475268943437, + "learning_rate": 3.7634940023793137e-06, + "loss": 0.1026336669921875, + "step": 96705 + }, + { + "epoch": 0.8362227736898081, + "grad_norm": 2.1737876766824225, + "learning_rate": 3.7632969757093615e-06, + "loss": 0.1335845947265625, + "step": 96710 + }, + { + "epoch": 0.8362660072113514, + "grad_norm": 24.355542876517028, + "learning_rate": 3.7630999455191513e-06, + "loss": 0.18673934936523437, + "step": 96715 + }, + { + "epoch": 0.8363092407328947, + "grad_norm": 1.280131845275192, + "learning_rate": 3.76290291180959e-06, + "loss": 0.334613037109375, + "step": 96720 + }, + { + "epoch": 0.8363524742544379, + "grad_norm": 0.49907998933529946, + "learning_rate": 3.7627058745815892e-06, + "loss": 0.15419769287109375, + "step": 96725 + }, + { + "epoch": 0.8363957077759812, + "grad_norm": 5.1831349005657605, + "learning_rate": 3.762508833836056e-06, + "loss": 0.04071502685546875, + "step": 96730 + }, + { + "epoch": 0.8364389412975245, + "grad_norm": 18.136601400776993, + "learning_rate": 3.7623117895738994e-06, + "loss": 0.2180694580078125, + "step": 96735 + }, + { + "epoch": 0.8364821748190677, + "grad_norm": 0.7132071368767198, + "learning_rate": 3.762114741796029e-06, + "loss": 0.10902862548828125, + "step": 96740 + }, + { + "epoch": 0.836525408340611, + "grad_norm": 4.506569378104345, + "learning_rate": 3.7619176905033522e-06, + "loss": 0.0679901123046875, + "step": 96745 + }, + { + "epoch": 0.8365686418621543, + "grad_norm": 3.6250408374927483, + "learning_rate": 3.7617206356967777e-06, + "loss": 0.07197914123535157, + "step": 96750 + }, + { + "epoch": 0.8366118753836975, + "grad_norm": 27.53057007052387, + "learning_rate": 3.761523577377215e-06, + "loss": 0.3568023681640625, + "step": 96755 + }, + { + "epoch": 0.8366551089052408, + "grad_norm": 6.107564609230118, + "learning_rate": 3.7613265155455735e-06, + "loss": 0.092327880859375, + "step": 96760 + }, + { + "epoch": 0.836698342426784, + "grad_norm": 5.145738195492549, + "learning_rate": 3.7611294502027612e-06, + "loss": 0.03828353881835937, + "step": 96765 + }, + { + "epoch": 0.8367415759483273, + "grad_norm": 0.9828319600860292, + "learning_rate": 3.7609323813496864e-06, + "loss": 0.05058631896972656, + "step": 96770 + }, + { + "epoch": 0.8367848094698705, + "grad_norm": 4.365291558109639, + "learning_rate": 3.7607353089872597e-06, + "loss": 0.3606842041015625, + "step": 96775 + }, + { + "epoch": 0.8368280429914138, + "grad_norm": 10.123545131857174, + "learning_rate": 3.7605382331163886e-06, + "loss": 0.10597381591796876, + "step": 96780 + }, + { + "epoch": 0.8368712765129571, + "grad_norm": 0.13551469191204682, + "learning_rate": 3.7603411537379824e-06, + "loss": 0.07076416015625, + "step": 96785 + }, + { + "epoch": 0.8369145100345003, + "grad_norm": 15.566302481019386, + "learning_rate": 3.76014407085295e-06, + "loss": 0.04577789306640625, + "step": 96790 + }, + { + "epoch": 0.8369577435560436, + "grad_norm": 1.9973017947921654, + "learning_rate": 3.7599469844622004e-06, + "loss": 0.10069503784179687, + "step": 96795 + }, + { + "epoch": 0.8370009770775869, + "grad_norm": 21.376581519112662, + "learning_rate": 3.759749894566642e-06, + "loss": 0.1458404541015625, + "step": 96800 + }, + { + "epoch": 0.8370442105991301, + "grad_norm": 4.374426852306407, + "learning_rate": 3.759552801167185e-06, + "loss": 0.058624267578125, + "step": 96805 + }, + { + "epoch": 0.8370874441206734, + "grad_norm": 19.117575265571688, + "learning_rate": 3.7593557042647365e-06, + "loss": 0.109307861328125, + "step": 96810 + }, + { + "epoch": 0.8371306776422167, + "grad_norm": 1.3016489570138965, + "learning_rate": 3.759158603860207e-06, + "loss": 0.0800018310546875, + "step": 96815 + }, + { + "epoch": 0.8371739111637599, + "grad_norm": 6.700816185545996, + "learning_rate": 3.7589614999545063e-06, + "loss": 0.03684654235839844, + "step": 96820 + }, + { + "epoch": 0.8372171446853032, + "grad_norm": 2.1771557367847096, + "learning_rate": 3.758764392548541e-06, + "loss": 0.327447509765625, + "step": 96825 + }, + { + "epoch": 0.8372603782068465, + "grad_norm": 0.13171302557266631, + "learning_rate": 3.758567281643222e-06, + "loss": 0.08410873413085937, + "step": 96830 + }, + { + "epoch": 0.8373036117283897, + "grad_norm": 4.350422309157536, + "learning_rate": 3.7583701672394564e-06, + "loss": 0.26356201171875, + "step": 96835 + }, + { + "epoch": 0.837346845249933, + "grad_norm": 14.890589400057129, + "learning_rate": 3.758173049338156e-06, + "loss": 0.10239639282226562, + "step": 96840 + }, + { + "epoch": 0.8373900787714762, + "grad_norm": 1.2077706241515698, + "learning_rate": 3.7579759279402285e-06, + "loss": 0.08441085815429687, + "step": 96845 + }, + { + "epoch": 0.8374333122930195, + "grad_norm": 13.770829715156891, + "learning_rate": 3.757778803046582e-06, + "loss": 0.13451385498046875, + "step": 96850 + }, + { + "epoch": 0.8374765458145628, + "grad_norm": 12.975838427820852, + "learning_rate": 3.757581674658128e-06, + "loss": 0.09188385009765625, + "step": 96855 + }, + { + "epoch": 0.837519779336106, + "grad_norm": 4.059561000074339, + "learning_rate": 3.7573845427757728e-06, + "loss": 0.227777099609375, + "step": 96860 + }, + { + "epoch": 0.8375630128576493, + "grad_norm": 9.270539976275375, + "learning_rate": 3.7571874074004276e-06, + "loss": 0.2239992141723633, + "step": 96865 + }, + { + "epoch": 0.8376062463791926, + "grad_norm": 99.43027265055908, + "learning_rate": 3.756990268533001e-06, + "loss": 0.31853790283203126, + "step": 96870 + }, + { + "epoch": 0.8376494799007358, + "grad_norm": 15.199229310516184, + "learning_rate": 3.756793126174403e-06, + "loss": 0.2715118408203125, + "step": 96875 + }, + { + "epoch": 0.8376927134222791, + "grad_norm": 5.875690719434381, + "learning_rate": 3.7565959803255406e-06, + "loss": 0.1002288818359375, + "step": 96880 + }, + { + "epoch": 0.8377359469438224, + "grad_norm": 4.409915989915002, + "learning_rate": 3.756398830987325e-06, + "loss": 0.048181915283203126, + "step": 96885 + }, + { + "epoch": 0.8377791804653656, + "grad_norm": 34.57124348372097, + "learning_rate": 3.7562016781606644e-06, + "loss": 0.32738304138183594, + "step": 96890 + }, + { + "epoch": 0.8378224139869089, + "grad_norm": 1.4212322222329745, + "learning_rate": 3.7560045218464687e-06, + "loss": 0.0506256103515625, + "step": 96895 + }, + { + "epoch": 0.8378656475084522, + "grad_norm": 2.7199389692170324, + "learning_rate": 3.7558073620456474e-06, + "loss": 0.1325897216796875, + "step": 96900 + }, + { + "epoch": 0.8379088810299954, + "grad_norm": 55.2226258316418, + "learning_rate": 3.7556101987591093e-06, + "loss": 0.36460418701171876, + "step": 96905 + }, + { + "epoch": 0.8379521145515387, + "grad_norm": 1.7803735784297545, + "learning_rate": 3.7554130319877633e-06, + "loss": 0.18291015625, + "step": 96910 + }, + { + "epoch": 0.837995348073082, + "grad_norm": 3.254725027011468, + "learning_rate": 3.7552158617325195e-06, + "loss": 0.04987030029296875, + "step": 96915 + }, + { + "epoch": 0.8380385815946252, + "grad_norm": 0.7746524348943964, + "learning_rate": 3.7550186879942864e-06, + "loss": 0.1118011474609375, + "step": 96920 + }, + { + "epoch": 0.8380818151161685, + "grad_norm": 6.510313953516108, + "learning_rate": 3.7548215107739737e-06, + "loss": 0.037506103515625, + "step": 96925 + }, + { + "epoch": 0.8381250486377118, + "grad_norm": 0.8683714490482307, + "learning_rate": 3.7546243300724916e-06, + "loss": 0.07079620361328125, + "step": 96930 + }, + { + "epoch": 0.838168282159255, + "grad_norm": 6.334286184724993, + "learning_rate": 3.754427145890749e-06, + "loss": 0.19136199951171876, + "step": 96935 + }, + { + "epoch": 0.8382115156807982, + "grad_norm": 38.144929932127496, + "learning_rate": 3.754229958229655e-06, + "loss": 0.5348594665527344, + "step": 96940 + }, + { + "epoch": 0.8382547492023416, + "grad_norm": 18.151002517945415, + "learning_rate": 3.7540327670901183e-06, + "loss": 0.13996124267578125, + "step": 96945 + }, + { + "epoch": 0.8382979827238848, + "grad_norm": 0.3486914244051513, + "learning_rate": 3.7538355724730487e-06, + "loss": 0.16181869506835939, + "step": 96950 + }, + { + "epoch": 0.838341216245428, + "grad_norm": 5.340127078596424, + "learning_rate": 3.7536383743793573e-06, + "loss": 0.08820343017578125, + "step": 96955 + }, + { + "epoch": 0.8383844497669714, + "grad_norm": 16.400621281295233, + "learning_rate": 3.7534411728099513e-06, + "loss": 0.0894989013671875, + "step": 96960 + }, + { + "epoch": 0.8384276832885146, + "grad_norm": 0.5315613274192642, + "learning_rate": 3.7532439677657418e-06, + "loss": 0.14397964477539063, + "step": 96965 + }, + { + "epoch": 0.8384709168100578, + "grad_norm": 1.09540220745323, + "learning_rate": 3.7530467592476383e-06, + "loss": 0.06859512329101562, + "step": 96970 + }, + { + "epoch": 0.8385141503316011, + "grad_norm": 20.786411815614304, + "learning_rate": 3.7528495472565482e-06, + "loss": 0.12938995361328126, + "step": 96975 + }, + { + "epoch": 0.8385573838531444, + "grad_norm": 0.37799665352434175, + "learning_rate": 3.7526523317933832e-06, + "loss": 0.037133979797363284, + "step": 96980 + }, + { + "epoch": 0.8386006173746876, + "grad_norm": 2.9734250495771812, + "learning_rate": 3.7524551128590522e-06, + "loss": 0.024297332763671874, + "step": 96985 + }, + { + "epoch": 0.838643850896231, + "grad_norm": 10.748084111644232, + "learning_rate": 3.7522578904544645e-06, + "loss": 0.21159286499023439, + "step": 96990 + }, + { + "epoch": 0.8386870844177742, + "grad_norm": 3.71231670953113, + "learning_rate": 3.752060664580531e-06, + "loss": 0.2312957763671875, + "step": 96995 + }, + { + "epoch": 0.8387303179393174, + "grad_norm": 26.64742443631219, + "learning_rate": 3.751863435238158e-06, + "loss": 0.15435333251953126, + "step": 97000 + }, + { + "epoch": 0.8387735514608607, + "grad_norm": 3.4988251930481695, + "learning_rate": 3.7516662024282586e-06, + "loss": 0.053905487060546875, + "step": 97005 + }, + { + "epoch": 0.838816784982404, + "grad_norm": 13.686128856112061, + "learning_rate": 3.7514689661517416e-06, + "loss": 0.104058837890625, + "step": 97010 + }, + { + "epoch": 0.8388600185039472, + "grad_norm": 2.2181608917154265, + "learning_rate": 3.7512717264095156e-06, + "loss": 0.192828369140625, + "step": 97015 + }, + { + "epoch": 0.8389032520254904, + "grad_norm": 0.775623494634863, + "learning_rate": 3.751074483202491e-06, + "loss": 0.015455245971679688, + "step": 97020 + }, + { + "epoch": 0.8389464855470338, + "grad_norm": 26.03131898087769, + "learning_rate": 3.750877236531577e-06, + "loss": 0.0942230224609375, + "step": 97025 + }, + { + "epoch": 0.838989719068577, + "grad_norm": 5.79178646624585, + "learning_rate": 3.7506799863976828e-06, + "loss": 0.056406402587890626, + "step": 97030 + }, + { + "epoch": 0.8390329525901202, + "grad_norm": 20.79554201121651, + "learning_rate": 3.75048273280172e-06, + "loss": 0.14336585998535156, + "step": 97035 + }, + { + "epoch": 0.8390761861116636, + "grad_norm": 0.3605205760875404, + "learning_rate": 3.7502854757445964e-06, + "loss": 0.1055633544921875, + "step": 97040 + }, + { + "epoch": 0.8391194196332068, + "grad_norm": 6.603695876065844, + "learning_rate": 3.750088215227223e-06, + "loss": 0.034351348876953125, + "step": 97045 + }, + { + "epoch": 0.83916265315475, + "grad_norm": 8.060193317357013, + "learning_rate": 3.749890951250509e-06, + "loss": 0.06771926879882813, + "step": 97050 + }, + { + "epoch": 0.8392058866762934, + "grad_norm": 1.052389464241339, + "learning_rate": 3.749693683815364e-06, + "loss": 0.11895103454589843, + "step": 97055 + }, + { + "epoch": 0.8392491201978366, + "grad_norm": 4.51403741617157, + "learning_rate": 3.7494964129226975e-06, + "loss": 0.0807586669921875, + "step": 97060 + }, + { + "epoch": 0.8392923537193798, + "grad_norm": 0.34338087408274437, + "learning_rate": 3.749299138573421e-06, + "loss": 0.2620212554931641, + "step": 97065 + }, + { + "epoch": 0.8393355872409232, + "grad_norm": 9.01051944190351, + "learning_rate": 3.7491018607684416e-06, + "loss": 0.25645599365234373, + "step": 97070 + }, + { + "epoch": 0.8393788207624664, + "grad_norm": 2.130163542846221, + "learning_rate": 3.7489045795086717e-06, + "loss": 0.178033447265625, + "step": 97075 + }, + { + "epoch": 0.8394220542840096, + "grad_norm": 4.317652739915416, + "learning_rate": 3.74870729479502e-06, + "loss": 0.11461181640625, + "step": 97080 + }, + { + "epoch": 0.839465287805553, + "grad_norm": 27.176094081326454, + "learning_rate": 3.7485100066283954e-06, + "loss": 0.1689910888671875, + "step": 97085 + }, + { + "epoch": 0.8395085213270962, + "grad_norm": 9.625667120764971, + "learning_rate": 3.7483127150097094e-06, + "loss": 0.13816604614257813, + "step": 97090 + }, + { + "epoch": 0.8395517548486394, + "grad_norm": 0.6082527990034695, + "learning_rate": 3.748115419939872e-06, + "loss": 0.15821094512939454, + "step": 97095 + }, + { + "epoch": 0.8395949883701828, + "grad_norm": 3.758135212776918, + "learning_rate": 3.747918121419791e-06, + "loss": 0.17370758056640626, + "step": 97100 + }, + { + "epoch": 0.839638221891726, + "grad_norm": 2.3748405651894373, + "learning_rate": 3.747720819450379e-06, + "loss": 0.060260009765625, + "step": 97105 + }, + { + "epoch": 0.8396814554132692, + "grad_norm": 13.123558673258922, + "learning_rate": 3.747523514032544e-06, + "loss": 0.3103485107421875, + "step": 97110 + }, + { + "epoch": 0.8397246889348124, + "grad_norm": 3.6524310099750292, + "learning_rate": 3.7473262051671964e-06, + "loss": 0.04724464416503906, + "step": 97115 + }, + { + "epoch": 0.8397679224563558, + "grad_norm": 35.229027961253244, + "learning_rate": 3.747128892855247e-06, + "loss": 0.42387542724609373, + "step": 97120 + }, + { + "epoch": 0.839811155977899, + "grad_norm": 41.3155841664801, + "learning_rate": 3.746931577097605e-06, + "loss": 0.15755157470703124, + "step": 97125 + }, + { + "epoch": 0.8398543894994422, + "grad_norm": 0.06557302399955801, + "learning_rate": 3.7467342578951805e-06, + "loss": 0.06402626037597656, + "step": 97130 + }, + { + "epoch": 0.8398976230209856, + "grad_norm": 6.668309459863062, + "learning_rate": 3.746536935248883e-06, + "loss": 0.2833160400390625, + "step": 97135 + }, + { + "epoch": 0.8399408565425288, + "grad_norm": 1.7233241566228679, + "learning_rate": 3.7463396091596242e-06, + "loss": 0.01637420654296875, + "step": 97140 + }, + { + "epoch": 0.839984090064072, + "grad_norm": 7.167561436350774, + "learning_rate": 3.7461422796283123e-06, + "loss": 0.190899658203125, + "step": 97145 + }, + { + "epoch": 0.8400273235856154, + "grad_norm": 24.155930867598233, + "learning_rate": 3.7459449466558582e-06, + "loss": 0.12534236907958984, + "step": 97150 + }, + { + "epoch": 0.8400705571071586, + "grad_norm": 0.8803218181812112, + "learning_rate": 3.7457476102431727e-06, + "loss": 0.012072944641113281, + "step": 97155 + }, + { + "epoch": 0.8401137906287018, + "grad_norm": 4.163396221625789, + "learning_rate": 3.7455502703911645e-06, + "loss": 0.10041275024414062, + "step": 97160 + }, + { + "epoch": 0.8401570241502452, + "grad_norm": 3.5721284502495205, + "learning_rate": 3.745352927100744e-06, + "loss": 0.2088836669921875, + "step": 97165 + }, + { + "epoch": 0.8402002576717884, + "grad_norm": 10.759820083947783, + "learning_rate": 3.745155580372822e-06, + "loss": 0.0655364990234375, + "step": 97170 + }, + { + "epoch": 0.8402434911933316, + "grad_norm": 36.47427285204347, + "learning_rate": 3.744958230208308e-06, + "loss": 0.31470565795898436, + "step": 97175 + }, + { + "epoch": 0.840286724714875, + "grad_norm": 7.1544348557322435, + "learning_rate": 3.7447608766081126e-06, + "loss": 0.12376174926757813, + "step": 97180 + }, + { + "epoch": 0.8403299582364182, + "grad_norm": 0.6708144866159536, + "learning_rate": 3.744563519573146e-06, + "loss": 0.012837982177734375, + "step": 97185 + }, + { + "epoch": 0.8403731917579614, + "grad_norm": 8.573258937206667, + "learning_rate": 3.744366159104319e-06, + "loss": 0.10506401062011719, + "step": 97190 + }, + { + "epoch": 0.8404164252795047, + "grad_norm": 1.2352579215024888, + "learning_rate": 3.74416879520254e-06, + "loss": 0.12931060791015625, + "step": 97195 + }, + { + "epoch": 0.840459658801048, + "grad_norm": 5.015312483499957, + "learning_rate": 3.74397142786872e-06, + "loss": 0.2724964141845703, + "step": 97200 + }, + { + "epoch": 0.8405028923225912, + "grad_norm": 0.5039503351852878, + "learning_rate": 3.7437740571037703e-06, + "loss": 0.09146270751953126, + "step": 97205 + }, + { + "epoch": 0.8405461258441345, + "grad_norm": 6.147009408071136, + "learning_rate": 3.7435766829086003e-06, + "loss": 0.2109668731689453, + "step": 97210 + }, + { + "epoch": 0.8405893593656778, + "grad_norm": 0.8004853277660025, + "learning_rate": 3.74337930528412e-06, + "loss": 0.14573593139648439, + "step": 97215 + }, + { + "epoch": 0.840632592887221, + "grad_norm": 0.046173124924990025, + "learning_rate": 3.7431819242312405e-06, + "loss": 0.15448837280273436, + "step": 97220 + }, + { + "epoch": 0.8406758264087643, + "grad_norm": 45.880325132017646, + "learning_rate": 3.7429845397508713e-06, + "loss": 0.14917144775390626, + "step": 97225 + }, + { + "epoch": 0.8407190599303076, + "grad_norm": 0.38382411645910314, + "learning_rate": 3.7427871518439227e-06, + "loss": 0.20296783447265626, + "step": 97230 + }, + { + "epoch": 0.8407622934518508, + "grad_norm": 4.004877633829783, + "learning_rate": 3.742589760511306e-06, + "loss": 0.035962677001953124, + "step": 97235 + }, + { + "epoch": 0.8408055269733941, + "grad_norm": 0.09424087054696959, + "learning_rate": 3.742392365753931e-06, + "loss": 0.08429794311523438, + "step": 97240 + }, + { + "epoch": 0.8408487604949374, + "grad_norm": 19.90456159212885, + "learning_rate": 3.7421949675727073e-06, + "loss": 0.1387847900390625, + "step": 97245 + }, + { + "epoch": 0.8408919940164806, + "grad_norm": 19.019074306175813, + "learning_rate": 3.7419975659685465e-06, + "loss": 0.47075881958007815, + "step": 97250 + }, + { + "epoch": 0.8409352275380239, + "grad_norm": 13.998433902639174, + "learning_rate": 3.7418001609423585e-06, + "loss": 0.06872406005859374, + "step": 97255 + }, + { + "epoch": 0.8409784610595672, + "grad_norm": 1.582061624204541, + "learning_rate": 3.7416027524950528e-06, + "loss": 0.21611251831054687, + "step": 97260 + }, + { + "epoch": 0.8410216945811104, + "grad_norm": 3.1735201697415794, + "learning_rate": 3.741405340627542e-06, + "loss": 0.017287826538085936, + "step": 97265 + }, + { + "epoch": 0.8410649281026537, + "grad_norm": 2.9941297135743423, + "learning_rate": 3.7412079253407352e-06, + "loss": 0.04940643310546875, + "step": 97270 + }, + { + "epoch": 0.841108161624197, + "grad_norm": 5.934920604912023, + "learning_rate": 3.741010506635543e-06, + "loss": 0.11319503784179688, + "step": 97275 + }, + { + "epoch": 0.8411513951457402, + "grad_norm": 19.634648489520835, + "learning_rate": 3.740813084512875e-06, + "loss": 0.13873443603515626, + "step": 97280 + }, + { + "epoch": 0.8411946286672835, + "grad_norm": 12.83282140012862, + "learning_rate": 3.740615658973643e-06, + "loss": 0.06062507629394531, + "step": 97285 + }, + { + "epoch": 0.8412378621888267, + "grad_norm": 24.4045852596502, + "learning_rate": 3.740418230018757e-06, + "loss": 0.07973279953002929, + "step": 97290 + }, + { + "epoch": 0.84128109571037, + "grad_norm": 18.944007294856643, + "learning_rate": 3.740220797649127e-06, + "loss": 0.1437713623046875, + "step": 97295 + }, + { + "epoch": 0.8413243292319132, + "grad_norm": 13.190267486713923, + "learning_rate": 3.7400233618656653e-06, + "loss": 0.25455780029296876, + "step": 97300 + }, + { + "epoch": 0.8413675627534565, + "grad_norm": 0.7022295903588476, + "learning_rate": 3.7398259226692805e-06, + "loss": 0.14893112182617188, + "step": 97305 + }, + { + "epoch": 0.8414107962749998, + "grad_norm": 0.13310059916661454, + "learning_rate": 3.7396284800608836e-06, + "loss": 0.17362213134765625, + "step": 97310 + }, + { + "epoch": 0.841454029796543, + "grad_norm": 1.7445660552634137, + "learning_rate": 3.7394310340413855e-06, + "loss": 0.022690200805664064, + "step": 97315 + }, + { + "epoch": 0.8414972633180863, + "grad_norm": 4.15504475448005, + "learning_rate": 3.7392335846116975e-06, + "loss": 0.1955476760864258, + "step": 97320 + }, + { + "epoch": 0.8415404968396296, + "grad_norm": 2.2739613093216766, + "learning_rate": 3.7390361317727282e-06, + "loss": 0.13377456665039061, + "step": 97325 + }, + { + "epoch": 0.8415837303611728, + "grad_norm": 0.8427658905636792, + "learning_rate": 3.7388386755253916e-06, + "loss": 0.17927474975585939, + "step": 97330 + }, + { + "epoch": 0.8416269638827161, + "grad_norm": 0.102384906352703, + "learning_rate": 3.738641215870595e-06, + "loss": 0.05683259963989258, + "step": 97335 + }, + { + "epoch": 0.8416701974042594, + "grad_norm": 1.6187095490666614, + "learning_rate": 3.73844375280925e-06, + "loss": 0.017405128479003905, + "step": 97340 + }, + { + "epoch": 0.8417134309258026, + "grad_norm": 1.8795458415347532, + "learning_rate": 3.7382462863422683e-06, + "loss": 0.16083297729492188, + "step": 97345 + }, + { + "epoch": 0.8417566644473459, + "grad_norm": 12.628427995662456, + "learning_rate": 3.7380488164705608e-06, + "loss": 0.07771148681640624, + "step": 97350 + }, + { + "epoch": 0.8417998979688892, + "grad_norm": 2.3770327265093028, + "learning_rate": 3.7378513431950357e-06, + "loss": 0.05682029724121094, + "step": 97355 + }, + { + "epoch": 0.8418431314904324, + "grad_norm": 13.582960032111487, + "learning_rate": 3.7376538665166066e-06, + "loss": 0.1960540771484375, + "step": 97360 + }, + { + "epoch": 0.8418863650119757, + "grad_norm": 0.013035629433942705, + "learning_rate": 3.737456386436183e-06, + "loss": 0.061035537719726564, + "step": 97365 + }, + { + "epoch": 0.8419295985335189, + "grad_norm": 24.930852355627817, + "learning_rate": 3.737258902954675e-06, + "loss": 0.12327880859375, + "step": 97370 + }, + { + "epoch": 0.8419728320550622, + "grad_norm": 37.0632211912528, + "learning_rate": 3.7370614160729943e-06, + "loss": 0.1089019775390625, + "step": 97375 + }, + { + "epoch": 0.8420160655766055, + "grad_norm": 26.79310851910728, + "learning_rate": 3.736863925792052e-06, + "loss": 0.214129638671875, + "step": 97380 + }, + { + "epoch": 0.8420592990981487, + "grad_norm": 3.321993098898483, + "learning_rate": 3.7366664321127582e-06, + "loss": 0.16436386108398438, + "step": 97385 + }, + { + "epoch": 0.842102532619692, + "grad_norm": 60.14149895568481, + "learning_rate": 3.7364689350360237e-06, + "loss": 0.3298248291015625, + "step": 97390 + }, + { + "epoch": 0.8421457661412353, + "grad_norm": 2.6438951872415144, + "learning_rate": 3.73627143456276e-06, + "loss": 0.0707061767578125, + "step": 97395 + }, + { + "epoch": 0.8421889996627785, + "grad_norm": 4.280875835129718, + "learning_rate": 3.7360739306938765e-06, + "loss": 0.40164947509765625, + "step": 97400 + }, + { + "epoch": 0.8422322331843218, + "grad_norm": 4.153390855820856, + "learning_rate": 3.735876423430285e-06, + "loss": 0.41605377197265625, + "step": 97405 + }, + { + "epoch": 0.8422754667058651, + "grad_norm": 0.07126794284213235, + "learning_rate": 3.735678912772898e-06, + "loss": 0.051981353759765626, + "step": 97410 + }, + { + "epoch": 0.8423187002274083, + "grad_norm": 25.116710283072763, + "learning_rate": 3.735481398722624e-06, + "loss": 0.297772216796875, + "step": 97415 + }, + { + "epoch": 0.8423619337489516, + "grad_norm": 0.6515949474302748, + "learning_rate": 3.7352838812803746e-06, + "loss": 0.188543701171875, + "step": 97420 + }, + { + "epoch": 0.8424051672704949, + "grad_norm": 14.183649558864987, + "learning_rate": 3.7350863604470607e-06, + "loss": 0.1421142578125, + "step": 97425 + }, + { + "epoch": 0.8424484007920381, + "grad_norm": 5.405461832964358, + "learning_rate": 3.7348888362235936e-06, + "loss": 0.04456415176391602, + "step": 97430 + }, + { + "epoch": 0.8424916343135814, + "grad_norm": 6.4648603927949635, + "learning_rate": 3.7346913086108846e-06, + "loss": 0.11682052612304687, + "step": 97435 + }, + { + "epoch": 0.8425348678351247, + "grad_norm": 0.33849856913762594, + "learning_rate": 3.734493777609844e-06, + "loss": 0.006485748291015625, + "step": 97440 + }, + { + "epoch": 0.8425781013566679, + "grad_norm": 1.9076695670554045, + "learning_rate": 3.7342962432213835e-06, + "loss": 0.14552459716796876, + "step": 97445 + }, + { + "epoch": 0.8426213348782111, + "grad_norm": 1.7845554600218714, + "learning_rate": 3.734098705446413e-06, + "loss": 0.0766510009765625, + "step": 97450 + }, + { + "epoch": 0.8426645683997545, + "grad_norm": 0.17485139479180878, + "learning_rate": 3.7339011642858435e-06, + "loss": 0.08243045806884766, + "step": 97455 + }, + { + "epoch": 0.8427078019212977, + "grad_norm": 23.81562409477535, + "learning_rate": 3.7337036197405876e-06, + "loss": 0.0920196533203125, + "step": 97460 + }, + { + "epoch": 0.8427510354428409, + "grad_norm": 0.7764419670458819, + "learning_rate": 3.7335060718115554e-06, + "loss": 0.27086219787597654, + "step": 97465 + }, + { + "epoch": 0.8427942689643843, + "grad_norm": 0.20760122425180655, + "learning_rate": 3.733308520499658e-06, + "loss": 0.235455322265625, + "step": 97470 + }, + { + "epoch": 0.8428375024859275, + "grad_norm": 1.4397389039311588, + "learning_rate": 3.733110965805806e-06, + "loss": 0.11587448120117187, + "step": 97475 + }, + { + "epoch": 0.8428807360074707, + "grad_norm": 0.49324940961409225, + "learning_rate": 3.7329134077309114e-06, + "loss": 0.05825309753417969, + "step": 97480 + }, + { + "epoch": 0.842923969529014, + "grad_norm": 18.588504964047182, + "learning_rate": 3.732715846275884e-06, + "loss": 0.4588409423828125, + "step": 97485 + }, + { + "epoch": 0.8429672030505573, + "grad_norm": 20.818899980381698, + "learning_rate": 3.732518281441637e-06, + "loss": 0.15258331298828126, + "step": 97490 + }, + { + "epoch": 0.8430104365721005, + "grad_norm": 24.776350416119424, + "learning_rate": 3.732320713229081e-06, + "loss": 0.160150146484375, + "step": 97495 + }, + { + "epoch": 0.8430536700936438, + "grad_norm": 6.560581162687481, + "learning_rate": 3.732123141639125e-06, + "loss": 0.18776702880859375, + "step": 97500 + }, + { + "epoch": 0.8430969036151871, + "grad_norm": 0.49543462404665284, + "learning_rate": 3.731925566672683e-06, + "loss": 0.22693023681640626, + "step": 97505 + }, + { + "epoch": 0.8431401371367303, + "grad_norm": 26.8319669569567, + "learning_rate": 3.731727988330664e-06, + "loss": 0.24910202026367187, + "step": 97510 + }, + { + "epoch": 0.8431833706582736, + "grad_norm": 0.2967175611351099, + "learning_rate": 3.73153040661398e-06, + "loss": 0.041196441650390624, + "step": 97515 + }, + { + "epoch": 0.8432266041798169, + "grad_norm": 16.69449582472111, + "learning_rate": 3.731332821523544e-06, + "loss": 0.29920501708984376, + "step": 97520 + }, + { + "epoch": 0.8432698377013601, + "grad_norm": 10.786692429228395, + "learning_rate": 3.7311352330602647e-06, + "loss": 0.08470993041992188, + "step": 97525 + }, + { + "epoch": 0.8433130712229034, + "grad_norm": 11.646490732200718, + "learning_rate": 3.730937641225054e-06, + "loss": 0.28072357177734375, + "step": 97530 + }, + { + "epoch": 0.8433563047444467, + "grad_norm": 2.1118418226919626, + "learning_rate": 3.730740046018824e-06, + "loss": 0.037319183349609375, + "step": 97535 + }, + { + "epoch": 0.8433995382659899, + "grad_norm": 0.8605975129409361, + "learning_rate": 3.7305424474424845e-06, + "loss": 0.10795745849609376, + "step": 97540 + }, + { + "epoch": 0.8434427717875331, + "grad_norm": 2.4863022903513734, + "learning_rate": 3.7303448454969488e-06, + "loss": 0.06533308029174804, + "step": 97545 + }, + { + "epoch": 0.8434860053090765, + "grad_norm": 22.7620244127002, + "learning_rate": 3.7301472401831268e-06, + "loss": 0.15396957397460936, + "step": 97550 + }, + { + "epoch": 0.8435292388306197, + "grad_norm": 10.69592323943143, + "learning_rate": 3.7299496315019308e-06, + "loss": 0.363800048828125, + "step": 97555 + }, + { + "epoch": 0.8435724723521629, + "grad_norm": 2.9840010417387393, + "learning_rate": 3.729752019454271e-06, + "loss": 0.060359954833984375, + "step": 97560 + }, + { + "epoch": 0.8436157058737063, + "grad_norm": 13.796166599548348, + "learning_rate": 3.729554404041059e-06, + "loss": 0.08178939819335937, + "step": 97565 + }, + { + "epoch": 0.8436589393952495, + "grad_norm": 33.56851208238359, + "learning_rate": 3.7293567852632075e-06, + "loss": 0.2648956298828125, + "step": 97570 + }, + { + "epoch": 0.8437021729167927, + "grad_norm": 38.2603594944419, + "learning_rate": 3.7291591631216267e-06, + "loss": 0.28199005126953125, + "step": 97575 + }, + { + "epoch": 0.8437454064383361, + "grad_norm": 0.03599529943033191, + "learning_rate": 3.7289615376172277e-06, + "loss": 0.07584114074707031, + "step": 97580 + }, + { + "epoch": 0.8437886399598793, + "grad_norm": 5.621976383971339, + "learning_rate": 3.7287639087509232e-06, + "loss": 0.0916717529296875, + "step": 97585 + }, + { + "epoch": 0.8438318734814225, + "grad_norm": 7.35375669369457, + "learning_rate": 3.7285662765236237e-06, + "loss": 0.181695556640625, + "step": 97590 + }, + { + "epoch": 0.8438751070029659, + "grad_norm": 8.358806719168976, + "learning_rate": 3.7283686409362407e-06, + "loss": 0.3379608154296875, + "step": 97595 + }, + { + "epoch": 0.8439183405245091, + "grad_norm": 0.6423384677265761, + "learning_rate": 3.7281710019896855e-06, + "loss": 0.13013916015625, + "step": 97600 + }, + { + "epoch": 0.8439615740460523, + "grad_norm": 1.2841428015559884, + "learning_rate": 3.727973359684871e-06, + "loss": 0.018903350830078124, + "step": 97605 + }, + { + "epoch": 0.8440048075675957, + "grad_norm": 1.4210320254472009, + "learning_rate": 3.7277757140227067e-06, + "loss": 0.1399505615234375, + "step": 97610 + }, + { + "epoch": 0.8440480410891389, + "grad_norm": 7.425714013934437, + "learning_rate": 3.727578065004106e-06, + "loss": 0.1323455810546875, + "step": 97615 + }, + { + "epoch": 0.8440912746106821, + "grad_norm": 1.5339820276208977, + "learning_rate": 3.7273804126299792e-06, + "loss": 0.23861427307128907, + "step": 97620 + }, + { + "epoch": 0.8441345081322253, + "grad_norm": 22.23324457771674, + "learning_rate": 3.7271827569012386e-06, + "loss": 0.0421783447265625, + "step": 97625 + }, + { + "epoch": 0.8441777416537687, + "grad_norm": 12.29312646490276, + "learning_rate": 3.7269850978187942e-06, + "loss": 0.21585235595703126, + "step": 97630 + }, + { + "epoch": 0.8442209751753119, + "grad_norm": 13.399430134770528, + "learning_rate": 3.72678743538356e-06, + "loss": 0.22566986083984375, + "step": 97635 + }, + { + "epoch": 0.8442642086968551, + "grad_norm": 1.9043248007276143, + "learning_rate": 3.7265897695964464e-06, + "loss": 0.2844211578369141, + "step": 97640 + }, + { + "epoch": 0.8443074422183985, + "grad_norm": 16.06888845152413, + "learning_rate": 3.726392100458364e-06, + "loss": 0.1675262451171875, + "step": 97645 + }, + { + "epoch": 0.8443506757399417, + "grad_norm": 11.041871910651457, + "learning_rate": 3.7261944279702256e-06, + "loss": 0.24973602294921876, + "step": 97650 + }, + { + "epoch": 0.8443939092614849, + "grad_norm": 25.608249638709953, + "learning_rate": 3.7259967521329436e-06, + "loss": 0.10070953369140626, + "step": 97655 + }, + { + "epoch": 0.8444371427830283, + "grad_norm": 16.41874192027305, + "learning_rate": 3.7257990729474275e-06, + "loss": 0.170867919921875, + "step": 97660 + }, + { + "epoch": 0.8444803763045715, + "grad_norm": 1.645821082064582, + "learning_rate": 3.725601390414591e-06, + "loss": 0.0812255859375, + "step": 97665 + }, + { + "epoch": 0.8445236098261147, + "grad_norm": 1.044957134631827, + "learning_rate": 3.7254037045353454e-06, + "loss": 0.134991455078125, + "step": 97670 + }, + { + "epoch": 0.8445668433476581, + "grad_norm": 6.033916669972115, + "learning_rate": 3.7252060153106013e-06, + "loss": 0.0273651123046875, + "step": 97675 + }, + { + "epoch": 0.8446100768692013, + "grad_norm": 0.10671078710436314, + "learning_rate": 3.7250083227412705e-06, + "loss": 0.08536996841430664, + "step": 97680 + }, + { + "epoch": 0.8446533103907445, + "grad_norm": 11.059425662336189, + "learning_rate": 3.724810626828267e-06, + "loss": 0.12511444091796875, + "step": 97685 + }, + { + "epoch": 0.8446965439122879, + "grad_norm": 11.755346554553185, + "learning_rate": 3.7246129275724993e-06, + "loss": 0.3920135498046875, + "step": 97690 + }, + { + "epoch": 0.8447397774338311, + "grad_norm": 0.1472851791128166, + "learning_rate": 3.7244152249748816e-06, + "loss": 0.30131874084472654, + "step": 97695 + }, + { + "epoch": 0.8447830109553743, + "grad_norm": 5.683331373625128, + "learning_rate": 3.7242175190363254e-06, + "loss": 0.074493408203125, + "step": 97700 + }, + { + "epoch": 0.8448262444769177, + "grad_norm": 19.602669769226395, + "learning_rate": 3.7240198097577413e-06, + "loss": 0.1790008544921875, + "step": 97705 + }, + { + "epoch": 0.8448694779984609, + "grad_norm": 13.75114032191499, + "learning_rate": 3.7238220971400415e-06, + "loss": 0.23846282958984374, + "step": 97710 + }, + { + "epoch": 0.8449127115200041, + "grad_norm": 25.395381744630498, + "learning_rate": 3.723624381184139e-06, + "loss": 0.14691009521484374, + "step": 97715 + }, + { + "epoch": 0.8449559450415474, + "grad_norm": 2.072023871818939, + "learning_rate": 3.7234266618909447e-06, + "loss": 0.11310653686523438, + "step": 97720 + }, + { + "epoch": 0.8449991785630907, + "grad_norm": 0.27435876650052005, + "learning_rate": 3.7232289392613705e-06, + "loss": 0.20061264038085938, + "step": 97725 + }, + { + "epoch": 0.8450424120846339, + "grad_norm": 3.7885060280650302, + "learning_rate": 3.723031213296328e-06, + "loss": 0.0437591552734375, + "step": 97730 + }, + { + "epoch": 0.8450856456061772, + "grad_norm": 12.521768346253873, + "learning_rate": 3.7228334839967293e-06, + "loss": 0.142626953125, + "step": 97735 + }, + { + "epoch": 0.8451288791277205, + "grad_norm": 10.005859882013041, + "learning_rate": 3.722635751363486e-06, + "loss": 0.15809783935546876, + "step": 97740 + }, + { + "epoch": 0.8451721126492637, + "grad_norm": 42.12465083136559, + "learning_rate": 3.722438015397512e-06, + "loss": 0.31432571411132815, + "step": 97745 + }, + { + "epoch": 0.845215346170807, + "grad_norm": 0.3095198074502339, + "learning_rate": 3.722240276099717e-06, + "loss": 0.18184356689453124, + "step": 97750 + }, + { + "epoch": 0.8452585796923503, + "grad_norm": 8.76775725720226, + "learning_rate": 3.722042533471013e-06, + "loss": 0.539227294921875, + "step": 97755 + }, + { + "epoch": 0.8453018132138935, + "grad_norm": 1.6773317094477962, + "learning_rate": 3.7218447875123135e-06, + "loss": 0.47025260925292967, + "step": 97760 + }, + { + "epoch": 0.8453450467354368, + "grad_norm": 26.130798472627514, + "learning_rate": 3.721647038224529e-06, + "loss": 0.11072158813476562, + "step": 97765 + }, + { + "epoch": 0.8453882802569801, + "grad_norm": 5.985267878661393, + "learning_rate": 3.7214492856085724e-06, + "loss": 0.31370849609375, + "step": 97770 + }, + { + "epoch": 0.8454315137785233, + "grad_norm": 17.93438007911326, + "learning_rate": 3.7212515296653555e-06, + "loss": 0.12188262939453125, + "step": 97775 + }, + { + "epoch": 0.8454747473000666, + "grad_norm": 27.36439358005828, + "learning_rate": 3.7210537703957906e-06, + "loss": 0.244476318359375, + "step": 97780 + }, + { + "epoch": 0.8455179808216099, + "grad_norm": 0.5222573594623725, + "learning_rate": 3.7208560078007883e-06, + "loss": 0.05076522827148437, + "step": 97785 + }, + { + "epoch": 0.8455612143431531, + "grad_norm": 2.7749268228367305, + "learning_rate": 3.720658241881263e-06, + "loss": 0.12605438232421876, + "step": 97790 + }, + { + "epoch": 0.8456044478646964, + "grad_norm": 0.9280031324839909, + "learning_rate": 3.720460472638125e-06, + "loss": 0.1600738525390625, + "step": 97795 + }, + { + "epoch": 0.8456476813862396, + "grad_norm": 5.041333519090864, + "learning_rate": 3.7202627000722875e-06, + "loss": 0.20944442749023437, + "step": 97800 + }, + { + "epoch": 0.8456909149077829, + "grad_norm": 1.6198273041923297, + "learning_rate": 3.7200649241846612e-06, + "loss": 0.23948564529418945, + "step": 97805 + }, + { + "epoch": 0.8457341484293261, + "grad_norm": 4.204839506057941, + "learning_rate": 3.7198671449761597e-06, + "loss": 0.07463178634643555, + "step": 97810 + }, + { + "epoch": 0.8457773819508694, + "grad_norm": 7.001700295152136, + "learning_rate": 3.7196693624476948e-06, + "loss": 0.04987335205078125, + "step": 97815 + }, + { + "epoch": 0.8458206154724127, + "grad_norm": 18.021635562544752, + "learning_rate": 3.719471576600177e-06, + "loss": 0.14364299774169922, + "step": 97820 + }, + { + "epoch": 0.845863848993956, + "grad_norm": 72.1244515542571, + "learning_rate": 3.719273787434521e-06, + "loss": 0.2975128173828125, + "step": 97825 + }, + { + "epoch": 0.8459070825154992, + "grad_norm": 1.0623672510928541, + "learning_rate": 3.7190759949516386e-06, + "loss": 0.152349853515625, + "step": 97830 + }, + { + "epoch": 0.8459503160370425, + "grad_norm": 58.31474252008576, + "learning_rate": 3.7188781991524396e-06, + "loss": 0.18405914306640625, + "step": 97835 + }, + { + "epoch": 0.8459935495585857, + "grad_norm": 9.768408836363713, + "learning_rate": 3.7186804000378388e-06, + "loss": 0.1024871826171875, + "step": 97840 + }, + { + "epoch": 0.846036783080129, + "grad_norm": 26.226241647782768, + "learning_rate": 3.718482597608747e-06, + "loss": 0.13772201538085938, + "step": 97845 + }, + { + "epoch": 0.8460800166016723, + "grad_norm": 2.0769473256126676, + "learning_rate": 3.718284791866077e-06, + "loss": 0.04190406799316406, + "step": 97850 + }, + { + "epoch": 0.8461232501232155, + "grad_norm": 3.369581320786887, + "learning_rate": 3.718086982810742e-06, + "loss": 0.15228805541992188, + "step": 97855 + }, + { + "epoch": 0.8461664836447588, + "grad_norm": 20.043976698984714, + "learning_rate": 3.7178891704436525e-06, + "loss": 0.2888206481933594, + "step": 97860 + }, + { + "epoch": 0.8462097171663021, + "grad_norm": 1.6651014613716075, + "learning_rate": 3.7176913547657214e-06, + "loss": 0.6082656860351563, + "step": 97865 + }, + { + "epoch": 0.8462529506878453, + "grad_norm": 0.07822058920423783, + "learning_rate": 3.7174935357778616e-06, + "loss": 0.06753997802734375, + "step": 97870 + }, + { + "epoch": 0.8462961842093886, + "grad_norm": 26.53241642282686, + "learning_rate": 3.7172957134809847e-06, + "loss": 0.11647491455078125, + "step": 97875 + }, + { + "epoch": 0.8463394177309319, + "grad_norm": 4.8087360927157095, + "learning_rate": 3.7170978878760028e-06, + "loss": 0.09264717102050782, + "step": 97880 + }, + { + "epoch": 0.8463826512524751, + "grad_norm": 0.28747929044199694, + "learning_rate": 3.7169000589638294e-06, + "loss": 0.10922832489013672, + "step": 97885 + }, + { + "epoch": 0.8464258847740184, + "grad_norm": 44.36371602440757, + "learning_rate": 3.7167022267453773e-06, + "loss": 0.44474077224731445, + "step": 97890 + }, + { + "epoch": 0.8464691182955616, + "grad_norm": 3.780816682190929, + "learning_rate": 3.716504391221557e-06, + "loss": 0.0860809326171875, + "step": 97895 + }, + { + "epoch": 0.8465123518171049, + "grad_norm": 0.030064595615172197, + "learning_rate": 3.716306552393281e-06, + "loss": 0.30935020446777345, + "step": 97900 + }, + { + "epoch": 0.8465555853386482, + "grad_norm": 6.6840716847860575, + "learning_rate": 3.7161087102614633e-06, + "loss": 0.0804840087890625, + "step": 97905 + }, + { + "epoch": 0.8465988188601914, + "grad_norm": 23.120049115555183, + "learning_rate": 3.7159108648270153e-06, + "loss": 0.31763916015625, + "step": 97910 + }, + { + "epoch": 0.8466420523817347, + "grad_norm": 1.8285524439681764, + "learning_rate": 3.71571301609085e-06, + "loss": 0.22017822265625, + "step": 97915 + }, + { + "epoch": 0.846685285903278, + "grad_norm": 0.8155981831579571, + "learning_rate": 3.715515164053879e-06, + "loss": 0.10326347351074219, + "step": 97920 + }, + { + "epoch": 0.8467285194248212, + "grad_norm": 0.2591998429524822, + "learning_rate": 3.7153173087170163e-06, + "loss": 0.0400238037109375, + "step": 97925 + }, + { + "epoch": 0.8467717529463645, + "grad_norm": 5.567711853919852, + "learning_rate": 3.7151194500811714e-06, + "loss": 0.28381500244140623, + "step": 97930 + }, + { + "epoch": 0.8468149864679078, + "grad_norm": 2.0374962445592093, + "learning_rate": 3.71492158814726e-06, + "loss": 0.04454193115234375, + "step": 97935 + }, + { + "epoch": 0.846858219989451, + "grad_norm": 1.5889131029248882, + "learning_rate": 3.7147237229161932e-06, + "loss": 0.0426849365234375, + "step": 97940 + }, + { + "epoch": 0.8469014535109943, + "grad_norm": 0.9847236121804681, + "learning_rate": 3.714525854388884e-06, + "loss": 0.024555206298828125, + "step": 97945 + }, + { + "epoch": 0.8469446870325376, + "grad_norm": 4.7371438400120205, + "learning_rate": 3.7143279825662447e-06, + "loss": 0.03328857421875, + "step": 97950 + }, + { + "epoch": 0.8469879205540808, + "grad_norm": 0.7760385869876328, + "learning_rate": 3.7141301074491886e-06, + "loss": 0.10066184997558594, + "step": 97955 + }, + { + "epoch": 0.8470311540756241, + "grad_norm": 1.6418097058288643, + "learning_rate": 3.713932229038626e-06, + "loss": 0.12262115478515626, + "step": 97960 + }, + { + "epoch": 0.8470743875971674, + "grad_norm": 13.443160931696324, + "learning_rate": 3.713734347335472e-06, + "loss": 0.09667434692382812, + "step": 97965 + }, + { + "epoch": 0.8471176211187106, + "grad_norm": 18.4211025743306, + "learning_rate": 3.713536462340638e-06, + "loss": 0.16056404113769532, + "step": 97970 + }, + { + "epoch": 0.8471608546402538, + "grad_norm": 3.1543765636908065, + "learning_rate": 3.713338574055037e-06, + "loss": 0.0163665771484375, + "step": 97975 + }, + { + "epoch": 0.8472040881617972, + "grad_norm": 4.236629371174547, + "learning_rate": 3.7131406824795812e-06, + "loss": 0.349322509765625, + "step": 97980 + }, + { + "epoch": 0.8472473216833404, + "grad_norm": 3.320978119999257, + "learning_rate": 3.7129427876151843e-06, + "loss": 0.09667625427246093, + "step": 97985 + }, + { + "epoch": 0.8472905552048836, + "grad_norm": 19.280116774237072, + "learning_rate": 3.712744889462757e-06, + "loss": 0.14312896728515626, + "step": 97990 + }, + { + "epoch": 0.847333788726427, + "grad_norm": 1.135528156672056, + "learning_rate": 3.712546988023214e-06, + "loss": 0.1552886962890625, + "step": 97995 + }, + { + "epoch": 0.8473770222479702, + "grad_norm": 0.3618315181385488, + "learning_rate": 3.7123490832974673e-06, + "loss": 0.14481983184814454, + "step": 98000 + }, + { + "epoch": 0.8474202557695134, + "grad_norm": 2.1248463019601673, + "learning_rate": 3.71215117528643e-06, + "loss": 0.22133331298828124, + "step": 98005 + }, + { + "epoch": 0.8474634892910567, + "grad_norm": 2.911554732028446, + "learning_rate": 3.7119532639910135e-06, + "loss": 0.052387237548828125, + "step": 98010 + }, + { + "epoch": 0.8475067228126, + "grad_norm": 1.1169216744952974, + "learning_rate": 3.7117553494121327e-06, + "loss": 0.6775653839111329, + "step": 98015 + }, + { + "epoch": 0.8475499563341432, + "grad_norm": 2.1451955753440206, + "learning_rate": 3.7115574315506976e-06, + "loss": 0.08124198913574218, + "step": 98020 + }, + { + "epoch": 0.8475931898556865, + "grad_norm": 0.09925022008362468, + "learning_rate": 3.7113595104076228e-06, + "loss": 0.15153961181640624, + "step": 98025 + }, + { + "epoch": 0.8476364233772298, + "grad_norm": 10.472056438622028, + "learning_rate": 3.7111615859838216e-06, + "loss": 0.15045318603515626, + "step": 98030 + }, + { + "epoch": 0.847679656898773, + "grad_norm": 2.9362762260693698, + "learning_rate": 3.7109636582802063e-06, + "loss": 0.07654037475585937, + "step": 98035 + }, + { + "epoch": 0.8477228904203163, + "grad_norm": 24.651745689480364, + "learning_rate": 3.7107657272976884e-06, + "loss": 0.3072166442871094, + "step": 98040 + }, + { + "epoch": 0.8477661239418596, + "grad_norm": 0.28833401926600793, + "learning_rate": 3.710567793037182e-06, + "loss": 0.058740234375, + "step": 98045 + }, + { + "epoch": 0.8478093574634028, + "grad_norm": 0.2037877268928163, + "learning_rate": 3.7103698554996e-06, + "loss": 0.015177726745605469, + "step": 98050 + }, + { + "epoch": 0.8478525909849461, + "grad_norm": 8.587303606107573, + "learning_rate": 3.710171914685854e-06, + "loss": 0.17502822875976562, + "step": 98055 + }, + { + "epoch": 0.8478958245064894, + "grad_norm": 5.335372246263375, + "learning_rate": 3.7099739705968595e-06, + "loss": 0.03783721923828125, + "step": 98060 + }, + { + "epoch": 0.8479390580280326, + "grad_norm": 33.893080233196784, + "learning_rate": 3.709776023233527e-06, + "loss": 0.2424591064453125, + "step": 98065 + }, + { + "epoch": 0.8479822915495758, + "grad_norm": 0.6006641663430186, + "learning_rate": 3.70957807259677e-06, + "loss": 0.19162139892578126, + "step": 98070 + }, + { + "epoch": 0.8480255250711192, + "grad_norm": 5.8973787535571525, + "learning_rate": 3.7093801186875016e-06, + "loss": 0.16309814453125, + "step": 98075 + }, + { + "epoch": 0.8480687585926624, + "grad_norm": 1.855252951091658, + "learning_rate": 3.7091821615066354e-06, + "loss": 0.04568939208984375, + "step": 98080 + }, + { + "epoch": 0.8481119921142056, + "grad_norm": 6.309169370101892, + "learning_rate": 3.7089842010550836e-06, + "loss": 0.12360305786132812, + "step": 98085 + }, + { + "epoch": 0.848155225635749, + "grad_norm": 1.4464372992062005, + "learning_rate": 3.708786237333759e-06, + "loss": 0.14173049926757814, + "step": 98090 + }, + { + "epoch": 0.8481984591572922, + "grad_norm": 2.0712396581991195, + "learning_rate": 3.708588270343575e-06, + "loss": 0.24589157104492188, + "step": 98095 + }, + { + "epoch": 0.8482416926788354, + "grad_norm": 15.02705446565136, + "learning_rate": 3.7083903000854443e-06, + "loss": 0.145501708984375, + "step": 98100 + }, + { + "epoch": 0.8482849262003788, + "grad_norm": 66.94408199435509, + "learning_rate": 3.70819232656028e-06, + "loss": 0.15828857421875, + "step": 98105 + }, + { + "epoch": 0.848328159721922, + "grad_norm": 35.8918457074658, + "learning_rate": 3.707994349768996e-06, + "loss": 0.49725341796875, + "step": 98110 + }, + { + "epoch": 0.8483713932434652, + "grad_norm": 23.1327489854984, + "learning_rate": 3.707796369712504e-06, + "loss": 0.20842399597167968, + "step": 98115 + }, + { + "epoch": 0.8484146267650086, + "grad_norm": 0.22778074913014057, + "learning_rate": 3.7075983863917177e-06, + "loss": 0.06273193359375, + "step": 98120 + }, + { + "epoch": 0.8484578602865518, + "grad_norm": 39.464335074167366, + "learning_rate": 3.7074003998075503e-06, + "loss": 0.3427703857421875, + "step": 98125 + }, + { + "epoch": 0.848501093808095, + "grad_norm": 3.8218169423882213, + "learning_rate": 3.707202409960915e-06, + "loss": 0.21245193481445312, + "step": 98130 + }, + { + "epoch": 0.8485443273296384, + "grad_norm": 5.019374286962974, + "learning_rate": 3.7070044168527243e-06, + "loss": 0.41410980224609373, + "step": 98135 + }, + { + "epoch": 0.8485875608511816, + "grad_norm": 37.16615038186973, + "learning_rate": 3.706806420483892e-06, + "loss": 0.37767868041992186, + "step": 98140 + }, + { + "epoch": 0.8486307943727248, + "grad_norm": 9.543784493105882, + "learning_rate": 3.706608420855331e-06, + "loss": 0.03538837432861328, + "step": 98145 + }, + { + "epoch": 0.848674027894268, + "grad_norm": 14.25522609961982, + "learning_rate": 3.7064104179679547e-06, + "loss": 0.06331253051757812, + "step": 98150 + }, + { + "epoch": 0.8487172614158114, + "grad_norm": 2.1690306797408363, + "learning_rate": 3.7062124118226752e-06, + "loss": 0.1930328369140625, + "step": 98155 + }, + { + "epoch": 0.8487604949373546, + "grad_norm": 6.874017816653156, + "learning_rate": 3.7060144024204067e-06, + "loss": 0.3171966552734375, + "step": 98160 + }, + { + "epoch": 0.8488037284588978, + "grad_norm": 43.60328588418634, + "learning_rate": 3.7058163897620618e-06, + "loss": 0.20572319030761718, + "step": 98165 + }, + { + "epoch": 0.8488469619804412, + "grad_norm": 9.258129959766261, + "learning_rate": 3.7056183738485544e-06, + "loss": 0.04112167358398437, + "step": 98170 + }, + { + "epoch": 0.8488901955019844, + "grad_norm": 5.907770265874856, + "learning_rate": 3.7054203546807977e-06, + "loss": 0.04526519775390625, + "step": 98175 + }, + { + "epoch": 0.8489334290235276, + "grad_norm": 10.666359420677834, + "learning_rate": 3.705222332259705e-06, + "loss": 0.08195381164550782, + "step": 98180 + }, + { + "epoch": 0.848976662545071, + "grad_norm": 28.751527727601243, + "learning_rate": 3.7050243065861883e-06, + "loss": 0.24672698974609375, + "step": 98185 + }, + { + "epoch": 0.8490198960666142, + "grad_norm": 0.2495015027521453, + "learning_rate": 3.7048262776611613e-06, + "loss": 0.010280609130859375, + "step": 98190 + }, + { + "epoch": 0.8490631295881574, + "grad_norm": 42.57734719225909, + "learning_rate": 3.7046282454855395e-06, + "loss": 0.107318115234375, + "step": 98195 + }, + { + "epoch": 0.8491063631097008, + "grad_norm": 0.6416452835872996, + "learning_rate": 3.704430210060233e-06, + "loss": 0.04405136108398437, + "step": 98200 + }, + { + "epoch": 0.849149596631244, + "grad_norm": 24.335926332281925, + "learning_rate": 3.7042321713861576e-06, + "loss": 0.073907470703125, + "step": 98205 + }, + { + "epoch": 0.8491928301527872, + "grad_norm": 15.521586949820637, + "learning_rate": 3.7040341294642258e-06, + "loss": 0.06755294799804687, + "step": 98210 + }, + { + "epoch": 0.8492360636743306, + "grad_norm": 2.9408334708737653, + "learning_rate": 3.7038360842953503e-06, + "loss": 0.04749298095703125, + "step": 98215 + }, + { + "epoch": 0.8492792971958738, + "grad_norm": 3.852366395385093, + "learning_rate": 3.7036380358804446e-06, + "loss": 0.10582695007324219, + "step": 98220 + }, + { + "epoch": 0.849322530717417, + "grad_norm": 12.507041045155106, + "learning_rate": 3.703439984220423e-06, + "loss": 0.141156005859375, + "step": 98225 + }, + { + "epoch": 0.8493657642389604, + "grad_norm": 1.536810587454739, + "learning_rate": 3.7032419293161977e-06, + "loss": 0.1184844970703125, + "step": 98230 + }, + { + "epoch": 0.8494089977605036, + "grad_norm": 2.2911631101484993, + "learning_rate": 3.7030438711686836e-06, + "loss": 0.06766510009765625, + "step": 98235 + }, + { + "epoch": 0.8494522312820468, + "grad_norm": 2.4659916499780627, + "learning_rate": 3.7028458097787934e-06, + "loss": 0.07421875, + "step": 98240 + }, + { + "epoch": 0.8494954648035901, + "grad_norm": 0.3339325393771338, + "learning_rate": 3.70264774514744e-06, + "loss": 0.3047969818115234, + "step": 98245 + }, + { + "epoch": 0.8495386983251334, + "grad_norm": 4.368704607832222, + "learning_rate": 3.7024496772755364e-06, + "loss": 0.1869110107421875, + "step": 98250 + }, + { + "epoch": 0.8495819318466766, + "grad_norm": 52.642963287374236, + "learning_rate": 3.702251606163998e-06, + "loss": 0.20531988143920898, + "step": 98255 + }, + { + "epoch": 0.8496251653682199, + "grad_norm": 6.260014301806778, + "learning_rate": 3.702053531813738e-06, + "loss": 0.070013427734375, + "step": 98260 + }, + { + "epoch": 0.8496683988897632, + "grad_norm": 0.34754528218505676, + "learning_rate": 3.7018554542256674e-06, + "loss": 0.3860038757324219, + "step": 98265 + }, + { + "epoch": 0.8497116324113064, + "grad_norm": 0.18792438176122772, + "learning_rate": 3.7016573734007024e-06, + "loss": 0.2030975341796875, + "step": 98270 + }, + { + "epoch": 0.8497548659328497, + "grad_norm": 19.81189705384257, + "learning_rate": 3.701459289339755e-06, + "loss": 0.09867324829101562, + "step": 98275 + }, + { + "epoch": 0.849798099454393, + "grad_norm": 20.30376432759144, + "learning_rate": 3.7012612020437396e-06, + "loss": 0.33013763427734377, + "step": 98280 + }, + { + "epoch": 0.8498413329759362, + "grad_norm": 0.08382920813496307, + "learning_rate": 3.70106311151357e-06, + "loss": 0.150958251953125, + "step": 98285 + }, + { + "epoch": 0.8498845664974795, + "grad_norm": 14.790310067611495, + "learning_rate": 3.70086501775016e-06, + "loss": 0.07649765014648438, + "step": 98290 + }, + { + "epoch": 0.8499278000190228, + "grad_norm": 49.9603131007387, + "learning_rate": 3.7006669207544208e-06, + "loss": 0.237469482421875, + "step": 98295 + }, + { + "epoch": 0.849971033540566, + "grad_norm": 1.2101864916527854, + "learning_rate": 3.700468820527268e-06, + "loss": 0.2802391052246094, + "step": 98300 + }, + { + "epoch": 0.8500142670621093, + "grad_norm": 0.7165328675653801, + "learning_rate": 3.700270717069616e-06, + "loss": 0.039579010009765624, + "step": 98305 + }, + { + "epoch": 0.8500575005836526, + "grad_norm": 6.795459632183635, + "learning_rate": 3.7000726103823756e-06, + "loss": 0.13256607055664063, + "step": 98310 + }, + { + "epoch": 0.8501007341051958, + "grad_norm": 150.93638033214103, + "learning_rate": 3.6998745004664638e-06, + "loss": 0.4904930114746094, + "step": 98315 + }, + { + "epoch": 0.850143967626739, + "grad_norm": 4.118454189857479, + "learning_rate": 3.6996763873227923e-06, + "loss": 0.06773529052734376, + "step": 98320 + }, + { + "epoch": 0.8501872011482823, + "grad_norm": 0.6309994428088844, + "learning_rate": 3.6994782709522747e-06, + "loss": 0.1255950927734375, + "step": 98325 + }, + { + "epoch": 0.8502304346698256, + "grad_norm": 31.359406958912974, + "learning_rate": 3.699280151355824e-06, + "loss": 0.1710174560546875, + "step": 98330 + }, + { + "epoch": 0.8502736681913688, + "grad_norm": 4.088783804679001, + "learning_rate": 3.6990820285343566e-06, + "loss": 0.5569915771484375, + "step": 98335 + }, + { + "epoch": 0.8503169017129121, + "grad_norm": 4.892791606362863, + "learning_rate": 3.698883902488785e-06, + "loss": 0.06581878662109375, + "step": 98340 + }, + { + "epoch": 0.8503601352344554, + "grad_norm": 0.11222028908266635, + "learning_rate": 3.6986857732200217e-06, + "loss": 0.05364151000976562, + "step": 98345 + }, + { + "epoch": 0.8504033687559986, + "grad_norm": 1.1276600682582083, + "learning_rate": 3.698487640728982e-06, + "loss": 0.16385650634765625, + "step": 98350 + }, + { + "epoch": 0.8504466022775419, + "grad_norm": 0.8058537821985986, + "learning_rate": 3.6982895050165783e-06, + "loss": 0.09798583984375, + "step": 98355 + }, + { + "epoch": 0.8504898357990852, + "grad_norm": 7.318526712288243, + "learning_rate": 3.6980913660837246e-06, + "loss": 0.04978179931640625, + "step": 98360 + }, + { + "epoch": 0.8505330693206284, + "grad_norm": 3.9715201214511593, + "learning_rate": 3.6978932239313363e-06, + "loss": 0.2677486419677734, + "step": 98365 + }, + { + "epoch": 0.8505763028421717, + "grad_norm": 29.337297301998817, + "learning_rate": 3.697695078560326e-06, + "loss": 0.13492431640625, + "step": 98370 + }, + { + "epoch": 0.850619536363715, + "grad_norm": 22.39845765141021, + "learning_rate": 3.697496929971607e-06, + "loss": 0.331298828125, + "step": 98375 + }, + { + "epoch": 0.8506627698852582, + "grad_norm": 21.33360542614224, + "learning_rate": 3.6972987781660953e-06, + "loss": 0.098223876953125, + "step": 98380 + }, + { + "epoch": 0.8507060034068015, + "grad_norm": 15.948081294429125, + "learning_rate": 3.697100623144701e-06, + "loss": 0.07290205955505372, + "step": 98385 + }, + { + "epoch": 0.8507492369283448, + "grad_norm": 29.847067643929048, + "learning_rate": 3.6969024649083414e-06, + "loss": 0.22267093658447265, + "step": 98390 + }, + { + "epoch": 0.850792470449888, + "grad_norm": 0.7900087098286053, + "learning_rate": 3.6967043034579295e-06, + "loss": 0.3133026123046875, + "step": 98395 + }, + { + "epoch": 0.8508357039714313, + "grad_norm": 289.875818599431, + "learning_rate": 3.696506138794379e-06, + "loss": 0.34030303955078123, + "step": 98400 + }, + { + "epoch": 0.8508789374929746, + "grad_norm": 11.825166358301688, + "learning_rate": 3.696307970918603e-06, + "loss": 0.071807861328125, + "step": 98405 + }, + { + "epoch": 0.8509221710145178, + "grad_norm": 1.5427908903538246, + "learning_rate": 3.696109799831517e-06, + "loss": 0.35645599365234376, + "step": 98410 + }, + { + "epoch": 0.8509654045360611, + "grad_norm": 8.550634420457099, + "learning_rate": 3.6959116255340337e-06, + "loss": 0.26237640380859373, + "step": 98415 + }, + { + "epoch": 0.8510086380576043, + "grad_norm": 0.29067799008292594, + "learning_rate": 3.6957134480270675e-06, + "loss": 0.38879547119140623, + "step": 98420 + }, + { + "epoch": 0.8510518715791476, + "grad_norm": 5.81225058789922, + "learning_rate": 3.695515267311532e-06, + "loss": 0.08768310546875, + "step": 98425 + }, + { + "epoch": 0.8510951051006909, + "grad_norm": 0.3192512138247596, + "learning_rate": 3.695317083388342e-06, + "loss": 0.1226318359375, + "step": 98430 + }, + { + "epoch": 0.8511383386222341, + "grad_norm": 14.957171455047362, + "learning_rate": 3.6951188962584106e-06, + "loss": 0.1533203125, + "step": 98435 + }, + { + "epoch": 0.8511815721437774, + "grad_norm": 8.428918835925025, + "learning_rate": 3.6949207059226527e-06, + "loss": 0.0625885009765625, + "step": 98440 + }, + { + "epoch": 0.8512248056653207, + "grad_norm": 25.13960696998142, + "learning_rate": 3.6947225123819813e-06, + "loss": 0.14381561279296876, + "step": 98445 + }, + { + "epoch": 0.8512680391868639, + "grad_norm": 10.689465232412115, + "learning_rate": 3.694524315637312e-06, + "loss": 0.02590179443359375, + "step": 98450 + }, + { + "epoch": 0.8513112727084072, + "grad_norm": 0.16783770274359178, + "learning_rate": 3.694326115689557e-06, + "loss": 0.1537017822265625, + "step": 98455 + }, + { + "epoch": 0.8513545062299505, + "grad_norm": 11.138694307817905, + "learning_rate": 3.694127912539632e-06, + "loss": 0.4480998992919922, + "step": 98460 + }, + { + "epoch": 0.8513977397514937, + "grad_norm": 6.01878655786148, + "learning_rate": 3.6939297061884507e-06, + "loss": 0.028458404541015624, + "step": 98465 + }, + { + "epoch": 0.851440973273037, + "grad_norm": 0.31791838979125664, + "learning_rate": 3.693731496636925e-06, + "loss": 0.36507415771484375, + "step": 98470 + }, + { + "epoch": 0.8514842067945803, + "grad_norm": 1.9457736225148696, + "learning_rate": 3.693533283885973e-06, + "loss": 0.07357406616210938, + "step": 98475 + }, + { + "epoch": 0.8515274403161235, + "grad_norm": 8.935511232320936, + "learning_rate": 3.6933350679365064e-06, + "loss": 0.1298297882080078, + "step": 98480 + }, + { + "epoch": 0.8515706738376668, + "grad_norm": 22.224373360643824, + "learning_rate": 3.6931368487894386e-06, + "loss": 0.13030242919921875, + "step": 98485 + }, + { + "epoch": 0.85161390735921, + "grad_norm": 9.057580371625887, + "learning_rate": 3.6929386264456863e-06, + "loss": 0.208740234375, + "step": 98490 + }, + { + "epoch": 0.8516571408807533, + "grad_norm": 5.980475033678996, + "learning_rate": 3.6927404009061608e-06, + "loss": 0.23694305419921874, + "step": 98495 + }, + { + "epoch": 0.8517003744022965, + "grad_norm": 2.523000474599271, + "learning_rate": 3.692542172171779e-06, + "loss": 0.15069046020507812, + "step": 98500 + }, + { + "epoch": 0.8517436079238399, + "grad_norm": 2.6839354169623832, + "learning_rate": 3.6923439402434537e-06, + "loss": 0.05101966857910156, + "step": 98505 + }, + { + "epoch": 0.8517868414453831, + "grad_norm": 3.626698556730894, + "learning_rate": 3.6921457051220997e-06, + "loss": 0.04661483764648437, + "step": 98510 + }, + { + "epoch": 0.8518300749669263, + "grad_norm": 8.627841388824862, + "learning_rate": 3.6919474668086304e-06, + "loss": 0.17290802001953126, + "step": 98515 + }, + { + "epoch": 0.8518733084884696, + "grad_norm": 1.4402340327241876, + "learning_rate": 3.69174922530396e-06, + "loss": 0.12243194580078125, + "step": 98520 + }, + { + "epoch": 0.8519165420100129, + "grad_norm": 55.749552011723075, + "learning_rate": 3.6915509806090036e-06, + "loss": 0.2795360565185547, + "step": 98525 + }, + { + "epoch": 0.8519597755315561, + "grad_norm": 34.35683262142587, + "learning_rate": 3.6913527327246756e-06, + "loss": 0.335882568359375, + "step": 98530 + }, + { + "epoch": 0.8520030090530994, + "grad_norm": 29.026959738171588, + "learning_rate": 3.691154481651889e-06, + "loss": 0.22971878051757813, + "step": 98535 + }, + { + "epoch": 0.8520462425746427, + "grad_norm": 1.5615815314556116, + "learning_rate": 3.69095622739156e-06, + "loss": 0.05148162841796875, + "step": 98540 + }, + { + "epoch": 0.8520894760961859, + "grad_norm": 4.788251421370459, + "learning_rate": 3.690757969944602e-06, + "loss": 0.0918212890625, + "step": 98545 + }, + { + "epoch": 0.8521327096177292, + "grad_norm": 18.203419577690152, + "learning_rate": 3.690559709311929e-06, + "loss": 0.3576194763183594, + "step": 98550 + }, + { + "epoch": 0.8521759431392725, + "grad_norm": 31.115678090541035, + "learning_rate": 3.6903614454944547e-06, + "loss": 0.41876564025878904, + "step": 98555 + }, + { + "epoch": 0.8522191766608157, + "grad_norm": 0.7505980013815255, + "learning_rate": 3.690163178493096e-06, + "loss": 0.13621139526367188, + "step": 98560 + }, + { + "epoch": 0.852262410182359, + "grad_norm": 9.229848082754454, + "learning_rate": 3.6899649083087647e-06, + "loss": 0.17049026489257812, + "step": 98565 + }, + { + "epoch": 0.8523056437039023, + "grad_norm": 0.08815284588288054, + "learning_rate": 3.689766634942376e-06, + "loss": 0.0338836669921875, + "step": 98570 + }, + { + "epoch": 0.8523488772254455, + "grad_norm": 6.25535812928381, + "learning_rate": 3.6895683583948453e-06, + "loss": 0.05374755859375, + "step": 98575 + }, + { + "epoch": 0.8523921107469888, + "grad_norm": 9.556368838561689, + "learning_rate": 3.689370078667086e-06, + "loss": 0.08656044006347656, + "step": 98580 + }, + { + "epoch": 0.8524353442685321, + "grad_norm": 5.380898210969775, + "learning_rate": 3.689171795760012e-06, + "loss": 0.06418476104736329, + "step": 98585 + }, + { + "epoch": 0.8524785777900753, + "grad_norm": 6.6328402176322365, + "learning_rate": 3.6889735096745395e-06, + "loss": 0.1583740234375, + "step": 98590 + }, + { + "epoch": 0.8525218113116185, + "grad_norm": 3.434485206438253, + "learning_rate": 3.6887752204115823e-06, + "loss": 0.1222900390625, + "step": 98595 + }, + { + "epoch": 0.8525650448331619, + "grad_norm": 6.5158125770247395, + "learning_rate": 3.688576927972054e-06, + "loss": 0.17619781494140624, + "step": 98600 + }, + { + "epoch": 0.8526082783547051, + "grad_norm": 21.144287637147674, + "learning_rate": 3.68837863235687e-06, + "loss": 0.16438560485839843, + "step": 98605 + }, + { + "epoch": 0.8526515118762483, + "grad_norm": 21.085473042001627, + "learning_rate": 3.6881803335669444e-06, + "loss": 0.24367408752441405, + "step": 98610 + }, + { + "epoch": 0.8526947453977917, + "grad_norm": 18.0797308338346, + "learning_rate": 3.687982031603192e-06, + "loss": 0.329779052734375, + "step": 98615 + }, + { + "epoch": 0.8527379789193349, + "grad_norm": 0.22424741225711534, + "learning_rate": 3.6877837264665277e-06, + "loss": 0.3094902038574219, + "step": 98620 + }, + { + "epoch": 0.8527812124408781, + "grad_norm": 14.86398764809246, + "learning_rate": 3.6875854181578652e-06, + "loss": 0.16880111694335936, + "step": 98625 + }, + { + "epoch": 0.8528244459624215, + "grad_norm": 13.85164178706647, + "learning_rate": 3.687387106678119e-06, + "loss": 0.193408203125, + "step": 98630 + }, + { + "epoch": 0.8528676794839647, + "grad_norm": 0.31174518269516754, + "learning_rate": 3.687188792028205e-06, + "loss": 0.212347412109375, + "step": 98635 + }, + { + "epoch": 0.8529109130055079, + "grad_norm": 18.141762723449354, + "learning_rate": 3.6869904742090363e-06, + "loss": 0.1145751953125, + "step": 98640 + }, + { + "epoch": 0.8529541465270513, + "grad_norm": 27.559287152620566, + "learning_rate": 3.6867921532215286e-06, + "loss": 0.1735687255859375, + "step": 98645 + }, + { + "epoch": 0.8529973800485945, + "grad_norm": 7.421777463748799, + "learning_rate": 3.6865938290665966e-06, + "loss": 0.1080657958984375, + "step": 98650 + }, + { + "epoch": 0.8530406135701377, + "grad_norm": 3.96183962853183, + "learning_rate": 3.6863955017451544e-06, + "loss": 0.047747802734375, + "step": 98655 + }, + { + "epoch": 0.8530838470916811, + "grad_norm": 26.57519486769007, + "learning_rate": 3.6861971712581163e-06, + "loss": 0.42315521240234377, + "step": 98660 + }, + { + "epoch": 0.8531270806132243, + "grad_norm": 3.6758607905018112, + "learning_rate": 3.685998837606398e-06, + "loss": 0.1651123046875, + "step": 98665 + }, + { + "epoch": 0.8531703141347675, + "grad_norm": 7.949925723183979, + "learning_rate": 3.6858005007909133e-06, + "loss": 0.04991912841796875, + "step": 98670 + }, + { + "epoch": 0.8532135476563107, + "grad_norm": 25.89363796778883, + "learning_rate": 3.6856021608125775e-06, + "loss": 0.148614501953125, + "step": 98675 + }, + { + "epoch": 0.8532567811778541, + "grad_norm": 2.586647121999476, + "learning_rate": 3.685403817672305e-06, + "loss": 0.15043144226074218, + "step": 98680 + }, + { + "epoch": 0.8533000146993973, + "grad_norm": 4.315331818566206, + "learning_rate": 3.685205471371011e-06, + "loss": 0.11605987548828126, + "step": 98685 + }, + { + "epoch": 0.8533432482209405, + "grad_norm": 0.9617398169927169, + "learning_rate": 3.6850071219096094e-06, + "loss": 0.250079345703125, + "step": 98690 + }, + { + "epoch": 0.8533864817424839, + "grad_norm": 2.7240963710000936, + "learning_rate": 3.684808769289015e-06, + "loss": 0.07609710693359376, + "step": 98695 + }, + { + "epoch": 0.8534297152640271, + "grad_norm": 0.07958511334653678, + "learning_rate": 3.684610413510144e-06, + "loss": 0.014470672607421875, + "step": 98700 + }, + { + "epoch": 0.8534729487855703, + "grad_norm": 3.4663575432785674, + "learning_rate": 3.6844120545739103e-06, + "loss": 0.04725494384765625, + "step": 98705 + }, + { + "epoch": 0.8535161823071137, + "grad_norm": 3.4309081266815307, + "learning_rate": 3.684213692481228e-06, + "loss": 0.28829498291015626, + "step": 98710 + }, + { + "epoch": 0.8535594158286569, + "grad_norm": 5.775419972608558, + "learning_rate": 3.6840153272330133e-06, + "loss": 0.2072265625, + "step": 98715 + }, + { + "epoch": 0.8536026493502001, + "grad_norm": 3.417676557472473, + "learning_rate": 3.6838169588301796e-06, + "loss": 0.0642852783203125, + "step": 98720 + }, + { + "epoch": 0.8536458828717435, + "grad_norm": 2.9918632564916177, + "learning_rate": 3.683618587273643e-06, + "loss": 0.06486549377441406, + "step": 98725 + }, + { + "epoch": 0.8536891163932867, + "grad_norm": 0.9314008322757823, + "learning_rate": 3.6834202125643186e-06, + "loss": 0.10314254760742188, + "step": 98730 + }, + { + "epoch": 0.8537323499148299, + "grad_norm": 0.8643856679000873, + "learning_rate": 3.68322183470312e-06, + "loss": 0.131524658203125, + "step": 98735 + }, + { + "epoch": 0.8537755834363733, + "grad_norm": 4.815767974165202, + "learning_rate": 3.6830234536909616e-06, + "loss": 0.08428230285644531, + "step": 98740 + }, + { + "epoch": 0.8538188169579165, + "grad_norm": 23.148655538322785, + "learning_rate": 3.6828250695287602e-06, + "loss": 0.32778244018554686, + "step": 98745 + }, + { + "epoch": 0.8538620504794597, + "grad_norm": 0.42139828022740516, + "learning_rate": 3.6826266822174305e-06, + "loss": 0.0667938232421875, + "step": 98750 + }, + { + "epoch": 0.8539052840010031, + "grad_norm": 11.019291800241302, + "learning_rate": 3.682428291757886e-06, + "loss": 0.1278564453125, + "step": 98755 + }, + { + "epoch": 0.8539485175225463, + "grad_norm": 2.3421000904893847, + "learning_rate": 3.6822298981510432e-06, + "loss": 0.2562877655029297, + "step": 98760 + }, + { + "epoch": 0.8539917510440895, + "grad_norm": 7.11150994210121, + "learning_rate": 3.6820315013978157e-06, + "loss": 0.13678398132324218, + "step": 98765 + }, + { + "epoch": 0.8540349845656328, + "grad_norm": 0.6733661241230862, + "learning_rate": 3.68183310149912e-06, + "loss": 0.03348236083984375, + "step": 98770 + }, + { + "epoch": 0.8540782180871761, + "grad_norm": 0.8621588737610812, + "learning_rate": 3.6816346984558697e-06, + "loss": 0.204150390625, + "step": 98775 + }, + { + "epoch": 0.8541214516087193, + "grad_norm": 6.250515131659706, + "learning_rate": 3.6814362922689804e-06, + "loss": 0.13179397583007812, + "step": 98780 + }, + { + "epoch": 0.8541646851302626, + "grad_norm": 18.262820210611846, + "learning_rate": 3.681237882939367e-06, + "loss": 0.056536865234375, + "step": 98785 + }, + { + "epoch": 0.8542079186518059, + "grad_norm": 0.3922200502828833, + "learning_rate": 3.6810394704679453e-06, + "loss": 0.10999374389648438, + "step": 98790 + }, + { + "epoch": 0.8542511521733491, + "grad_norm": 6.890919245203244, + "learning_rate": 3.6808410548556295e-06, + "loss": 0.330401611328125, + "step": 98795 + }, + { + "epoch": 0.8542943856948924, + "grad_norm": 1.0798941670382673, + "learning_rate": 3.680642636103335e-06, + "loss": 0.07179641723632812, + "step": 98800 + }, + { + "epoch": 0.8543376192164357, + "grad_norm": 1.0484004599199666, + "learning_rate": 3.680444214211976e-06, + "loss": 0.041387939453125, + "step": 98805 + }, + { + "epoch": 0.8543808527379789, + "grad_norm": 22.751899854631723, + "learning_rate": 3.6802457891824686e-06, + "loss": 0.11081295013427735, + "step": 98810 + }, + { + "epoch": 0.8544240862595222, + "grad_norm": 0.5414337712361392, + "learning_rate": 3.680047361015729e-06, + "loss": 0.06972541809082031, + "step": 98815 + }, + { + "epoch": 0.8544673197810655, + "grad_norm": 25.951458618633964, + "learning_rate": 3.6798489297126692e-06, + "loss": 0.15065536499023438, + "step": 98820 + }, + { + "epoch": 0.8545105533026087, + "grad_norm": 0.36077596991008337, + "learning_rate": 3.6796504952742076e-06, + "loss": 0.08226509094238281, + "step": 98825 + }, + { + "epoch": 0.854553786824152, + "grad_norm": 4.239811736962412, + "learning_rate": 3.6794520577012583e-06, + "loss": 0.05037384033203125, + "step": 98830 + }, + { + "epoch": 0.8545970203456953, + "grad_norm": 0.3725042223218349, + "learning_rate": 3.6792536169947347e-06, + "loss": 0.11169967651367188, + "step": 98835 + }, + { + "epoch": 0.8546402538672385, + "grad_norm": 4.0199741436473095, + "learning_rate": 3.679055173155554e-06, + "loss": 0.38118896484375, + "step": 98840 + }, + { + "epoch": 0.8546834873887817, + "grad_norm": 6.74193937111747, + "learning_rate": 3.6788567261846316e-06, + "loss": 0.04378833770751953, + "step": 98845 + }, + { + "epoch": 0.854726720910325, + "grad_norm": 2.8348652565855748, + "learning_rate": 3.678658276082881e-06, + "loss": 0.22269287109375, + "step": 98850 + }, + { + "epoch": 0.8547699544318683, + "grad_norm": 0.16905879172329377, + "learning_rate": 3.6784598228512187e-06, + "loss": 0.042205810546875, + "step": 98855 + }, + { + "epoch": 0.8548131879534115, + "grad_norm": 2.813041699329864, + "learning_rate": 3.67826136649056e-06, + "loss": 0.10533599853515625, + "step": 98860 + }, + { + "epoch": 0.8548564214749548, + "grad_norm": 28.911326301850906, + "learning_rate": 3.6780629070018196e-06, + "loss": 0.3079845428466797, + "step": 98865 + }, + { + "epoch": 0.8548996549964981, + "grad_norm": 1.8877517567123188, + "learning_rate": 3.6778644443859126e-06, + "loss": 0.057201385498046875, + "step": 98870 + }, + { + "epoch": 0.8549428885180413, + "grad_norm": 53.129655468738854, + "learning_rate": 3.677665978643755e-06, + "loss": 0.4449270248413086, + "step": 98875 + }, + { + "epoch": 0.8549861220395846, + "grad_norm": 3.047038315440402, + "learning_rate": 3.677467509776262e-06, + "loss": 0.10641937255859375, + "step": 98880 + }, + { + "epoch": 0.8550293555611279, + "grad_norm": 6.813215444768657, + "learning_rate": 3.677269037784348e-06, + "loss": 0.027420234680175782, + "step": 98885 + }, + { + "epoch": 0.8550725890826711, + "grad_norm": 1.0743030871171564, + "learning_rate": 3.6770705626689295e-06, + "loss": 0.08173828125, + "step": 98890 + }, + { + "epoch": 0.8551158226042144, + "grad_norm": 23.874010489715296, + "learning_rate": 3.676872084430922e-06, + "loss": 0.08010711669921874, + "step": 98895 + }, + { + "epoch": 0.8551590561257577, + "grad_norm": 2.3125163730984877, + "learning_rate": 3.676673603071239e-06, + "loss": 0.02951202392578125, + "step": 98900 + }, + { + "epoch": 0.8552022896473009, + "grad_norm": 4.453597947133364, + "learning_rate": 3.676475118590798e-06, + "loss": 0.07342629432678223, + "step": 98905 + }, + { + "epoch": 0.8552455231688442, + "grad_norm": 3.2564941262145006, + "learning_rate": 3.6762766309905135e-06, + "loss": 0.08145675659179688, + "step": 98910 + }, + { + "epoch": 0.8552887566903875, + "grad_norm": 23.730544731652493, + "learning_rate": 3.6760781402713006e-06, + "loss": 0.06605300903320313, + "step": 98915 + }, + { + "epoch": 0.8553319902119307, + "grad_norm": 1.090065093191987, + "learning_rate": 3.675879646434075e-06, + "loss": 0.36146087646484376, + "step": 98920 + }, + { + "epoch": 0.855375223733474, + "grad_norm": 10.312489351563903, + "learning_rate": 3.675681149479753e-06, + "loss": 0.11201858520507812, + "step": 98925 + }, + { + "epoch": 0.8554184572550172, + "grad_norm": 12.573974216833356, + "learning_rate": 3.675482649409249e-06, + "loss": 0.2823360443115234, + "step": 98930 + }, + { + "epoch": 0.8554616907765605, + "grad_norm": 1.6079791771313028, + "learning_rate": 3.6752841462234774e-06, + "loss": 0.0247833251953125, + "step": 98935 + }, + { + "epoch": 0.8555049242981038, + "grad_norm": 8.89655361032651, + "learning_rate": 3.6750856399233564e-06, + "loss": 0.025156402587890626, + "step": 98940 + }, + { + "epoch": 0.855548157819647, + "grad_norm": 57.254409755536216, + "learning_rate": 3.6748871305097993e-06, + "loss": 0.14617595672607422, + "step": 98945 + }, + { + "epoch": 0.8555913913411903, + "grad_norm": 24.066251136638137, + "learning_rate": 3.6746886179837223e-06, + "loss": 0.1079620361328125, + "step": 98950 + }, + { + "epoch": 0.8556346248627336, + "grad_norm": 17.84392838301263, + "learning_rate": 3.6744901023460418e-06, + "loss": 0.13344459533691405, + "step": 98955 + }, + { + "epoch": 0.8556778583842768, + "grad_norm": 6.985410662205238, + "learning_rate": 3.6742915835976726e-06, + "loss": 0.12972145080566405, + "step": 98960 + }, + { + "epoch": 0.8557210919058201, + "grad_norm": 5.366131683905709, + "learning_rate": 3.6740930617395295e-06, + "loss": 0.13920936584472657, + "step": 98965 + }, + { + "epoch": 0.8557643254273634, + "grad_norm": 0.31687465054652936, + "learning_rate": 3.6738945367725294e-06, + "loss": 0.3550224304199219, + "step": 98970 + }, + { + "epoch": 0.8558075589489066, + "grad_norm": 27.52181472328777, + "learning_rate": 3.6736960086975867e-06, + "loss": 0.29616241455078124, + "step": 98975 + }, + { + "epoch": 0.8558507924704499, + "grad_norm": 26.848187280214148, + "learning_rate": 3.673497477515617e-06, + "loss": 0.19516983032226562, + "step": 98980 + }, + { + "epoch": 0.8558940259919932, + "grad_norm": 42.072813918373214, + "learning_rate": 3.6732989432275377e-06, + "loss": 0.3467041015625, + "step": 98985 + }, + { + "epoch": 0.8559372595135364, + "grad_norm": 0.08356852390765081, + "learning_rate": 3.6731004058342628e-06, + "loss": 0.0117584228515625, + "step": 98990 + }, + { + "epoch": 0.8559804930350797, + "grad_norm": 3.3533414904273986, + "learning_rate": 3.6729018653367078e-06, + "loss": 0.05022125244140625, + "step": 98995 + }, + { + "epoch": 0.856023726556623, + "grad_norm": 15.333544668656728, + "learning_rate": 3.6727033217357897e-06, + "loss": 0.1268228530883789, + "step": 99000 + }, + { + "epoch": 0.8560669600781662, + "grad_norm": 20.207091272422936, + "learning_rate": 3.672504775032422e-06, + "loss": 0.09752197265625, + "step": 99005 + }, + { + "epoch": 0.8561101935997095, + "grad_norm": 15.288588601639152, + "learning_rate": 3.672306225227522e-06, + "loss": 0.15885772705078124, + "step": 99010 + }, + { + "epoch": 0.8561534271212528, + "grad_norm": 27.391649175542348, + "learning_rate": 3.672107672322006e-06, + "loss": 0.3474620819091797, + "step": 99015 + }, + { + "epoch": 0.856196660642796, + "grad_norm": 6.294924580988004, + "learning_rate": 3.671909116316789e-06, + "loss": 0.06078033447265625, + "step": 99020 + }, + { + "epoch": 0.8562398941643392, + "grad_norm": 2.737170781366495, + "learning_rate": 3.671710557212786e-06, + "loss": 0.36162109375, + "step": 99025 + }, + { + "epoch": 0.8562831276858826, + "grad_norm": 43.24919600930568, + "learning_rate": 3.671511995010913e-06, + "loss": 0.1570465087890625, + "step": 99030 + }, + { + "epoch": 0.8563263612074258, + "grad_norm": 70.41370954885542, + "learning_rate": 3.671313429712086e-06, + "loss": 0.3712249755859375, + "step": 99035 + }, + { + "epoch": 0.856369594728969, + "grad_norm": 20.650443058762683, + "learning_rate": 3.671114861317221e-06, + "loss": 0.31957321166992186, + "step": 99040 + }, + { + "epoch": 0.8564128282505123, + "grad_norm": 50.94929442094642, + "learning_rate": 3.670916289827233e-06, + "loss": 0.24777793884277344, + "step": 99045 + }, + { + "epoch": 0.8564560617720556, + "grad_norm": 0.8412900380582976, + "learning_rate": 3.67071771524304e-06, + "loss": 0.15097808837890625, + "step": 99050 + }, + { + "epoch": 0.8564992952935988, + "grad_norm": 0.48477835875970837, + "learning_rate": 3.670519137565555e-06, + "loss": 0.2601646423339844, + "step": 99055 + }, + { + "epoch": 0.8565425288151421, + "grad_norm": 9.795771058642027, + "learning_rate": 3.6703205567956946e-06, + "loss": 0.21046714782714843, + "step": 99060 + }, + { + "epoch": 0.8565857623366854, + "grad_norm": 8.240620085541181, + "learning_rate": 3.6701219729343755e-06, + "loss": 0.11878242492675781, + "step": 99065 + }, + { + "epoch": 0.8566289958582286, + "grad_norm": 3.272311606379228, + "learning_rate": 3.6699233859825136e-06, + "loss": 0.03701095581054688, + "step": 99070 + }, + { + "epoch": 0.8566722293797719, + "grad_norm": 1.454364823903692, + "learning_rate": 3.6697247959410227e-06, + "loss": 0.08014907836914062, + "step": 99075 + }, + { + "epoch": 0.8567154629013152, + "grad_norm": 9.600729379418995, + "learning_rate": 3.669526202810822e-06, + "loss": 0.42503814697265624, + "step": 99080 + }, + { + "epoch": 0.8567586964228584, + "grad_norm": 3.4352893634018273, + "learning_rate": 3.6693276065928245e-06, + "loss": 0.0980804443359375, + "step": 99085 + }, + { + "epoch": 0.8568019299444017, + "grad_norm": 4.053549558637871, + "learning_rate": 3.6691290072879468e-06, + "loss": 0.23173980712890624, + "step": 99090 + }, + { + "epoch": 0.856845163465945, + "grad_norm": 0.588370227983191, + "learning_rate": 3.6689304048971065e-06, + "loss": 0.055294036865234375, + "step": 99095 + }, + { + "epoch": 0.8568883969874882, + "grad_norm": 1.1995203558803227, + "learning_rate": 3.668731799421218e-06, + "loss": 0.0954010009765625, + "step": 99100 + }, + { + "epoch": 0.8569316305090314, + "grad_norm": 26.436796624428997, + "learning_rate": 3.668533190861198e-06, + "loss": 0.46330108642578127, + "step": 99105 + }, + { + "epoch": 0.8569748640305748, + "grad_norm": 2.8340218079772654, + "learning_rate": 3.668334579217961e-06, + "loss": 0.05623435974121094, + "step": 99110 + }, + { + "epoch": 0.857018097552118, + "grad_norm": 2.7584437120702345, + "learning_rate": 3.668135964492423e-06, + "loss": 0.09141807556152344, + "step": 99115 + }, + { + "epoch": 0.8570613310736612, + "grad_norm": 21.083642554296954, + "learning_rate": 3.667937346685503e-06, + "loss": 0.27501220703125, + "step": 99120 + }, + { + "epoch": 0.8571045645952046, + "grad_norm": 8.631389647780665, + "learning_rate": 3.667738725798114e-06, + "loss": 0.0971588134765625, + "step": 99125 + }, + { + "epoch": 0.8571477981167478, + "grad_norm": 32.98489135194454, + "learning_rate": 3.6675401018311737e-06, + "loss": 0.1646759033203125, + "step": 99130 + }, + { + "epoch": 0.857191031638291, + "grad_norm": 3.250793023018658, + "learning_rate": 3.667341474785597e-06, + "loss": 0.22598876953125, + "step": 99135 + }, + { + "epoch": 0.8572342651598344, + "grad_norm": 3.5182891685549484, + "learning_rate": 3.6671428446622997e-06, + "loss": 0.036449432373046875, + "step": 99140 + }, + { + "epoch": 0.8572774986813776, + "grad_norm": 16.366779189300644, + "learning_rate": 3.666944211462199e-06, + "loss": 0.1607513427734375, + "step": 99145 + }, + { + "epoch": 0.8573207322029208, + "grad_norm": 8.279204184531954, + "learning_rate": 3.6667455751862107e-06, + "loss": 0.08684120178222657, + "step": 99150 + }, + { + "epoch": 0.8573639657244642, + "grad_norm": 1.4958286774927, + "learning_rate": 3.6665469358352512e-06, + "loss": 0.19161911010742189, + "step": 99155 + }, + { + "epoch": 0.8574071992460074, + "grad_norm": 0.9334455152839242, + "learning_rate": 3.6663482934102355e-06, + "loss": 0.08244476318359376, + "step": 99160 + }, + { + "epoch": 0.8574504327675506, + "grad_norm": 7.136629341605849, + "learning_rate": 3.666149647912081e-06, + "loss": 0.1847076416015625, + "step": 99165 + }, + { + "epoch": 0.857493666289094, + "grad_norm": 30.15562978931556, + "learning_rate": 3.6659509993417024e-06, + "loss": 0.10137100219726562, + "step": 99170 + }, + { + "epoch": 0.8575368998106372, + "grad_norm": 27.907608555392567, + "learning_rate": 3.6657523477000166e-06, + "loss": 0.16825103759765625, + "step": 99175 + }, + { + "epoch": 0.8575801333321804, + "grad_norm": 1.3339687045644053, + "learning_rate": 3.6655536929879405e-06, + "loss": 0.0623291015625, + "step": 99180 + }, + { + "epoch": 0.8576233668537238, + "grad_norm": 5.9211805655533025, + "learning_rate": 3.665355035206389e-06, + "loss": 0.06888580322265625, + "step": 99185 + }, + { + "epoch": 0.857666600375267, + "grad_norm": 3.0041386860139574, + "learning_rate": 3.6651563743562794e-06, + "loss": 0.20275421142578126, + "step": 99190 + }, + { + "epoch": 0.8577098338968102, + "grad_norm": 0.5207962462396575, + "learning_rate": 3.6649577104385273e-06, + "loss": 0.20257844924926757, + "step": 99195 + }, + { + "epoch": 0.8577530674183534, + "grad_norm": 0.2516232953280649, + "learning_rate": 3.664759043454048e-06, + "loss": 0.05768518447875977, + "step": 99200 + }, + { + "epoch": 0.8577963009398968, + "grad_norm": 0.9950527713967753, + "learning_rate": 3.66456037340376e-06, + "loss": 0.14402999877929687, + "step": 99205 + }, + { + "epoch": 0.85783953446144, + "grad_norm": 1.8471504236592848, + "learning_rate": 3.6643617002885778e-06, + "loss": 0.1586456298828125, + "step": 99210 + }, + { + "epoch": 0.8578827679829832, + "grad_norm": 0.5223477341627653, + "learning_rate": 3.664163024109418e-06, + "loss": 0.0945159912109375, + "step": 99215 + }, + { + "epoch": 0.8579260015045266, + "grad_norm": 35.19849718992371, + "learning_rate": 3.663964344867197e-06, + "loss": 0.243402099609375, + "step": 99220 + }, + { + "epoch": 0.8579692350260698, + "grad_norm": 38.59354673435668, + "learning_rate": 3.6637656625628315e-06, + "loss": 0.2334991455078125, + "step": 99225 + }, + { + "epoch": 0.858012468547613, + "grad_norm": 7.3929082735105665, + "learning_rate": 3.6635669771972366e-06, + "loss": 0.04127044677734375, + "step": 99230 + }, + { + "epoch": 0.8580557020691564, + "grad_norm": 1.1007523387480944, + "learning_rate": 3.66336828877133e-06, + "loss": 0.1679656982421875, + "step": 99235 + }, + { + "epoch": 0.8580989355906996, + "grad_norm": 3.643170004277672, + "learning_rate": 3.6631695972860274e-06, + "loss": 0.0246246337890625, + "step": 99240 + }, + { + "epoch": 0.8581421691122428, + "grad_norm": 1.9452523268979343, + "learning_rate": 3.6629709027422457e-06, + "loss": 0.14178466796875, + "step": 99245 + }, + { + "epoch": 0.8581854026337862, + "grad_norm": 32.86362806387038, + "learning_rate": 3.6627722051409e-06, + "loss": 0.261517333984375, + "step": 99250 + }, + { + "epoch": 0.8582286361553294, + "grad_norm": 0.7703275682591452, + "learning_rate": 3.6625735044829082e-06, + "loss": 0.12191696166992187, + "step": 99255 + }, + { + "epoch": 0.8582718696768726, + "grad_norm": 25.483092038977396, + "learning_rate": 3.662374800769185e-06, + "loss": 0.09503173828125, + "step": 99260 + }, + { + "epoch": 0.858315103198416, + "grad_norm": 18.192027541783485, + "learning_rate": 3.662176094000648e-06, + "loss": 0.25516719818115235, + "step": 99265 + }, + { + "epoch": 0.8583583367199592, + "grad_norm": 8.527434728690032, + "learning_rate": 3.6619773841782144e-06, + "loss": 0.04538803100585938, + "step": 99270 + }, + { + "epoch": 0.8584015702415024, + "grad_norm": 0.8355854308508437, + "learning_rate": 3.6617786713027987e-06, + "loss": 0.04853515625, + "step": 99275 + }, + { + "epoch": 0.8584448037630457, + "grad_norm": 110.3929210995476, + "learning_rate": 3.661579955375319e-06, + "loss": 0.1819122314453125, + "step": 99280 + }, + { + "epoch": 0.858488037284589, + "grad_norm": 9.410015913955863, + "learning_rate": 3.66138123639669e-06, + "loss": 0.082489013671875, + "step": 99285 + }, + { + "epoch": 0.8585312708061322, + "grad_norm": 9.47235902100779, + "learning_rate": 3.6611825143678292e-06, + "loss": 0.3391548156738281, + "step": 99290 + }, + { + "epoch": 0.8585745043276755, + "grad_norm": 0.11002310603149053, + "learning_rate": 3.6609837892896536e-06, + "loss": 0.0598480224609375, + "step": 99295 + }, + { + "epoch": 0.8586177378492188, + "grad_norm": 15.507728753246457, + "learning_rate": 3.660785061163079e-06, + "loss": 0.1281494140625, + "step": 99300 + }, + { + "epoch": 0.858660971370762, + "grad_norm": 0.34113547725640125, + "learning_rate": 3.660586329989022e-06, + "loss": 0.2217620849609375, + "step": 99305 + }, + { + "epoch": 0.8587042048923053, + "grad_norm": 16.398896072661255, + "learning_rate": 3.6603875957684e-06, + "loss": 0.1522045135498047, + "step": 99310 + }, + { + "epoch": 0.8587474384138486, + "grad_norm": 13.504320972231117, + "learning_rate": 3.6601888585021272e-06, + "loss": 0.28764801025390624, + "step": 99315 + }, + { + "epoch": 0.8587906719353918, + "grad_norm": 38.679119840096035, + "learning_rate": 3.659990118191123e-06, + "loss": 0.17983360290527345, + "step": 99320 + }, + { + "epoch": 0.858833905456935, + "grad_norm": 1.9538593163382199, + "learning_rate": 3.6597913748363022e-06, + "loss": 0.04821720123291016, + "step": 99325 + }, + { + "epoch": 0.8588771389784784, + "grad_norm": 9.89396857096615, + "learning_rate": 3.6595926284385822e-06, + "loss": 0.129150390625, + "step": 99330 + }, + { + "epoch": 0.8589203725000216, + "grad_norm": 12.536513574160107, + "learning_rate": 3.659393878998879e-06, + "loss": 0.04070892333984375, + "step": 99335 + }, + { + "epoch": 0.8589636060215649, + "grad_norm": 1.815764469722617, + "learning_rate": 3.6591951265181092e-06, + "loss": 0.09800567626953124, + "step": 99340 + }, + { + "epoch": 0.8590068395431082, + "grad_norm": 0.8619834906144961, + "learning_rate": 3.65899637099719e-06, + "loss": 0.09238624572753906, + "step": 99345 + }, + { + "epoch": 0.8590500730646514, + "grad_norm": 7.442531606578804, + "learning_rate": 3.6587976124370375e-06, + "loss": 0.1391357421875, + "step": 99350 + }, + { + "epoch": 0.8590933065861946, + "grad_norm": 1.5001620729477008, + "learning_rate": 3.6585988508385697e-06, + "loss": 0.1829925537109375, + "step": 99355 + }, + { + "epoch": 0.859136540107738, + "grad_norm": 29.101672795548144, + "learning_rate": 3.658400086202701e-06, + "loss": 0.09498367309570313, + "step": 99360 + }, + { + "epoch": 0.8591797736292812, + "grad_norm": 0.368321566786785, + "learning_rate": 3.6582013185303497e-06, + "loss": 0.23197021484375, + "step": 99365 + }, + { + "epoch": 0.8592230071508244, + "grad_norm": 0.5456737177494839, + "learning_rate": 3.658002547822432e-06, + "loss": 0.14669303894042968, + "step": 99370 + }, + { + "epoch": 0.8592662406723677, + "grad_norm": 25.053021613662708, + "learning_rate": 3.6578037740798653e-06, + "loss": 0.11493358612060547, + "step": 99375 + }, + { + "epoch": 0.859309474193911, + "grad_norm": 2.709584136693148, + "learning_rate": 3.6576049973035645e-06, + "loss": 0.09907493591308594, + "step": 99380 + }, + { + "epoch": 0.8593527077154542, + "grad_norm": 1.5602185677311757, + "learning_rate": 3.657406217494449e-06, + "loss": 0.12145462036132812, + "step": 99385 + }, + { + "epoch": 0.8593959412369975, + "grad_norm": 0.3207521143933478, + "learning_rate": 3.657207434653434e-06, + "loss": 0.20123138427734374, + "step": 99390 + }, + { + "epoch": 0.8594391747585408, + "grad_norm": 1.755650193471016, + "learning_rate": 3.6570086487814352e-06, + "loss": 0.0502471923828125, + "step": 99395 + }, + { + "epoch": 0.859482408280084, + "grad_norm": 0.07889286899929313, + "learning_rate": 3.6568098598793707e-06, + "loss": 0.07590370178222657, + "step": 99400 + }, + { + "epoch": 0.8595256418016273, + "grad_norm": 9.229003581419182, + "learning_rate": 3.6566110679481575e-06, + "loss": 0.303956413269043, + "step": 99405 + }, + { + "epoch": 0.8595688753231706, + "grad_norm": 0.9015580820525648, + "learning_rate": 3.6564122729887122e-06, + "loss": 0.09214324951171875, + "step": 99410 + }, + { + "epoch": 0.8596121088447138, + "grad_norm": 7.906206975113359, + "learning_rate": 3.656213475001952e-06, + "loss": 0.05262451171875, + "step": 99415 + }, + { + "epoch": 0.8596553423662571, + "grad_norm": 1.0902519079891728, + "learning_rate": 3.6560146739887932e-06, + "loss": 0.1063232421875, + "step": 99420 + }, + { + "epoch": 0.8596985758878004, + "grad_norm": 3.6361194465081796, + "learning_rate": 3.6558158699501517e-06, + "loss": 0.09023818969726563, + "step": 99425 + }, + { + "epoch": 0.8597418094093436, + "grad_norm": 11.48967480628369, + "learning_rate": 3.655617062886945e-06, + "loss": 0.10335693359375, + "step": 99430 + }, + { + "epoch": 0.8597850429308869, + "grad_norm": 18.927351836591594, + "learning_rate": 3.655418252800092e-06, + "loss": 0.191864013671875, + "step": 99435 + }, + { + "epoch": 0.8598282764524302, + "grad_norm": 21.479857932371942, + "learning_rate": 3.6552194396905064e-06, + "loss": 0.08702545166015625, + "step": 99440 + }, + { + "epoch": 0.8598715099739734, + "grad_norm": 2.0133833295276746, + "learning_rate": 3.6550206235591075e-06, + "loss": 0.04880599975585938, + "step": 99445 + }, + { + "epoch": 0.8599147434955167, + "grad_norm": 6.6851558737274175, + "learning_rate": 3.654821804406812e-06, + "loss": 0.3411407470703125, + "step": 99450 + }, + { + "epoch": 0.8599579770170599, + "grad_norm": 0.9644486943865191, + "learning_rate": 3.6546229822345352e-06, + "loss": 0.21787109375, + "step": 99455 + }, + { + "epoch": 0.8600012105386032, + "grad_norm": 16.45283406623162, + "learning_rate": 3.6544241570431953e-06, + "loss": 0.12417449951171874, + "step": 99460 + }, + { + "epoch": 0.8600444440601465, + "grad_norm": 23.97777188761897, + "learning_rate": 3.654225328833709e-06, + "loss": 0.52169189453125, + "step": 99465 + }, + { + "epoch": 0.8600876775816897, + "grad_norm": 7.169417066687929, + "learning_rate": 3.654026497606993e-06, + "loss": 0.23518447875976561, + "step": 99470 + }, + { + "epoch": 0.860130911103233, + "grad_norm": 0.7531157121677302, + "learning_rate": 3.6538276633639647e-06, + "loss": 0.05797576904296875, + "step": 99475 + }, + { + "epoch": 0.8601741446247763, + "grad_norm": 27.63157239708744, + "learning_rate": 3.653628826105542e-06, + "loss": 0.24141082763671876, + "step": 99480 + }, + { + "epoch": 0.8602173781463195, + "grad_norm": 29.644215308494775, + "learning_rate": 3.6534299858326396e-06, + "loss": 0.18251953125, + "step": 99485 + }, + { + "epoch": 0.8602606116678628, + "grad_norm": 3.5916039009704575, + "learning_rate": 3.653231142546176e-06, + "loss": 0.21141433715820312, + "step": 99490 + }, + { + "epoch": 0.8603038451894061, + "grad_norm": 12.526757980357528, + "learning_rate": 3.6530322962470688e-06, + "loss": 0.11859893798828125, + "step": 99495 + }, + { + "epoch": 0.8603470787109493, + "grad_norm": 4.222157594009258, + "learning_rate": 3.652833446936234e-06, + "loss": 0.0663442611694336, + "step": 99500 + }, + { + "epoch": 0.8603903122324926, + "grad_norm": 40.681168692359115, + "learning_rate": 3.6526345946145884e-06, + "loss": 0.4327831268310547, + "step": 99505 + }, + { + "epoch": 0.8604335457540359, + "grad_norm": 8.638741711758502, + "learning_rate": 3.652435739283051e-06, + "loss": 0.11646728515625, + "step": 99510 + }, + { + "epoch": 0.8604767792755791, + "grad_norm": 5.151311833746438, + "learning_rate": 3.6522368809425367e-06, + "loss": 0.185968017578125, + "step": 99515 + }, + { + "epoch": 0.8605200127971224, + "grad_norm": 10.424053735385435, + "learning_rate": 3.6520380195939635e-06, + "loss": 0.048928451538085935, + "step": 99520 + }, + { + "epoch": 0.8605632463186657, + "grad_norm": 4.235309007764313, + "learning_rate": 3.6518391552382485e-06, + "loss": 0.0776824951171875, + "step": 99525 + }, + { + "epoch": 0.8606064798402089, + "grad_norm": 14.802409288364238, + "learning_rate": 3.65164028787631e-06, + "loss": 0.21904296875, + "step": 99530 + }, + { + "epoch": 0.8606497133617522, + "grad_norm": 3.707028689150336, + "learning_rate": 3.6514414175090627e-06, + "loss": 0.03556137084960938, + "step": 99535 + }, + { + "epoch": 0.8606929468832955, + "grad_norm": 15.919944475921046, + "learning_rate": 3.651242544137425e-06, + "loss": 0.09430770874023438, + "step": 99540 + }, + { + "epoch": 0.8607361804048387, + "grad_norm": 2.0503470774103376, + "learning_rate": 3.6510436677623156e-06, + "loss": 0.2592315673828125, + "step": 99545 + }, + { + "epoch": 0.8607794139263819, + "grad_norm": 29.84752965794069, + "learning_rate": 3.650844788384651e-06, + "loss": 0.1847808837890625, + "step": 99550 + }, + { + "epoch": 0.8608226474479252, + "grad_norm": 12.762517439957923, + "learning_rate": 3.6506459060053453e-06, + "loss": 0.21710205078125, + "step": 99555 + }, + { + "epoch": 0.8608658809694685, + "grad_norm": 29.38263023821441, + "learning_rate": 3.6504470206253197e-06, + "loss": 0.1059722900390625, + "step": 99560 + }, + { + "epoch": 0.8609091144910117, + "grad_norm": 1.410233990834089, + "learning_rate": 3.6502481322454895e-06, + "loss": 0.254730224609375, + "step": 99565 + }, + { + "epoch": 0.860952348012555, + "grad_norm": 7.416859822449647, + "learning_rate": 3.650049240866772e-06, + "loss": 0.18867645263671876, + "step": 99570 + }, + { + "epoch": 0.8609955815340983, + "grad_norm": 15.897361955734564, + "learning_rate": 3.6498503464900853e-06, + "loss": 0.1008209228515625, + "step": 99575 + }, + { + "epoch": 0.8610388150556415, + "grad_norm": 6.500853687107848, + "learning_rate": 3.649651449116347e-06, + "loss": 0.2219970703125, + "step": 99580 + }, + { + "epoch": 0.8610820485771848, + "grad_norm": 3.7467940285778014, + "learning_rate": 3.649452548746472e-06, + "loss": 0.2094867706298828, + "step": 99585 + }, + { + "epoch": 0.8611252820987281, + "grad_norm": 51.746631594283045, + "learning_rate": 3.6492536453813802e-06, + "loss": 0.2053985595703125, + "step": 99590 + }, + { + "epoch": 0.8611685156202713, + "grad_norm": 2.8903407877482956, + "learning_rate": 3.649054739021988e-06, + "loss": 0.197137451171875, + "step": 99595 + }, + { + "epoch": 0.8612117491418146, + "grad_norm": 3.321227667252735, + "learning_rate": 3.6488558296692124e-06, + "loss": 0.01994171142578125, + "step": 99600 + }, + { + "epoch": 0.8612549826633579, + "grad_norm": 35.990305703027644, + "learning_rate": 3.6486569173239713e-06, + "loss": 0.2622314453125, + "step": 99605 + }, + { + "epoch": 0.8612982161849011, + "grad_norm": 13.041107368136611, + "learning_rate": 3.6484580019871816e-06, + "loss": 0.1689910888671875, + "step": 99610 + }, + { + "epoch": 0.8613414497064444, + "grad_norm": 1.9049874842443209, + "learning_rate": 3.6482590836597605e-06, + "loss": 0.035695648193359374, + "step": 99615 + }, + { + "epoch": 0.8613846832279877, + "grad_norm": 6.796419728011115, + "learning_rate": 3.648060162342627e-06, + "loss": 0.1266357421875, + "step": 99620 + }, + { + "epoch": 0.8614279167495309, + "grad_norm": 23.761488134853217, + "learning_rate": 3.647861238036696e-06, + "loss": 0.1608428955078125, + "step": 99625 + }, + { + "epoch": 0.8614711502710741, + "grad_norm": 1.3676611431727148, + "learning_rate": 3.6476623107428874e-06, + "loss": 0.18073348999023436, + "step": 99630 + }, + { + "epoch": 0.8615143837926175, + "grad_norm": 3.099645546423644, + "learning_rate": 3.647463380462116e-06, + "loss": 0.0823944091796875, + "step": 99635 + }, + { + "epoch": 0.8615576173141607, + "grad_norm": 3.5958369594328983, + "learning_rate": 3.6472644471953018e-06, + "loss": 0.05441780090332031, + "step": 99640 + }, + { + "epoch": 0.8616008508357039, + "grad_norm": 3.51539130589177, + "learning_rate": 3.6470655109433616e-06, + "loss": 0.14817543029785157, + "step": 99645 + }, + { + "epoch": 0.8616440843572473, + "grad_norm": 0.3135240355017921, + "learning_rate": 3.646866571707211e-06, + "loss": 0.0754638671875, + "step": 99650 + }, + { + "epoch": 0.8616873178787905, + "grad_norm": 21.45025811546398, + "learning_rate": 3.6466676294877694e-06, + "loss": 0.3444873809814453, + "step": 99655 + }, + { + "epoch": 0.8617305514003337, + "grad_norm": 12.43148205836113, + "learning_rate": 3.646468684285954e-06, + "loss": 0.4046875, + "step": 99660 + }, + { + "epoch": 0.8617737849218771, + "grad_norm": 25.28719526266654, + "learning_rate": 3.6462697361026825e-06, + "loss": 0.05851593017578125, + "step": 99665 + }, + { + "epoch": 0.8618170184434203, + "grad_norm": 9.81638371293277, + "learning_rate": 3.646070784938872e-06, + "loss": 0.16956405639648436, + "step": 99670 + }, + { + "epoch": 0.8618602519649635, + "grad_norm": 15.08172657336606, + "learning_rate": 3.64587183079544e-06, + "loss": 0.15782661437988282, + "step": 99675 + }, + { + "epoch": 0.8619034854865069, + "grad_norm": 25.13725554576609, + "learning_rate": 3.6456728736733042e-06, + "loss": 0.1618183135986328, + "step": 99680 + }, + { + "epoch": 0.8619467190080501, + "grad_norm": 14.728074456784174, + "learning_rate": 3.6454739135733814e-06, + "loss": 0.13233489990234376, + "step": 99685 + }, + { + "epoch": 0.8619899525295933, + "grad_norm": 2.4383460607752356, + "learning_rate": 3.645274950496591e-06, + "loss": 0.024000930786132812, + "step": 99690 + }, + { + "epoch": 0.8620331860511367, + "grad_norm": 5.248634010660204, + "learning_rate": 3.6450759844438484e-06, + "loss": 0.09850921630859374, + "step": 99695 + }, + { + "epoch": 0.8620764195726799, + "grad_norm": 10.10244537378734, + "learning_rate": 3.6448770154160734e-06, + "loss": 0.091796875, + "step": 99700 + }, + { + "epoch": 0.8621196530942231, + "grad_norm": 43.10709106681292, + "learning_rate": 3.6446780434141827e-06, + "loss": 0.3607738494873047, + "step": 99705 + }, + { + "epoch": 0.8621628866157665, + "grad_norm": 0.7432284090386523, + "learning_rate": 3.6444790684390933e-06, + "loss": 0.020954322814941407, + "step": 99710 + }, + { + "epoch": 0.8622061201373097, + "grad_norm": 9.171615904578854, + "learning_rate": 3.6442800904917238e-06, + "loss": 0.0830780029296875, + "step": 99715 + }, + { + "epoch": 0.8622493536588529, + "grad_norm": 2.5758230895969882, + "learning_rate": 3.644081109572991e-06, + "loss": 0.219354248046875, + "step": 99720 + }, + { + "epoch": 0.8622925871803961, + "grad_norm": 1.3049775887051243, + "learning_rate": 3.6438821256838133e-06, + "loss": 0.20632171630859375, + "step": 99725 + }, + { + "epoch": 0.8623358207019395, + "grad_norm": 15.385583899462654, + "learning_rate": 3.6436831388251083e-06, + "loss": 0.21878433227539062, + "step": 99730 + }, + { + "epoch": 0.8623790542234827, + "grad_norm": 26.375990232911068, + "learning_rate": 3.643484148997793e-06, + "loss": 0.3064178466796875, + "step": 99735 + }, + { + "epoch": 0.8624222877450259, + "grad_norm": 3.6515474431004904, + "learning_rate": 3.643285156202786e-06, + "loss": 0.042083740234375, + "step": 99740 + }, + { + "epoch": 0.8624655212665693, + "grad_norm": 26.65143715441742, + "learning_rate": 3.6430861604410044e-06, + "loss": 0.4100013732910156, + "step": 99745 + }, + { + "epoch": 0.8625087547881125, + "grad_norm": 12.715446168791106, + "learning_rate": 3.6428871617133667e-06, + "loss": 0.11178970336914062, + "step": 99750 + }, + { + "epoch": 0.8625519883096557, + "grad_norm": 0.1708186765453017, + "learning_rate": 3.64268816002079e-06, + "loss": 0.07411346435546876, + "step": 99755 + }, + { + "epoch": 0.8625952218311991, + "grad_norm": 21.73104011491802, + "learning_rate": 3.642489155364192e-06, + "loss": 0.12313232421875, + "step": 99760 + }, + { + "epoch": 0.8626384553527423, + "grad_norm": 1.768734676662629, + "learning_rate": 3.6422901477444913e-06, + "loss": 0.07135200500488281, + "step": 99765 + }, + { + "epoch": 0.8626816888742855, + "grad_norm": 14.179053785617953, + "learning_rate": 3.6420911371626047e-06, + "loss": 0.448724365234375, + "step": 99770 + }, + { + "epoch": 0.8627249223958289, + "grad_norm": 3.8452224456455597, + "learning_rate": 3.641892123619451e-06, + "loss": 0.08869132995605469, + "step": 99775 + }, + { + "epoch": 0.8627681559173721, + "grad_norm": 0.7826457563939678, + "learning_rate": 3.6416931071159473e-06, + "loss": 0.16040878295898436, + "step": 99780 + }, + { + "epoch": 0.8628113894389153, + "grad_norm": 0.4600114274552123, + "learning_rate": 3.641494087653012e-06, + "loss": 0.05609474182128906, + "step": 99785 + }, + { + "epoch": 0.8628546229604587, + "grad_norm": 8.944515578217729, + "learning_rate": 3.6412950652315623e-06, + "loss": 0.043231201171875, + "step": 99790 + }, + { + "epoch": 0.8628978564820019, + "grad_norm": 9.70167392211567, + "learning_rate": 3.641096039852516e-06, + "loss": 0.1851957321166992, + "step": 99795 + }, + { + "epoch": 0.8629410900035451, + "grad_norm": 1.2424646468749416, + "learning_rate": 3.6408970115167926e-06, + "loss": 0.07832298278808594, + "step": 99800 + }, + { + "epoch": 0.8629843235250884, + "grad_norm": 4.25813785394158, + "learning_rate": 3.640697980225308e-06, + "loss": 0.0501129150390625, + "step": 99805 + }, + { + "epoch": 0.8630275570466317, + "grad_norm": 47.485650350375685, + "learning_rate": 3.640498945978981e-06, + "loss": 0.1502349853515625, + "step": 99810 + }, + { + "epoch": 0.8630707905681749, + "grad_norm": 0.25310835477270777, + "learning_rate": 3.6402999087787297e-06, + "loss": 0.05827903747558594, + "step": 99815 + }, + { + "epoch": 0.8631140240897182, + "grad_norm": 5.172483264448454, + "learning_rate": 3.640100868625471e-06, + "loss": 0.28063411712646485, + "step": 99820 + }, + { + "epoch": 0.8631572576112615, + "grad_norm": 43.624808972278025, + "learning_rate": 3.639901825520124e-06, + "loss": 0.097833251953125, + "step": 99825 + }, + { + "epoch": 0.8632004911328047, + "grad_norm": 0.4403551649360387, + "learning_rate": 3.639702779463607e-06, + "loss": 0.07122726440429687, + "step": 99830 + }, + { + "epoch": 0.863243724654348, + "grad_norm": 1.4625783712938678, + "learning_rate": 3.639503730456837e-06, + "loss": 0.09935646057128907, + "step": 99835 + }, + { + "epoch": 0.8632869581758913, + "grad_norm": 7.536029363064273, + "learning_rate": 3.6393046785007323e-06, + "loss": 0.135302734375, + "step": 99840 + }, + { + "epoch": 0.8633301916974345, + "grad_norm": 0.10035424408955233, + "learning_rate": 3.639105623596211e-06, + "loss": 0.06510467529296875, + "step": 99845 + }, + { + "epoch": 0.8633734252189778, + "grad_norm": 5.698995199410221, + "learning_rate": 3.638906565744191e-06, + "loss": 0.5415412902832031, + "step": 99850 + }, + { + "epoch": 0.8634166587405211, + "grad_norm": 20.747035213047212, + "learning_rate": 3.638707504945589e-06, + "loss": 0.30817108154296874, + "step": 99855 + }, + { + "epoch": 0.8634598922620643, + "grad_norm": 4.196010524255187, + "learning_rate": 3.638508441201326e-06, + "loss": 0.146783447265625, + "step": 99860 + }, + { + "epoch": 0.8635031257836076, + "grad_norm": 1.0445476411462573, + "learning_rate": 3.638309374512319e-06, + "loss": 0.1157012939453125, + "step": 99865 + }, + { + "epoch": 0.8635463593051509, + "grad_norm": 5.080923185761142, + "learning_rate": 3.638110304879484e-06, + "loss": 0.0634368896484375, + "step": 99870 + }, + { + "epoch": 0.8635895928266941, + "grad_norm": 4.062933274751851, + "learning_rate": 3.6379112323037414e-06, + "loss": 0.35913887023925783, + "step": 99875 + }, + { + "epoch": 0.8636328263482373, + "grad_norm": 2.3106391368295824, + "learning_rate": 3.6377121567860084e-06, + "loss": 0.11296043395996094, + "step": 99880 + }, + { + "epoch": 0.8636760598697807, + "grad_norm": 1.6971390470710734, + "learning_rate": 3.637513078327204e-06, + "loss": 0.265313720703125, + "step": 99885 + }, + { + "epoch": 0.8637192933913239, + "grad_norm": 9.166172388342897, + "learning_rate": 3.637313996928245e-06, + "loss": 0.21145782470703126, + "step": 99890 + }, + { + "epoch": 0.8637625269128671, + "grad_norm": 0.3770697366734355, + "learning_rate": 3.63711491259005e-06, + "loss": 0.25881805419921877, + "step": 99895 + }, + { + "epoch": 0.8638057604344104, + "grad_norm": 0.5924028160730488, + "learning_rate": 3.636915825313538e-06, + "loss": 0.30564117431640625, + "step": 99900 + }, + { + "epoch": 0.8638489939559537, + "grad_norm": 32.25923046312668, + "learning_rate": 3.636716735099626e-06, + "loss": 0.3978546142578125, + "step": 99905 + }, + { + "epoch": 0.8638922274774969, + "grad_norm": 7.936824061309965, + "learning_rate": 3.636517641949232e-06, + "loss": 0.15621337890625, + "step": 99910 + }, + { + "epoch": 0.8639354609990402, + "grad_norm": 17.388336741929468, + "learning_rate": 3.6363185458632757e-06, + "loss": 0.08317413330078124, + "step": 99915 + }, + { + "epoch": 0.8639786945205835, + "grad_norm": 1.1983587917752765, + "learning_rate": 3.6361194468426744e-06, + "loss": 0.14173583984375, + "step": 99920 + }, + { + "epoch": 0.8640219280421267, + "grad_norm": 9.870121294590392, + "learning_rate": 3.635920344888347e-06, + "loss": 0.24534568786621094, + "step": 99925 + }, + { + "epoch": 0.86406516156367, + "grad_norm": 1.159112419113973, + "learning_rate": 3.6357212400012106e-06, + "loss": 0.060894775390625, + "step": 99930 + }, + { + "epoch": 0.8641083950852133, + "grad_norm": 1.9458981888366476, + "learning_rate": 3.635522132182183e-06, + "loss": 0.12945175170898438, + "step": 99935 + }, + { + "epoch": 0.8641516286067565, + "grad_norm": 1.3949183001090995, + "learning_rate": 3.6353230214321847e-06, + "loss": 0.026533126831054688, + "step": 99940 + }, + { + "epoch": 0.8641948621282998, + "grad_norm": 45.06919921716946, + "learning_rate": 3.635123907752133e-06, + "loss": 0.12824249267578125, + "step": 99945 + }, + { + "epoch": 0.8642380956498431, + "grad_norm": 9.780407446317799, + "learning_rate": 3.634924791142945e-06, + "loss": 0.8239532470703125, + "step": 99950 + }, + { + "epoch": 0.8642813291713863, + "grad_norm": 18.6601819803161, + "learning_rate": 3.63472567160554e-06, + "loss": 0.42256851196289064, + "step": 99955 + }, + { + "epoch": 0.8643245626929296, + "grad_norm": 2.2936130209767946, + "learning_rate": 3.6345265491408368e-06, + "loss": 0.0603179931640625, + "step": 99960 + }, + { + "epoch": 0.8643677962144729, + "grad_norm": 4.152919134547556, + "learning_rate": 3.6343274237497526e-06, + "loss": 0.1119873046875, + "step": 99965 + }, + { + "epoch": 0.8644110297360161, + "grad_norm": 3.047459333175666, + "learning_rate": 3.6341282954332073e-06, + "loss": 0.0719883918762207, + "step": 99970 + }, + { + "epoch": 0.8644542632575594, + "grad_norm": 16.572463408657843, + "learning_rate": 3.633929164192118e-06, + "loss": 0.2827583312988281, + "step": 99975 + }, + { + "epoch": 0.8644974967791026, + "grad_norm": 0.2133633952376349, + "learning_rate": 3.6337300300274036e-06, + "loss": 0.26595001220703124, + "step": 99980 + }, + { + "epoch": 0.8645407303006459, + "grad_norm": 9.007740062815325, + "learning_rate": 3.633530892939982e-06, + "loss": 0.039878463745117186, + "step": 99985 + }, + { + "epoch": 0.8645839638221892, + "grad_norm": 24.669649676682692, + "learning_rate": 3.633331752930772e-06, + "loss": 0.15145301818847656, + "step": 99990 + }, + { + "epoch": 0.8646271973437324, + "grad_norm": 1.1774595789117819, + "learning_rate": 3.633132610000692e-06, + "loss": 0.26077117919921877, + "step": 99995 + }, + { + "epoch": 0.8646704308652757, + "grad_norm": 31.405310985284956, + "learning_rate": 3.6329334641506595e-06, + "loss": 0.2587921142578125, + "step": 100000 + }, + { + "epoch": 0.864713664386819, + "grad_norm": 0.9560510358799229, + "learning_rate": 3.6327343153815947e-06, + "loss": 0.22724685668945313, + "step": 100005 + }, + { + "epoch": 0.8647568979083622, + "grad_norm": 15.081977671868945, + "learning_rate": 3.632535163694415e-06, + "loss": 0.0951171875, + "step": 100010 + }, + { + "epoch": 0.8648001314299055, + "grad_norm": 8.866935380815507, + "learning_rate": 3.6323360090900388e-06, + "loss": 0.7194869995117188, + "step": 100015 + }, + { + "epoch": 0.8648433649514488, + "grad_norm": 4.384642482209038, + "learning_rate": 3.632136851569385e-06, + "loss": 0.09219512939453126, + "step": 100020 + }, + { + "epoch": 0.864886598472992, + "grad_norm": 1.7997816903983137, + "learning_rate": 3.631937691133372e-06, + "loss": 0.05974273681640625, + "step": 100025 + }, + { + "epoch": 0.8649298319945353, + "grad_norm": 15.121883009945593, + "learning_rate": 3.6317385277829176e-06, + "loss": 0.08702392578125, + "step": 100030 + }, + { + "epoch": 0.8649730655160786, + "grad_norm": 1.0849917243857008, + "learning_rate": 3.631539361518942e-06, + "loss": 0.20424652099609375, + "step": 100035 + }, + { + "epoch": 0.8650162990376218, + "grad_norm": 2.018631609786138, + "learning_rate": 3.6313401923423622e-06, + "loss": 0.4216148376464844, + "step": 100040 + }, + { + "epoch": 0.8650595325591651, + "grad_norm": 4.259728191378396, + "learning_rate": 3.6311410202540967e-06, + "loss": 0.123736572265625, + "step": 100045 + }, + { + "epoch": 0.8651027660807084, + "grad_norm": 0.099645646308935, + "learning_rate": 3.630941845255065e-06, + "loss": 0.13331756591796876, + "step": 100050 + }, + { + "epoch": 0.8651459996022516, + "grad_norm": 9.005938757074746, + "learning_rate": 3.6307426673461854e-06, + "loss": 0.1325439453125, + "step": 100055 + }, + { + "epoch": 0.8651892331237949, + "grad_norm": 23.57197027238272, + "learning_rate": 3.6305434865283755e-06, + "loss": 0.14014892578125, + "step": 100060 + }, + { + "epoch": 0.8652324666453381, + "grad_norm": 8.840327751996826, + "learning_rate": 3.630344302802556e-06, + "loss": 0.1192901611328125, + "step": 100065 + }, + { + "epoch": 0.8652757001668814, + "grad_norm": 20.953788699893607, + "learning_rate": 3.630145116169644e-06, + "loss": 0.094757080078125, + "step": 100070 + }, + { + "epoch": 0.8653189336884246, + "grad_norm": 0.6384931323895267, + "learning_rate": 3.6299459266305577e-06, + "loss": 0.13909149169921875, + "step": 100075 + }, + { + "epoch": 0.865362167209968, + "grad_norm": 24.70370745558531, + "learning_rate": 3.6297467341862165e-06, + "loss": 0.2801490783691406, + "step": 100080 + }, + { + "epoch": 0.8654054007315112, + "grad_norm": 61.75990086842527, + "learning_rate": 3.62954753883754e-06, + "loss": 0.030249786376953126, + "step": 100085 + }, + { + "epoch": 0.8654486342530544, + "grad_norm": 7.626947550950345, + "learning_rate": 3.629348340585445e-06, + "loss": 0.05674591064453125, + "step": 100090 + }, + { + "epoch": 0.8654918677745977, + "grad_norm": 30.706499327272983, + "learning_rate": 3.629149139430851e-06, + "loss": 0.09070892333984375, + "step": 100095 + }, + { + "epoch": 0.865535101296141, + "grad_norm": 0.9310437530346626, + "learning_rate": 3.6289499353746775e-06, + "loss": 0.17506809234619142, + "step": 100100 + }, + { + "epoch": 0.8655783348176842, + "grad_norm": 3.676519380998686, + "learning_rate": 3.6287507284178416e-06, + "loss": 0.10963363647460937, + "step": 100105 + }, + { + "epoch": 0.8656215683392275, + "grad_norm": 3.8209337714183094, + "learning_rate": 3.628551518561263e-06, + "loss": 0.183837890625, + "step": 100110 + }, + { + "epoch": 0.8656648018607708, + "grad_norm": 1.643523844490075, + "learning_rate": 3.6283523058058616e-06, + "loss": 0.1360321044921875, + "step": 100115 + }, + { + "epoch": 0.865708035382314, + "grad_norm": 1.3152129036361158, + "learning_rate": 3.6281530901525537e-06, + "loss": 0.11571807861328125, + "step": 100120 + }, + { + "epoch": 0.8657512689038573, + "grad_norm": 5.602691543403685, + "learning_rate": 3.6279538716022594e-06, + "loss": 0.09066886901855468, + "step": 100125 + }, + { + "epoch": 0.8657945024254006, + "grad_norm": 2.8317083378081382, + "learning_rate": 3.6277546501558975e-06, + "loss": 0.33326263427734376, + "step": 100130 + }, + { + "epoch": 0.8658377359469438, + "grad_norm": 5.513385803653576, + "learning_rate": 3.6275554258143864e-06, + "loss": 0.06377925872802734, + "step": 100135 + }, + { + "epoch": 0.8658809694684871, + "grad_norm": 3.5823010224011647, + "learning_rate": 3.627356198578645e-06, + "loss": 0.1272918701171875, + "step": 100140 + }, + { + "epoch": 0.8659242029900304, + "grad_norm": 17.633738057651836, + "learning_rate": 3.6271569684495927e-06, + "loss": 0.10619049072265625, + "step": 100145 + }, + { + "epoch": 0.8659674365115736, + "grad_norm": 8.29099412978052, + "learning_rate": 3.626957735428148e-06, + "loss": 0.09256420135498047, + "step": 100150 + }, + { + "epoch": 0.8660106700331168, + "grad_norm": 0.5344679322764085, + "learning_rate": 3.6267584995152295e-06, + "loss": 0.2182098388671875, + "step": 100155 + }, + { + "epoch": 0.8660539035546602, + "grad_norm": 11.246016186221398, + "learning_rate": 3.6265592607117556e-06, + "loss": 0.12065963745117188, + "step": 100160 + }, + { + "epoch": 0.8660971370762034, + "grad_norm": 3.6573958965423197, + "learning_rate": 3.626360019018646e-06, + "loss": 0.1439971923828125, + "step": 100165 + }, + { + "epoch": 0.8661403705977466, + "grad_norm": 11.417010640424442, + "learning_rate": 3.6261607744368203e-06, + "loss": 0.0836181640625, + "step": 100170 + }, + { + "epoch": 0.86618360411929, + "grad_norm": 16.617864897668728, + "learning_rate": 3.625961526967195e-06, + "loss": 0.0984710693359375, + "step": 100175 + }, + { + "epoch": 0.8662268376408332, + "grad_norm": 7.050047931710661, + "learning_rate": 3.6257622766106913e-06, + "loss": 0.09199371337890624, + "step": 100180 + }, + { + "epoch": 0.8662700711623764, + "grad_norm": 1.6301241909511697, + "learning_rate": 3.6255630233682267e-06, + "loss": 0.06841964721679687, + "step": 100185 + }, + { + "epoch": 0.8663133046839198, + "grad_norm": 2.6391902103356344, + "learning_rate": 3.625363767240721e-06, + "loss": 0.11695518493652343, + "step": 100190 + }, + { + "epoch": 0.866356538205463, + "grad_norm": 9.911721492777199, + "learning_rate": 3.625164508229093e-06, + "loss": 0.20245513916015626, + "step": 100195 + }, + { + "epoch": 0.8663997717270062, + "grad_norm": 0.17960498580871828, + "learning_rate": 3.624965246334262e-06, + "loss": 0.06719207763671875, + "step": 100200 + }, + { + "epoch": 0.8664430052485496, + "grad_norm": 43.54900119714648, + "learning_rate": 3.624765981557146e-06, + "loss": 0.14079132080078124, + "step": 100205 + }, + { + "epoch": 0.8664862387700928, + "grad_norm": 15.317216494080215, + "learning_rate": 3.6245667138986645e-06, + "loss": 0.19847183227539061, + "step": 100210 + }, + { + "epoch": 0.866529472291636, + "grad_norm": 18.38508851458028, + "learning_rate": 3.6243674433597363e-06, + "loss": 0.11251907348632813, + "step": 100215 + }, + { + "epoch": 0.8665727058131794, + "grad_norm": 42.64831893035609, + "learning_rate": 3.6241681699412806e-06, + "loss": 0.4671062469482422, + "step": 100220 + }, + { + "epoch": 0.8666159393347226, + "grad_norm": 15.671187824963745, + "learning_rate": 3.623968893644217e-06, + "loss": 0.14060134887695314, + "step": 100225 + }, + { + "epoch": 0.8666591728562658, + "grad_norm": 5.067012015787299, + "learning_rate": 3.623769614469464e-06, + "loss": 0.5149276733398438, + "step": 100230 + }, + { + "epoch": 0.866702406377809, + "grad_norm": 3.7070019282990865, + "learning_rate": 3.6235703324179403e-06, + "loss": 0.5281356811523438, + "step": 100235 + }, + { + "epoch": 0.8667456398993524, + "grad_norm": 5.367581372640845, + "learning_rate": 3.6233710474905646e-06, + "loss": 0.2007659912109375, + "step": 100240 + }, + { + "epoch": 0.8667888734208956, + "grad_norm": 7.543121634321979, + "learning_rate": 3.6231717596882575e-06, + "loss": 0.05796680450439453, + "step": 100245 + }, + { + "epoch": 0.8668321069424388, + "grad_norm": 0.3204809976392452, + "learning_rate": 3.6229724690119375e-06, + "loss": 0.06873550415039062, + "step": 100250 + }, + { + "epoch": 0.8668753404639822, + "grad_norm": 28.838868458727678, + "learning_rate": 3.622773175462523e-06, + "loss": 0.322857666015625, + "step": 100255 + }, + { + "epoch": 0.8669185739855254, + "grad_norm": 12.43455165680192, + "learning_rate": 3.622573879040934e-06, + "loss": 0.1174285888671875, + "step": 100260 + }, + { + "epoch": 0.8669618075070686, + "grad_norm": 13.761174306367433, + "learning_rate": 3.6223745797480894e-06, + "loss": 0.06397781372070313, + "step": 100265 + }, + { + "epoch": 0.867005041028612, + "grad_norm": 1.8378919113442063, + "learning_rate": 3.6221752775849082e-06, + "loss": 0.1612534523010254, + "step": 100270 + }, + { + "epoch": 0.8670482745501552, + "grad_norm": 4.414495055975251, + "learning_rate": 3.6219759725523086e-06, + "loss": 0.21709709167480468, + "step": 100275 + }, + { + "epoch": 0.8670915080716984, + "grad_norm": 5.6435071286715175, + "learning_rate": 3.6217766646512123e-06, + "loss": 0.18117103576660157, + "step": 100280 + }, + { + "epoch": 0.8671347415932418, + "grad_norm": 10.594216215951937, + "learning_rate": 3.6215773538825356e-06, + "loss": 0.19485321044921874, + "step": 100285 + }, + { + "epoch": 0.867177975114785, + "grad_norm": 4.037512626126407, + "learning_rate": 3.6213780402472e-06, + "loss": 0.3881950378417969, + "step": 100290 + }, + { + "epoch": 0.8672212086363282, + "grad_norm": 18.024994091339295, + "learning_rate": 3.6211787237461244e-06, + "loss": 0.21560020446777345, + "step": 100295 + }, + { + "epoch": 0.8672644421578716, + "grad_norm": 16.57931999835965, + "learning_rate": 3.620979404380226e-06, + "loss": 0.09823417663574219, + "step": 100300 + }, + { + "epoch": 0.8673076756794148, + "grad_norm": 11.627036173468431, + "learning_rate": 3.620780082150426e-06, + "loss": 0.113641357421875, + "step": 100305 + }, + { + "epoch": 0.867350909200958, + "grad_norm": 3.594643055834642, + "learning_rate": 3.620580757057643e-06, + "loss": 0.22778816223144532, + "step": 100310 + }, + { + "epoch": 0.8673941427225014, + "grad_norm": 1.2641106682963366, + "learning_rate": 3.6203814291027963e-06, + "loss": 0.0812469482421875, + "step": 100315 + }, + { + "epoch": 0.8674373762440446, + "grad_norm": 0.24615838849513288, + "learning_rate": 3.620182098286806e-06, + "loss": 0.025695037841796876, + "step": 100320 + }, + { + "epoch": 0.8674806097655878, + "grad_norm": 13.358159186543679, + "learning_rate": 3.6199827646105903e-06, + "loss": 0.1924356460571289, + "step": 100325 + }, + { + "epoch": 0.8675238432871311, + "grad_norm": 7.632323172298017, + "learning_rate": 3.619783428075068e-06, + "loss": 0.087042236328125, + "step": 100330 + }, + { + "epoch": 0.8675670768086744, + "grad_norm": 17.81393939638177, + "learning_rate": 3.61958408868116e-06, + "loss": 0.39130172729492185, + "step": 100335 + }, + { + "epoch": 0.8676103103302176, + "grad_norm": 56.62048017771266, + "learning_rate": 3.619384746429785e-06, + "loss": 0.18841476440429689, + "step": 100340 + }, + { + "epoch": 0.8676535438517609, + "grad_norm": 57.228054655877024, + "learning_rate": 3.6191854013218627e-06, + "loss": 0.42454376220703127, + "step": 100345 + }, + { + "epoch": 0.8676967773733042, + "grad_norm": 5.207390619783738, + "learning_rate": 3.618986053358311e-06, + "loss": 0.16213226318359375, + "step": 100350 + }, + { + "epoch": 0.8677400108948474, + "grad_norm": 0.9237344032388793, + "learning_rate": 3.6187867025400515e-06, + "loss": 0.12541351318359376, + "step": 100355 + }, + { + "epoch": 0.8677832444163907, + "grad_norm": 12.990465780172537, + "learning_rate": 3.618587348868001e-06, + "loss": 0.1683868408203125, + "step": 100360 + }, + { + "epoch": 0.867826477937934, + "grad_norm": 0.5409248206741645, + "learning_rate": 3.6183879923430805e-06, + "loss": 0.260418701171875, + "step": 100365 + }, + { + "epoch": 0.8678697114594772, + "grad_norm": 3.0210540206593466, + "learning_rate": 3.6181886329662103e-06, + "loss": 0.08362236022949218, + "step": 100370 + }, + { + "epoch": 0.8679129449810205, + "grad_norm": 0.34266653908301237, + "learning_rate": 3.617989270738309e-06, + "loss": 0.09687538146972656, + "step": 100375 + }, + { + "epoch": 0.8679561785025638, + "grad_norm": 3.638583372728757, + "learning_rate": 3.6177899056602944e-06, + "loss": 0.053228759765625, + "step": 100380 + }, + { + "epoch": 0.867999412024107, + "grad_norm": 8.109878827623314, + "learning_rate": 3.6175905377330873e-06, + "loss": 0.116424560546875, + "step": 100385 + }, + { + "epoch": 0.8680426455456502, + "grad_norm": 61.57624958722208, + "learning_rate": 3.6173911669576087e-06, + "loss": 0.1244842529296875, + "step": 100390 + }, + { + "epoch": 0.8680858790671936, + "grad_norm": 9.0864932725061, + "learning_rate": 3.6171917933347754e-06, + "loss": 0.09341583251953126, + "step": 100395 + }, + { + "epoch": 0.8681291125887368, + "grad_norm": 12.646721501495895, + "learning_rate": 3.6169924168655083e-06, + "loss": 0.08817901611328124, + "step": 100400 + }, + { + "epoch": 0.86817234611028, + "grad_norm": 14.427018467296076, + "learning_rate": 3.6167930375507276e-06, + "loss": 0.09105987548828125, + "step": 100405 + }, + { + "epoch": 0.8682155796318233, + "grad_norm": 2.7784029388530045, + "learning_rate": 3.616593655391351e-06, + "loss": 0.07883758544921875, + "step": 100410 + }, + { + "epoch": 0.8682588131533666, + "grad_norm": 29.66884659424678, + "learning_rate": 3.6163942703882985e-06, + "loss": 0.416412353515625, + "step": 100415 + }, + { + "epoch": 0.8683020466749098, + "grad_norm": 6.4693246974917455, + "learning_rate": 3.616194882542491e-06, + "loss": 0.3149364471435547, + "step": 100420 + }, + { + "epoch": 0.8683452801964531, + "grad_norm": 2.338149687706514, + "learning_rate": 3.615995491854847e-06, + "loss": 0.2884849548339844, + "step": 100425 + }, + { + "epoch": 0.8683885137179964, + "grad_norm": 6.224661502239574, + "learning_rate": 3.615796098326286e-06, + "loss": 0.09085006713867187, + "step": 100430 + }, + { + "epoch": 0.8684317472395396, + "grad_norm": 6.878624511317983, + "learning_rate": 3.6155967019577282e-06, + "loss": 0.020095062255859376, + "step": 100435 + }, + { + "epoch": 0.8684749807610829, + "grad_norm": 4.0651218803467595, + "learning_rate": 3.615397302750092e-06, + "loss": 0.0191986083984375, + "step": 100440 + }, + { + "epoch": 0.8685182142826262, + "grad_norm": 2.816226424499126, + "learning_rate": 3.615197900704298e-06, + "loss": 0.04365692138671875, + "step": 100445 + }, + { + "epoch": 0.8685614478041694, + "grad_norm": 3.5932024997323984, + "learning_rate": 3.614998495821266e-06, + "loss": 0.1490997314453125, + "step": 100450 + }, + { + "epoch": 0.8686046813257127, + "grad_norm": 0.5839128035537561, + "learning_rate": 3.614799088101916e-06, + "loss": 0.0667724609375, + "step": 100455 + }, + { + "epoch": 0.868647914847256, + "grad_norm": 24.71455297259154, + "learning_rate": 3.614599677547165e-06, + "loss": 0.12690887451171876, + "step": 100460 + }, + { + "epoch": 0.8686911483687992, + "grad_norm": 34.411423124293044, + "learning_rate": 3.6144002641579367e-06, + "loss": 0.16238441467285156, + "step": 100465 + }, + { + "epoch": 0.8687343818903425, + "grad_norm": 59.77858224333604, + "learning_rate": 3.614200847935147e-06, + "loss": 0.18950653076171875, + "step": 100470 + }, + { + "epoch": 0.8687776154118858, + "grad_norm": 10.984760224726125, + "learning_rate": 3.6140014288797175e-06, + "loss": 0.308819580078125, + "step": 100475 + }, + { + "epoch": 0.868820848933429, + "grad_norm": 9.869221546514966, + "learning_rate": 3.6138020069925688e-06, + "loss": 0.14744796752929687, + "step": 100480 + }, + { + "epoch": 0.8688640824549723, + "grad_norm": 20.386730576001685, + "learning_rate": 3.6136025822746194e-06, + "loss": 0.1775909423828125, + "step": 100485 + }, + { + "epoch": 0.8689073159765156, + "grad_norm": 0.7132915114105353, + "learning_rate": 3.6134031547267884e-06, + "loss": 0.037103271484375, + "step": 100490 + }, + { + "epoch": 0.8689505494980588, + "grad_norm": 3.8489969714555503, + "learning_rate": 3.613203724349996e-06, + "loss": 0.2997222900390625, + "step": 100495 + }, + { + "epoch": 0.8689937830196021, + "grad_norm": 0.7371500730121943, + "learning_rate": 3.6130042911451624e-06, + "loss": 0.3211212158203125, + "step": 100500 + }, + { + "epoch": 0.8690370165411453, + "grad_norm": 0.4089301666249937, + "learning_rate": 3.612804855113208e-06, + "loss": 0.16439857482910156, + "step": 100505 + }, + { + "epoch": 0.8690802500626886, + "grad_norm": 0.2414884725277509, + "learning_rate": 3.61260541625505e-06, + "loss": 0.014413738250732422, + "step": 100510 + }, + { + "epoch": 0.8691234835842319, + "grad_norm": 34.513891341550625, + "learning_rate": 3.612405974571612e-06, + "loss": 0.15042724609375, + "step": 100515 + }, + { + "epoch": 0.8691667171057751, + "grad_norm": 16.66154863469796, + "learning_rate": 3.612206530063811e-06, + "loss": 0.04456253051757812, + "step": 100520 + }, + { + "epoch": 0.8692099506273184, + "grad_norm": 1.1485002096838448, + "learning_rate": 3.612007082732567e-06, + "loss": 0.14338226318359376, + "step": 100525 + }, + { + "epoch": 0.8692531841488617, + "grad_norm": 2.9834729159596836, + "learning_rate": 3.6118076325788005e-06, + "loss": 0.1509674072265625, + "step": 100530 + }, + { + "epoch": 0.8692964176704049, + "grad_norm": 17.01550207275728, + "learning_rate": 3.6116081796034318e-06, + "loss": 0.09769134521484375, + "step": 100535 + }, + { + "epoch": 0.8693396511919482, + "grad_norm": 1.8280014750477807, + "learning_rate": 3.6114087238073798e-06, + "loss": 0.1465911865234375, + "step": 100540 + }, + { + "epoch": 0.8693828847134915, + "grad_norm": 1.3367270211968476, + "learning_rate": 3.6112092651915653e-06, + "loss": 0.02023735046386719, + "step": 100545 + }, + { + "epoch": 0.8694261182350347, + "grad_norm": 18.2509390518845, + "learning_rate": 3.6110098037569077e-06, + "loss": 0.09395828247070312, + "step": 100550 + }, + { + "epoch": 0.869469351756578, + "grad_norm": 2.5015643873014226, + "learning_rate": 3.610810339504326e-06, + "loss": 0.08817558288574219, + "step": 100555 + }, + { + "epoch": 0.8695125852781213, + "grad_norm": 2.8339363166392695, + "learning_rate": 3.610610872434741e-06, + "loss": 0.41307964324951174, + "step": 100560 + }, + { + "epoch": 0.8695558187996645, + "grad_norm": 0.863209536320399, + "learning_rate": 3.6104114025490737e-06, + "loss": 0.3281513214111328, + "step": 100565 + }, + { + "epoch": 0.8695990523212078, + "grad_norm": 0.3853236131067141, + "learning_rate": 3.6102119298482422e-06, + "loss": 0.1645843505859375, + "step": 100570 + }, + { + "epoch": 0.869642285842751, + "grad_norm": 10.341044998819019, + "learning_rate": 3.6100124543331675e-06, + "loss": 0.19566726684570312, + "step": 100575 + }, + { + "epoch": 0.8696855193642943, + "grad_norm": 1.5168940500475179, + "learning_rate": 3.6098129760047694e-06, + "loss": 0.035001373291015624, + "step": 100580 + }, + { + "epoch": 0.8697287528858375, + "grad_norm": 0.4952793862058429, + "learning_rate": 3.609613494863967e-06, + "loss": 0.10293197631835938, + "step": 100585 + }, + { + "epoch": 0.8697719864073808, + "grad_norm": 3.6600953887097383, + "learning_rate": 3.609414010911682e-06, + "loss": 0.342877197265625, + "step": 100590 + }, + { + "epoch": 0.8698152199289241, + "grad_norm": 43.48051941923755, + "learning_rate": 3.6092145241488324e-06, + "loss": 0.4229240417480469, + "step": 100595 + }, + { + "epoch": 0.8698584534504673, + "grad_norm": 29.39381485787889, + "learning_rate": 3.60901503457634e-06, + "loss": 0.20551300048828125, + "step": 100600 + }, + { + "epoch": 0.8699016869720106, + "grad_norm": 25.176814244401758, + "learning_rate": 3.6088155421951237e-06, + "loss": 0.20219268798828124, + "step": 100605 + }, + { + "epoch": 0.8699449204935539, + "grad_norm": 0.8272736045743606, + "learning_rate": 3.608616047006104e-06, + "loss": 0.1108367919921875, + "step": 100610 + }, + { + "epoch": 0.8699881540150971, + "grad_norm": 4.193146498281373, + "learning_rate": 3.608416549010201e-06, + "loss": 0.09903316497802735, + "step": 100615 + }, + { + "epoch": 0.8700313875366404, + "grad_norm": 3.879605473040301, + "learning_rate": 3.6082170482083346e-06, + "loss": 0.14749298095703126, + "step": 100620 + }, + { + "epoch": 0.8700746210581837, + "grad_norm": 0.3846880385135658, + "learning_rate": 3.6080175446014253e-06, + "loss": 0.08094100952148438, + "step": 100625 + }, + { + "epoch": 0.8701178545797269, + "grad_norm": 35.05248512637155, + "learning_rate": 3.6078180381903923e-06, + "loss": 0.30181427001953126, + "step": 100630 + }, + { + "epoch": 0.8701610881012702, + "grad_norm": 7.741699100811906, + "learning_rate": 3.6076185289761566e-06, + "loss": 0.16954193115234376, + "step": 100635 + }, + { + "epoch": 0.8702043216228135, + "grad_norm": 9.365635018948401, + "learning_rate": 3.607419016959637e-06, + "loss": 0.13538360595703125, + "step": 100640 + }, + { + "epoch": 0.8702475551443567, + "grad_norm": 14.818710333450248, + "learning_rate": 3.6072195021417554e-06, + "loss": 0.10098133087158204, + "step": 100645 + }, + { + "epoch": 0.8702907886659, + "grad_norm": 7.4148678018425205, + "learning_rate": 3.607019984523431e-06, + "loss": 0.1363250732421875, + "step": 100650 + }, + { + "epoch": 0.8703340221874433, + "grad_norm": 4.563529126799491, + "learning_rate": 3.606820464105584e-06, + "loss": 0.04750518798828125, + "step": 100655 + }, + { + "epoch": 0.8703772557089865, + "grad_norm": 1.1904348365330168, + "learning_rate": 3.6066209408891348e-06, + "loss": 0.06490020751953125, + "step": 100660 + }, + { + "epoch": 0.8704204892305298, + "grad_norm": 0.27109732157414673, + "learning_rate": 3.606421414875003e-06, + "loss": 0.23091583251953124, + "step": 100665 + }, + { + "epoch": 0.8704637227520731, + "grad_norm": 1.5860374717500811, + "learning_rate": 3.606221886064109e-06, + "loss": 0.14062461853027344, + "step": 100670 + }, + { + "epoch": 0.8705069562736163, + "grad_norm": 0.443323029602676, + "learning_rate": 3.6060223544573743e-06, + "loss": 0.146014404296875, + "step": 100675 + }, + { + "epoch": 0.8705501897951595, + "grad_norm": 10.031716798582666, + "learning_rate": 3.605822820055718e-06, + "loss": 0.13746109008789062, + "step": 100680 + }, + { + "epoch": 0.8705934233167029, + "grad_norm": 5.156634281054544, + "learning_rate": 3.6056232828600593e-06, + "loss": 0.250555419921875, + "step": 100685 + }, + { + "epoch": 0.8706366568382461, + "grad_norm": 2.08129888176367, + "learning_rate": 3.6054237428713205e-06, + "loss": 0.37310791015625, + "step": 100690 + }, + { + "epoch": 0.8706798903597893, + "grad_norm": 2.849489311327214, + "learning_rate": 3.60522420009042e-06, + "loss": 0.045477294921875, + "step": 100695 + }, + { + "epoch": 0.8707231238813327, + "grad_norm": 4.258434086317555, + "learning_rate": 3.605024654518279e-06, + "loss": 0.15269622802734376, + "step": 100700 + }, + { + "epoch": 0.8707663574028759, + "grad_norm": 0.40256865784409046, + "learning_rate": 3.6048251061558185e-06, + "loss": 0.0706085205078125, + "step": 100705 + }, + { + "epoch": 0.8708095909244191, + "grad_norm": 0.5459738641700101, + "learning_rate": 3.6046255550039584e-06, + "loss": 0.09029474258422851, + "step": 100710 + }, + { + "epoch": 0.8708528244459625, + "grad_norm": 2.9786570145403646, + "learning_rate": 3.6044260010636176e-06, + "loss": 0.18319091796875, + "step": 100715 + }, + { + "epoch": 0.8708960579675057, + "grad_norm": 2.9833368520522114, + "learning_rate": 3.604226444335718e-06, + "loss": 0.12178306579589844, + "step": 100720 + }, + { + "epoch": 0.8709392914890489, + "grad_norm": 21.7633649040049, + "learning_rate": 3.6040268848211788e-06, + "loss": 0.1848541259765625, + "step": 100725 + }, + { + "epoch": 0.8709825250105923, + "grad_norm": 15.550064251815392, + "learning_rate": 3.6038273225209214e-06, + "loss": 0.350653076171875, + "step": 100730 + }, + { + "epoch": 0.8710257585321355, + "grad_norm": 0.7415911182847831, + "learning_rate": 3.603627757435866e-06, + "loss": 0.03828125, + "step": 100735 + }, + { + "epoch": 0.8710689920536787, + "grad_norm": 2.736662187766631, + "learning_rate": 3.603428189566933e-06, + "loss": 0.07059173583984375, + "step": 100740 + }, + { + "epoch": 0.871112225575222, + "grad_norm": 3.589194035325373, + "learning_rate": 3.603228618915042e-06, + "loss": 0.19585418701171875, + "step": 100745 + }, + { + "epoch": 0.8711554590967653, + "grad_norm": 3.98383358940503, + "learning_rate": 3.6030290454811137e-06, + "loss": 0.25041542053222654, + "step": 100750 + }, + { + "epoch": 0.8711986926183085, + "grad_norm": 0.6087362425620677, + "learning_rate": 3.602829469266069e-06, + "loss": 0.07393875122070312, + "step": 100755 + }, + { + "epoch": 0.8712419261398517, + "grad_norm": 9.623086820952961, + "learning_rate": 3.602629890270828e-06, + "loss": 0.18807373046875, + "step": 100760 + }, + { + "epoch": 0.8712851596613951, + "grad_norm": 3.008554461204689, + "learning_rate": 3.6024303084963116e-06, + "loss": 0.050205230712890625, + "step": 100765 + }, + { + "epoch": 0.8713283931829383, + "grad_norm": 35.67248827092504, + "learning_rate": 3.60223072394344e-06, + "loss": 0.0868804931640625, + "step": 100770 + }, + { + "epoch": 0.8713716267044815, + "grad_norm": 4.62834099760176, + "learning_rate": 3.6020311366131327e-06, + "loss": 0.23239288330078126, + "step": 100775 + }, + { + "epoch": 0.8714148602260249, + "grad_norm": 5.787335347001624, + "learning_rate": 3.601831546506311e-06, + "loss": 0.46418914794921873, + "step": 100780 + }, + { + "epoch": 0.8714580937475681, + "grad_norm": 32.9503636305785, + "learning_rate": 3.6016319536238954e-06, + "loss": 0.2290802001953125, + "step": 100785 + }, + { + "epoch": 0.8715013272691113, + "grad_norm": 0.1345282358047597, + "learning_rate": 3.601432357966807e-06, + "loss": 0.09250869750976562, + "step": 100790 + }, + { + "epoch": 0.8715445607906547, + "grad_norm": 7.0544211441930935, + "learning_rate": 3.601232759535965e-06, + "loss": 0.07985076904296876, + "step": 100795 + }, + { + "epoch": 0.8715877943121979, + "grad_norm": 7.840875853840199, + "learning_rate": 3.6010331583322916e-06, + "loss": 0.1158111572265625, + "step": 100800 + }, + { + "epoch": 0.8716310278337411, + "grad_norm": 7.390954814991234, + "learning_rate": 3.600833554356706e-06, + "loss": 0.07766609191894532, + "step": 100805 + }, + { + "epoch": 0.8716742613552845, + "grad_norm": 4.299823413624123, + "learning_rate": 3.6006339476101283e-06, + "loss": 0.20422821044921874, + "step": 100810 + }, + { + "epoch": 0.8717174948768277, + "grad_norm": 4.47020322302151, + "learning_rate": 3.6004343380934804e-06, + "loss": 0.1148956298828125, + "step": 100815 + }, + { + "epoch": 0.8717607283983709, + "grad_norm": 2.585568382355863, + "learning_rate": 3.600234725807683e-06, + "loss": 0.03338356018066406, + "step": 100820 + }, + { + "epoch": 0.8718039619199143, + "grad_norm": 17.4983839520733, + "learning_rate": 3.600035110753656e-06, + "loss": 0.11882095336914063, + "step": 100825 + }, + { + "epoch": 0.8718471954414575, + "grad_norm": 4.037871947755392, + "learning_rate": 3.5998354929323197e-06, + "loss": 0.0612945556640625, + "step": 100830 + }, + { + "epoch": 0.8718904289630007, + "grad_norm": 9.584264559663483, + "learning_rate": 3.599635872344595e-06, + "loss": 0.13663787841796876, + "step": 100835 + }, + { + "epoch": 0.8719336624845441, + "grad_norm": 2.7313568381507687, + "learning_rate": 3.5994362489914025e-06, + "loss": 0.2405517578125, + "step": 100840 + }, + { + "epoch": 0.8719768960060873, + "grad_norm": 0.3231167977606285, + "learning_rate": 3.599236622873663e-06, + "loss": 0.10433120727539062, + "step": 100845 + }, + { + "epoch": 0.8720201295276305, + "grad_norm": 0.689526704503462, + "learning_rate": 3.5990369939922983e-06, + "loss": 0.17262763977050782, + "step": 100850 + }, + { + "epoch": 0.8720633630491738, + "grad_norm": 9.535923491452623, + "learning_rate": 3.5988373623482275e-06, + "loss": 0.30660400390625, + "step": 100855 + }, + { + "epoch": 0.8721065965707171, + "grad_norm": 0.6471056137733894, + "learning_rate": 3.5986377279423702e-06, + "loss": 0.058597564697265625, + "step": 100860 + }, + { + "epoch": 0.8721498300922603, + "grad_norm": 6.400509365955627, + "learning_rate": 3.5984380907756497e-06, + "loss": 0.31662445068359374, + "step": 100865 + }, + { + "epoch": 0.8721930636138036, + "grad_norm": 3.1747406245384857, + "learning_rate": 3.5982384508489854e-06, + "loss": 0.05580463409423828, + "step": 100870 + }, + { + "epoch": 0.8722362971353469, + "grad_norm": 5.993372953127072, + "learning_rate": 3.5980388081632985e-06, + "loss": 0.4608612060546875, + "step": 100875 + }, + { + "epoch": 0.8722795306568901, + "grad_norm": 5.902867932999362, + "learning_rate": 3.5978391627195097e-06, + "loss": 0.07095775604248047, + "step": 100880 + }, + { + "epoch": 0.8723227641784334, + "grad_norm": 21.611187741432268, + "learning_rate": 3.5976395145185397e-06, + "loss": 0.10908737182617187, + "step": 100885 + }, + { + "epoch": 0.8723659976999767, + "grad_norm": 6.759505672292593, + "learning_rate": 3.5974398635613075e-06, + "loss": 0.226727294921875, + "step": 100890 + }, + { + "epoch": 0.8724092312215199, + "grad_norm": 28.959409294712728, + "learning_rate": 3.5972402098487364e-06, + "loss": 0.14319686889648436, + "step": 100895 + }, + { + "epoch": 0.8724524647430631, + "grad_norm": 0.2981387356228585, + "learning_rate": 3.5970405533817464e-06, + "loss": 0.3176296234130859, + "step": 100900 + }, + { + "epoch": 0.8724956982646065, + "grad_norm": 2.7053713617907276, + "learning_rate": 3.5968408941612576e-06, + "loss": 0.11339035034179687, + "step": 100905 + }, + { + "epoch": 0.8725389317861497, + "grad_norm": 14.22865690930774, + "learning_rate": 3.596641232188192e-06, + "loss": 0.14810943603515625, + "step": 100910 + }, + { + "epoch": 0.872582165307693, + "grad_norm": 11.797739727193438, + "learning_rate": 3.5964415674634694e-06, + "loss": 0.28074111938476565, + "step": 100915 + }, + { + "epoch": 0.8726253988292363, + "grad_norm": 3.5595425341151667, + "learning_rate": 3.5962418999880114e-06, + "loss": 0.15098876953125, + "step": 100920 + }, + { + "epoch": 0.8726686323507795, + "grad_norm": 0.08232555207621357, + "learning_rate": 3.5960422297627376e-06, + "loss": 0.23621368408203125, + "step": 100925 + }, + { + "epoch": 0.8727118658723227, + "grad_norm": 26.782186914840903, + "learning_rate": 3.595842556788571e-06, + "loss": 0.18599624633789064, + "step": 100930 + }, + { + "epoch": 0.872755099393866, + "grad_norm": 27.585657719743455, + "learning_rate": 3.59564288106643e-06, + "loss": 0.215875244140625, + "step": 100935 + }, + { + "epoch": 0.8727983329154093, + "grad_norm": 0.3296725315727957, + "learning_rate": 3.5954432025972367e-06, + "loss": 0.19420013427734376, + "step": 100940 + }, + { + "epoch": 0.8728415664369525, + "grad_norm": 4.018295703655455, + "learning_rate": 3.595243521381913e-06, + "loss": 0.10286636352539062, + "step": 100945 + }, + { + "epoch": 0.8728847999584958, + "grad_norm": 18.312458001705867, + "learning_rate": 3.5950438374213773e-06, + "loss": 0.2843742370605469, + "step": 100950 + }, + { + "epoch": 0.8729280334800391, + "grad_norm": 1.3607416758083877, + "learning_rate": 3.5948441507165528e-06, + "loss": 0.14872894287109376, + "step": 100955 + }, + { + "epoch": 0.8729712670015823, + "grad_norm": 7.216895528114752, + "learning_rate": 3.59464446126836e-06, + "loss": 0.4464366912841797, + "step": 100960 + }, + { + "epoch": 0.8730145005231256, + "grad_norm": 2.052264547862304, + "learning_rate": 3.5944447690777194e-06, + "loss": 0.30111083984375, + "step": 100965 + }, + { + "epoch": 0.8730577340446689, + "grad_norm": 0.606734262570417, + "learning_rate": 3.5942450741455512e-06, + "loss": 0.14995479583740234, + "step": 100970 + }, + { + "epoch": 0.8731009675662121, + "grad_norm": 0.9596328897677816, + "learning_rate": 3.5940453764727787e-06, + "loss": 0.0631744384765625, + "step": 100975 + }, + { + "epoch": 0.8731442010877554, + "grad_norm": 4.017444341228418, + "learning_rate": 3.59384567606032e-06, + "loss": 0.06550674438476563, + "step": 100980 + }, + { + "epoch": 0.8731874346092987, + "grad_norm": 0.2763620350959409, + "learning_rate": 3.5936459729090977e-06, + "loss": 0.032411956787109376, + "step": 100985 + }, + { + "epoch": 0.8732306681308419, + "grad_norm": 1.3203537881341294, + "learning_rate": 3.593446267020033e-06, + "loss": 0.06835098266601562, + "step": 100990 + }, + { + "epoch": 0.8732739016523852, + "grad_norm": 3.571389105358171, + "learning_rate": 3.593246558394047e-06, + "loss": 0.04085235595703125, + "step": 100995 + }, + { + "epoch": 0.8733171351739285, + "grad_norm": 39.779760687990404, + "learning_rate": 3.5930468470320597e-06, + "loss": 0.19575347900390624, + "step": 101000 + }, + { + "epoch": 0.8733603686954717, + "grad_norm": 1.8425058949806155, + "learning_rate": 3.592847132934993e-06, + "loss": 0.040679931640625, + "step": 101005 + }, + { + "epoch": 0.873403602217015, + "grad_norm": 4.85226461839162, + "learning_rate": 3.5926474161037676e-06, + "loss": 0.10084266662597656, + "step": 101010 + }, + { + "epoch": 0.8734468357385583, + "grad_norm": 0.4306367206316716, + "learning_rate": 3.5924476965393037e-06, + "loss": 0.10836601257324219, + "step": 101015 + }, + { + "epoch": 0.8734900692601015, + "grad_norm": 0.4908777375314338, + "learning_rate": 3.592247974242525e-06, + "loss": 0.43837432861328124, + "step": 101020 + }, + { + "epoch": 0.8735333027816448, + "grad_norm": 2.0636017065722982, + "learning_rate": 3.5920482492143507e-06, + "loss": 0.07698440551757812, + "step": 101025 + }, + { + "epoch": 0.873576536303188, + "grad_norm": 3.3836013731346104, + "learning_rate": 3.591848521455701e-06, + "loss": 0.03259735107421875, + "step": 101030 + }, + { + "epoch": 0.8736197698247313, + "grad_norm": 2.908625566219144, + "learning_rate": 3.5916487909674986e-06, + "loss": 0.18316574096679689, + "step": 101035 + }, + { + "epoch": 0.8736630033462746, + "grad_norm": 1.7756276137226201, + "learning_rate": 3.591449057750665e-06, + "loss": 0.054290771484375, + "step": 101040 + }, + { + "epoch": 0.8737062368678178, + "grad_norm": 0.30061024587895907, + "learning_rate": 3.591249321806121e-06, + "loss": 0.160162353515625, + "step": 101045 + }, + { + "epoch": 0.8737494703893611, + "grad_norm": 26.706566158389542, + "learning_rate": 3.5910495831347862e-06, + "loss": 0.09518470764160156, + "step": 101050 + }, + { + "epoch": 0.8737927039109044, + "grad_norm": 25.785971503424033, + "learning_rate": 3.5908498417375834e-06, + "loss": 0.59322509765625, + "step": 101055 + }, + { + "epoch": 0.8738359374324476, + "grad_norm": 0.31899995444435636, + "learning_rate": 3.590650097615433e-06, + "loss": 0.10245628356933593, + "step": 101060 + }, + { + "epoch": 0.8738791709539909, + "grad_norm": 23.82389210649001, + "learning_rate": 3.590450350769256e-06, + "loss": 0.07741737365722656, + "step": 101065 + }, + { + "epoch": 0.8739224044755342, + "grad_norm": 0.7341692587407285, + "learning_rate": 3.590250601199976e-06, + "loss": 0.30545654296875, + "step": 101070 + }, + { + "epoch": 0.8739656379970774, + "grad_norm": 48.515466055222234, + "learning_rate": 3.5900508489085116e-06, + "loss": 0.5578006744384766, + "step": 101075 + }, + { + "epoch": 0.8740088715186207, + "grad_norm": 14.654259405879147, + "learning_rate": 3.589851093895784e-06, + "loss": 0.16437530517578125, + "step": 101080 + }, + { + "epoch": 0.874052105040164, + "grad_norm": 0.6006489846729225, + "learning_rate": 3.5896513361627165e-06, + "loss": 0.19444503784179687, + "step": 101085 + }, + { + "epoch": 0.8740953385617072, + "grad_norm": 21.815224382444203, + "learning_rate": 3.5894515757102285e-06, + "loss": 0.12037277221679688, + "step": 101090 + }, + { + "epoch": 0.8741385720832505, + "grad_norm": 4.978051543663957, + "learning_rate": 3.5892518125392417e-06, + "loss": 0.110235595703125, + "step": 101095 + }, + { + "epoch": 0.8741818056047937, + "grad_norm": 3.8518397423519586, + "learning_rate": 3.5890520466506776e-06, + "loss": 0.06089324951171875, + "step": 101100 + }, + { + "epoch": 0.874225039126337, + "grad_norm": 0.2598764298154745, + "learning_rate": 3.5888522780454585e-06, + "loss": 0.2500213623046875, + "step": 101105 + }, + { + "epoch": 0.8742682726478802, + "grad_norm": 11.037050129511337, + "learning_rate": 3.588652506724504e-06, + "loss": 0.2303863525390625, + "step": 101110 + }, + { + "epoch": 0.8743115061694235, + "grad_norm": 1.8871794777074968, + "learning_rate": 3.588452732688736e-06, + "loss": 0.1348541259765625, + "step": 101115 + }, + { + "epoch": 0.8743547396909668, + "grad_norm": 2.519393612027648, + "learning_rate": 3.588252955939076e-06, + "loss": 0.033809661865234375, + "step": 101120 + }, + { + "epoch": 0.87439797321251, + "grad_norm": 2.2093142914993757, + "learning_rate": 3.5880531764764454e-06, + "loss": 0.2540069580078125, + "step": 101125 + }, + { + "epoch": 0.8744412067340533, + "grad_norm": 6.8750104619502785, + "learning_rate": 3.5878533943017653e-06, + "loss": 0.09310150146484375, + "step": 101130 + }, + { + "epoch": 0.8744844402555966, + "grad_norm": 43.53788443822023, + "learning_rate": 3.5876536094159577e-06, + "loss": 0.20842132568359376, + "step": 101135 + }, + { + "epoch": 0.8745276737771398, + "grad_norm": 1.0193750848359717, + "learning_rate": 3.587453821819944e-06, + "loss": 0.0782562255859375, + "step": 101140 + }, + { + "epoch": 0.8745709072986831, + "grad_norm": 15.533996976295743, + "learning_rate": 3.5872540315146447e-06, + "loss": 0.08006134033203124, + "step": 101145 + }, + { + "epoch": 0.8746141408202264, + "grad_norm": 7.389317245921459, + "learning_rate": 3.5870542385009808e-06, + "loss": 0.04894390106201172, + "step": 101150 + }, + { + "epoch": 0.8746573743417696, + "grad_norm": 36.312885313642674, + "learning_rate": 3.5868544427798756e-06, + "loss": 0.09130859375, + "step": 101155 + }, + { + "epoch": 0.8747006078633129, + "grad_norm": 30.52951703669858, + "learning_rate": 3.5866546443522494e-06, + "loss": 0.20424652099609375, + "step": 101160 + }, + { + "epoch": 0.8747438413848562, + "grad_norm": 1.8243933618334003, + "learning_rate": 3.586454843219024e-06, + "loss": 0.13333587646484374, + "step": 101165 + }, + { + "epoch": 0.8747870749063994, + "grad_norm": 0.14170892580342295, + "learning_rate": 3.5862550393811207e-06, + "loss": 0.1836761474609375, + "step": 101170 + }, + { + "epoch": 0.8748303084279427, + "grad_norm": 0.48112734699829557, + "learning_rate": 3.586055232839461e-06, + "loss": 0.0856414794921875, + "step": 101175 + }, + { + "epoch": 0.874873541949486, + "grad_norm": 2.108453100855991, + "learning_rate": 3.5858554235949657e-06, + "loss": 0.3091289520263672, + "step": 101180 + }, + { + "epoch": 0.8749167754710292, + "grad_norm": 19.681060525466982, + "learning_rate": 3.5856556116485577e-06, + "loss": 0.2182018280029297, + "step": 101185 + }, + { + "epoch": 0.8749600089925725, + "grad_norm": 35.65274774858315, + "learning_rate": 3.585455797001157e-06, + "loss": 0.37795867919921877, + "step": 101190 + }, + { + "epoch": 0.8750032425141158, + "grad_norm": 23.299820139611384, + "learning_rate": 3.585255979653687e-06, + "loss": 0.3669647216796875, + "step": 101195 + }, + { + "epoch": 0.875046476035659, + "grad_norm": 4.53778614367368, + "learning_rate": 3.5850561596070678e-06, + "loss": 0.07480316162109375, + "step": 101200 + }, + { + "epoch": 0.8750897095572022, + "grad_norm": 0.16835287151315656, + "learning_rate": 3.5848563368622204e-06, + "loss": 0.2310314178466797, + "step": 101205 + }, + { + "epoch": 0.8751329430787456, + "grad_norm": 340.70429170296285, + "learning_rate": 3.584656511420067e-06, + "loss": 0.1426422119140625, + "step": 101210 + }, + { + "epoch": 0.8751761766002888, + "grad_norm": 6.182742672091343, + "learning_rate": 3.5844566832815307e-06, + "loss": 0.33092269897460935, + "step": 101215 + }, + { + "epoch": 0.875219410121832, + "grad_norm": 1.5965889284307357, + "learning_rate": 3.5842568524475323e-06, + "loss": 0.07257881164550781, + "step": 101220 + }, + { + "epoch": 0.8752626436433754, + "grad_norm": 0.8237356657139386, + "learning_rate": 3.584057018918991e-06, + "loss": 0.2558483123779297, + "step": 101225 + }, + { + "epoch": 0.8753058771649186, + "grad_norm": 11.686390425723287, + "learning_rate": 3.5838571826968316e-06, + "loss": 0.08653564453125, + "step": 101230 + }, + { + "epoch": 0.8753491106864618, + "grad_norm": 14.559485605102761, + "learning_rate": 3.5836573437819745e-06, + "loss": 0.09152069091796874, + "step": 101235 + }, + { + "epoch": 0.8753923442080052, + "grad_norm": 0.03663698182137616, + "learning_rate": 3.5834575021753403e-06, + "loss": 0.03874187469482422, + "step": 101240 + }, + { + "epoch": 0.8754355777295484, + "grad_norm": 51.11907843450184, + "learning_rate": 3.5832576578778528e-06, + "loss": 0.193731689453125, + "step": 101245 + }, + { + "epoch": 0.8754788112510916, + "grad_norm": 12.057119092085033, + "learning_rate": 3.5830578108904325e-06, + "loss": 0.07875518798828125, + "step": 101250 + }, + { + "epoch": 0.875522044772635, + "grad_norm": 2.7381938399806813, + "learning_rate": 3.5828579612140002e-06, + "loss": 0.194158935546875, + "step": 101255 + }, + { + "epoch": 0.8755652782941782, + "grad_norm": 0.7952619966931915, + "learning_rate": 3.582658108849479e-06, + "loss": 0.081951904296875, + "step": 101260 + }, + { + "epoch": 0.8756085118157214, + "grad_norm": 4.112262740679304, + "learning_rate": 3.58245825379779e-06, + "loss": 0.03918399810791016, + "step": 101265 + }, + { + "epoch": 0.8756517453372648, + "grad_norm": 0.11062169030569054, + "learning_rate": 3.582258396059855e-06, + "loss": 0.006484222412109375, + "step": 101270 + }, + { + "epoch": 0.875694978858808, + "grad_norm": 1.2078959915529182, + "learning_rate": 3.5820585356365957e-06, + "loss": 0.235198974609375, + "step": 101275 + }, + { + "epoch": 0.8757382123803512, + "grad_norm": 0.7625651120244276, + "learning_rate": 3.5818586725289348e-06, + "loss": 0.088995361328125, + "step": 101280 + }, + { + "epoch": 0.8757814459018944, + "grad_norm": 11.832540883629731, + "learning_rate": 3.5816588067377913e-06, + "loss": 0.1696624755859375, + "step": 101285 + }, + { + "epoch": 0.8758246794234378, + "grad_norm": 14.099185602758594, + "learning_rate": 3.58145893826409e-06, + "loss": 0.322259521484375, + "step": 101290 + }, + { + "epoch": 0.875867912944981, + "grad_norm": 5.445554723406695, + "learning_rate": 3.5812590671087516e-06, + "loss": 0.08798370361328126, + "step": 101295 + }, + { + "epoch": 0.8759111464665242, + "grad_norm": 5.7095930420005265, + "learning_rate": 3.5810591932726976e-06, + "loss": 0.137410306930542, + "step": 101300 + }, + { + "epoch": 0.8759543799880676, + "grad_norm": 4.734219894128999, + "learning_rate": 3.580859316756849e-06, + "loss": 0.09842643737792969, + "step": 101305 + }, + { + "epoch": 0.8759976135096108, + "grad_norm": 3.862247138035238, + "learning_rate": 3.5806594375621296e-06, + "loss": 0.08259735107421876, + "step": 101310 + }, + { + "epoch": 0.876040847031154, + "grad_norm": 12.185521096004408, + "learning_rate": 3.5804595556894596e-06, + "loss": 0.19311866760253907, + "step": 101315 + }, + { + "epoch": 0.8760840805526974, + "grad_norm": 2.9388144408308365, + "learning_rate": 3.5802596711397615e-06, + "loss": 0.07694244384765625, + "step": 101320 + }, + { + "epoch": 0.8761273140742406, + "grad_norm": 2.821480398119184, + "learning_rate": 3.5800597839139576e-06, + "loss": 0.05678825378417969, + "step": 101325 + }, + { + "epoch": 0.8761705475957838, + "grad_norm": 3.6372611194511926, + "learning_rate": 3.5798598940129687e-06, + "loss": 0.06365032196044922, + "step": 101330 + }, + { + "epoch": 0.8762137811173272, + "grad_norm": 0.36115618203037914, + "learning_rate": 3.5796600014377173e-06, + "loss": 0.1400909423828125, + "step": 101335 + }, + { + "epoch": 0.8762570146388704, + "grad_norm": 22.392522003094932, + "learning_rate": 3.5794601061891256e-06, + "loss": 0.09854087829589844, + "step": 101340 + }, + { + "epoch": 0.8763002481604136, + "grad_norm": 3.0631890599142904, + "learning_rate": 3.579260208268114e-06, + "loss": 0.06380062103271485, + "step": 101345 + }, + { + "epoch": 0.876343481681957, + "grad_norm": 17.070250630524374, + "learning_rate": 3.5790603076756065e-06, + "loss": 0.2984569549560547, + "step": 101350 + }, + { + "epoch": 0.8763867152035002, + "grad_norm": 1.3285891035424076, + "learning_rate": 3.5788604044125234e-06, + "loss": 0.07791748046875, + "step": 101355 + }, + { + "epoch": 0.8764299487250434, + "grad_norm": 0.1469111119556094, + "learning_rate": 3.578660498479788e-06, + "loss": 0.066766357421875, + "step": 101360 + }, + { + "epoch": 0.8764731822465868, + "grad_norm": 0.22255078559255306, + "learning_rate": 3.578460589878321e-06, + "loss": 0.17732982635498046, + "step": 101365 + }, + { + "epoch": 0.87651641576813, + "grad_norm": 0.41982146449093355, + "learning_rate": 3.578260678609046e-06, + "loss": 0.19860992431640626, + "step": 101370 + }, + { + "epoch": 0.8765596492896732, + "grad_norm": 1.453163088678669, + "learning_rate": 3.5780607646728818e-06, + "loss": 0.1419891357421875, + "step": 101375 + }, + { + "epoch": 0.8766028828112165, + "grad_norm": 3.286323205625941, + "learning_rate": 3.577860848070754e-06, + "loss": 0.1360595703125, + "step": 101380 + }, + { + "epoch": 0.8766461163327598, + "grad_norm": 3.2611467397476877, + "learning_rate": 3.577660928803582e-06, + "loss": 0.30423431396484374, + "step": 101385 + }, + { + "epoch": 0.876689349854303, + "grad_norm": 1.9858698067134979, + "learning_rate": 3.5774610068722896e-06, + "loss": 0.07761611938476562, + "step": 101390 + }, + { + "epoch": 0.8767325833758463, + "grad_norm": 1.2232664729238756, + "learning_rate": 3.577261082277798e-06, + "loss": 0.06662330627441407, + "step": 101395 + }, + { + "epoch": 0.8767758168973896, + "grad_norm": 4.1020675000515, + "learning_rate": 3.577061155021029e-06, + "loss": 0.0848358154296875, + "step": 101400 + }, + { + "epoch": 0.8768190504189328, + "grad_norm": 4.729180170102494, + "learning_rate": 3.5768612251029046e-06, + "loss": 0.171990966796875, + "step": 101405 + }, + { + "epoch": 0.876862283940476, + "grad_norm": 2.484522894035191, + "learning_rate": 3.5766612925243483e-06, + "loss": 0.07844390869140624, + "step": 101410 + }, + { + "epoch": 0.8769055174620194, + "grad_norm": 14.361610941486411, + "learning_rate": 3.5764613572862805e-06, + "loss": 0.13904037475585937, + "step": 101415 + }, + { + "epoch": 0.8769487509835626, + "grad_norm": 14.48892705733945, + "learning_rate": 3.5762614193896236e-06, + "loss": 0.15247802734375, + "step": 101420 + }, + { + "epoch": 0.8769919845051058, + "grad_norm": 6.441024356176173, + "learning_rate": 3.5760614788353005e-06, + "loss": 0.05004615783691406, + "step": 101425 + }, + { + "epoch": 0.8770352180266492, + "grad_norm": 2.080205662577626, + "learning_rate": 3.5758615356242314e-06, + "loss": 0.6852127075195312, + "step": 101430 + }, + { + "epoch": 0.8770784515481924, + "grad_norm": 1.7337136204751693, + "learning_rate": 3.575661589757341e-06, + "loss": 0.24479217529296876, + "step": 101435 + }, + { + "epoch": 0.8771216850697356, + "grad_norm": 17.452269181475927, + "learning_rate": 3.5754616412355505e-06, + "loss": 0.3136474609375, + "step": 101440 + }, + { + "epoch": 0.877164918591279, + "grad_norm": 1.108738187674609, + "learning_rate": 3.5752616900597807e-06, + "loss": 0.3142425537109375, + "step": 101445 + }, + { + "epoch": 0.8772081521128222, + "grad_norm": 14.969883045231121, + "learning_rate": 3.5750617362309562e-06, + "loss": 0.05867767333984375, + "step": 101450 + }, + { + "epoch": 0.8772513856343654, + "grad_norm": 0.37382818466784257, + "learning_rate": 3.5748617797499967e-06, + "loss": 0.1497112274169922, + "step": 101455 + }, + { + "epoch": 0.8772946191559087, + "grad_norm": 7.713276580253324, + "learning_rate": 3.5746618206178257e-06, + "loss": 0.26551284790039065, + "step": 101460 + }, + { + "epoch": 0.877337852677452, + "grad_norm": 1.867352984199584, + "learning_rate": 3.574461858835365e-06, + "loss": 0.17701873779296876, + "step": 101465 + }, + { + "epoch": 0.8773810861989952, + "grad_norm": 7.9613380955282125, + "learning_rate": 3.5742618944035374e-06, + "loss": 0.21169586181640626, + "step": 101470 + }, + { + "epoch": 0.8774243197205385, + "grad_norm": 3.891907417561949, + "learning_rate": 3.574061927323265e-06, + "loss": 0.0909423828125, + "step": 101475 + }, + { + "epoch": 0.8774675532420818, + "grad_norm": 15.260501108406999, + "learning_rate": 3.573861957595468e-06, + "loss": 0.12506561279296874, + "step": 101480 + }, + { + "epoch": 0.877510786763625, + "grad_norm": 0.3056409815297064, + "learning_rate": 3.5736619852210716e-06, + "loss": 0.09248085021972656, + "step": 101485 + }, + { + "epoch": 0.8775540202851683, + "grad_norm": 3.4495826054769445, + "learning_rate": 3.573462010200997e-06, + "loss": 0.04079704284667969, + "step": 101490 + }, + { + "epoch": 0.8775972538067116, + "grad_norm": 13.847068432307399, + "learning_rate": 3.573262032536166e-06, + "loss": 0.29104766845703123, + "step": 101495 + }, + { + "epoch": 0.8776404873282548, + "grad_norm": 1.2275022261039636, + "learning_rate": 3.5730620522275013e-06, + "loss": 0.038678741455078124, + "step": 101500 + }, + { + "epoch": 0.8776837208497981, + "grad_norm": 30.774504175498993, + "learning_rate": 3.5728620692759253e-06, + "loss": 0.2406097412109375, + "step": 101505 + }, + { + "epoch": 0.8777269543713414, + "grad_norm": 3.1127902076911167, + "learning_rate": 3.5726620836823595e-06, + "loss": 0.1747039794921875, + "step": 101510 + }, + { + "epoch": 0.8777701878928846, + "grad_norm": 13.371569083033537, + "learning_rate": 3.572462095447726e-06, + "loss": 0.1163330078125, + "step": 101515 + }, + { + "epoch": 0.8778134214144279, + "grad_norm": 22.323187933180208, + "learning_rate": 3.5722621045729494e-06, + "loss": 0.1414276123046875, + "step": 101520 + }, + { + "epoch": 0.8778566549359712, + "grad_norm": 1.1963315755270665, + "learning_rate": 3.5720621110589496e-06, + "loss": 0.1186065673828125, + "step": 101525 + }, + { + "epoch": 0.8778998884575144, + "grad_norm": 2.14887103289354, + "learning_rate": 3.5718621149066506e-06, + "loss": 0.06127853393554687, + "step": 101530 + }, + { + "epoch": 0.8779431219790577, + "grad_norm": 1.9721869673288108, + "learning_rate": 3.5716621161169747e-06, + "loss": 0.0809173583984375, + "step": 101535 + }, + { + "epoch": 0.877986355500601, + "grad_norm": 42.54663716818522, + "learning_rate": 3.571462114690842e-06, + "loss": 0.31136112213134765, + "step": 101540 + }, + { + "epoch": 0.8780295890221442, + "grad_norm": 17.770898656315094, + "learning_rate": 3.571262110629177e-06, + "loss": 0.11820831298828124, + "step": 101545 + }, + { + "epoch": 0.8780728225436875, + "grad_norm": 33.2563985374273, + "learning_rate": 3.571062103932902e-06, + "loss": 0.302606201171875, + "step": 101550 + }, + { + "epoch": 0.8781160560652307, + "grad_norm": 2.697367612049156, + "learning_rate": 3.570862094602939e-06, + "loss": 0.2094573974609375, + "step": 101555 + }, + { + "epoch": 0.878159289586774, + "grad_norm": 2.4576649836679145, + "learning_rate": 3.57066208264021e-06, + "loss": 0.047852706909179685, + "step": 101560 + }, + { + "epoch": 0.8782025231083173, + "grad_norm": 5.191395619080606, + "learning_rate": 3.5704620680456386e-06, + "loss": 0.086932373046875, + "step": 101565 + }, + { + "epoch": 0.8782457566298605, + "grad_norm": 1.3965580919906084, + "learning_rate": 3.570262050820146e-06, + "loss": 0.037329483032226565, + "step": 101570 + }, + { + "epoch": 0.8782889901514038, + "grad_norm": 3.726383163961197, + "learning_rate": 3.570062030964655e-06, + "loss": 0.1973358154296875, + "step": 101575 + }, + { + "epoch": 0.878332223672947, + "grad_norm": 1.5034263944731487, + "learning_rate": 3.5698620084800887e-06, + "loss": 0.20080413818359374, + "step": 101580 + }, + { + "epoch": 0.8783754571944903, + "grad_norm": 1.048338908556383, + "learning_rate": 3.5696619833673694e-06, + "loss": 0.0346282958984375, + "step": 101585 + }, + { + "epoch": 0.8784186907160336, + "grad_norm": 9.504469175754265, + "learning_rate": 3.569461955627419e-06, + "loss": 0.0757080078125, + "step": 101590 + }, + { + "epoch": 0.8784619242375769, + "grad_norm": 16.967743682906924, + "learning_rate": 3.5692619252611607e-06, + "loss": 0.1106353759765625, + "step": 101595 + }, + { + "epoch": 0.8785051577591201, + "grad_norm": 5.09942524883917, + "learning_rate": 3.5690618922695162e-06, + "loss": 0.04017906188964844, + "step": 101600 + }, + { + "epoch": 0.8785483912806634, + "grad_norm": 32.571467753531465, + "learning_rate": 3.568861856653409e-06, + "loss": 0.6247303009033203, + "step": 101605 + }, + { + "epoch": 0.8785916248022066, + "grad_norm": 30.507392400063083, + "learning_rate": 3.5686618184137608e-06, + "loss": 0.153045654296875, + "step": 101610 + }, + { + "epoch": 0.8786348583237499, + "grad_norm": 4.547735828798684, + "learning_rate": 3.568461777551495e-06, + "loss": 0.04881973266601562, + "step": 101615 + }, + { + "epoch": 0.8786780918452932, + "grad_norm": 0.8048389729759912, + "learning_rate": 3.5682617340675336e-06, + "loss": 0.08512306213378906, + "step": 101620 + }, + { + "epoch": 0.8787213253668364, + "grad_norm": 0.6875409967750059, + "learning_rate": 3.5680616879627993e-06, + "loss": 0.10732879638671874, + "step": 101625 + }, + { + "epoch": 0.8787645588883797, + "grad_norm": 11.216688725499578, + "learning_rate": 3.5678616392382137e-06, + "loss": 0.10264205932617188, + "step": 101630 + }, + { + "epoch": 0.8788077924099229, + "grad_norm": 17.247311640338825, + "learning_rate": 3.5676615878947017e-06, + "loss": 0.2760345458984375, + "step": 101635 + }, + { + "epoch": 0.8788510259314662, + "grad_norm": 17.998715839183795, + "learning_rate": 3.5674615339331837e-06, + "loss": 0.19362030029296876, + "step": 101640 + }, + { + "epoch": 0.8788942594530095, + "grad_norm": 6.298653694422525, + "learning_rate": 3.5672614773545846e-06, + "loss": 0.13597869873046875, + "step": 101645 + }, + { + "epoch": 0.8789374929745527, + "grad_norm": 33.13047572283895, + "learning_rate": 3.567061418159825e-06, + "loss": 0.23078155517578125, + "step": 101650 + }, + { + "epoch": 0.878980726496096, + "grad_norm": 68.73803344919378, + "learning_rate": 3.5668613563498264e-06, + "loss": 0.20349960327148436, + "step": 101655 + }, + { + "epoch": 0.8790239600176393, + "grad_norm": 9.412732608332535, + "learning_rate": 3.566661291925516e-06, + "loss": 0.12421607971191406, + "step": 101660 + }, + { + "epoch": 0.8790671935391825, + "grad_norm": 11.959201115816272, + "learning_rate": 3.5664612248878125e-06, + "loss": 0.26992034912109375, + "step": 101665 + }, + { + "epoch": 0.8791104270607258, + "grad_norm": 2.3534925341338844, + "learning_rate": 3.5662611552376405e-06, + "loss": 0.12491226196289062, + "step": 101670 + }, + { + "epoch": 0.8791536605822691, + "grad_norm": 0.3330049266774537, + "learning_rate": 3.566061082975922e-06, + "loss": 0.08915138244628906, + "step": 101675 + }, + { + "epoch": 0.8791968941038123, + "grad_norm": 40.07579849081915, + "learning_rate": 3.5658610081035786e-06, + "loss": 0.1878204345703125, + "step": 101680 + }, + { + "epoch": 0.8792401276253556, + "grad_norm": 3.45254745818708, + "learning_rate": 3.5656609306215354e-06, + "loss": 0.07742843627929688, + "step": 101685 + }, + { + "epoch": 0.8792833611468989, + "grad_norm": 109.1582118690888, + "learning_rate": 3.5654608505307132e-06, + "loss": 0.3653472900390625, + "step": 101690 + }, + { + "epoch": 0.8793265946684421, + "grad_norm": 1.7592338396600133, + "learning_rate": 3.5652607678320367e-06, + "loss": 0.1429229736328125, + "step": 101695 + }, + { + "epoch": 0.8793698281899854, + "grad_norm": 1.0902289722934435, + "learning_rate": 3.565060682526427e-06, + "loss": 0.04779129028320313, + "step": 101700 + }, + { + "epoch": 0.8794130617115287, + "grad_norm": 1.6180644959249906, + "learning_rate": 3.564860594614807e-06, + "loss": 0.277044677734375, + "step": 101705 + }, + { + "epoch": 0.8794562952330719, + "grad_norm": 2.09108124216466, + "learning_rate": 3.5646605040980994e-06, + "loss": 0.07780971527099609, + "step": 101710 + }, + { + "epoch": 0.8794995287546151, + "grad_norm": 6.784776431325465, + "learning_rate": 3.5644604109772284e-06, + "loss": 0.3074951171875, + "step": 101715 + }, + { + "epoch": 0.8795427622761585, + "grad_norm": 39.643059838749195, + "learning_rate": 3.564260315253115e-06, + "loss": 0.35509605407714845, + "step": 101720 + }, + { + "epoch": 0.8795859957977017, + "grad_norm": 22.170213252410942, + "learning_rate": 3.564060216926684e-06, + "loss": 0.0951019287109375, + "step": 101725 + }, + { + "epoch": 0.8796292293192449, + "grad_norm": 15.207411543475846, + "learning_rate": 3.563860115998857e-06, + "loss": 0.10286865234375, + "step": 101730 + }, + { + "epoch": 0.8796724628407883, + "grad_norm": 16.63107471088997, + "learning_rate": 3.563660012470556e-06, + "loss": 0.34539260864257815, + "step": 101735 + }, + { + "epoch": 0.8797156963623315, + "grad_norm": 0.2165121007793559, + "learning_rate": 3.563459906342705e-06, + "loss": 0.041679763793945314, + "step": 101740 + }, + { + "epoch": 0.8797589298838747, + "grad_norm": 0.8803436119090311, + "learning_rate": 3.563259797616228e-06, + "loss": 0.22846565246582032, + "step": 101745 + }, + { + "epoch": 0.8798021634054181, + "grad_norm": 43.11520409475488, + "learning_rate": 3.563059686292045e-06, + "loss": 0.3732881546020508, + "step": 101750 + }, + { + "epoch": 0.8798453969269613, + "grad_norm": 5.652618527504383, + "learning_rate": 3.5628595723710814e-06, + "loss": 0.03963813781738281, + "step": 101755 + }, + { + "epoch": 0.8798886304485045, + "grad_norm": 0.03865236897447805, + "learning_rate": 3.562659455854259e-06, + "loss": 0.12637062072753907, + "step": 101760 + }, + { + "epoch": 0.8799318639700479, + "grad_norm": 25.399073605806052, + "learning_rate": 3.562459336742501e-06, + "loss": 0.15162982940673828, + "step": 101765 + }, + { + "epoch": 0.8799750974915911, + "grad_norm": 4.9325980395425795, + "learning_rate": 3.5622592150367296e-06, + "loss": 0.1155181884765625, + "step": 101770 + }, + { + "epoch": 0.8800183310131343, + "grad_norm": 1.934844444656028, + "learning_rate": 3.562059090737869e-06, + "loss": 0.2329925537109375, + "step": 101775 + }, + { + "epoch": 0.8800615645346777, + "grad_norm": 29.90696763154105, + "learning_rate": 3.561858963846841e-06, + "loss": 0.3512092590332031, + "step": 101780 + }, + { + "epoch": 0.8801047980562209, + "grad_norm": 1.4740490837360565, + "learning_rate": 3.56165883436457e-06, + "loss": 0.100537109375, + "step": 101785 + }, + { + "epoch": 0.8801480315777641, + "grad_norm": 25.605614312527738, + "learning_rate": 3.5614587022919774e-06, + "loss": 0.1994293212890625, + "step": 101790 + }, + { + "epoch": 0.8801912650993075, + "grad_norm": 38.67315301524677, + "learning_rate": 3.5612585676299865e-06, + "loss": 0.32297821044921876, + "step": 101795 + }, + { + "epoch": 0.8802344986208507, + "grad_norm": 0.7223946903654747, + "learning_rate": 3.561058430379522e-06, + "loss": 0.1947784423828125, + "step": 101800 + }, + { + "epoch": 0.8802777321423939, + "grad_norm": 9.751493888809424, + "learning_rate": 3.5608582905415045e-06, + "loss": 0.12694664001464845, + "step": 101805 + }, + { + "epoch": 0.8803209656639371, + "grad_norm": 1.847938129196579, + "learning_rate": 3.5606581481168586e-06, + "loss": 0.03956184387207031, + "step": 101810 + }, + { + "epoch": 0.8803641991854805, + "grad_norm": 0.15771181439594087, + "learning_rate": 3.5604580031065066e-06, + "loss": 0.021432876586914062, + "step": 101815 + }, + { + "epoch": 0.8804074327070237, + "grad_norm": 4.011124146675865, + "learning_rate": 3.560257855511372e-06, + "loss": 0.09148788452148438, + "step": 101820 + }, + { + "epoch": 0.8804506662285669, + "grad_norm": 0.6308278055379105, + "learning_rate": 3.5600577053323776e-06, + "loss": 0.02714691162109375, + "step": 101825 + }, + { + "epoch": 0.8804938997501103, + "grad_norm": 8.18970560800291, + "learning_rate": 3.559857552570446e-06, + "loss": 0.0395965576171875, + "step": 101830 + }, + { + "epoch": 0.8805371332716535, + "grad_norm": 0.5436865793947603, + "learning_rate": 3.559657397226502e-06, + "loss": 0.06828079223632813, + "step": 101835 + }, + { + "epoch": 0.8805803667931967, + "grad_norm": 2.1872588388435803, + "learning_rate": 3.559457239301467e-06, + "loss": 0.0194671630859375, + "step": 101840 + }, + { + "epoch": 0.8806236003147401, + "grad_norm": 16.115193182942797, + "learning_rate": 3.559257078796264e-06, + "loss": 0.5649642944335938, + "step": 101845 + }, + { + "epoch": 0.8806668338362833, + "grad_norm": 0.8641380827728737, + "learning_rate": 3.559056915711818e-06, + "loss": 0.0931121826171875, + "step": 101850 + }, + { + "epoch": 0.8807100673578265, + "grad_norm": 0.31636213740279207, + "learning_rate": 3.55885675004905e-06, + "loss": 0.1569000244140625, + "step": 101855 + }, + { + "epoch": 0.8807533008793699, + "grad_norm": 4.762657922933808, + "learning_rate": 3.5586565818088833e-06, + "loss": 0.19491653442382811, + "step": 101860 + }, + { + "epoch": 0.8807965344009131, + "grad_norm": 21.604255292454877, + "learning_rate": 3.558456410992243e-06, + "loss": 0.29626922607421874, + "step": 101865 + }, + { + "epoch": 0.8808397679224563, + "grad_norm": 10.27552980203877, + "learning_rate": 3.5582562376000515e-06, + "loss": 0.0855133056640625, + "step": 101870 + }, + { + "epoch": 0.8808830014439997, + "grad_norm": 1.0833335880334654, + "learning_rate": 3.558056061633231e-06, + "loss": 0.16785736083984376, + "step": 101875 + }, + { + "epoch": 0.8809262349655429, + "grad_norm": 6.711833601686437, + "learning_rate": 3.557855883092705e-06, + "loss": 0.1394805908203125, + "step": 101880 + }, + { + "epoch": 0.8809694684870861, + "grad_norm": 3.0124751062144908, + "learning_rate": 3.557655701979397e-06, + "loss": 0.049151611328125, + "step": 101885 + }, + { + "epoch": 0.8810127020086294, + "grad_norm": 68.08912812592493, + "learning_rate": 3.5574555182942305e-06, + "loss": 0.1923095703125, + "step": 101890 + }, + { + "epoch": 0.8810559355301727, + "grad_norm": 16.84989544648189, + "learning_rate": 3.557255332038128e-06, + "loss": 0.3061737060546875, + "step": 101895 + }, + { + "epoch": 0.8810991690517159, + "grad_norm": 1.6894183332962167, + "learning_rate": 3.557055143212014e-06, + "loss": 0.07907257080078126, + "step": 101900 + }, + { + "epoch": 0.8811424025732592, + "grad_norm": 1.1179306840411498, + "learning_rate": 3.5568549518168095e-06, + "loss": 0.13328094482421876, + "step": 101905 + }, + { + "epoch": 0.8811856360948025, + "grad_norm": 0.9444639130844471, + "learning_rate": 3.5566547578534398e-06, + "loss": 0.13880767822265624, + "step": 101910 + }, + { + "epoch": 0.8812288696163457, + "grad_norm": 1.0054930348915458, + "learning_rate": 3.556454561322828e-06, + "loss": 0.124615478515625, + "step": 101915 + }, + { + "epoch": 0.881272103137889, + "grad_norm": 12.891837310419088, + "learning_rate": 3.556254362225897e-06, + "loss": 0.0873504638671875, + "step": 101920 + }, + { + "epoch": 0.8813153366594323, + "grad_norm": 14.047924108077284, + "learning_rate": 3.556054160563569e-06, + "loss": 0.2684593200683594, + "step": 101925 + }, + { + "epoch": 0.8813585701809755, + "grad_norm": 7.4883297499774475, + "learning_rate": 3.5558539563367693e-06, + "loss": 0.158282470703125, + "step": 101930 + }, + { + "epoch": 0.8814018037025187, + "grad_norm": 19.169773970703123, + "learning_rate": 3.55565374954642e-06, + "loss": 0.2320892333984375, + "step": 101935 + }, + { + "epoch": 0.8814450372240621, + "grad_norm": 3.817982870451789, + "learning_rate": 3.5554535401934442e-06, + "loss": 0.0747161865234375, + "step": 101940 + }, + { + "epoch": 0.8814882707456053, + "grad_norm": 0.4561378956651293, + "learning_rate": 3.5552533282787663e-06, + "loss": 0.03403263092041016, + "step": 101945 + }, + { + "epoch": 0.8815315042671485, + "grad_norm": 5.47511106709058, + "learning_rate": 3.5550531138033097e-06, + "loss": 0.08565101623535157, + "step": 101950 + }, + { + "epoch": 0.8815747377886919, + "grad_norm": 4.740687746477542, + "learning_rate": 3.554852896767996e-06, + "loss": 0.10922508239746094, + "step": 101955 + }, + { + "epoch": 0.8816179713102351, + "grad_norm": 2.376090323950065, + "learning_rate": 3.554652677173751e-06, + "loss": 0.017716217041015624, + "step": 101960 + }, + { + "epoch": 0.8816612048317783, + "grad_norm": 4.889288099239614, + "learning_rate": 3.5544524550214956e-06, + "loss": 0.09061260223388672, + "step": 101965 + }, + { + "epoch": 0.8817044383533217, + "grad_norm": 0.9297206593505668, + "learning_rate": 3.5542522303121542e-06, + "loss": 0.07343063354492188, + "step": 101970 + }, + { + "epoch": 0.8817476718748649, + "grad_norm": 6.075662685378685, + "learning_rate": 3.554052003046652e-06, + "loss": 0.148370361328125, + "step": 101975 + }, + { + "epoch": 0.8817909053964081, + "grad_norm": 1.990197200556104, + "learning_rate": 3.5538517732259107e-06, + "loss": 0.1937896728515625, + "step": 101980 + }, + { + "epoch": 0.8818341389179514, + "grad_norm": 14.364259201385204, + "learning_rate": 3.553651540850854e-06, + "loss": 0.1305816650390625, + "step": 101985 + }, + { + "epoch": 0.8818773724394947, + "grad_norm": 1.7991457352568836, + "learning_rate": 3.5534513059224043e-06, + "loss": 0.0203125, + "step": 101990 + }, + { + "epoch": 0.8819206059610379, + "grad_norm": 3.7013198118518424, + "learning_rate": 3.553251068441486e-06, + "loss": 0.21518478393554688, + "step": 101995 + }, + { + "epoch": 0.8819638394825812, + "grad_norm": 43.92024460501172, + "learning_rate": 3.5530508284090236e-06, + "loss": 0.17639808654785155, + "step": 102000 + }, + { + "epoch": 0.8820070730041245, + "grad_norm": 30.949315284844207, + "learning_rate": 3.5528505858259394e-06, + "loss": 0.182763671875, + "step": 102005 + }, + { + "epoch": 0.8820503065256677, + "grad_norm": 4.573660782081177, + "learning_rate": 3.5526503406931573e-06, + "loss": 0.8120525360107422, + "step": 102010 + }, + { + "epoch": 0.882093540047211, + "grad_norm": 30.496454791264725, + "learning_rate": 3.5524500930116e-06, + "loss": 0.14827117919921876, + "step": 102015 + }, + { + "epoch": 0.8821367735687543, + "grad_norm": 0.9179700890680039, + "learning_rate": 3.552249842782192e-06, + "loss": 0.0853302001953125, + "step": 102020 + }, + { + "epoch": 0.8821800070902975, + "grad_norm": 1.0656602479796302, + "learning_rate": 3.552049590005856e-06, + "loss": 0.36363525390625, + "step": 102025 + }, + { + "epoch": 0.8822232406118408, + "grad_norm": 2.1290795203499626, + "learning_rate": 3.551849334683517e-06, + "loss": 0.0370635986328125, + "step": 102030 + }, + { + "epoch": 0.8822664741333841, + "grad_norm": 1.6747088421571776, + "learning_rate": 3.551649076816097e-06, + "loss": 0.074713134765625, + "step": 102035 + }, + { + "epoch": 0.8823097076549273, + "grad_norm": 8.666637562055856, + "learning_rate": 3.5514488164045205e-06, + "loss": 0.08282470703125, + "step": 102040 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.1289227145400684, + "learning_rate": 3.5512485534497116e-06, + "loss": 0.030712890625, + "step": 102045 + }, + { + "epoch": 0.8823961746980139, + "grad_norm": 5.5449636816361245, + "learning_rate": 3.5510482879525918e-06, + "loss": 0.32034454345703123, + "step": 102050 + }, + { + "epoch": 0.8824394082195571, + "grad_norm": 3.7384688502337284, + "learning_rate": 3.5508480199140864e-06, + "loss": 0.27437772750854494, + "step": 102055 + }, + { + "epoch": 0.8824826417411004, + "grad_norm": 2.268048744907366, + "learning_rate": 3.5506477493351187e-06, + "loss": 0.05029296875, + "step": 102060 + }, + { + "epoch": 0.8825258752626436, + "grad_norm": 14.385777855315116, + "learning_rate": 3.5504474762166128e-06, + "loss": 0.091796875, + "step": 102065 + }, + { + "epoch": 0.8825691087841869, + "grad_norm": 6.307489339489128, + "learning_rate": 3.550247200559491e-06, + "loss": 0.10646133422851563, + "step": 102070 + }, + { + "epoch": 0.8826123423057302, + "grad_norm": 37.04797144388934, + "learning_rate": 3.5500469223646774e-06, + "loss": 0.4322498321533203, + "step": 102075 + }, + { + "epoch": 0.8826555758272734, + "grad_norm": 2.1559718007455966, + "learning_rate": 3.5498466416330965e-06, + "loss": 0.3894866943359375, + "step": 102080 + }, + { + "epoch": 0.8826988093488167, + "grad_norm": 23.62333506597152, + "learning_rate": 3.549646358365671e-06, + "loss": 0.18422622680664064, + "step": 102085 + }, + { + "epoch": 0.88274204287036, + "grad_norm": 0.19034960371999607, + "learning_rate": 3.5494460725633266e-06, + "loss": 0.017539405822753908, + "step": 102090 + }, + { + "epoch": 0.8827852763919032, + "grad_norm": 22.095522259270613, + "learning_rate": 3.5492457842269844e-06, + "loss": 0.1384124755859375, + "step": 102095 + }, + { + "epoch": 0.8828285099134465, + "grad_norm": 2.6716773427806535, + "learning_rate": 3.5490454933575686e-06, + "loss": 0.1061065673828125, + "step": 102100 + }, + { + "epoch": 0.8828717434349898, + "grad_norm": 0.5210106879121682, + "learning_rate": 3.548845199956004e-06, + "loss": 0.13619384765625, + "step": 102105 + }, + { + "epoch": 0.882914976956533, + "grad_norm": 7.703721154840961, + "learning_rate": 3.5486449040232143e-06, + "loss": 0.38456268310546876, + "step": 102110 + }, + { + "epoch": 0.8829582104780763, + "grad_norm": 17.126977193317018, + "learning_rate": 3.548444605560122e-06, + "loss": 0.06295623779296874, + "step": 102115 + }, + { + "epoch": 0.8830014439996196, + "grad_norm": 2.735266686923094, + "learning_rate": 3.5482443045676522e-06, + "loss": 0.23781261444091797, + "step": 102120 + }, + { + "epoch": 0.8830446775211628, + "grad_norm": 0.23740521019491326, + "learning_rate": 3.5480440010467287e-06, + "loss": 0.0919830322265625, + "step": 102125 + }, + { + "epoch": 0.8830879110427061, + "grad_norm": 7.3940549941155655, + "learning_rate": 3.5478436949982733e-06, + "loss": 0.2643096923828125, + "step": 102130 + }, + { + "epoch": 0.8831311445642493, + "grad_norm": 7.827781637473362, + "learning_rate": 3.5476433864232117e-06, + "loss": 0.378936767578125, + "step": 102135 + }, + { + "epoch": 0.8831743780857926, + "grad_norm": 4.387018605135554, + "learning_rate": 3.5474430753224673e-06, + "loss": 0.3643310546875, + "step": 102140 + }, + { + "epoch": 0.8832176116073359, + "grad_norm": 2.97133925270106, + "learning_rate": 3.547242761696964e-06, + "loss": 0.18978347778320312, + "step": 102145 + }, + { + "epoch": 0.8832608451288791, + "grad_norm": 17.38786604743324, + "learning_rate": 3.5470424455476252e-06, + "loss": 0.1266510009765625, + "step": 102150 + }, + { + "epoch": 0.8833040786504224, + "grad_norm": 0.6627532128728333, + "learning_rate": 3.5468421268753753e-06, + "loss": 0.07996063232421875, + "step": 102155 + }, + { + "epoch": 0.8833473121719656, + "grad_norm": 6.213638604365339, + "learning_rate": 3.5466418056811373e-06, + "loss": 0.14765968322753906, + "step": 102160 + }, + { + "epoch": 0.8833905456935089, + "grad_norm": 13.389059638436036, + "learning_rate": 3.5464414819658353e-06, + "loss": 0.38787078857421875, + "step": 102165 + }, + { + "epoch": 0.8834337792150522, + "grad_norm": 24.745131718570548, + "learning_rate": 3.5462411557303947e-06, + "loss": 0.09127120971679688, + "step": 102170 + }, + { + "epoch": 0.8834770127365954, + "grad_norm": 3.4609843447623505, + "learning_rate": 3.546040826975738e-06, + "loss": 0.024969863891601562, + "step": 102175 + }, + { + "epoch": 0.8835202462581387, + "grad_norm": 1.5201768855665674, + "learning_rate": 3.5458404957027884e-06, + "loss": 0.35773696899414065, + "step": 102180 + }, + { + "epoch": 0.883563479779682, + "grad_norm": 1.183894908399828, + "learning_rate": 3.5456401619124708e-06, + "loss": 0.024631500244140625, + "step": 102185 + }, + { + "epoch": 0.8836067133012252, + "grad_norm": 23.998982733327697, + "learning_rate": 3.5454398256057094e-06, + "loss": 0.20500946044921875, + "step": 102190 + }, + { + "epoch": 0.8836499468227685, + "grad_norm": 6.46871810610965, + "learning_rate": 3.545239486783427e-06, + "loss": 0.03528594970703125, + "step": 102195 + }, + { + "epoch": 0.8836931803443118, + "grad_norm": 12.164721061770383, + "learning_rate": 3.545039145446549e-06, + "loss": 0.05586233139038086, + "step": 102200 + }, + { + "epoch": 0.883736413865855, + "grad_norm": 5.039096846647937, + "learning_rate": 3.544838801595999e-06, + "loss": 0.10323333740234375, + "step": 102205 + }, + { + "epoch": 0.8837796473873983, + "grad_norm": 3.4217117310646468, + "learning_rate": 3.5446384552326996e-06, + "loss": 0.1342803955078125, + "step": 102210 + }, + { + "epoch": 0.8838228809089416, + "grad_norm": 0.18781557937470136, + "learning_rate": 3.544438106357576e-06, + "loss": 0.04784736633300781, + "step": 102215 + }, + { + "epoch": 0.8838661144304848, + "grad_norm": 0.7301376436959013, + "learning_rate": 3.5442377549715525e-06, + "loss": 0.062744140625, + "step": 102220 + }, + { + "epoch": 0.8839093479520281, + "grad_norm": 7.097395671081739, + "learning_rate": 3.5440374010755514e-06, + "loss": 0.22936172485351564, + "step": 102225 + }, + { + "epoch": 0.8839525814735714, + "grad_norm": 2.6301999512080623, + "learning_rate": 3.5438370446704994e-06, + "loss": 0.0760833740234375, + "step": 102230 + }, + { + "epoch": 0.8839958149951146, + "grad_norm": 2.1831764163065297, + "learning_rate": 3.543636685757319e-06, + "loss": 0.1056365966796875, + "step": 102235 + }, + { + "epoch": 0.8840390485166578, + "grad_norm": 26.800121275630726, + "learning_rate": 3.543436324336933e-06, + "loss": 0.25761566162109373, + "step": 102240 + }, + { + "epoch": 0.8840822820382012, + "grad_norm": 5.164099397685939, + "learning_rate": 3.543235960410267e-06, + "loss": 0.03503341674804687, + "step": 102245 + }, + { + "epoch": 0.8841255155597444, + "grad_norm": 14.668495945103963, + "learning_rate": 3.543035593978245e-06, + "loss": 0.196630859375, + "step": 102250 + }, + { + "epoch": 0.8841687490812876, + "grad_norm": 2.381304986447472, + "learning_rate": 3.542835225041791e-06, + "loss": 0.10712966918945313, + "step": 102255 + }, + { + "epoch": 0.884211982602831, + "grad_norm": 89.49457101661915, + "learning_rate": 3.5426348536018282e-06, + "loss": 0.3893402099609375, + "step": 102260 + }, + { + "epoch": 0.8842552161243742, + "grad_norm": 3.103106953371401, + "learning_rate": 3.5424344796592826e-06, + "loss": 0.28418846130371095, + "step": 102265 + }, + { + "epoch": 0.8842984496459174, + "grad_norm": 1.3765948737484253, + "learning_rate": 3.5422341032150764e-06, + "loss": 0.14233551025390626, + "step": 102270 + }, + { + "epoch": 0.8843416831674608, + "grad_norm": 13.842940884375677, + "learning_rate": 3.5420337242701337e-06, + "loss": 0.130865478515625, + "step": 102275 + }, + { + "epoch": 0.884384916689004, + "grad_norm": 13.781336306679087, + "learning_rate": 3.5418333428253804e-06, + "loss": 0.34635162353515625, + "step": 102280 + }, + { + "epoch": 0.8844281502105472, + "grad_norm": 4.511009414424014, + "learning_rate": 3.5416329588817397e-06, + "loss": 0.05001678466796875, + "step": 102285 + }, + { + "epoch": 0.8844713837320906, + "grad_norm": 7.8522062368156, + "learning_rate": 3.541432572440135e-06, + "loss": 0.125054931640625, + "step": 102290 + }, + { + "epoch": 0.8845146172536338, + "grad_norm": 13.021317522354055, + "learning_rate": 3.5412321835014914e-06, + "loss": 0.12070465087890625, + "step": 102295 + }, + { + "epoch": 0.884557850775177, + "grad_norm": 25.672917930658254, + "learning_rate": 3.5410317920667324e-06, + "loss": 0.08935089111328125, + "step": 102300 + }, + { + "epoch": 0.8846010842967204, + "grad_norm": 2.3629015350352276, + "learning_rate": 3.5408313981367825e-06, + "loss": 0.036210250854492185, + "step": 102305 + }, + { + "epoch": 0.8846443178182636, + "grad_norm": 4.2449260805640785, + "learning_rate": 3.540631001712567e-06, + "loss": 0.18351821899414061, + "step": 102310 + }, + { + "epoch": 0.8846875513398068, + "grad_norm": 5.921349628544897, + "learning_rate": 3.5404306027950085e-06, + "loss": 0.1305419921875, + "step": 102315 + }, + { + "epoch": 0.8847307848613502, + "grad_norm": 10.846586576426358, + "learning_rate": 3.5402302013850313e-06, + "loss": 0.069049072265625, + "step": 102320 + }, + { + "epoch": 0.8847740183828934, + "grad_norm": 25.55620042897571, + "learning_rate": 3.5400297974835613e-06, + "loss": 0.16668701171875, + "step": 102325 + }, + { + "epoch": 0.8848172519044366, + "grad_norm": 0.7688241507072058, + "learning_rate": 3.5398293910915206e-06, + "loss": 0.17338409423828124, + "step": 102330 + }, + { + "epoch": 0.8848604854259798, + "grad_norm": 22.994401462723992, + "learning_rate": 3.5396289822098353e-06, + "loss": 0.2041534423828125, + "step": 102335 + }, + { + "epoch": 0.8849037189475232, + "grad_norm": 1.1912272150933665, + "learning_rate": 3.5394285708394275e-06, + "loss": 0.07698574066162109, + "step": 102340 + }, + { + "epoch": 0.8849469524690664, + "grad_norm": 1.5057798427006572, + "learning_rate": 3.5392281569812246e-06, + "loss": 0.23085708618164064, + "step": 102345 + }, + { + "epoch": 0.8849901859906096, + "grad_norm": 1.872141031731945, + "learning_rate": 3.539027740636148e-06, + "loss": 0.06446533203125, + "step": 102350 + }, + { + "epoch": 0.885033419512153, + "grad_norm": 17.317070234474585, + "learning_rate": 3.538827321805123e-06, + "loss": 0.0922760009765625, + "step": 102355 + }, + { + "epoch": 0.8850766530336962, + "grad_norm": 30.195792843775536, + "learning_rate": 3.5386269004890735e-06, + "loss": 0.1166046142578125, + "step": 102360 + }, + { + "epoch": 0.8851198865552394, + "grad_norm": 14.930525413466038, + "learning_rate": 3.538426476688926e-06, + "loss": 0.1309732437133789, + "step": 102365 + }, + { + "epoch": 0.8851631200767828, + "grad_norm": 0.12767101442163442, + "learning_rate": 3.5382260504056014e-06, + "loss": 0.13384170532226564, + "step": 102370 + }, + { + "epoch": 0.885206353598326, + "grad_norm": 7.931843940923768, + "learning_rate": 3.538025621640027e-06, + "loss": 0.14735450744628906, + "step": 102375 + }, + { + "epoch": 0.8852495871198692, + "grad_norm": 17.867587130338553, + "learning_rate": 3.537825190393126e-06, + "loss": 0.12789154052734375, + "step": 102380 + }, + { + "epoch": 0.8852928206414126, + "grad_norm": 13.178602387953012, + "learning_rate": 3.5376247566658224e-06, + "loss": 0.141571044921875, + "step": 102385 + }, + { + "epoch": 0.8853360541629558, + "grad_norm": 74.42772340864677, + "learning_rate": 3.53742432045904e-06, + "loss": 0.44880027770996095, + "step": 102390 + }, + { + "epoch": 0.885379287684499, + "grad_norm": 1.6818135367463787, + "learning_rate": 3.5372238817737056e-06, + "loss": 0.06220550537109375, + "step": 102395 + }, + { + "epoch": 0.8854225212060424, + "grad_norm": 0.16211647914616345, + "learning_rate": 3.5370234406107413e-06, + "loss": 0.12173843383789062, + "step": 102400 + }, + { + "epoch": 0.8854657547275856, + "grad_norm": 0.21209781407474476, + "learning_rate": 3.536822996971072e-06, + "loss": 0.04910011291503906, + "step": 102405 + }, + { + "epoch": 0.8855089882491288, + "grad_norm": 30.585818391801528, + "learning_rate": 3.536622550855624e-06, + "loss": 0.4830068588256836, + "step": 102410 + }, + { + "epoch": 0.885552221770672, + "grad_norm": 24.23433822926106, + "learning_rate": 3.5364221022653187e-06, + "loss": 0.19359893798828126, + "step": 102415 + }, + { + "epoch": 0.8855954552922154, + "grad_norm": 7.528613382124228, + "learning_rate": 3.5362216512010822e-06, + "loss": 0.0370335578918457, + "step": 102420 + }, + { + "epoch": 0.8856386888137586, + "grad_norm": 0.13880913935430708, + "learning_rate": 3.53602119766384e-06, + "loss": 0.10435905456542968, + "step": 102425 + }, + { + "epoch": 0.8856819223353019, + "grad_norm": 1.2386227223723492, + "learning_rate": 3.535820741654515e-06, + "loss": 0.17010078430175782, + "step": 102430 + }, + { + "epoch": 0.8857251558568452, + "grad_norm": 0.08724462715960454, + "learning_rate": 3.5356202831740315e-06, + "loss": 0.23714752197265626, + "step": 102435 + }, + { + "epoch": 0.8857683893783884, + "grad_norm": 4.891701459055476, + "learning_rate": 3.5354198222233145e-06, + "loss": 0.0805999755859375, + "step": 102440 + }, + { + "epoch": 0.8858116228999316, + "grad_norm": 50.6444076858931, + "learning_rate": 3.535219358803289e-06, + "loss": 0.11913871765136719, + "step": 102445 + }, + { + "epoch": 0.885854856421475, + "grad_norm": 11.457561388764018, + "learning_rate": 3.5350188929148785e-06, + "loss": 0.1655609130859375, + "step": 102450 + }, + { + "epoch": 0.8858980899430182, + "grad_norm": 6.511850954257005, + "learning_rate": 3.534818424559009e-06, + "loss": 0.06964244842529296, + "step": 102455 + }, + { + "epoch": 0.8859413234645614, + "grad_norm": 19.28074706221991, + "learning_rate": 3.534617953736604e-06, + "loss": 0.1259002685546875, + "step": 102460 + }, + { + "epoch": 0.8859845569861048, + "grad_norm": 0.8684198944267518, + "learning_rate": 3.5344174804485877e-06, + "loss": 0.0165435791015625, + "step": 102465 + }, + { + "epoch": 0.886027790507648, + "grad_norm": 1.66387389972233, + "learning_rate": 3.534217004695886e-06, + "loss": 0.16210556030273438, + "step": 102470 + }, + { + "epoch": 0.8860710240291912, + "grad_norm": 2.1004348880937003, + "learning_rate": 3.5340165264794223e-06, + "loss": 0.0427581787109375, + "step": 102475 + }, + { + "epoch": 0.8861142575507346, + "grad_norm": 4.286192430070307, + "learning_rate": 3.5338160458001205e-06, + "loss": 0.153802490234375, + "step": 102480 + }, + { + "epoch": 0.8861574910722778, + "grad_norm": 23.51224874858234, + "learning_rate": 3.533615562658908e-06, + "loss": 0.17445449829101561, + "step": 102485 + }, + { + "epoch": 0.886200724593821, + "grad_norm": 0.9626669575387776, + "learning_rate": 3.533415077056707e-06, + "loss": 0.18279266357421875, + "step": 102490 + }, + { + "epoch": 0.8862439581153644, + "grad_norm": 9.483906380396501, + "learning_rate": 3.5332145889944424e-06, + "loss": 0.1817913055419922, + "step": 102495 + }, + { + "epoch": 0.8862871916369076, + "grad_norm": 0.15997042967536546, + "learning_rate": 3.5330140984730395e-06, + "loss": 0.06743049621582031, + "step": 102500 + }, + { + "epoch": 0.8863304251584508, + "grad_norm": 14.411957990022094, + "learning_rate": 3.5328136054934225e-06, + "loss": 0.1437957763671875, + "step": 102505 + }, + { + "epoch": 0.8863736586799941, + "grad_norm": 10.620430363384887, + "learning_rate": 3.532613110056517e-06, + "loss": 0.07627716064453124, + "step": 102510 + }, + { + "epoch": 0.8864168922015374, + "grad_norm": 14.320008101030647, + "learning_rate": 3.5324126121632464e-06, + "loss": 0.15676116943359375, + "step": 102515 + }, + { + "epoch": 0.8864601257230806, + "grad_norm": 1.0505378946212456, + "learning_rate": 3.532212111814536e-06, + "loss": 0.2287311553955078, + "step": 102520 + }, + { + "epoch": 0.8865033592446239, + "grad_norm": 8.943545530299183, + "learning_rate": 3.53201160901131e-06, + "loss": 0.18006668090820313, + "step": 102525 + }, + { + "epoch": 0.8865465927661672, + "grad_norm": 12.041242812031197, + "learning_rate": 3.5318111037544933e-06, + "loss": 0.08473052978515624, + "step": 102530 + }, + { + "epoch": 0.8865898262877104, + "grad_norm": 8.385846748834789, + "learning_rate": 3.5316105960450114e-06, + "loss": 0.258038330078125, + "step": 102535 + }, + { + "epoch": 0.8866330598092537, + "grad_norm": 2.9541299763955147, + "learning_rate": 3.5314100858837885e-06, + "loss": 0.07529029846191407, + "step": 102540 + }, + { + "epoch": 0.886676293330797, + "grad_norm": 18.26744480453724, + "learning_rate": 3.531209573271749e-06, + "loss": 0.21860275268554688, + "step": 102545 + }, + { + "epoch": 0.8867195268523402, + "grad_norm": 0.9524184304869231, + "learning_rate": 3.531009058209818e-06, + "loss": 0.08935394287109374, + "step": 102550 + }, + { + "epoch": 0.8867627603738835, + "grad_norm": 2.243334436950814, + "learning_rate": 3.5308085406989197e-06, + "loss": 0.078265380859375, + "step": 102555 + }, + { + "epoch": 0.8868059938954268, + "grad_norm": 47.52486086048568, + "learning_rate": 3.530608020739979e-06, + "loss": 0.30717926025390624, + "step": 102560 + }, + { + "epoch": 0.88684922741697, + "grad_norm": 4.089508877266293, + "learning_rate": 3.530407498333922e-06, + "loss": 0.1660898208618164, + "step": 102565 + }, + { + "epoch": 0.8868924609385133, + "grad_norm": 1.420318703873718, + "learning_rate": 3.5302069734816724e-06, + "loss": 0.14432373046875, + "step": 102570 + }, + { + "epoch": 0.8869356944600566, + "grad_norm": 4.900783251688077, + "learning_rate": 3.5300064461841542e-06, + "loss": 0.0656005859375, + "step": 102575 + }, + { + "epoch": 0.8869789279815998, + "grad_norm": 2.3659935841093986, + "learning_rate": 3.529805916442294e-06, + "loss": 0.02229766845703125, + "step": 102580 + }, + { + "epoch": 0.8870221615031431, + "grad_norm": 9.891092363477428, + "learning_rate": 3.5296053842570153e-06, + "loss": 0.121868896484375, + "step": 102585 + }, + { + "epoch": 0.8870653950246863, + "grad_norm": 0.0625182540137649, + "learning_rate": 3.529404849629243e-06, + "loss": 0.04558906555175781, + "step": 102590 + }, + { + "epoch": 0.8871086285462296, + "grad_norm": 5.94227373461823, + "learning_rate": 3.5292043125599027e-06, + "loss": 0.14615478515625, + "step": 102595 + }, + { + "epoch": 0.8871518620677729, + "grad_norm": 2.3744929939685746, + "learning_rate": 3.5290037730499195e-06, + "loss": 0.09447174072265625, + "step": 102600 + }, + { + "epoch": 0.8871950955893161, + "grad_norm": 31.197288276697964, + "learning_rate": 3.528803231100217e-06, + "loss": 0.05932884216308594, + "step": 102605 + }, + { + "epoch": 0.8872383291108594, + "grad_norm": 5.3197200391306, + "learning_rate": 3.52860268671172e-06, + "loss": 0.34259033203125, + "step": 102610 + }, + { + "epoch": 0.8872815626324027, + "grad_norm": 3.0412460386002547, + "learning_rate": 3.5284021398853555e-06, + "loss": 0.10373497009277344, + "step": 102615 + }, + { + "epoch": 0.8873247961539459, + "grad_norm": 1.2395059153486165, + "learning_rate": 3.5282015906220466e-06, + "loss": 0.04138946533203125, + "step": 102620 + }, + { + "epoch": 0.8873680296754892, + "grad_norm": 16.73208178920937, + "learning_rate": 3.5280010389227176e-06, + "loss": 0.19077777862548828, + "step": 102625 + }, + { + "epoch": 0.8874112631970325, + "grad_norm": 0.18176059872330078, + "learning_rate": 3.5278004847882963e-06, + "loss": 0.26531219482421875, + "step": 102630 + }, + { + "epoch": 0.8874544967185757, + "grad_norm": 0.2398345663502306, + "learning_rate": 3.5275999282197047e-06, + "loss": 0.057108306884765626, + "step": 102635 + }, + { + "epoch": 0.887497730240119, + "grad_norm": 22.79399033503597, + "learning_rate": 3.5273993692178688e-06, + "loss": 0.21614990234375, + "step": 102640 + }, + { + "epoch": 0.8875409637616622, + "grad_norm": 12.135369414808757, + "learning_rate": 3.5271988077837137e-06, + "loss": 0.086474609375, + "step": 102645 + }, + { + "epoch": 0.8875841972832055, + "grad_norm": 1.5847707728602405, + "learning_rate": 3.5269982439181646e-06, + "loss": 0.1749034881591797, + "step": 102650 + }, + { + "epoch": 0.8876274308047488, + "grad_norm": 3.5886913180991735, + "learning_rate": 3.5267976776221455e-06, + "loss": 0.08815879821777343, + "step": 102655 + }, + { + "epoch": 0.887670664326292, + "grad_norm": 9.125550591358367, + "learning_rate": 3.5265971088965826e-06, + "loss": 0.2313201904296875, + "step": 102660 + }, + { + "epoch": 0.8877138978478353, + "grad_norm": 3.8915581012237443, + "learning_rate": 3.526396537742401e-06, + "loss": 0.1871723175048828, + "step": 102665 + }, + { + "epoch": 0.8877571313693786, + "grad_norm": 0.19982524396993342, + "learning_rate": 3.5261959641605233e-06, + "loss": 0.14723434448242187, + "step": 102670 + }, + { + "epoch": 0.8878003648909218, + "grad_norm": 50.86487425195838, + "learning_rate": 3.5259953881518772e-06, + "loss": 0.2942935943603516, + "step": 102675 + }, + { + "epoch": 0.8878435984124651, + "grad_norm": 2.9071648539705084, + "learning_rate": 3.5257948097173877e-06, + "loss": 0.3270111083984375, + "step": 102680 + }, + { + "epoch": 0.8878868319340083, + "grad_norm": 8.403996214858283, + "learning_rate": 3.5255942288579787e-06, + "loss": 0.20135650634765626, + "step": 102685 + }, + { + "epoch": 0.8879300654555516, + "grad_norm": 3.261428087289502, + "learning_rate": 3.5253936455745745e-06, + "loss": 0.15764312744140624, + "step": 102690 + }, + { + "epoch": 0.8879732989770949, + "grad_norm": 0.7678605636849024, + "learning_rate": 3.5251930598681025e-06, + "loss": 0.2157318115234375, + "step": 102695 + }, + { + "epoch": 0.8880165324986381, + "grad_norm": 0.7757647771361511, + "learning_rate": 3.524992471739485e-06, + "loss": 0.3965057373046875, + "step": 102700 + }, + { + "epoch": 0.8880597660201814, + "grad_norm": 9.782521535129515, + "learning_rate": 3.5247918811896496e-06, + "loss": 0.19683837890625, + "step": 102705 + }, + { + "epoch": 0.8881029995417247, + "grad_norm": 5.332537202703479, + "learning_rate": 3.5245912882195207e-06, + "loss": 0.7533462524414063, + "step": 102710 + }, + { + "epoch": 0.8881462330632679, + "grad_norm": 10.086431753488553, + "learning_rate": 3.524390692830023e-06, + "loss": 0.05200042724609375, + "step": 102715 + }, + { + "epoch": 0.8881894665848112, + "grad_norm": 3.987021728230105, + "learning_rate": 3.524190095022081e-06, + "loss": 0.08584365844726563, + "step": 102720 + }, + { + "epoch": 0.8882327001063545, + "grad_norm": 6.31477865873, + "learning_rate": 3.5239894947966204e-06, + "loss": 0.09575214385986328, + "step": 102725 + }, + { + "epoch": 0.8882759336278977, + "grad_norm": 0.6343219273752511, + "learning_rate": 3.5237888921545674e-06, + "loss": 0.22900962829589844, + "step": 102730 + }, + { + "epoch": 0.888319167149441, + "grad_norm": 0.9201433342101918, + "learning_rate": 3.5235882870968464e-06, + "loss": 0.40165252685546876, + "step": 102735 + }, + { + "epoch": 0.8883624006709843, + "grad_norm": 12.287010536123066, + "learning_rate": 3.5233876796243826e-06, + "loss": 0.13325347900390624, + "step": 102740 + }, + { + "epoch": 0.8884056341925275, + "grad_norm": 3.861371037777479, + "learning_rate": 3.523187069738101e-06, + "loss": 0.13587646484375, + "step": 102745 + }, + { + "epoch": 0.8884488677140708, + "grad_norm": 0.5171639802431136, + "learning_rate": 3.522986457438926e-06, + "loss": 0.039057159423828126, + "step": 102750 + }, + { + "epoch": 0.8884921012356141, + "grad_norm": 0.8470786379828267, + "learning_rate": 3.522785842727784e-06, + "loss": 0.05233001708984375, + "step": 102755 + }, + { + "epoch": 0.8885353347571573, + "grad_norm": 3.4913534714478986, + "learning_rate": 3.5225852256056006e-06, + "loss": 0.08708343505859376, + "step": 102760 + }, + { + "epoch": 0.8885785682787005, + "grad_norm": 0.11251677491390712, + "learning_rate": 3.5223846060733e-06, + "loss": 0.1130340576171875, + "step": 102765 + }, + { + "epoch": 0.8886218018002439, + "grad_norm": 32.29069611263616, + "learning_rate": 3.5221839841318075e-06, + "loss": 0.1142059326171875, + "step": 102770 + }, + { + "epoch": 0.8886650353217871, + "grad_norm": 0.3049625647645763, + "learning_rate": 3.521983359782049e-06, + "loss": 0.022177886962890626, + "step": 102775 + }, + { + "epoch": 0.8887082688433303, + "grad_norm": 0.14127787346681447, + "learning_rate": 3.5217827330249488e-06, + "loss": 0.2148406982421875, + "step": 102780 + }, + { + "epoch": 0.8887515023648737, + "grad_norm": 4.522934611452241, + "learning_rate": 3.521582103861433e-06, + "loss": 0.09207687377929688, + "step": 102785 + }, + { + "epoch": 0.8887947358864169, + "grad_norm": 1.0442632486319163, + "learning_rate": 3.5213814722924263e-06, + "loss": 0.5361557006835938, + "step": 102790 + }, + { + "epoch": 0.8888379694079601, + "grad_norm": 7.319441529374449, + "learning_rate": 3.5211808383188554e-06, + "loss": 0.07345809936523437, + "step": 102795 + }, + { + "epoch": 0.8888812029295035, + "grad_norm": 6.475479711465377, + "learning_rate": 3.5209802019416435e-06, + "loss": 0.0681549072265625, + "step": 102800 + }, + { + "epoch": 0.8889244364510467, + "grad_norm": 3.4210977212502236, + "learning_rate": 3.520779563161717e-06, + "loss": 0.1384002685546875, + "step": 102805 + }, + { + "epoch": 0.8889676699725899, + "grad_norm": 3.476745388745459, + "learning_rate": 3.5205789219800013e-06, + "loss": 0.06737060546875, + "step": 102810 + }, + { + "epoch": 0.8890109034941333, + "grad_norm": 17.331035780498375, + "learning_rate": 3.5203782783974215e-06, + "loss": 0.1008941650390625, + "step": 102815 + }, + { + "epoch": 0.8890541370156765, + "grad_norm": 6.936020580323903, + "learning_rate": 3.520177632414904e-06, + "loss": 0.045733642578125, + "step": 102820 + }, + { + "epoch": 0.8890973705372197, + "grad_norm": 7.991035123754767, + "learning_rate": 3.5199769840333724e-06, + "loss": 0.08838462829589844, + "step": 102825 + }, + { + "epoch": 0.889140604058763, + "grad_norm": 7.771115367778047, + "learning_rate": 3.5197763332537526e-06, + "loss": 0.041742134094238284, + "step": 102830 + }, + { + "epoch": 0.8891838375803063, + "grad_norm": 6.4562947630367535, + "learning_rate": 3.5195756800769716e-06, + "loss": 0.187384033203125, + "step": 102835 + }, + { + "epoch": 0.8892270711018495, + "grad_norm": 13.279570622338408, + "learning_rate": 3.5193750245039517e-06, + "loss": 0.10601348876953125, + "step": 102840 + }, + { + "epoch": 0.8892703046233928, + "grad_norm": 2.8075138429828383, + "learning_rate": 3.5191743665356205e-06, + "loss": 0.04993133544921875, + "step": 102845 + }, + { + "epoch": 0.8893135381449361, + "grad_norm": 1.6617139039378745, + "learning_rate": 3.518973706172904e-06, + "loss": 0.05054931640625, + "step": 102850 + }, + { + "epoch": 0.8893567716664793, + "grad_norm": 0.825085927399086, + "learning_rate": 3.518773043416726e-06, + "loss": 0.47245330810546876, + "step": 102855 + }, + { + "epoch": 0.8894000051880225, + "grad_norm": 15.894594954642638, + "learning_rate": 3.518572378268013e-06, + "loss": 0.050237274169921874, + "step": 102860 + }, + { + "epoch": 0.8894432387095659, + "grad_norm": 1.1125872346548251, + "learning_rate": 3.5183717107276895e-06, + "loss": 0.28780364990234375, + "step": 102865 + }, + { + "epoch": 0.8894864722311091, + "grad_norm": 55.79578828605443, + "learning_rate": 3.518171040796681e-06, + "loss": 0.46532669067382815, + "step": 102870 + }, + { + "epoch": 0.8895297057526523, + "grad_norm": 2.5137990341794723, + "learning_rate": 3.5179703684759145e-06, + "loss": 0.0580780029296875, + "step": 102875 + }, + { + "epoch": 0.8895729392741957, + "grad_norm": 0.5554233072804444, + "learning_rate": 3.5177696937663138e-06, + "loss": 0.11977462768554688, + "step": 102880 + }, + { + "epoch": 0.8896161727957389, + "grad_norm": 6.287140412181198, + "learning_rate": 3.5175690166688056e-06, + "loss": 0.25574684143066406, + "step": 102885 + }, + { + "epoch": 0.8896594063172821, + "grad_norm": 25.888068304072622, + "learning_rate": 3.517368337184315e-06, + "loss": 0.2691051483154297, + "step": 102890 + }, + { + "epoch": 0.8897026398388255, + "grad_norm": 0.8586455201273858, + "learning_rate": 3.5171676553137665e-06, + "loss": 0.0501861572265625, + "step": 102895 + }, + { + "epoch": 0.8897458733603687, + "grad_norm": 19.812164656656773, + "learning_rate": 3.5169669710580862e-06, + "loss": 0.4310333251953125, + "step": 102900 + }, + { + "epoch": 0.8897891068819119, + "grad_norm": 13.478263898719366, + "learning_rate": 3.5167662844182005e-06, + "loss": 0.14791011810302734, + "step": 102905 + }, + { + "epoch": 0.8898323404034553, + "grad_norm": 5.463716025325904, + "learning_rate": 3.5165655953950343e-06, + "loss": 0.1911041259765625, + "step": 102910 + }, + { + "epoch": 0.8898755739249985, + "grad_norm": 2.1790489904377504, + "learning_rate": 3.5163649039895134e-06, + "loss": 0.1827301025390625, + "step": 102915 + }, + { + "epoch": 0.8899188074465417, + "grad_norm": 1.8024470140924587, + "learning_rate": 3.5161642102025636e-06, + "loss": 0.2368896484375, + "step": 102920 + }, + { + "epoch": 0.8899620409680851, + "grad_norm": 2.1456692681158747, + "learning_rate": 3.5159635140351085e-06, + "loss": 0.1175628662109375, + "step": 102925 + }, + { + "epoch": 0.8900052744896283, + "grad_norm": 12.17001018038693, + "learning_rate": 3.5157628154880762e-06, + "loss": 0.0899169921875, + "step": 102930 + }, + { + "epoch": 0.8900485080111715, + "grad_norm": 2.118901460001233, + "learning_rate": 3.515562114562392e-06, + "loss": 0.149200439453125, + "step": 102935 + }, + { + "epoch": 0.8900917415327148, + "grad_norm": 10.747852286558443, + "learning_rate": 3.515361411258981e-06, + "loss": 0.11504974365234374, + "step": 102940 + }, + { + "epoch": 0.8901349750542581, + "grad_norm": 0.39108906199418847, + "learning_rate": 3.515160705578767e-06, + "loss": 0.14014129638671874, + "step": 102945 + }, + { + "epoch": 0.8901782085758013, + "grad_norm": 13.53581748243925, + "learning_rate": 3.514959997522678e-06, + "loss": 0.06708984375, + "step": 102950 + }, + { + "epoch": 0.8902214420973446, + "grad_norm": 9.395742205588624, + "learning_rate": 3.51475928709164e-06, + "loss": 0.2467184066772461, + "step": 102955 + }, + { + "epoch": 0.8902646756188879, + "grad_norm": 0.1832922412345889, + "learning_rate": 3.5145585742865763e-06, + "loss": 0.019428062438964843, + "step": 102960 + }, + { + "epoch": 0.8903079091404311, + "grad_norm": 0.03804310460769525, + "learning_rate": 3.514357859108415e-06, + "loss": 0.20803146362304686, + "step": 102965 + }, + { + "epoch": 0.8903511426619743, + "grad_norm": 12.566493593796364, + "learning_rate": 3.514157141558081e-06, + "loss": 0.11069221496582031, + "step": 102970 + }, + { + "epoch": 0.8903943761835177, + "grad_norm": 0.5067666400437985, + "learning_rate": 3.513956421636498e-06, + "loss": 0.12983245849609376, + "step": 102975 + }, + { + "epoch": 0.8904376097050609, + "grad_norm": 11.842062086568239, + "learning_rate": 3.5137556993445947e-06, + "loss": 0.04672927856445312, + "step": 102980 + }, + { + "epoch": 0.8904808432266041, + "grad_norm": 0.5494337092692592, + "learning_rate": 3.5135549746832956e-06, + "loss": 0.13285179138183595, + "step": 102985 + }, + { + "epoch": 0.8905240767481475, + "grad_norm": 20.757653342410812, + "learning_rate": 3.5133542476535256e-06, + "loss": 0.12749786376953126, + "step": 102990 + }, + { + "epoch": 0.8905673102696907, + "grad_norm": 10.341034112043456, + "learning_rate": 3.513153518256212e-06, + "loss": 0.1538116455078125, + "step": 102995 + }, + { + "epoch": 0.8906105437912339, + "grad_norm": 5.684493642683359, + "learning_rate": 3.5129527864922796e-06, + "loss": 0.074932861328125, + "step": 103000 + }, + { + "epoch": 0.8906537773127773, + "grad_norm": 55.99339937563329, + "learning_rate": 3.5127520523626535e-06, + "loss": 0.29107208251953126, + "step": 103005 + }, + { + "epoch": 0.8906970108343205, + "grad_norm": 0.21873455665153296, + "learning_rate": 3.51255131586826e-06, + "loss": 0.03445510864257813, + "step": 103010 + }, + { + "epoch": 0.8907402443558637, + "grad_norm": 4.447629948290647, + "learning_rate": 3.5123505770100265e-06, + "loss": 0.15279464721679686, + "step": 103015 + }, + { + "epoch": 0.8907834778774071, + "grad_norm": 0.4148464160794674, + "learning_rate": 3.5121498357888768e-06, + "loss": 0.282720947265625, + "step": 103020 + }, + { + "epoch": 0.8908267113989503, + "grad_norm": 1.4038115475091684, + "learning_rate": 3.511949092205737e-06, + "loss": 0.036565399169921874, + "step": 103025 + }, + { + "epoch": 0.8908699449204935, + "grad_norm": 0.9224107561612216, + "learning_rate": 3.5117483462615337e-06, + "loss": 0.2690155029296875, + "step": 103030 + }, + { + "epoch": 0.8909131784420368, + "grad_norm": 0.9481660623910134, + "learning_rate": 3.511547597957192e-06, + "loss": 0.12056427001953125, + "step": 103035 + }, + { + "epoch": 0.8909564119635801, + "grad_norm": 16.309776519842426, + "learning_rate": 3.5113468472936375e-06, + "loss": 0.266998291015625, + "step": 103040 + }, + { + "epoch": 0.8909996454851233, + "grad_norm": 3.1976005359814668, + "learning_rate": 3.511146094271797e-06, + "loss": 0.0829864501953125, + "step": 103045 + }, + { + "epoch": 0.8910428790066666, + "grad_norm": 33.81535580780105, + "learning_rate": 3.5109453388925966e-06, + "loss": 0.2684186935424805, + "step": 103050 + }, + { + "epoch": 0.8910861125282099, + "grad_norm": 40.04377278961801, + "learning_rate": 3.5107445811569607e-06, + "loss": 0.22532272338867188, + "step": 103055 + }, + { + "epoch": 0.8911293460497531, + "grad_norm": 29.218707132536842, + "learning_rate": 3.510543821065816e-06, + "loss": 0.10371932983398438, + "step": 103060 + }, + { + "epoch": 0.8911725795712964, + "grad_norm": 16.395406356653737, + "learning_rate": 3.510343058620088e-06, + "loss": 0.23482208251953124, + "step": 103065 + }, + { + "epoch": 0.8912158130928397, + "grad_norm": 52.894634659349656, + "learning_rate": 3.510142293820703e-06, + "loss": 0.3586452484130859, + "step": 103070 + }, + { + "epoch": 0.8912590466143829, + "grad_norm": 27.102982349121515, + "learning_rate": 3.509941526668587e-06, + "loss": 0.24171600341796876, + "step": 103075 + }, + { + "epoch": 0.8913022801359262, + "grad_norm": 4.5770460634817836, + "learning_rate": 3.5097407571646657e-06, + "loss": 0.10423355102539063, + "step": 103080 + }, + { + "epoch": 0.8913455136574695, + "grad_norm": 11.336788793956234, + "learning_rate": 3.509539985309865e-06, + "loss": 0.15217132568359376, + "step": 103085 + }, + { + "epoch": 0.8913887471790127, + "grad_norm": 2.1986701922133025, + "learning_rate": 3.5093392111051113e-06, + "loss": 0.05160255432128906, + "step": 103090 + }, + { + "epoch": 0.891431980700556, + "grad_norm": 3.3866792913660704, + "learning_rate": 3.5091384345513294e-06, + "loss": 0.4348121643066406, + "step": 103095 + }, + { + "epoch": 0.8914752142220993, + "grad_norm": 6.100229549231265, + "learning_rate": 3.508937655649446e-06, + "loss": 0.29596900939941406, + "step": 103100 + }, + { + "epoch": 0.8915184477436425, + "grad_norm": 19.673344103989113, + "learning_rate": 3.5087368744003883e-06, + "loss": 0.24117050170898438, + "step": 103105 + }, + { + "epoch": 0.8915616812651858, + "grad_norm": 1.5214107134594725, + "learning_rate": 3.5085360908050805e-06, + "loss": 0.095654296875, + "step": 103110 + }, + { + "epoch": 0.891604914786729, + "grad_norm": 6.219862477620611, + "learning_rate": 3.5083353048644493e-06, + "loss": 0.08723678588867187, + "step": 103115 + }, + { + "epoch": 0.8916481483082723, + "grad_norm": 3.9926692915956834, + "learning_rate": 3.5081345165794203e-06, + "loss": 0.02552337646484375, + "step": 103120 + }, + { + "epoch": 0.8916913818298156, + "grad_norm": 25.89370258992231, + "learning_rate": 3.5079337259509196e-06, + "loss": 0.098406982421875, + "step": 103125 + }, + { + "epoch": 0.8917346153513588, + "grad_norm": 3.1630918278251183, + "learning_rate": 3.507732932979874e-06, + "loss": 0.029919815063476563, + "step": 103130 + }, + { + "epoch": 0.8917778488729021, + "grad_norm": 14.714852437043222, + "learning_rate": 3.507532137667208e-06, + "loss": 0.0694793701171875, + "step": 103135 + }, + { + "epoch": 0.8918210823944454, + "grad_norm": 5.513725387681501, + "learning_rate": 3.5073313400138495e-06, + "loss": 0.03540802001953125, + "step": 103140 + }, + { + "epoch": 0.8918643159159886, + "grad_norm": 3.7357399444600037, + "learning_rate": 3.507130540020724e-06, + "loss": 0.16532440185546876, + "step": 103145 + }, + { + "epoch": 0.8919075494375319, + "grad_norm": 19.182973218099356, + "learning_rate": 3.5069297376887555e-06, + "loss": 0.21095771789550782, + "step": 103150 + }, + { + "epoch": 0.8919507829590752, + "grad_norm": 1.5643842509557575, + "learning_rate": 3.5067289330188736e-06, + "loss": 0.16482086181640626, + "step": 103155 + }, + { + "epoch": 0.8919940164806184, + "grad_norm": 7.168849230485674, + "learning_rate": 3.506528126012003e-06, + "loss": 0.09579315185546874, + "step": 103160 + }, + { + "epoch": 0.8920372500021617, + "grad_norm": 8.964155420604268, + "learning_rate": 3.5063273166690677e-06, + "loss": 0.04667816162109375, + "step": 103165 + }, + { + "epoch": 0.892080483523705, + "grad_norm": 0.22024825718949634, + "learning_rate": 3.506126504990997e-06, + "loss": 0.0526458740234375, + "step": 103170 + }, + { + "epoch": 0.8921237170452482, + "grad_norm": 15.508852765091776, + "learning_rate": 3.505925690978715e-06, + "loss": 0.4494926452636719, + "step": 103175 + }, + { + "epoch": 0.8921669505667915, + "grad_norm": 1.2968516050335173, + "learning_rate": 3.5057248746331478e-06, + "loss": 0.04616546630859375, + "step": 103180 + }, + { + "epoch": 0.8922101840883347, + "grad_norm": 11.873363163582393, + "learning_rate": 3.505524055955223e-06, + "loss": 0.09429702758789063, + "step": 103185 + }, + { + "epoch": 0.892253417609878, + "grad_norm": 37.13833716003552, + "learning_rate": 3.5053232349458664e-06, + "loss": 0.19507598876953125, + "step": 103190 + }, + { + "epoch": 0.8922966511314212, + "grad_norm": 10.87875224587134, + "learning_rate": 3.5051224116060027e-06, + "loss": 0.13218841552734376, + "step": 103195 + }, + { + "epoch": 0.8923398846529645, + "grad_norm": 2.898216154428331, + "learning_rate": 3.504921585936559e-06, + "loss": 0.12173919677734375, + "step": 103200 + }, + { + "epoch": 0.8923831181745078, + "grad_norm": 0.10257961365435277, + "learning_rate": 3.504720757938462e-06, + "loss": 0.2576385498046875, + "step": 103205 + }, + { + "epoch": 0.892426351696051, + "grad_norm": 27.77183357204441, + "learning_rate": 3.504519927612638e-06, + "loss": 0.17277679443359376, + "step": 103210 + }, + { + "epoch": 0.8924695852175943, + "grad_norm": 1.5290721296905487, + "learning_rate": 3.5043190949600115e-06, + "loss": 0.0535980224609375, + "step": 103215 + }, + { + "epoch": 0.8925128187391376, + "grad_norm": 2.980459471522609, + "learning_rate": 3.5041182599815108e-06, + "loss": 0.12919921875, + "step": 103220 + }, + { + "epoch": 0.8925560522606808, + "grad_norm": 18.50295646228552, + "learning_rate": 3.5039174226780616e-06, + "loss": 0.118963623046875, + "step": 103225 + }, + { + "epoch": 0.8925992857822241, + "grad_norm": 3.203733807745206, + "learning_rate": 3.5037165830505888e-06, + "loss": 0.063372802734375, + "step": 103230 + }, + { + "epoch": 0.8926425193037674, + "grad_norm": 0.7676239609322921, + "learning_rate": 3.5035157411000196e-06, + "loss": 0.12580642700195313, + "step": 103235 + }, + { + "epoch": 0.8926857528253106, + "grad_norm": 1.2641649419819367, + "learning_rate": 3.5033148968272804e-06, + "loss": 0.14253997802734375, + "step": 103240 + }, + { + "epoch": 0.8927289863468539, + "grad_norm": 9.16958444125858, + "learning_rate": 3.5031140502332973e-06, + "loss": 0.23739681243896485, + "step": 103245 + }, + { + "epoch": 0.8927722198683972, + "grad_norm": 2.8874984056319057, + "learning_rate": 3.502913201318997e-06, + "loss": 0.10773296356201172, + "step": 103250 + }, + { + "epoch": 0.8928154533899404, + "grad_norm": 1.2795972951873786, + "learning_rate": 3.5027123500853056e-06, + "loss": 0.0350677490234375, + "step": 103255 + }, + { + "epoch": 0.8928586869114837, + "grad_norm": 1.52691327081589, + "learning_rate": 3.502511496533149e-06, + "loss": 0.1845001220703125, + "step": 103260 + }, + { + "epoch": 0.892901920433027, + "grad_norm": 6.03168258411888, + "learning_rate": 3.5023106406634538e-06, + "loss": 0.15419845581054686, + "step": 103265 + }, + { + "epoch": 0.8929451539545702, + "grad_norm": 9.788024916589556, + "learning_rate": 3.5021097824771464e-06, + "loss": 0.0599151611328125, + "step": 103270 + }, + { + "epoch": 0.8929883874761135, + "grad_norm": 4.789115188355143, + "learning_rate": 3.501908921975153e-06, + "loss": 0.09192848205566406, + "step": 103275 + }, + { + "epoch": 0.8930316209976568, + "grad_norm": 0.10110543420432963, + "learning_rate": 3.5017080591584002e-06, + "loss": 0.13875808715820312, + "step": 103280 + }, + { + "epoch": 0.8930748545192, + "grad_norm": 4.036614130426427, + "learning_rate": 3.5015071940278135e-06, + "loss": 0.1575927734375, + "step": 103285 + }, + { + "epoch": 0.8931180880407432, + "grad_norm": 4.530091460958457, + "learning_rate": 3.5013063265843207e-06, + "loss": 0.3307762145996094, + "step": 103290 + }, + { + "epoch": 0.8931613215622866, + "grad_norm": 8.09377073296585, + "learning_rate": 3.5011054568288464e-06, + "loss": 0.17898521423339844, + "step": 103295 + }, + { + "epoch": 0.8932045550838298, + "grad_norm": 3.4077731473175668, + "learning_rate": 3.5009045847623194e-06, + "loss": 0.24094810485839843, + "step": 103300 + }, + { + "epoch": 0.893247788605373, + "grad_norm": 13.090162001146348, + "learning_rate": 3.500703710385664e-06, + "loss": 0.1344991683959961, + "step": 103305 + }, + { + "epoch": 0.8932910221269164, + "grad_norm": 4.806663873786169, + "learning_rate": 3.500502833699807e-06, + "loss": 0.24295120239257811, + "step": 103310 + }, + { + "epoch": 0.8933342556484596, + "grad_norm": 30.436628502816166, + "learning_rate": 3.500301954705676e-06, + "loss": 0.09773635864257812, + "step": 103315 + }, + { + "epoch": 0.8933774891700028, + "grad_norm": 2.856511863509687, + "learning_rate": 3.500101073404196e-06, + "loss": 0.047336578369140625, + "step": 103320 + }, + { + "epoch": 0.8934207226915462, + "grad_norm": 24.76330029084567, + "learning_rate": 3.499900189796294e-06, + "loss": 0.22479248046875, + "step": 103325 + }, + { + "epoch": 0.8934639562130894, + "grad_norm": 11.257402649208586, + "learning_rate": 3.4996993038828966e-06, + "loss": 0.1484954833984375, + "step": 103330 + }, + { + "epoch": 0.8935071897346326, + "grad_norm": 4.037894785571029, + "learning_rate": 3.4994984156649306e-06, + "loss": 0.11052684783935547, + "step": 103335 + }, + { + "epoch": 0.893550423256176, + "grad_norm": 52.699796535605806, + "learning_rate": 3.499297525143321e-06, + "loss": 0.39630279541015623, + "step": 103340 + }, + { + "epoch": 0.8935936567777192, + "grad_norm": 9.121827698687301, + "learning_rate": 3.4990966323189957e-06, + "loss": 0.074755859375, + "step": 103345 + }, + { + "epoch": 0.8936368902992624, + "grad_norm": 3.2926302248500723, + "learning_rate": 3.498895737192881e-06, + "loss": 0.17292556762695313, + "step": 103350 + }, + { + "epoch": 0.8936801238208057, + "grad_norm": 16.061470086983363, + "learning_rate": 3.498694839765904e-06, + "loss": 0.5939521789550781, + "step": 103355 + }, + { + "epoch": 0.893723357342349, + "grad_norm": 17.353726584232835, + "learning_rate": 3.4984939400389894e-06, + "loss": 0.09475231170654297, + "step": 103360 + }, + { + "epoch": 0.8937665908638922, + "grad_norm": 0.6616017686408779, + "learning_rate": 3.4982930380130655e-06, + "loss": 0.13200454711914061, + "step": 103365 + }, + { + "epoch": 0.8938098243854354, + "grad_norm": 4.00423546755181, + "learning_rate": 3.498092133689058e-06, + "loss": 0.08490657806396484, + "step": 103370 + }, + { + "epoch": 0.8938530579069788, + "grad_norm": 6.0698438539318875, + "learning_rate": 3.497891227067893e-06, + "loss": 0.24834442138671875, + "step": 103375 + }, + { + "epoch": 0.893896291428522, + "grad_norm": 0.3423014262511932, + "learning_rate": 3.4976903181504985e-06, + "loss": 0.37170867919921874, + "step": 103380 + }, + { + "epoch": 0.8939395249500652, + "grad_norm": 0.41344572036576877, + "learning_rate": 3.4974894069378e-06, + "loss": 0.06688423156738281, + "step": 103385 + }, + { + "epoch": 0.8939827584716086, + "grad_norm": 2.0440071753104383, + "learning_rate": 3.497288493430724e-06, + "loss": 0.016162109375, + "step": 103390 + }, + { + "epoch": 0.8940259919931518, + "grad_norm": 11.505047236298758, + "learning_rate": 3.4970875776301977e-06, + "loss": 0.1080596923828125, + "step": 103395 + }, + { + "epoch": 0.894069225514695, + "grad_norm": 0.4050152813603518, + "learning_rate": 3.496886659537147e-06, + "loss": 0.1591033935546875, + "step": 103400 + }, + { + "epoch": 0.8941124590362384, + "grad_norm": 4.864586545028007, + "learning_rate": 3.496685739152499e-06, + "loss": 0.0794107437133789, + "step": 103405 + }, + { + "epoch": 0.8941556925577816, + "grad_norm": 0.525696652461726, + "learning_rate": 3.49648481647718e-06, + "loss": 0.07129440307617188, + "step": 103410 + }, + { + "epoch": 0.8941989260793248, + "grad_norm": 3.919550145889767, + "learning_rate": 3.4962838915121187e-06, + "loss": 0.4815185546875, + "step": 103415 + }, + { + "epoch": 0.8942421596008682, + "grad_norm": 14.098894031683024, + "learning_rate": 3.4960829642582382e-06, + "loss": 0.28897247314453123, + "step": 103420 + }, + { + "epoch": 0.8942853931224114, + "grad_norm": 2.6233198280389556, + "learning_rate": 3.495882034716467e-06, + "loss": 0.21072845458984374, + "step": 103425 + }, + { + "epoch": 0.8943286266439546, + "grad_norm": 0.5477038333321547, + "learning_rate": 3.4956811028877317e-06, + "loss": 0.10326690673828125, + "step": 103430 + }, + { + "epoch": 0.894371860165498, + "grad_norm": 7.035154213198692, + "learning_rate": 3.495480168772959e-06, + "loss": 0.05743331909179687, + "step": 103435 + }, + { + "epoch": 0.8944150936870412, + "grad_norm": 1.4434378066330993, + "learning_rate": 3.495279232373076e-06, + "loss": 0.15582046508789063, + "step": 103440 + }, + { + "epoch": 0.8944583272085844, + "grad_norm": 34.93092702808316, + "learning_rate": 3.4950782936890094e-06, + "loss": 0.37218246459960935, + "step": 103445 + }, + { + "epoch": 0.8945015607301278, + "grad_norm": 7.183526370460374, + "learning_rate": 3.494877352721684e-06, + "loss": 0.085443115234375, + "step": 103450 + }, + { + "epoch": 0.894544794251671, + "grad_norm": 2.901425999527272, + "learning_rate": 3.494676409472029e-06, + "loss": 0.16406707763671874, + "step": 103455 + }, + { + "epoch": 0.8945880277732142, + "grad_norm": 8.087423303739714, + "learning_rate": 3.4944754639409695e-06, + "loss": 0.17590713500976562, + "step": 103460 + }, + { + "epoch": 0.8946312612947575, + "grad_norm": 3.7718761495560558, + "learning_rate": 3.494274516129434e-06, + "loss": 0.0509490966796875, + "step": 103465 + }, + { + "epoch": 0.8946744948163008, + "grad_norm": 47.32176085942549, + "learning_rate": 3.4940735660383466e-06, + "loss": 0.21419677734375, + "step": 103470 + }, + { + "epoch": 0.894717728337844, + "grad_norm": 1.8533136225376667, + "learning_rate": 3.493872613668637e-06, + "loss": 0.31121673583984377, + "step": 103475 + }, + { + "epoch": 0.8947609618593872, + "grad_norm": 3.2714827620737585, + "learning_rate": 3.49367165902123e-06, + "loss": 0.21199722290039064, + "step": 103480 + }, + { + "epoch": 0.8948041953809306, + "grad_norm": 0.41850660986569094, + "learning_rate": 3.4934707020970525e-06, + "loss": 0.05123443603515625, + "step": 103485 + }, + { + "epoch": 0.8948474289024738, + "grad_norm": 51.19184316166783, + "learning_rate": 3.4932697428970314e-06, + "loss": 0.47150726318359376, + "step": 103490 + }, + { + "epoch": 0.894890662424017, + "grad_norm": 3.770844370864399, + "learning_rate": 3.493068781422095e-06, + "loss": 0.47213134765625, + "step": 103495 + }, + { + "epoch": 0.8949338959455604, + "grad_norm": 4.00843591144031, + "learning_rate": 3.4928678176731685e-06, + "loss": 0.11311874389648438, + "step": 103500 + }, + { + "epoch": 0.8949771294671036, + "grad_norm": 0.20500904981137313, + "learning_rate": 3.4926668516511788e-06, + "loss": 0.08944549560546874, + "step": 103505 + }, + { + "epoch": 0.8950203629886468, + "grad_norm": 14.72228357523145, + "learning_rate": 3.4924658833570536e-06, + "loss": 0.14821701049804686, + "step": 103510 + }, + { + "epoch": 0.8950635965101902, + "grad_norm": 1.2211728359639107, + "learning_rate": 3.492264912791719e-06, + "loss": 0.07129058837890626, + "step": 103515 + }, + { + "epoch": 0.8951068300317334, + "grad_norm": 0.23687491853519352, + "learning_rate": 3.4920639399561017e-06, + "loss": 0.01675758361816406, + "step": 103520 + }, + { + "epoch": 0.8951500635532766, + "grad_norm": 5.946692458732817, + "learning_rate": 3.4918629648511295e-06, + "loss": 0.012497329711914062, + "step": 103525 + }, + { + "epoch": 0.89519329707482, + "grad_norm": 35.4525247155794, + "learning_rate": 3.4916619874777285e-06, + "loss": 0.441455078125, + "step": 103530 + }, + { + "epoch": 0.8952365305963632, + "grad_norm": 4.819218873988789, + "learning_rate": 3.4914610078368263e-06, + "loss": 0.13603668212890624, + "step": 103535 + }, + { + "epoch": 0.8952797641179064, + "grad_norm": 18.060005393446275, + "learning_rate": 3.49126002592935e-06, + "loss": 0.11382007598876953, + "step": 103540 + }, + { + "epoch": 0.8953229976394497, + "grad_norm": 22.59330583530386, + "learning_rate": 3.491059041756224e-06, + "loss": 0.258807373046875, + "step": 103545 + }, + { + "epoch": 0.895366231160993, + "grad_norm": 1.1453248170436425, + "learning_rate": 3.490858055318378e-06, + "loss": 0.08352813720703126, + "step": 103550 + }, + { + "epoch": 0.8954094646825362, + "grad_norm": 3.464668275022272, + "learning_rate": 3.490657066616738e-06, + "loss": 0.06953125, + "step": 103555 + }, + { + "epoch": 0.8954526982040795, + "grad_norm": 11.784947310309981, + "learning_rate": 3.4904560756522318e-06, + "loss": 0.0480708122253418, + "step": 103560 + }, + { + "epoch": 0.8954959317256228, + "grad_norm": 1.4044459746751259, + "learning_rate": 3.490255082425784e-06, + "loss": 0.1874217987060547, + "step": 103565 + }, + { + "epoch": 0.895539165247166, + "grad_norm": 22.467074084179252, + "learning_rate": 3.4900540869383236e-06, + "loss": 0.1608642578125, + "step": 103570 + }, + { + "epoch": 0.8955823987687093, + "grad_norm": 3.6567984315851296, + "learning_rate": 3.4898530891907775e-06, + "loss": 0.165740966796875, + "step": 103575 + }, + { + "epoch": 0.8956256322902526, + "grad_norm": 49.612190248301204, + "learning_rate": 3.489652089184072e-06, + "loss": 0.1035247802734375, + "step": 103580 + }, + { + "epoch": 0.8956688658117958, + "grad_norm": 0.6177425959593527, + "learning_rate": 3.4894510869191345e-06, + "loss": 0.2250753402709961, + "step": 103585 + }, + { + "epoch": 0.8957120993333391, + "grad_norm": 0.6405553150606731, + "learning_rate": 3.4892500823968923e-06, + "loss": 0.1411041259765625, + "step": 103590 + }, + { + "epoch": 0.8957553328548824, + "grad_norm": 0.2560831598506516, + "learning_rate": 3.489049075618271e-06, + "loss": 0.14966926574707032, + "step": 103595 + }, + { + "epoch": 0.8957985663764256, + "grad_norm": 16.94572679005635, + "learning_rate": 3.4888480665841982e-06, + "loss": 0.4137073516845703, + "step": 103600 + }, + { + "epoch": 0.8958417998979689, + "grad_norm": 2.751135301739487, + "learning_rate": 3.4886470552956027e-06, + "loss": 0.0272003173828125, + "step": 103605 + }, + { + "epoch": 0.8958850334195122, + "grad_norm": 13.77522769886376, + "learning_rate": 3.4884460417534095e-06, + "loss": 0.04090194702148438, + "step": 103610 + }, + { + "epoch": 0.8959282669410554, + "grad_norm": 1.181298367636766, + "learning_rate": 3.4882450259585466e-06, + "loss": 0.007702255249023437, + "step": 103615 + }, + { + "epoch": 0.8959715004625987, + "grad_norm": 1.5442008441059007, + "learning_rate": 3.488044007911941e-06, + "loss": 0.08826332092285157, + "step": 103620 + }, + { + "epoch": 0.896014733984142, + "grad_norm": 27.55300819748928, + "learning_rate": 3.4878429876145182e-06, + "loss": 0.214208984375, + "step": 103625 + }, + { + "epoch": 0.8960579675056852, + "grad_norm": 3.3118630573835794, + "learning_rate": 3.4876419650672078e-06, + "loss": 0.20397186279296875, + "step": 103630 + }, + { + "epoch": 0.8961012010272285, + "grad_norm": 0.14731039921198055, + "learning_rate": 3.487440940270935e-06, + "loss": 0.3369232177734375, + "step": 103635 + }, + { + "epoch": 0.8961444345487717, + "grad_norm": 2.3527530659670166, + "learning_rate": 3.487239913226629e-06, + "loss": 0.050031280517578124, + "step": 103640 + }, + { + "epoch": 0.896187668070315, + "grad_norm": 18.665145550264214, + "learning_rate": 3.487038883935214e-06, + "loss": 0.46771240234375, + "step": 103645 + }, + { + "epoch": 0.8962309015918583, + "grad_norm": 1.377376292003318, + "learning_rate": 3.48683785239762e-06, + "loss": 0.042940521240234376, + "step": 103650 + }, + { + "epoch": 0.8962741351134015, + "grad_norm": 1.8090207186813647, + "learning_rate": 3.4866368186147716e-06, + "loss": 0.042799758911132815, + "step": 103655 + }, + { + "epoch": 0.8963173686349448, + "grad_norm": 2.0156912334113026, + "learning_rate": 3.4864357825875975e-06, + "loss": 0.057234668731689455, + "step": 103660 + }, + { + "epoch": 0.896360602156488, + "grad_norm": 2.949504273781094, + "learning_rate": 3.4862347443170254e-06, + "loss": 0.21556472778320312, + "step": 103665 + }, + { + "epoch": 0.8964038356780313, + "grad_norm": 5.177242357219483, + "learning_rate": 3.486033703803981e-06, + "loss": 0.1433135986328125, + "step": 103670 + }, + { + "epoch": 0.8964470691995746, + "grad_norm": 0.7364715730601366, + "learning_rate": 3.485832661049392e-06, + "loss": 0.1980438232421875, + "step": 103675 + }, + { + "epoch": 0.8964903027211178, + "grad_norm": 40.01650039590277, + "learning_rate": 3.4856316160541864e-06, + "loss": 0.05094833374023437, + "step": 103680 + }, + { + "epoch": 0.8965335362426611, + "grad_norm": 1.175738404307269, + "learning_rate": 3.4854305688192897e-06, + "loss": 0.33814849853515627, + "step": 103685 + }, + { + "epoch": 0.8965767697642044, + "grad_norm": 0.8318773730151505, + "learning_rate": 3.4852295193456307e-06, + "loss": 0.10349960327148437, + "step": 103690 + }, + { + "epoch": 0.8966200032857476, + "grad_norm": 4.895184856173584, + "learning_rate": 3.485028467634136e-06, + "loss": 0.13482666015625, + "step": 103695 + }, + { + "epoch": 0.8966632368072909, + "grad_norm": 1.452581621541031, + "learning_rate": 3.4848274136857328e-06, + "loss": 0.11764984130859375, + "step": 103700 + }, + { + "epoch": 0.8967064703288342, + "grad_norm": 2.223377619845525, + "learning_rate": 3.484626357501348e-06, + "loss": 0.0842315673828125, + "step": 103705 + }, + { + "epoch": 0.8967497038503774, + "grad_norm": 12.51445393825231, + "learning_rate": 3.4844252990819102e-06, + "loss": 0.12413406372070312, + "step": 103710 + }, + { + "epoch": 0.8967929373719207, + "grad_norm": 27.1833945362479, + "learning_rate": 3.484224238428345e-06, + "loss": 0.2737335205078125, + "step": 103715 + }, + { + "epoch": 0.8968361708934639, + "grad_norm": 0.3141201582694486, + "learning_rate": 3.4840231755415805e-06, + "loss": 0.2541511535644531, + "step": 103720 + }, + { + "epoch": 0.8968794044150072, + "grad_norm": 1.638708375888913, + "learning_rate": 3.4838221104225445e-06, + "loss": 0.1275177001953125, + "step": 103725 + }, + { + "epoch": 0.8969226379365505, + "grad_norm": 47.611135198370725, + "learning_rate": 3.4836210430721627e-06, + "loss": 0.20169448852539062, + "step": 103730 + }, + { + "epoch": 0.8969658714580937, + "grad_norm": 2.821281365853871, + "learning_rate": 3.483419973491364e-06, + "loss": 0.108734130859375, + "step": 103735 + }, + { + "epoch": 0.897009104979637, + "grad_norm": 25.55800069021372, + "learning_rate": 3.4832189016810753e-06, + "loss": 0.2087890625, + "step": 103740 + }, + { + "epoch": 0.8970523385011803, + "grad_norm": 33.43449916603235, + "learning_rate": 3.483017827642223e-06, + "loss": 0.097613525390625, + "step": 103745 + }, + { + "epoch": 0.8970955720227235, + "grad_norm": 1.8947851897602528, + "learning_rate": 3.4828167513757356e-06, + "loss": 0.2750244140625, + "step": 103750 + }, + { + "epoch": 0.8971388055442668, + "grad_norm": 4.572564443423334, + "learning_rate": 3.4826156728825393e-06, + "loss": 0.09649810791015626, + "step": 103755 + }, + { + "epoch": 0.8971820390658101, + "grad_norm": 19.532363222935043, + "learning_rate": 3.4824145921635636e-06, + "loss": 0.5132080078125, + "step": 103760 + }, + { + "epoch": 0.8972252725873533, + "grad_norm": 1.3594899197796713, + "learning_rate": 3.482213509219734e-06, + "loss": 0.11216278076171875, + "step": 103765 + }, + { + "epoch": 0.8972685061088966, + "grad_norm": 10.692346291293571, + "learning_rate": 3.482012424051977e-06, + "loss": 0.1183868408203125, + "step": 103770 + }, + { + "epoch": 0.8973117396304399, + "grad_norm": 4.090976162492265, + "learning_rate": 3.481811336661222e-06, + "loss": 0.18288536071777345, + "step": 103775 + }, + { + "epoch": 0.8973549731519831, + "grad_norm": 36.97445580292768, + "learning_rate": 3.4816102470483968e-06, + "loss": 0.11964645385742187, + "step": 103780 + }, + { + "epoch": 0.8973982066735264, + "grad_norm": 16.152824032394484, + "learning_rate": 3.481409155214426e-06, + "loss": 0.3655364990234375, + "step": 103785 + }, + { + "epoch": 0.8974414401950697, + "grad_norm": 6.003543512279005, + "learning_rate": 3.4812080611602397e-06, + "loss": 0.1309659004211426, + "step": 103790 + }, + { + "epoch": 0.8974846737166129, + "grad_norm": 0.6115713477975221, + "learning_rate": 3.481006964886764e-06, + "loss": 0.07870769500732422, + "step": 103795 + }, + { + "epoch": 0.8975279072381562, + "grad_norm": 1.8153642443700648, + "learning_rate": 3.4808058663949262e-06, + "loss": 0.0469573974609375, + "step": 103800 + }, + { + "epoch": 0.8975711407596995, + "grad_norm": 3.010386503257527, + "learning_rate": 3.480604765685656e-06, + "loss": 0.624072265625, + "step": 103805 + }, + { + "epoch": 0.8976143742812427, + "grad_norm": 7.263049578955417, + "learning_rate": 3.480403662759878e-06, + "loss": 0.15974655151367187, + "step": 103810 + }, + { + "epoch": 0.8976576078027859, + "grad_norm": 4.7973270148519935, + "learning_rate": 3.480202557618521e-06, + "loss": 0.16694049835205077, + "step": 103815 + }, + { + "epoch": 0.8977008413243293, + "grad_norm": 4.586854970184069, + "learning_rate": 3.4800014502625113e-06, + "loss": 0.09929275512695312, + "step": 103820 + }, + { + "epoch": 0.8977440748458725, + "grad_norm": 51.40652914818979, + "learning_rate": 3.4798003406927777e-06, + "loss": 0.3766014099121094, + "step": 103825 + }, + { + "epoch": 0.8977873083674157, + "grad_norm": 3.606477320132878, + "learning_rate": 3.4795992289102477e-06, + "loss": 0.4503753662109375, + "step": 103830 + }, + { + "epoch": 0.897830541888959, + "grad_norm": 0.11925612264252969, + "learning_rate": 3.4793981149158483e-06, + "loss": 0.10757942199707031, + "step": 103835 + }, + { + "epoch": 0.8978737754105023, + "grad_norm": 1.6878350397473727, + "learning_rate": 3.4791969987105077e-06, + "loss": 0.0372894287109375, + "step": 103840 + }, + { + "epoch": 0.8979170089320455, + "grad_norm": 0.28808510507914376, + "learning_rate": 3.478995880295153e-06, + "loss": 0.1904937744140625, + "step": 103845 + }, + { + "epoch": 0.8979602424535889, + "grad_norm": 10.803660132716006, + "learning_rate": 3.47879475967071e-06, + "loss": 0.1674407958984375, + "step": 103850 + }, + { + "epoch": 0.8980034759751321, + "grad_norm": 0.21615179579350433, + "learning_rate": 3.4785936368381084e-06, + "loss": 0.23646774291992187, + "step": 103855 + }, + { + "epoch": 0.8980467094966753, + "grad_norm": 2.874275529348038, + "learning_rate": 3.478392511798276e-06, + "loss": 0.055487060546875, + "step": 103860 + }, + { + "epoch": 0.8980899430182187, + "grad_norm": 1.6870554205175947, + "learning_rate": 3.478191384552139e-06, + "loss": 0.15437088012695313, + "step": 103865 + }, + { + "epoch": 0.8981331765397619, + "grad_norm": 5.473822703530698, + "learning_rate": 3.4779902551006266e-06, + "loss": 0.1915752410888672, + "step": 103870 + }, + { + "epoch": 0.8981764100613051, + "grad_norm": 8.406794727411082, + "learning_rate": 3.477789123444665e-06, + "loss": 0.269049072265625, + "step": 103875 + }, + { + "epoch": 0.8982196435828484, + "grad_norm": 2.1440077246396836, + "learning_rate": 3.4775879895851815e-06, + "loss": 0.030515289306640624, + "step": 103880 + }, + { + "epoch": 0.8982628771043917, + "grad_norm": 2.1480778899619324, + "learning_rate": 3.4773868535231043e-06, + "loss": 0.2548057556152344, + "step": 103885 + }, + { + "epoch": 0.8983061106259349, + "grad_norm": 23.828136950067574, + "learning_rate": 3.477185715259362e-06, + "loss": 0.342694091796875, + "step": 103890 + }, + { + "epoch": 0.8983493441474781, + "grad_norm": 22.956846930344977, + "learning_rate": 3.4769845747948808e-06, + "loss": 0.19602432250976562, + "step": 103895 + }, + { + "epoch": 0.8983925776690215, + "grad_norm": 12.089485083519165, + "learning_rate": 3.476783432130589e-06, + "loss": 0.12049942016601563, + "step": 103900 + }, + { + "epoch": 0.8984358111905647, + "grad_norm": 0.4260290337945546, + "learning_rate": 3.4765822872674143e-06, + "loss": 0.12990226745605468, + "step": 103905 + }, + { + "epoch": 0.8984790447121079, + "grad_norm": 7.115996112299003, + "learning_rate": 3.4763811402062835e-06, + "loss": 0.05236968994140625, + "step": 103910 + }, + { + "epoch": 0.8985222782336513, + "grad_norm": 15.263080198460337, + "learning_rate": 3.4761799909481254e-06, + "loss": 0.10960540771484376, + "step": 103915 + }, + { + "epoch": 0.8985655117551945, + "grad_norm": 0.5529597282824495, + "learning_rate": 3.4759788394938675e-06, + "loss": 0.013987922668457031, + "step": 103920 + }, + { + "epoch": 0.8986087452767377, + "grad_norm": 7.939497008166319, + "learning_rate": 3.4757776858444377e-06, + "loss": 0.1364501953125, + "step": 103925 + }, + { + "epoch": 0.8986519787982811, + "grad_norm": 2.1352317887870593, + "learning_rate": 3.4755765300007623e-06, + "loss": 0.23687515258789063, + "step": 103930 + }, + { + "epoch": 0.8986952123198243, + "grad_norm": 6.879062480589737, + "learning_rate": 3.4753753719637705e-06, + "loss": 0.06307373046875, + "step": 103935 + }, + { + "epoch": 0.8987384458413675, + "grad_norm": 8.206331334073424, + "learning_rate": 3.4751742117343894e-06, + "loss": 0.344677734375, + "step": 103940 + }, + { + "epoch": 0.8987816793629109, + "grad_norm": 1.1813973245750369, + "learning_rate": 3.4749730493135463e-06, + "loss": 0.04578399658203125, + "step": 103945 + }, + { + "epoch": 0.8988249128844541, + "grad_norm": 8.749572687630135, + "learning_rate": 3.47477188470217e-06, + "loss": 0.045542144775390626, + "step": 103950 + }, + { + "epoch": 0.8988681464059973, + "grad_norm": 1.5238416084278987, + "learning_rate": 3.474570717901188e-06, + "loss": 0.1097076416015625, + "step": 103955 + }, + { + "epoch": 0.8989113799275407, + "grad_norm": 0.1847737220103169, + "learning_rate": 3.474369548911527e-06, + "loss": 0.142584228515625, + "step": 103960 + }, + { + "epoch": 0.8989546134490839, + "grad_norm": 9.044837243562542, + "learning_rate": 3.4741683777341168e-06, + "loss": 0.03407211303710937, + "step": 103965 + }, + { + "epoch": 0.8989978469706271, + "grad_norm": 0.03251270454432755, + "learning_rate": 3.473967204369882e-06, + "loss": 0.2637022018432617, + "step": 103970 + }, + { + "epoch": 0.8990410804921705, + "grad_norm": 2.2670192499188566, + "learning_rate": 3.4737660288197535e-06, + "loss": 0.09003715515136719, + "step": 103975 + }, + { + "epoch": 0.8990843140137137, + "grad_norm": 3.5976238525088795, + "learning_rate": 3.473564851084658e-06, + "loss": 0.0798675537109375, + "step": 103980 + }, + { + "epoch": 0.8991275475352569, + "grad_norm": 16.591025552698415, + "learning_rate": 3.473363671165524e-06, + "loss": 0.10345802307128907, + "step": 103985 + }, + { + "epoch": 0.8991707810568002, + "grad_norm": 3.6089101343782715, + "learning_rate": 3.473162489063278e-06, + "loss": 0.0293365478515625, + "step": 103990 + }, + { + "epoch": 0.8992140145783435, + "grad_norm": 0.6898470111018261, + "learning_rate": 3.4729613047788475e-06, + "loss": 0.09939727783203126, + "step": 103995 + }, + { + "epoch": 0.8992572480998867, + "grad_norm": 2.451728160211166, + "learning_rate": 3.4727601183131616e-06, + "loss": 0.1275970458984375, + "step": 104000 + }, + { + "epoch": 0.89930048162143, + "grad_norm": 26.449485360850453, + "learning_rate": 3.4725589296671486e-06, + "loss": 0.43281917572021483, + "step": 104005 + }, + { + "epoch": 0.8993437151429733, + "grad_norm": 6.686735887077024, + "learning_rate": 3.4723577388417345e-06, + "loss": 0.23984909057617188, + "step": 104010 + }, + { + "epoch": 0.8993869486645165, + "grad_norm": 3.0637724010427787, + "learning_rate": 3.4721565458378496e-06, + "loss": 0.08150787353515625, + "step": 104015 + }, + { + "epoch": 0.8994301821860597, + "grad_norm": 1.90445962902067, + "learning_rate": 3.471955350656419e-06, + "loss": 0.20167236328125, + "step": 104020 + }, + { + "epoch": 0.8994734157076031, + "grad_norm": 10.38738726498998, + "learning_rate": 3.4717541532983724e-06, + "loss": 0.0369140625, + "step": 104025 + }, + { + "epoch": 0.8995166492291463, + "grad_norm": 1.4042883786757594, + "learning_rate": 3.471552953764638e-06, + "loss": 0.061658477783203124, + "step": 104030 + }, + { + "epoch": 0.8995598827506895, + "grad_norm": 6.861751723919351, + "learning_rate": 3.471351752056143e-06, + "loss": 0.15644683837890624, + "step": 104035 + }, + { + "epoch": 0.8996031162722329, + "grad_norm": 0.6264622992976642, + "learning_rate": 3.471150548173814e-06, + "loss": 0.05292778015136719, + "step": 104040 + }, + { + "epoch": 0.8996463497937761, + "grad_norm": 0.07101469287611252, + "learning_rate": 3.470949342118582e-06, + "loss": 0.08651542663574219, + "step": 104045 + }, + { + "epoch": 0.8996895833153193, + "grad_norm": 1.097810959014648, + "learning_rate": 3.4707481338913722e-06, + "loss": 0.20378494262695312, + "step": 104050 + }, + { + "epoch": 0.8997328168368627, + "grad_norm": 26.804179260145098, + "learning_rate": 3.470546923493114e-06, + "loss": 0.18716773986816407, + "step": 104055 + }, + { + "epoch": 0.8997760503584059, + "grad_norm": 5.769543330090067, + "learning_rate": 3.470345710924735e-06, + "loss": 0.035171890258789064, + "step": 104060 + }, + { + "epoch": 0.8998192838799491, + "grad_norm": 0.887557561767532, + "learning_rate": 3.470144496187163e-06, + "loss": 0.15785751342773438, + "step": 104065 + }, + { + "epoch": 0.8998625174014924, + "grad_norm": 4.765571543677817, + "learning_rate": 3.469943279281327e-06, + "loss": 0.0606689453125, + "step": 104070 + }, + { + "epoch": 0.8999057509230357, + "grad_norm": 0.3221648839184494, + "learning_rate": 3.4697420602081525e-06, + "loss": 0.11696701049804688, + "step": 104075 + }, + { + "epoch": 0.8999489844445789, + "grad_norm": 2.8515724938614317, + "learning_rate": 3.46954083896857e-06, + "loss": 0.059228134155273435, + "step": 104080 + }, + { + "epoch": 0.8999922179661222, + "grad_norm": 28.703915605320816, + "learning_rate": 3.4693396155635067e-06, + "loss": 0.114691162109375, + "step": 104085 + }, + { + "epoch": 0.9000354514876655, + "grad_norm": 0.42189667796432256, + "learning_rate": 3.46913838999389e-06, + "loss": 0.247802734375, + "step": 104090 + }, + { + "epoch": 0.9000786850092087, + "grad_norm": 5.041273737855924, + "learning_rate": 3.4689371622606497e-06, + "loss": 0.25765018463134765, + "step": 104095 + }, + { + "epoch": 0.900121918530752, + "grad_norm": 46.0131171700872, + "learning_rate": 3.468735932364712e-06, + "loss": 0.36341209411621095, + "step": 104100 + }, + { + "epoch": 0.9001651520522953, + "grad_norm": 9.254338877734659, + "learning_rate": 3.468534700307005e-06, + "loss": 0.08892860412597656, + "step": 104105 + }, + { + "epoch": 0.9002083855738385, + "grad_norm": 18.406939639614095, + "learning_rate": 3.4683334660884577e-06, + "loss": 0.13761062622070314, + "step": 104110 + }, + { + "epoch": 0.9002516190953818, + "grad_norm": 8.03109620724176, + "learning_rate": 3.468132229709998e-06, + "loss": 0.3822177886962891, + "step": 104115 + }, + { + "epoch": 0.9002948526169251, + "grad_norm": 2.2656178631011796, + "learning_rate": 3.4679309911725535e-06, + "loss": 0.064825439453125, + "step": 104120 + }, + { + "epoch": 0.9003380861384683, + "grad_norm": 15.483290268366321, + "learning_rate": 3.4677297504770536e-06, + "loss": 0.042813873291015624, + "step": 104125 + }, + { + "epoch": 0.9003813196600116, + "grad_norm": 2.8684732014146332, + "learning_rate": 3.467528507624425e-06, + "loss": 0.0476043701171875, + "step": 104130 + }, + { + "epoch": 0.9004245531815549, + "grad_norm": 1.3903632319536814, + "learning_rate": 3.467327262615596e-06, + "loss": 0.10119285583496093, + "step": 104135 + }, + { + "epoch": 0.9004677867030981, + "grad_norm": 7.368376950258173, + "learning_rate": 3.4671260154514942e-06, + "loss": 0.09224395751953125, + "step": 104140 + }, + { + "epoch": 0.9005110202246414, + "grad_norm": 2.170092101259056, + "learning_rate": 3.4669247661330494e-06, + "loss": 0.13197174072265624, + "step": 104145 + }, + { + "epoch": 0.9005542537461847, + "grad_norm": 1.2012787732721273, + "learning_rate": 3.466723514661189e-06, + "loss": 0.41121711730957033, + "step": 104150 + }, + { + "epoch": 0.9005974872677279, + "grad_norm": 29.55607298956508, + "learning_rate": 3.4665222610368404e-06, + "loss": 0.1542278289794922, + "step": 104155 + }, + { + "epoch": 0.9006407207892712, + "grad_norm": 0.06156239055973473, + "learning_rate": 3.466321005260933e-06, + "loss": 0.05218048095703125, + "step": 104160 + }, + { + "epoch": 0.9006839543108144, + "grad_norm": 0.17299649576696274, + "learning_rate": 3.4661197473343937e-06, + "loss": 0.052294921875, + "step": 104165 + }, + { + "epoch": 0.9007271878323577, + "grad_norm": 3.850422781971357, + "learning_rate": 3.465918487258151e-06, + "loss": 0.06082763671875, + "step": 104170 + }, + { + "epoch": 0.900770421353901, + "grad_norm": 0.14559505425871366, + "learning_rate": 3.4657172250331345e-06, + "loss": 0.24199209213256836, + "step": 104175 + }, + { + "epoch": 0.9008136548754442, + "grad_norm": 0.19561663374670554, + "learning_rate": 3.465515960660271e-06, + "loss": 0.17946968078613282, + "step": 104180 + }, + { + "epoch": 0.9008568883969875, + "grad_norm": 53.591814052209415, + "learning_rate": 3.4653146941404887e-06, + "loss": 0.28709716796875, + "step": 104185 + }, + { + "epoch": 0.9009001219185307, + "grad_norm": 31.78493788028496, + "learning_rate": 3.465113425474716e-06, + "loss": 0.16409454345703126, + "step": 104190 + }, + { + "epoch": 0.900943355440074, + "grad_norm": 3.515862787387612, + "learning_rate": 3.4649121546638814e-06, + "loss": 0.28395843505859375, + "step": 104195 + }, + { + "epoch": 0.9009865889616173, + "grad_norm": 14.597839962512698, + "learning_rate": 3.464710881708913e-06, + "loss": 0.12530670166015626, + "step": 104200 + }, + { + "epoch": 0.9010298224831605, + "grad_norm": 25.336133993037464, + "learning_rate": 3.464509606610739e-06, + "loss": 0.16867599487304688, + "step": 104205 + }, + { + "epoch": 0.9010730560047038, + "grad_norm": 1.878469638439174, + "learning_rate": 3.4643083293702886e-06, + "loss": 0.05891075134277344, + "step": 104210 + }, + { + "epoch": 0.9011162895262471, + "grad_norm": 3.3621834846568714, + "learning_rate": 3.4641070499884888e-06, + "loss": 0.08643035888671875, + "step": 104215 + }, + { + "epoch": 0.9011595230477903, + "grad_norm": 35.63321170846054, + "learning_rate": 3.4639057684662677e-06, + "loss": 0.26235160827636717, + "step": 104220 + }, + { + "epoch": 0.9012027565693336, + "grad_norm": 0.9583760280569846, + "learning_rate": 3.463704484804555e-06, + "loss": 0.05033416748046875, + "step": 104225 + }, + { + "epoch": 0.9012459900908769, + "grad_norm": 16.16173023159185, + "learning_rate": 3.4635031990042765e-06, + "loss": 0.2623077392578125, + "step": 104230 + }, + { + "epoch": 0.9012892236124201, + "grad_norm": 4.521866777670958, + "learning_rate": 3.4633019110663644e-06, + "loss": 0.11402587890625, + "step": 104235 + }, + { + "epoch": 0.9013324571339634, + "grad_norm": 6.221092667258027, + "learning_rate": 3.463100620991744e-06, + "loss": 0.2307891845703125, + "step": 104240 + }, + { + "epoch": 0.9013756906555066, + "grad_norm": 15.301019627570922, + "learning_rate": 3.462899328781344e-06, + "loss": 0.13306961059570313, + "step": 104245 + }, + { + "epoch": 0.9014189241770499, + "grad_norm": 0.6813746813021511, + "learning_rate": 3.462698034436093e-06, + "loss": 0.0404449462890625, + "step": 104250 + }, + { + "epoch": 0.9014621576985932, + "grad_norm": 10.978699206708312, + "learning_rate": 3.462496737956921e-06, + "loss": 0.17384281158447265, + "step": 104255 + }, + { + "epoch": 0.9015053912201364, + "grad_norm": 3.3702084040608136, + "learning_rate": 3.462295439344754e-06, + "loss": 0.13664016723632813, + "step": 104260 + }, + { + "epoch": 0.9015486247416797, + "grad_norm": 1.4807623437253423, + "learning_rate": 3.4620941386005205e-06, + "loss": 0.27919349670410154, + "step": 104265 + }, + { + "epoch": 0.901591858263223, + "grad_norm": 0.5983929607784854, + "learning_rate": 3.461892835725151e-06, + "loss": 0.04026527404785156, + "step": 104270 + }, + { + "epoch": 0.9016350917847662, + "grad_norm": 28.137705667326955, + "learning_rate": 3.4616915307195717e-06, + "loss": 0.30230045318603516, + "step": 104275 + }, + { + "epoch": 0.9016783253063095, + "grad_norm": 4.634550189690072, + "learning_rate": 3.461490223584712e-06, + "loss": 0.11920051574707032, + "step": 104280 + }, + { + "epoch": 0.9017215588278528, + "grad_norm": 0.47149530450964733, + "learning_rate": 3.4612889143215003e-06, + "loss": 0.01572113037109375, + "step": 104285 + }, + { + "epoch": 0.901764792349396, + "grad_norm": 1.7225879140546518, + "learning_rate": 3.4610876029308654e-06, + "loss": 0.08541259765625, + "step": 104290 + }, + { + "epoch": 0.9018080258709393, + "grad_norm": 75.89298086151125, + "learning_rate": 3.4608862894137344e-06, + "loss": 0.4042484283447266, + "step": 104295 + }, + { + "epoch": 0.9018512593924826, + "grad_norm": 27.41098999093479, + "learning_rate": 3.460684973771037e-06, + "loss": 0.21476287841796876, + "step": 104300 + }, + { + "epoch": 0.9018944929140258, + "grad_norm": 0.559935553271372, + "learning_rate": 3.460483656003701e-06, + "loss": 0.2084259033203125, + "step": 104305 + }, + { + "epoch": 0.9019377264355691, + "grad_norm": 12.51574031530131, + "learning_rate": 3.4602823361126546e-06, + "loss": 0.07419891357421875, + "step": 104310 + }, + { + "epoch": 0.9019809599571124, + "grad_norm": 11.344867282364437, + "learning_rate": 3.4600810140988276e-06, + "loss": 0.2612476348876953, + "step": 104315 + }, + { + "epoch": 0.9020241934786556, + "grad_norm": 2.0173505798842446, + "learning_rate": 3.4598796899631476e-06, + "loss": 0.06015968322753906, + "step": 104320 + }, + { + "epoch": 0.9020674270001989, + "grad_norm": 15.41141763949605, + "learning_rate": 3.4596783637065424e-06, + "loss": 0.1333831787109375, + "step": 104325 + }, + { + "epoch": 0.9021106605217422, + "grad_norm": 4.559748134844184, + "learning_rate": 3.4594770353299417e-06, + "loss": 0.03323974609375, + "step": 104330 + }, + { + "epoch": 0.9021538940432854, + "grad_norm": 45.00413093969009, + "learning_rate": 3.4592757048342737e-06, + "loss": 0.31325569152832033, + "step": 104335 + }, + { + "epoch": 0.9021971275648286, + "grad_norm": 18.651545334501865, + "learning_rate": 3.4590743722204664e-06, + "loss": 0.2441089630126953, + "step": 104340 + }, + { + "epoch": 0.902240361086372, + "grad_norm": 3.384532680478749, + "learning_rate": 3.4588730374894486e-06, + "loss": 0.03728065490722656, + "step": 104345 + }, + { + "epoch": 0.9022835946079152, + "grad_norm": 0.12971251652472685, + "learning_rate": 3.458671700642149e-06, + "loss": 0.015087127685546875, + "step": 104350 + }, + { + "epoch": 0.9023268281294584, + "grad_norm": 14.443159847808243, + "learning_rate": 3.4584703616794966e-06, + "loss": 0.05201263427734375, + "step": 104355 + }, + { + "epoch": 0.9023700616510018, + "grad_norm": 28.062279352716782, + "learning_rate": 3.458269020602418e-06, + "loss": 0.21886749267578126, + "step": 104360 + }, + { + "epoch": 0.902413295172545, + "grad_norm": 2.6402588285719077, + "learning_rate": 3.4580676774118443e-06, + "loss": 0.1769012451171875, + "step": 104365 + }, + { + "epoch": 0.9024565286940882, + "grad_norm": 0.8206165127909654, + "learning_rate": 3.457866332108703e-06, + "loss": 0.066131591796875, + "step": 104370 + }, + { + "epoch": 0.9024997622156316, + "grad_norm": 1.1946551350540677, + "learning_rate": 3.4576649846939216e-06, + "loss": 0.29190216064453123, + "step": 104375 + }, + { + "epoch": 0.9025429957371748, + "grad_norm": 5.204836061230079, + "learning_rate": 3.457463635168431e-06, + "loss": 0.30193405151367186, + "step": 104380 + }, + { + "epoch": 0.902586229258718, + "grad_norm": 0.5558059713871898, + "learning_rate": 3.457262283533158e-06, + "loss": 0.0277069091796875, + "step": 104385 + }, + { + "epoch": 0.9026294627802613, + "grad_norm": 3.47200775237443, + "learning_rate": 3.457060929789032e-06, + "loss": 0.16081771850585938, + "step": 104390 + }, + { + "epoch": 0.9026726963018046, + "grad_norm": 14.39351194781821, + "learning_rate": 3.4568595739369807e-06, + "loss": 0.26703720092773436, + "step": 104395 + }, + { + "epoch": 0.9027159298233478, + "grad_norm": 4.259108893217989, + "learning_rate": 3.4566582159779343e-06, + "loss": 0.0673583984375, + "step": 104400 + }, + { + "epoch": 0.9027591633448911, + "grad_norm": 2.292492850157927, + "learning_rate": 3.4564568559128196e-06, + "loss": 0.21649322509765626, + "step": 104405 + }, + { + "epoch": 0.9028023968664344, + "grad_norm": 1.461104357550842, + "learning_rate": 3.456255493742567e-06, + "loss": 0.155718994140625, + "step": 104410 + }, + { + "epoch": 0.9028456303879776, + "grad_norm": 7.077948031135885, + "learning_rate": 3.4560541294681036e-06, + "loss": 0.14272651672363282, + "step": 104415 + }, + { + "epoch": 0.9028888639095208, + "grad_norm": 2.048293309379691, + "learning_rate": 3.4558527630903594e-06, + "loss": 0.509808349609375, + "step": 104420 + }, + { + "epoch": 0.9029320974310642, + "grad_norm": 16.76696817513201, + "learning_rate": 3.455651394610262e-06, + "loss": 0.19185333251953124, + "step": 104425 + }, + { + "epoch": 0.9029753309526074, + "grad_norm": 2.5476661507987726, + "learning_rate": 3.4554500240287413e-06, + "loss": 0.23417434692382813, + "step": 104430 + }, + { + "epoch": 0.9030185644741506, + "grad_norm": 0.9407343213059856, + "learning_rate": 3.4552486513467257e-06, + "loss": 0.02281646728515625, + "step": 104435 + }, + { + "epoch": 0.903061797995694, + "grad_norm": 1.5474253992666525, + "learning_rate": 3.4550472765651424e-06, + "loss": 0.03500633239746094, + "step": 104440 + }, + { + "epoch": 0.9031050315172372, + "grad_norm": 4.763762648979937, + "learning_rate": 3.4548458996849215e-06, + "loss": 0.03906402587890625, + "step": 104445 + }, + { + "epoch": 0.9031482650387804, + "grad_norm": 0.04137159503291364, + "learning_rate": 3.454644520706992e-06, + "loss": 0.1464252471923828, + "step": 104450 + }, + { + "epoch": 0.9031914985603238, + "grad_norm": 1.0620652204206422, + "learning_rate": 3.454443139632282e-06, + "loss": 0.01495208740234375, + "step": 104455 + }, + { + "epoch": 0.903234732081867, + "grad_norm": 19.316988489170292, + "learning_rate": 3.4542417564617212e-06, + "loss": 0.09264965057373047, + "step": 104460 + }, + { + "epoch": 0.9032779656034102, + "grad_norm": 6.5602820891916265, + "learning_rate": 3.4540403711962367e-06, + "loss": 0.05631999969482422, + "step": 104465 + }, + { + "epoch": 0.9033211991249536, + "grad_norm": 1.2680574139287109, + "learning_rate": 3.4538389838367575e-06, + "loss": 0.104302978515625, + "step": 104470 + }, + { + "epoch": 0.9033644326464968, + "grad_norm": 20.197075329092957, + "learning_rate": 3.4536375943842138e-06, + "loss": 0.07909660339355469, + "step": 104475 + }, + { + "epoch": 0.90340766616804, + "grad_norm": 17.973961592021382, + "learning_rate": 3.453436202839534e-06, + "loss": 0.1205322265625, + "step": 104480 + }, + { + "epoch": 0.9034508996895834, + "grad_norm": 10.363392974834047, + "learning_rate": 3.453234809203645e-06, + "loss": 0.278216552734375, + "step": 104485 + }, + { + "epoch": 0.9034941332111266, + "grad_norm": 0.03279908671903103, + "learning_rate": 3.453033413477478e-06, + "loss": 0.22412281036376952, + "step": 104490 + }, + { + "epoch": 0.9035373667326698, + "grad_norm": 0.3573328530935297, + "learning_rate": 3.4528320156619617e-06, + "loss": 0.02917022705078125, + "step": 104495 + }, + { + "epoch": 0.9035806002542132, + "grad_norm": 8.36369604279291, + "learning_rate": 3.4526306157580237e-06, + "loss": 0.0415130615234375, + "step": 104500 + }, + { + "epoch": 0.9036238337757564, + "grad_norm": 18.904485759886775, + "learning_rate": 3.452429213766592e-06, + "loss": 0.12343368530273438, + "step": 104505 + }, + { + "epoch": 0.9036670672972996, + "grad_norm": 6.275605414529274, + "learning_rate": 3.452227809688598e-06, + "loss": 0.21057357788085937, + "step": 104510 + }, + { + "epoch": 0.9037103008188428, + "grad_norm": 0.07929170025165758, + "learning_rate": 3.45202640352497e-06, + "loss": 0.28183326721191404, + "step": 104515 + }, + { + "epoch": 0.9037535343403862, + "grad_norm": 4.481604108177341, + "learning_rate": 3.451824995276635e-06, + "loss": 0.192437744140625, + "step": 104520 + }, + { + "epoch": 0.9037967678619294, + "grad_norm": 27.930084556263374, + "learning_rate": 3.4516235849445236e-06, + "loss": 0.21934814453125, + "step": 104525 + }, + { + "epoch": 0.9038400013834726, + "grad_norm": 1.5834496413369927, + "learning_rate": 3.4514221725295634e-06, + "loss": 0.059679412841796876, + "step": 104530 + }, + { + "epoch": 0.903883234905016, + "grad_norm": 57.16750728181045, + "learning_rate": 3.4512207580326843e-06, + "loss": 0.3047187805175781, + "step": 104535 + }, + { + "epoch": 0.9039264684265592, + "grad_norm": 7.131233311167843, + "learning_rate": 3.4510193414548154e-06, + "loss": 0.1490020751953125, + "step": 104540 + }, + { + "epoch": 0.9039697019481024, + "grad_norm": 12.841401769737159, + "learning_rate": 3.450817922796885e-06, + "loss": 0.26633453369140625, + "step": 104545 + }, + { + "epoch": 0.9040129354696458, + "grad_norm": 12.494715426007842, + "learning_rate": 3.4506165020598217e-06, + "loss": 0.1315408706665039, + "step": 104550 + }, + { + "epoch": 0.904056168991189, + "grad_norm": 0.85075827984187, + "learning_rate": 3.4504150792445556e-06, + "loss": 0.17982177734375, + "step": 104555 + }, + { + "epoch": 0.9040994025127322, + "grad_norm": 12.600134256479425, + "learning_rate": 3.4502136543520144e-06, + "loss": 0.055192184448242185, + "step": 104560 + }, + { + "epoch": 0.9041426360342756, + "grad_norm": 5.75518149334325, + "learning_rate": 3.450012227383127e-06, + "loss": 0.1322784423828125, + "step": 104565 + }, + { + "epoch": 0.9041858695558188, + "grad_norm": 23.914601131875816, + "learning_rate": 3.449810798338824e-06, + "loss": 0.27361373901367186, + "step": 104570 + }, + { + "epoch": 0.904229103077362, + "grad_norm": 2.112472274279052, + "learning_rate": 3.4496093672200343e-06, + "loss": 0.2230560302734375, + "step": 104575 + }, + { + "epoch": 0.9042723365989054, + "grad_norm": 11.024843970599264, + "learning_rate": 3.449407934027684e-06, + "loss": 0.0894134521484375, + "step": 104580 + }, + { + "epoch": 0.9043155701204486, + "grad_norm": 2.814405792333337, + "learning_rate": 3.4492064987627053e-06, + "loss": 0.01772003173828125, + "step": 104585 + }, + { + "epoch": 0.9043588036419918, + "grad_norm": 15.359942397996818, + "learning_rate": 3.449005061426025e-06, + "loss": 0.20720138549804687, + "step": 104590 + }, + { + "epoch": 0.9044020371635351, + "grad_norm": 2.362875885391953, + "learning_rate": 3.448803622018574e-06, + "loss": 0.169464111328125, + "step": 104595 + }, + { + "epoch": 0.9044452706850784, + "grad_norm": 17.79156329004523, + "learning_rate": 3.448602180541279e-06, + "loss": 0.312353515625, + "step": 104600 + }, + { + "epoch": 0.9044885042066216, + "grad_norm": 41.10139364830875, + "learning_rate": 3.4484007369950714e-06, + "loss": 0.20072174072265625, + "step": 104605 + }, + { + "epoch": 0.9045317377281649, + "grad_norm": 31.04519333967451, + "learning_rate": 3.4481992913808793e-06, + "loss": 0.20811767578125, + "step": 104610 + }, + { + "epoch": 0.9045749712497082, + "grad_norm": 1.1922264988871234, + "learning_rate": 3.4479978436996303e-06, + "loss": 0.1811356544494629, + "step": 104615 + }, + { + "epoch": 0.9046182047712514, + "grad_norm": 6.507860737922265, + "learning_rate": 3.4477963939522562e-06, + "loss": 0.04930686950683594, + "step": 104620 + }, + { + "epoch": 0.9046614382927947, + "grad_norm": 21.425036434075352, + "learning_rate": 3.447594942139685e-06, + "loss": 0.13798904418945312, + "step": 104625 + }, + { + "epoch": 0.904704671814338, + "grad_norm": 7.604539498019669, + "learning_rate": 3.447393488262845e-06, + "loss": 0.4070770263671875, + "step": 104630 + }, + { + "epoch": 0.9047479053358812, + "grad_norm": 2.0990211290138276, + "learning_rate": 3.4471920323226655e-06, + "loss": 0.129888916015625, + "step": 104635 + }, + { + "epoch": 0.9047911388574245, + "grad_norm": 6.770125298319245, + "learning_rate": 3.4469905743200756e-06, + "loss": 0.19438705444335938, + "step": 104640 + }, + { + "epoch": 0.9048343723789678, + "grad_norm": 2.2478296704145504, + "learning_rate": 3.4467891142560046e-06, + "loss": 0.1140380859375, + "step": 104645 + }, + { + "epoch": 0.904877605900511, + "grad_norm": 35.46417749301363, + "learning_rate": 3.4465876521313827e-06, + "loss": 0.14818267822265624, + "step": 104650 + }, + { + "epoch": 0.9049208394220543, + "grad_norm": 15.407077195945881, + "learning_rate": 3.4463861879471375e-06, + "loss": 0.19990615844726561, + "step": 104655 + }, + { + "epoch": 0.9049640729435976, + "grad_norm": 5.12762740433358, + "learning_rate": 3.4461847217041983e-06, + "loss": 0.07407379150390625, + "step": 104660 + }, + { + "epoch": 0.9050073064651408, + "grad_norm": 0.7782221771857767, + "learning_rate": 3.4459832534034946e-06, + "loss": 0.07113304138183593, + "step": 104665 + }, + { + "epoch": 0.905050539986684, + "grad_norm": 1.1143359037422171, + "learning_rate": 3.4457817830459556e-06, + "loss": 0.4330940246582031, + "step": 104670 + }, + { + "epoch": 0.9050937735082273, + "grad_norm": 1.7952207002807838, + "learning_rate": 3.4455803106325113e-06, + "loss": 0.17471466064453126, + "step": 104675 + }, + { + "epoch": 0.9051370070297706, + "grad_norm": 0.24679226775531055, + "learning_rate": 3.4453788361640893e-06, + "loss": 0.0379608154296875, + "step": 104680 + }, + { + "epoch": 0.9051802405513139, + "grad_norm": 13.177484688715504, + "learning_rate": 3.4451773596416196e-06, + "loss": 0.12576522827148437, + "step": 104685 + }, + { + "epoch": 0.9052234740728571, + "grad_norm": 30.08524505627902, + "learning_rate": 3.444975881066032e-06, + "loss": 0.2363372802734375, + "step": 104690 + }, + { + "epoch": 0.9052667075944004, + "grad_norm": 1.3736036403173335, + "learning_rate": 3.4447744004382535e-06, + "loss": 0.053794479370117186, + "step": 104695 + }, + { + "epoch": 0.9053099411159437, + "grad_norm": 4.52817334393793, + "learning_rate": 3.4445729177592146e-06, + "loss": 0.26718902587890625, + "step": 104700 + }, + { + "epoch": 0.9053531746374869, + "grad_norm": 1.112257981262415, + "learning_rate": 3.4443714330298457e-06, + "loss": 0.042066001892089845, + "step": 104705 + }, + { + "epoch": 0.9053964081590302, + "grad_norm": 0.15400702547746095, + "learning_rate": 3.444169946251074e-06, + "loss": 0.061215972900390624, + "step": 104710 + }, + { + "epoch": 0.9054396416805734, + "grad_norm": 3.427537066781378, + "learning_rate": 3.4439684574238314e-06, + "loss": 0.06654586791992187, + "step": 104715 + }, + { + "epoch": 0.9054828752021167, + "grad_norm": 22.64134860565184, + "learning_rate": 3.4437669665490444e-06, + "loss": 0.3724372863769531, + "step": 104720 + }, + { + "epoch": 0.90552610872366, + "grad_norm": 3.940175995396679, + "learning_rate": 3.4435654736276433e-06, + "loss": 0.062200927734375, + "step": 104725 + }, + { + "epoch": 0.9055693422452032, + "grad_norm": 2.6362487670146626, + "learning_rate": 3.4433639786605573e-06, + "loss": 0.0898406982421875, + "step": 104730 + }, + { + "epoch": 0.9056125757667465, + "grad_norm": 4.35293506410459, + "learning_rate": 3.4431624816487168e-06, + "loss": 0.050055313110351565, + "step": 104735 + }, + { + "epoch": 0.9056558092882898, + "grad_norm": 38.630439906920685, + "learning_rate": 3.4429609825930493e-06, + "loss": 0.2893280029296875, + "step": 104740 + }, + { + "epoch": 0.905699042809833, + "grad_norm": 5.0299000961158455, + "learning_rate": 3.4427594814944853e-06, + "loss": 0.08162345886230468, + "step": 104745 + }, + { + "epoch": 0.9057422763313763, + "grad_norm": 18.170653811492855, + "learning_rate": 3.442557978353954e-06, + "loss": 0.1120361328125, + "step": 104750 + }, + { + "epoch": 0.9057855098529196, + "grad_norm": 8.374369325835465, + "learning_rate": 3.4423564731723833e-06, + "loss": 0.06364021301269532, + "step": 104755 + }, + { + "epoch": 0.9058287433744628, + "grad_norm": 1.7647584631892346, + "learning_rate": 3.4421549659507045e-06, + "loss": 0.08468360900878906, + "step": 104760 + }, + { + "epoch": 0.9058719768960061, + "grad_norm": 16.383069199946796, + "learning_rate": 3.441953456689845e-06, + "loss": 0.3978607177734375, + "step": 104765 + }, + { + "epoch": 0.9059152104175493, + "grad_norm": 24.62076986060565, + "learning_rate": 3.441751945390737e-06, + "loss": 0.13904266357421874, + "step": 104770 + }, + { + "epoch": 0.9059584439390926, + "grad_norm": 24.7560036220183, + "learning_rate": 3.4415504320543067e-06, + "loss": 0.29883270263671874, + "step": 104775 + }, + { + "epoch": 0.9060016774606359, + "grad_norm": 3.870874078754747, + "learning_rate": 3.4413489166814854e-06, + "loss": 0.12839622497558595, + "step": 104780 + }, + { + "epoch": 0.9060449109821791, + "grad_norm": 0.6839470582843388, + "learning_rate": 3.4411473992732012e-06, + "loss": 0.19197120666503906, + "step": 104785 + }, + { + "epoch": 0.9060881445037224, + "grad_norm": 0.9597487115385818, + "learning_rate": 3.440945879830384e-06, + "loss": 0.20811920166015624, + "step": 104790 + }, + { + "epoch": 0.9061313780252657, + "grad_norm": 18.054388833350636, + "learning_rate": 3.440744358353965e-06, + "loss": 0.1541473388671875, + "step": 104795 + }, + { + "epoch": 0.9061746115468089, + "grad_norm": 1.0752290899309034, + "learning_rate": 3.4405428348448714e-06, + "loss": 0.2964679718017578, + "step": 104800 + }, + { + "epoch": 0.9062178450683522, + "grad_norm": 5.786043748201559, + "learning_rate": 3.4403413093040325e-06, + "loss": 0.13329544067382812, + "step": 104805 + }, + { + "epoch": 0.9062610785898955, + "grad_norm": 52.459456161757004, + "learning_rate": 3.4401397817323787e-06, + "loss": 0.3004669189453125, + "step": 104810 + }, + { + "epoch": 0.9063043121114387, + "grad_norm": 10.51813435098923, + "learning_rate": 3.4399382521308384e-06, + "loss": 0.057696533203125, + "step": 104815 + }, + { + "epoch": 0.906347545632982, + "grad_norm": 0.2764059405141139, + "learning_rate": 3.439736720500342e-06, + "loss": 0.033489227294921875, + "step": 104820 + }, + { + "epoch": 0.9063907791545253, + "grad_norm": 1.9440152673464104, + "learning_rate": 3.4395351868418197e-06, + "loss": 0.2530517578125, + "step": 104825 + }, + { + "epoch": 0.9064340126760685, + "grad_norm": 12.501323657168012, + "learning_rate": 3.4393336511562e-06, + "loss": 0.1110748291015625, + "step": 104830 + }, + { + "epoch": 0.9064772461976118, + "grad_norm": 15.642229311775226, + "learning_rate": 3.439132113444411e-06, + "loss": 0.12618179321289064, + "step": 104835 + }, + { + "epoch": 0.9065204797191551, + "grad_norm": 3.0608110180358445, + "learning_rate": 3.4389305737073844e-06, + "loss": 0.14139461517333984, + "step": 104840 + }, + { + "epoch": 0.9065637132406983, + "grad_norm": 17.567686887188245, + "learning_rate": 3.4387290319460483e-06, + "loss": 0.46901702880859375, + "step": 104845 + }, + { + "epoch": 0.9066069467622415, + "grad_norm": 0.9641976939870768, + "learning_rate": 3.4385274881613335e-06, + "loss": 0.2251556396484375, + "step": 104850 + }, + { + "epoch": 0.9066501802837849, + "grad_norm": 48.61310554770724, + "learning_rate": 3.438325942354168e-06, + "loss": 0.2699981689453125, + "step": 104855 + }, + { + "epoch": 0.9066934138053281, + "grad_norm": 6.287573759449751, + "learning_rate": 3.4381243945254822e-06, + "loss": 0.22280426025390626, + "step": 104860 + }, + { + "epoch": 0.9067366473268713, + "grad_norm": 4.916677316412489, + "learning_rate": 3.4379228446762046e-06, + "loss": 0.21364765167236327, + "step": 104865 + }, + { + "epoch": 0.9067798808484147, + "grad_norm": 0.6134316804491171, + "learning_rate": 3.437721292807266e-06, + "loss": 0.052190399169921874, + "step": 104870 + }, + { + "epoch": 0.9068231143699579, + "grad_norm": 15.72229081449664, + "learning_rate": 3.437519738919596e-06, + "loss": 0.2949371337890625, + "step": 104875 + }, + { + "epoch": 0.9068663478915011, + "grad_norm": 3.284499148401227, + "learning_rate": 3.4373181830141235e-06, + "loss": 0.03210601806640625, + "step": 104880 + }, + { + "epoch": 0.9069095814130445, + "grad_norm": 8.020428052083902, + "learning_rate": 3.437116625091778e-06, + "loss": 0.3363525390625, + "step": 104885 + }, + { + "epoch": 0.9069528149345877, + "grad_norm": 1.3704054561907983, + "learning_rate": 3.436915065153489e-06, + "loss": 0.13927459716796875, + "step": 104890 + }, + { + "epoch": 0.9069960484561309, + "grad_norm": 0.14210610061452061, + "learning_rate": 3.4367135032001865e-06, + "loss": 0.16347274780273438, + "step": 104895 + }, + { + "epoch": 0.9070392819776742, + "grad_norm": 22.329504152833795, + "learning_rate": 3.4365119392328005e-06, + "loss": 0.14951324462890625, + "step": 104900 + }, + { + "epoch": 0.9070825154992175, + "grad_norm": 2.3105464318942097, + "learning_rate": 3.436310373252259e-06, + "loss": 0.1821258544921875, + "step": 104905 + }, + { + "epoch": 0.9071257490207607, + "grad_norm": 18.887745993223096, + "learning_rate": 3.4361088052594937e-06, + "loss": 0.24576034545898437, + "step": 104910 + }, + { + "epoch": 0.907168982542304, + "grad_norm": 15.216490718921852, + "learning_rate": 3.435907235255432e-06, + "loss": 0.45731353759765625, + "step": 104915 + }, + { + "epoch": 0.9072122160638473, + "grad_norm": 43.462987300338575, + "learning_rate": 3.4357056632410057e-06, + "loss": 0.15295944213867188, + "step": 104920 + }, + { + "epoch": 0.9072554495853905, + "grad_norm": 19.072976199722493, + "learning_rate": 3.435504089217143e-06, + "loss": 0.12332611083984375, + "step": 104925 + }, + { + "epoch": 0.9072986831069338, + "grad_norm": 10.009022881940211, + "learning_rate": 3.435302513184773e-06, + "loss": 0.08905410766601562, + "step": 104930 + }, + { + "epoch": 0.9073419166284771, + "grad_norm": 1.3410181339398055, + "learning_rate": 3.435100935144828e-06, + "loss": 0.240252685546875, + "step": 104935 + }, + { + "epoch": 0.9073851501500203, + "grad_norm": 5.36153863459923, + "learning_rate": 3.434899355098235e-06, + "loss": 0.08445243835449219, + "step": 104940 + }, + { + "epoch": 0.9074283836715635, + "grad_norm": 0.1303075681094601, + "learning_rate": 3.434697773045925e-06, + "loss": 0.05196866989135742, + "step": 104945 + }, + { + "epoch": 0.9074716171931069, + "grad_norm": 34.69888680399642, + "learning_rate": 3.4344961889888263e-06, + "loss": 0.20124359130859376, + "step": 104950 + }, + { + "epoch": 0.9075148507146501, + "grad_norm": 7.677790401362035, + "learning_rate": 3.4342946029278695e-06, + "loss": 0.14776153564453126, + "step": 104955 + }, + { + "epoch": 0.9075580842361933, + "grad_norm": 9.371806213367835, + "learning_rate": 3.434093014863986e-06, + "loss": 0.08755340576171874, + "step": 104960 + }, + { + "epoch": 0.9076013177577367, + "grad_norm": 2.0111028750817668, + "learning_rate": 3.4338914247981025e-06, + "loss": 0.14471435546875, + "step": 104965 + }, + { + "epoch": 0.9076445512792799, + "grad_norm": 0.9965027189792273, + "learning_rate": 3.4336898327311506e-06, + "loss": 0.15671539306640625, + "step": 104970 + }, + { + "epoch": 0.9076877848008231, + "grad_norm": 18.149356187724973, + "learning_rate": 3.43348823866406e-06, + "loss": 0.7049720764160157, + "step": 104975 + }, + { + "epoch": 0.9077310183223665, + "grad_norm": 28.567303100649184, + "learning_rate": 3.4332866425977586e-06, + "loss": 0.2187713623046875, + "step": 104980 + }, + { + "epoch": 0.9077742518439097, + "grad_norm": 3.956800380902225, + "learning_rate": 3.433085044533178e-06, + "loss": 0.12683868408203125, + "step": 104985 + }, + { + "epoch": 0.9078174853654529, + "grad_norm": 0.4607574721178984, + "learning_rate": 3.432883444471248e-06, + "loss": 0.11170501708984375, + "step": 104990 + }, + { + "epoch": 0.9078607188869963, + "grad_norm": 4.185203476712933, + "learning_rate": 3.432681842412897e-06, + "loss": 0.22493362426757812, + "step": 104995 + }, + { + "epoch": 0.9079039524085395, + "grad_norm": 15.843538745469383, + "learning_rate": 3.4324802383590567e-06, + "loss": 0.47379817962646487, + "step": 105000 + }, + { + "epoch": 0.9079471859300827, + "grad_norm": 4.5439895142347595, + "learning_rate": 3.432278632310655e-06, + "loss": 0.1254608154296875, + "step": 105005 + }, + { + "epoch": 0.9079904194516261, + "grad_norm": 36.18571535705982, + "learning_rate": 3.4320770242686223e-06, + "loss": 0.1509246826171875, + "step": 105010 + }, + { + "epoch": 0.9080336529731693, + "grad_norm": 7.894850107839203, + "learning_rate": 3.4318754142338888e-06, + "loss": 0.096484375, + "step": 105015 + }, + { + "epoch": 0.9080768864947125, + "grad_norm": 0.15417933390818034, + "learning_rate": 3.4316738022073847e-06, + "loss": 0.058134841918945315, + "step": 105020 + }, + { + "epoch": 0.9081201200162557, + "grad_norm": 5.779491428170528, + "learning_rate": 3.431472188190038e-06, + "loss": 0.23214950561523437, + "step": 105025 + }, + { + "epoch": 0.9081633535377991, + "grad_norm": 10.183803199845762, + "learning_rate": 3.43127057218278e-06, + "loss": 0.12466201782226563, + "step": 105030 + }, + { + "epoch": 0.9082065870593423, + "grad_norm": 13.199149515099412, + "learning_rate": 3.431068954186541e-06, + "loss": 0.5957006454467774, + "step": 105035 + }, + { + "epoch": 0.9082498205808855, + "grad_norm": 1.1536952301586982, + "learning_rate": 3.4308673342022494e-06, + "loss": 0.19283828735351563, + "step": 105040 + }, + { + "epoch": 0.9082930541024289, + "grad_norm": 6.151821390531237, + "learning_rate": 3.4306657122308353e-06, + "loss": 0.09636650085449219, + "step": 105045 + }, + { + "epoch": 0.9083362876239721, + "grad_norm": 0.10108260030502188, + "learning_rate": 3.4304640882732297e-06, + "loss": 0.05543594360351563, + "step": 105050 + }, + { + "epoch": 0.9083795211455153, + "grad_norm": 8.183506908008837, + "learning_rate": 3.430262462330362e-06, + "loss": 0.06621551513671875, + "step": 105055 + }, + { + "epoch": 0.9084227546670587, + "grad_norm": 23.290690522468935, + "learning_rate": 3.430060834403161e-06, + "loss": 0.1333221435546875, + "step": 105060 + }, + { + "epoch": 0.9084659881886019, + "grad_norm": 7.806795292397929, + "learning_rate": 3.429859204492558e-06, + "loss": 0.08345947265625, + "step": 105065 + }, + { + "epoch": 0.9085092217101451, + "grad_norm": 6.0112570658625675, + "learning_rate": 3.429657572599482e-06, + "loss": 0.073175048828125, + "step": 105070 + }, + { + "epoch": 0.9085524552316885, + "grad_norm": 30.19689471855563, + "learning_rate": 3.4294559387248637e-06, + "loss": 0.14581642150878907, + "step": 105075 + }, + { + "epoch": 0.9085956887532317, + "grad_norm": 5.380386706474991, + "learning_rate": 3.429254302869632e-06, + "loss": 0.1125732421875, + "step": 105080 + }, + { + "epoch": 0.9086389222747749, + "grad_norm": 9.009142231356797, + "learning_rate": 3.4290526650347182e-06, + "loss": 0.18890113830566407, + "step": 105085 + }, + { + "epoch": 0.9086821557963183, + "grad_norm": 0.3782444337209358, + "learning_rate": 3.4288510252210506e-06, + "loss": 0.028456878662109376, + "step": 105090 + }, + { + "epoch": 0.9087253893178615, + "grad_norm": 10.568872719244165, + "learning_rate": 3.42864938342956e-06, + "loss": 0.23539352416992188, + "step": 105095 + }, + { + "epoch": 0.9087686228394047, + "grad_norm": 21.124608953420655, + "learning_rate": 3.428447739661177e-06, + "loss": 0.12888565063476562, + "step": 105100 + }, + { + "epoch": 0.9088118563609481, + "grad_norm": 19.677845932533348, + "learning_rate": 3.4282460939168296e-06, + "loss": 0.11546630859375, + "step": 105105 + }, + { + "epoch": 0.9088550898824913, + "grad_norm": 0.48043661689631745, + "learning_rate": 3.4280444461974507e-06, + "loss": 0.07560272216796875, + "step": 105110 + }, + { + "epoch": 0.9088983234040345, + "grad_norm": 0.4362381020091963, + "learning_rate": 3.4278427965039684e-06, + "loss": 0.11307220458984375, + "step": 105115 + }, + { + "epoch": 0.9089415569255778, + "grad_norm": 30.470393352659148, + "learning_rate": 3.4276411448373113e-06, + "loss": 0.075042724609375, + "step": 105120 + }, + { + "epoch": 0.9089847904471211, + "grad_norm": 8.352651419869582, + "learning_rate": 3.427439491198412e-06, + "loss": 0.1949798583984375, + "step": 105125 + }, + { + "epoch": 0.9090280239686643, + "grad_norm": 1.145554749667822, + "learning_rate": 3.4272378355882e-06, + "loss": 0.008653640747070312, + "step": 105130 + }, + { + "epoch": 0.9090712574902076, + "grad_norm": 54.854069455503854, + "learning_rate": 3.427036178007605e-06, + "loss": 0.4649017333984375, + "step": 105135 + }, + { + "epoch": 0.9091144910117509, + "grad_norm": 19.40974569632974, + "learning_rate": 3.426834518457556e-06, + "loss": 0.177838134765625, + "step": 105140 + }, + { + "epoch": 0.9091577245332941, + "grad_norm": 26.968005592036835, + "learning_rate": 3.426632856938985e-06, + "loss": 0.2188018798828125, + "step": 105145 + }, + { + "epoch": 0.9092009580548374, + "grad_norm": 6.048252580624694, + "learning_rate": 3.42643119345282e-06, + "loss": 0.17626724243164063, + "step": 105150 + }, + { + "epoch": 0.9092441915763807, + "grad_norm": 16.352843315974127, + "learning_rate": 3.4262295279999914e-06, + "loss": 0.23447113037109374, + "step": 105155 + }, + { + "epoch": 0.9092874250979239, + "grad_norm": 14.839528606225135, + "learning_rate": 3.426027860581431e-06, + "loss": 0.27647247314453127, + "step": 105160 + }, + { + "epoch": 0.9093306586194672, + "grad_norm": 28.588246093589394, + "learning_rate": 3.425826191198068e-06, + "loss": 0.14067459106445312, + "step": 105165 + }, + { + "epoch": 0.9093738921410105, + "grad_norm": 10.286801830993014, + "learning_rate": 3.4256245198508314e-06, + "loss": 0.38521728515625, + "step": 105170 + }, + { + "epoch": 0.9094171256625537, + "grad_norm": 0.037772423086561174, + "learning_rate": 3.425422846540653e-06, + "loss": 0.15859146118164064, + "step": 105175 + }, + { + "epoch": 0.909460359184097, + "grad_norm": 1.5555786505581284, + "learning_rate": 3.425221171268461e-06, + "loss": 0.10137901306152344, + "step": 105180 + }, + { + "epoch": 0.9095035927056403, + "grad_norm": 8.517487113930805, + "learning_rate": 3.4250194940351866e-06, + "loss": 0.207342529296875, + "step": 105185 + }, + { + "epoch": 0.9095468262271835, + "grad_norm": 8.377628619908764, + "learning_rate": 3.4248178148417604e-06, + "loss": 0.082879638671875, + "step": 105190 + }, + { + "epoch": 0.9095900597487268, + "grad_norm": 4.6904785391981125, + "learning_rate": 3.4246161336891127e-06, + "loss": 0.1101287841796875, + "step": 105195 + }, + { + "epoch": 0.90963329327027, + "grad_norm": 6.019291453372076, + "learning_rate": 3.424414450578172e-06, + "loss": 0.08108673095703126, + "step": 105200 + }, + { + "epoch": 0.9096765267918133, + "grad_norm": 23.914607784014567, + "learning_rate": 3.424212765509869e-06, + "loss": 0.25603408813476564, + "step": 105205 + }, + { + "epoch": 0.9097197603133566, + "grad_norm": 9.351303073324253, + "learning_rate": 3.4240110784851346e-06, + "loss": 0.35728912353515624, + "step": 105210 + }, + { + "epoch": 0.9097629938348998, + "grad_norm": 16.218214279411143, + "learning_rate": 3.4238093895048988e-06, + "loss": 0.1494354248046875, + "step": 105215 + }, + { + "epoch": 0.9098062273564431, + "grad_norm": 9.419608581102356, + "learning_rate": 3.4236076985700908e-06, + "loss": 0.4780845642089844, + "step": 105220 + }, + { + "epoch": 0.9098494608779863, + "grad_norm": 1.7120285318887563, + "learning_rate": 3.4234060056816428e-06, + "loss": 0.30965213775634765, + "step": 105225 + }, + { + "epoch": 0.9098926943995296, + "grad_norm": 6.5369379342908855, + "learning_rate": 3.4232043108404835e-06, + "loss": 0.170928955078125, + "step": 105230 + }, + { + "epoch": 0.9099359279210729, + "grad_norm": 14.071018070779763, + "learning_rate": 3.423002614047542e-06, + "loss": 0.16793212890625, + "step": 105235 + }, + { + "epoch": 0.9099791614426161, + "grad_norm": 0.22533409930179918, + "learning_rate": 3.4228009153037504e-06, + "loss": 0.3070281982421875, + "step": 105240 + }, + { + "epoch": 0.9100223949641594, + "grad_norm": 10.897348730554224, + "learning_rate": 3.422599214610039e-06, + "loss": 0.19766387939453126, + "step": 105245 + }, + { + "epoch": 0.9100656284857027, + "grad_norm": 7.395552565994681, + "learning_rate": 3.422397511967336e-06, + "loss": 0.07647247314453125, + "step": 105250 + }, + { + "epoch": 0.910108862007246, + "grad_norm": 2.7831945568656886, + "learning_rate": 3.4221958073765745e-06, + "loss": 0.19010982513427735, + "step": 105255 + }, + { + "epoch": 0.9101520955287892, + "grad_norm": 10.345348283051363, + "learning_rate": 3.4219941008386835e-06, + "loss": 0.679803466796875, + "step": 105260 + }, + { + "epoch": 0.9101953290503325, + "grad_norm": 28.449811051809395, + "learning_rate": 3.421792392354591e-06, + "loss": 0.10259628295898438, + "step": 105265 + }, + { + "epoch": 0.9102385625718757, + "grad_norm": 6.457496362614111, + "learning_rate": 3.4215906819252306e-06, + "loss": 0.0665283203125, + "step": 105270 + }, + { + "epoch": 0.910281796093419, + "grad_norm": 0.4929156961251263, + "learning_rate": 3.421388969551531e-06, + "loss": 0.13720207214355468, + "step": 105275 + }, + { + "epoch": 0.9103250296149623, + "grad_norm": 2.043476154867059, + "learning_rate": 3.4211872552344227e-06, + "loss": 0.06793403625488281, + "step": 105280 + }, + { + "epoch": 0.9103682631365055, + "grad_norm": 0.2358133487568337, + "learning_rate": 3.4209855389748365e-06, + "loss": 0.12038459777832031, + "step": 105285 + }, + { + "epoch": 0.9104114966580488, + "grad_norm": 3.3670085530966296, + "learning_rate": 3.420783820773701e-06, + "loss": 0.09789810180664063, + "step": 105290 + }, + { + "epoch": 0.910454730179592, + "grad_norm": 0.09694024596771783, + "learning_rate": 3.4205821006319486e-06, + "loss": 0.27141571044921875, + "step": 105295 + }, + { + "epoch": 0.9104979637011353, + "grad_norm": 10.342524570277956, + "learning_rate": 3.4203803785505084e-06, + "loss": 0.054071044921875, + "step": 105300 + }, + { + "epoch": 0.9105411972226786, + "grad_norm": 6.9282688918809425, + "learning_rate": 3.4201786545303114e-06, + "loss": 0.2546875, + "step": 105305 + }, + { + "epoch": 0.9105844307442218, + "grad_norm": 20.300209629200914, + "learning_rate": 3.419976928572288e-06, + "loss": 0.377276611328125, + "step": 105310 + }, + { + "epoch": 0.9106276642657651, + "grad_norm": 1.1835167622677192, + "learning_rate": 3.419775200677367e-06, + "loss": 0.11983261108398438, + "step": 105315 + }, + { + "epoch": 0.9106708977873084, + "grad_norm": 26.72874783229066, + "learning_rate": 3.4195734708464798e-06, + "loss": 0.42366790771484375, + "step": 105320 + }, + { + "epoch": 0.9107141313088516, + "grad_norm": 1.9040952014469046, + "learning_rate": 3.419371739080558e-06, + "loss": 0.03680896759033203, + "step": 105325 + }, + { + "epoch": 0.9107573648303949, + "grad_norm": 37.8194644739554, + "learning_rate": 3.4191700053805297e-06, + "loss": 0.3395416259765625, + "step": 105330 + }, + { + "epoch": 0.9108005983519382, + "grad_norm": 0.16915350652128308, + "learning_rate": 3.418968269747327e-06, + "loss": 0.08136138916015626, + "step": 105335 + }, + { + "epoch": 0.9108438318734814, + "grad_norm": 34.648072527618325, + "learning_rate": 3.41876653218188e-06, + "loss": 0.337359619140625, + "step": 105340 + }, + { + "epoch": 0.9108870653950247, + "grad_norm": 2.2276301510272165, + "learning_rate": 3.418564792685118e-06, + "loss": 0.107867431640625, + "step": 105345 + }, + { + "epoch": 0.910930298916568, + "grad_norm": 12.526666157965867, + "learning_rate": 3.418363051257972e-06, + "loss": 0.1657695770263672, + "step": 105350 + }, + { + "epoch": 0.9109735324381112, + "grad_norm": 3.611407453129624, + "learning_rate": 3.4181613079013736e-06, + "loss": 0.10118408203125, + "step": 105355 + }, + { + "epoch": 0.9110167659596545, + "grad_norm": 5.501185406559246, + "learning_rate": 3.4179595626162514e-06, + "loss": 0.07253446578979492, + "step": 105360 + }, + { + "epoch": 0.9110599994811978, + "grad_norm": 1.6353127967907977, + "learning_rate": 3.4177578154035368e-06, + "loss": 0.25460128784179686, + "step": 105365 + }, + { + "epoch": 0.911103233002741, + "grad_norm": 1.8690699194362455, + "learning_rate": 3.41755606626416e-06, + "loss": 0.196795654296875, + "step": 105370 + }, + { + "epoch": 0.9111464665242842, + "grad_norm": 2.509716558775389, + "learning_rate": 3.417354315199052e-06, + "loss": 0.132275390625, + "step": 105375 + }, + { + "epoch": 0.9111897000458276, + "grad_norm": 56.41322472569521, + "learning_rate": 3.4171525622091415e-06, + "loss": 0.3612548828125, + "step": 105380 + }, + { + "epoch": 0.9112329335673708, + "grad_norm": 0.7232996903800851, + "learning_rate": 3.4169508072953617e-06, + "loss": 0.12350921630859375, + "step": 105385 + }, + { + "epoch": 0.911276167088914, + "grad_norm": 5.640881649278038, + "learning_rate": 3.4167490504586418e-06, + "loss": 0.036480712890625, + "step": 105390 + }, + { + "epoch": 0.9113194006104574, + "grad_norm": 13.993740984752478, + "learning_rate": 3.4165472916999105e-06, + "loss": 0.06616096496582032, + "step": 105395 + }, + { + "epoch": 0.9113626341320006, + "grad_norm": 2.126369863067045, + "learning_rate": 3.4163455310201013e-06, + "loss": 0.3805229187011719, + "step": 105400 + }, + { + "epoch": 0.9114058676535438, + "grad_norm": 19.760430127955356, + "learning_rate": 3.4161437684201416e-06, + "loss": 0.3061168670654297, + "step": 105405 + }, + { + "epoch": 0.9114491011750872, + "grad_norm": 4.7868989271020075, + "learning_rate": 3.415942003900965e-06, + "loss": 0.067901611328125, + "step": 105410 + }, + { + "epoch": 0.9114923346966304, + "grad_norm": 18.976381178266042, + "learning_rate": 3.4157402374635006e-06, + "loss": 0.07011299133300782, + "step": 105415 + }, + { + "epoch": 0.9115355682181736, + "grad_norm": 1.6826160458072088, + "learning_rate": 3.415538469108679e-06, + "loss": 0.07848968505859374, + "step": 105420 + }, + { + "epoch": 0.911578801739717, + "grad_norm": 23.30174531512152, + "learning_rate": 3.4153366988374298e-06, + "loss": 0.31485443115234374, + "step": 105425 + }, + { + "epoch": 0.9116220352612602, + "grad_norm": 0.12226219725961283, + "learning_rate": 3.415134926650686e-06, + "loss": 0.262451171875, + "step": 105430 + }, + { + "epoch": 0.9116652687828034, + "grad_norm": 4.771217251679379, + "learning_rate": 3.4149331525493755e-06, + "loss": 0.05964508056640625, + "step": 105435 + }, + { + "epoch": 0.9117085023043467, + "grad_norm": 53.0822377678019, + "learning_rate": 3.4147313765344295e-06, + "loss": 0.23848419189453124, + "step": 105440 + }, + { + "epoch": 0.91175173582589, + "grad_norm": 25.762832340894473, + "learning_rate": 3.4145295986067798e-06, + "loss": 0.372607421875, + "step": 105445 + }, + { + "epoch": 0.9117949693474332, + "grad_norm": 13.37897827627763, + "learning_rate": 3.4143278187673568e-06, + "loss": 0.12548828125, + "step": 105450 + }, + { + "epoch": 0.9118382028689765, + "grad_norm": 6.100078824480115, + "learning_rate": 3.4141260370170893e-06, + "loss": 0.10978240966796875, + "step": 105455 + }, + { + "epoch": 0.9118814363905198, + "grad_norm": 12.176807572891903, + "learning_rate": 3.4139242533569094e-06, + "loss": 0.15255241394042968, + "step": 105460 + }, + { + "epoch": 0.911924669912063, + "grad_norm": 0.9019056463998824, + "learning_rate": 3.413722467787748e-06, + "loss": 0.2080596923828125, + "step": 105465 + }, + { + "epoch": 0.9119679034336062, + "grad_norm": 0.663498922925047, + "learning_rate": 3.413520680310535e-06, + "loss": 0.05290641784667969, + "step": 105470 + }, + { + "epoch": 0.9120111369551496, + "grad_norm": 2.3056923246627843, + "learning_rate": 3.4133188909262006e-06, + "loss": 0.6527420043945312, + "step": 105475 + }, + { + "epoch": 0.9120543704766928, + "grad_norm": 16.547534317600565, + "learning_rate": 3.4131170996356765e-06, + "loss": 0.13162078857421874, + "step": 105480 + }, + { + "epoch": 0.912097603998236, + "grad_norm": 0.5889143959875667, + "learning_rate": 3.412915306439892e-06, + "loss": 0.05446014404296875, + "step": 105485 + }, + { + "epoch": 0.9121408375197794, + "grad_norm": 55.97447155299211, + "learning_rate": 3.4127135113397796e-06, + "loss": 0.60966796875, + "step": 105490 + }, + { + "epoch": 0.9121840710413226, + "grad_norm": 1.640522551855013, + "learning_rate": 3.412511714336269e-06, + "loss": 0.16770973205566406, + "step": 105495 + }, + { + "epoch": 0.9122273045628658, + "grad_norm": 2.06548024664772, + "learning_rate": 3.4123099154302906e-06, + "loss": 0.2136260986328125, + "step": 105500 + }, + { + "epoch": 0.9122705380844092, + "grad_norm": 2.1108399428167477, + "learning_rate": 3.4121081146227745e-06, + "loss": 0.08660659790039063, + "step": 105505 + }, + { + "epoch": 0.9123137716059524, + "grad_norm": 1.516910468187372, + "learning_rate": 3.4119063119146537e-06, + "loss": 0.0885009765625, + "step": 105510 + }, + { + "epoch": 0.9123570051274956, + "grad_norm": 5.325697902618711, + "learning_rate": 3.4117045073068557e-06, + "loss": 0.1329803466796875, + "step": 105515 + }, + { + "epoch": 0.912400238649039, + "grad_norm": 8.038475974753148, + "learning_rate": 3.4115027008003135e-06, + "loss": 0.397613525390625, + "step": 105520 + }, + { + "epoch": 0.9124434721705822, + "grad_norm": 49.147151175614525, + "learning_rate": 3.411300892395958e-06, + "loss": 0.48489532470703123, + "step": 105525 + }, + { + "epoch": 0.9124867056921254, + "grad_norm": 3.953263772356033, + "learning_rate": 3.4110990820947183e-06, + "loss": 0.1781402587890625, + "step": 105530 + }, + { + "epoch": 0.9125299392136688, + "grad_norm": 5.7210266061804695, + "learning_rate": 3.4108972698975262e-06, + "loss": 0.2513092041015625, + "step": 105535 + }, + { + "epoch": 0.912573172735212, + "grad_norm": 7.035224808546646, + "learning_rate": 3.4106954558053123e-06, + "loss": 0.108587646484375, + "step": 105540 + }, + { + "epoch": 0.9126164062567552, + "grad_norm": 19.07537951286964, + "learning_rate": 3.410493639819007e-06, + "loss": 0.12242279052734376, + "step": 105545 + }, + { + "epoch": 0.9126596397782984, + "grad_norm": 17.772991916545784, + "learning_rate": 3.410291821939542e-06, + "loss": 0.2414865493774414, + "step": 105550 + }, + { + "epoch": 0.9127028732998418, + "grad_norm": 2.336478594157576, + "learning_rate": 3.410090002167847e-06, + "loss": 0.1771453857421875, + "step": 105555 + }, + { + "epoch": 0.912746106821385, + "grad_norm": 0.5587817881828301, + "learning_rate": 3.409888180504853e-06, + "loss": 0.23260498046875, + "step": 105560 + }, + { + "epoch": 0.9127893403429282, + "grad_norm": 2.441728304324736, + "learning_rate": 3.4096863569514905e-06, + "loss": 0.1061126708984375, + "step": 105565 + }, + { + "epoch": 0.9128325738644716, + "grad_norm": 4.982636992476136, + "learning_rate": 3.409484531508691e-06, + "loss": 0.24449310302734376, + "step": 105570 + }, + { + "epoch": 0.9128758073860148, + "grad_norm": 0.7568292207210007, + "learning_rate": 3.409282704177385e-06, + "loss": 0.012031745910644532, + "step": 105575 + }, + { + "epoch": 0.912919040907558, + "grad_norm": 0.6318988680998905, + "learning_rate": 3.4090808749585037e-06, + "loss": 0.15601272583007814, + "step": 105580 + }, + { + "epoch": 0.9129622744291014, + "grad_norm": 28.206958990561258, + "learning_rate": 3.4088790438529774e-06, + "loss": 0.13055572509765626, + "step": 105585 + }, + { + "epoch": 0.9130055079506446, + "grad_norm": 13.694640458280102, + "learning_rate": 3.4086772108617372e-06, + "loss": 0.12767810821533204, + "step": 105590 + }, + { + "epoch": 0.9130487414721878, + "grad_norm": 5.610886242189032, + "learning_rate": 3.4084753759857136e-06, + "loss": 0.0503204345703125, + "step": 105595 + }, + { + "epoch": 0.9130919749937312, + "grad_norm": 0.6351974186299906, + "learning_rate": 3.4082735392258375e-06, + "loss": 0.1312957763671875, + "step": 105600 + }, + { + "epoch": 0.9131352085152744, + "grad_norm": 6.225112748676752, + "learning_rate": 3.40807170058304e-06, + "loss": 0.025681686401367188, + "step": 105605 + }, + { + "epoch": 0.9131784420368176, + "grad_norm": 1.0379448640796372, + "learning_rate": 3.407869860058252e-06, + "loss": 0.13017807006835938, + "step": 105610 + }, + { + "epoch": 0.913221675558361, + "grad_norm": 21.96982254565921, + "learning_rate": 3.407668017652404e-06, + "loss": 0.4426750183105469, + "step": 105615 + }, + { + "epoch": 0.9132649090799042, + "grad_norm": 2.1203346202139195, + "learning_rate": 3.4074661733664277e-06, + "loss": 0.4654022216796875, + "step": 105620 + }, + { + "epoch": 0.9133081426014474, + "grad_norm": 40.88714814278635, + "learning_rate": 3.4072643272012536e-06, + "loss": 0.22188644409179686, + "step": 105625 + }, + { + "epoch": 0.9133513761229908, + "grad_norm": 1.536605113901793, + "learning_rate": 3.4070624791578115e-06, + "loss": 0.08362045288085937, + "step": 105630 + }, + { + "epoch": 0.913394609644534, + "grad_norm": 2.1434922374402654, + "learning_rate": 3.4068606292370326e-06, + "loss": 0.10768604278564453, + "step": 105635 + }, + { + "epoch": 0.9134378431660772, + "grad_norm": 6.529278273858602, + "learning_rate": 3.40665877743985e-06, + "loss": 0.184234619140625, + "step": 105640 + }, + { + "epoch": 0.9134810766876205, + "grad_norm": 1.4378872443107598, + "learning_rate": 3.4064569237671933e-06, + "loss": 0.2811851501464844, + "step": 105645 + }, + { + "epoch": 0.9135243102091638, + "grad_norm": 8.096833303095135, + "learning_rate": 3.4062550682199914e-06, + "loss": 0.10963935852050781, + "step": 105650 + }, + { + "epoch": 0.913567543730707, + "grad_norm": 15.622249614160365, + "learning_rate": 3.4060532107991783e-06, + "loss": 0.3155967712402344, + "step": 105655 + }, + { + "epoch": 0.9136107772522503, + "grad_norm": 40.56953821368832, + "learning_rate": 3.405851351505683e-06, + "loss": 0.3200469970703125, + "step": 105660 + }, + { + "epoch": 0.9136540107737936, + "grad_norm": 5.121018987205918, + "learning_rate": 3.405649490340437e-06, + "loss": 0.08448333740234375, + "step": 105665 + }, + { + "epoch": 0.9136972442953368, + "grad_norm": 26.21835714118052, + "learning_rate": 3.4054476273043725e-06, + "loss": 0.19437332153320314, + "step": 105670 + }, + { + "epoch": 0.9137404778168801, + "grad_norm": 1.6356923493600024, + "learning_rate": 3.4052457623984195e-06, + "loss": 0.10609283447265624, + "step": 105675 + }, + { + "epoch": 0.9137837113384234, + "grad_norm": 15.652835799067253, + "learning_rate": 3.4050438956235074e-06, + "loss": 0.12231111526489258, + "step": 105680 + }, + { + "epoch": 0.9138269448599666, + "grad_norm": 38.53860835118127, + "learning_rate": 3.404842026980569e-06, + "loss": 0.19092903137207032, + "step": 105685 + }, + { + "epoch": 0.9138701783815099, + "grad_norm": 2.5505665016920225, + "learning_rate": 3.4046401564705357e-06, + "loss": 0.143438720703125, + "step": 105690 + }, + { + "epoch": 0.9139134119030532, + "grad_norm": 2.81143302602765, + "learning_rate": 3.404438284094337e-06, + "loss": 0.2658718109130859, + "step": 105695 + }, + { + "epoch": 0.9139566454245964, + "grad_norm": 25.405670358213076, + "learning_rate": 3.4042364098529056e-06, + "loss": 0.14552001953125, + "step": 105700 + }, + { + "epoch": 0.9139998789461397, + "grad_norm": 6.929367703988879, + "learning_rate": 3.404034533747171e-06, + "loss": 0.08223419189453125, + "step": 105705 + }, + { + "epoch": 0.914043112467683, + "grad_norm": 1.5695110902674143, + "learning_rate": 3.4038326557780645e-06, + "loss": 0.030419921875, + "step": 105710 + }, + { + "epoch": 0.9140863459892262, + "grad_norm": 16.323580250821177, + "learning_rate": 3.4036307759465177e-06, + "loss": 0.3905914306640625, + "step": 105715 + }, + { + "epoch": 0.9141295795107695, + "grad_norm": 54.292052055433416, + "learning_rate": 3.4034288942534614e-06, + "loss": 0.17145614624023436, + "step": 105720 + }, + { + "epoch": 0.9141728130323127, + "grad_norm": 0.12090477959902435, + "learning_rate": 3.4032270106998273e-06, + "loss": 0.08408126831054688, + "step": 105725 + }, + { + "epoch": 0.914216046553856, + "grad_norm": 0.6785138015052304, + "learning_rate": 3.403025125286545e-06, + "loss": 0.22098770141601562, + "step": 105730 + }, + { + "epoch": 0.9142592800753992, + "grad_norm": 10.805164013432655, + "learning_rate": 3.402823238014547e-06, + "loss": 0.05694160461425781, + "step": 105735 + }, + { + "epoch": 0.9143025135969425, + "grad_norm": 3.4364228872745124, + "learning_rate": 3.402621348884763e-06, + "loss": 0.061630630493164064, + "step": 105740 + }, + { + "epoch": 0.9143457471184858, + "grad_norm": 6.48055699165536, + "learning_rate": 3.402419457898126e-06, + "loss": 0.2620674133300781, + "step": 105745 + }, + { + "epoch": 0.914388980640029, + "grad_norm": 3.674498930332634, + "learning_rate": 3.4022175650555652e-06, + "loss": 0.0837677001953125, + "step": 105750 + }, + { + "epoch": 0.9144322141615723, + "grad_norm": 7.538521206817681, + "learning_rate": 3.402015670358014e-06, + "loss": 0.257672119140625, + "step": 105755 + }, + { + "epoch": 0.9144754476831156, + "grad_norm": 3.313047120580138, + "learning_rate": 3.4018137738064e-06, + "loss": 0.010843276977539062, + "step": 105760 + }, + { + "epoch": 0.9145186812046588, + "grad_norm": 0.06540323364564574, + "learning_rate": 3.4016118754016577e-06, + "loss": 0.07532081604003907, + "step": 105765 + }, + { + "epoch": 0.9145619147262021, + "grad_norm": 16.94536890405463, + "learning_rate": 3.401409975144716e-06, + "loss": 0.12950592041015624, + "step": 105770 + }, + { + "epoch": 0.9146051482477454, + "grad_norm": 4.919160311615514, + "learning_rate": 3.4012080730365072e-06, + "loss": 0.11902618408203125, + "step": 105775 + }, + { + "epoch": 0.9146483817692886, + "grad_norm": 0.7340592260778943, + "learning_rate": 3.401006169077963e-06, + "loss": 0.05404510498046875, + "step": 105780 + }, + { + "epoch": 0.9146916152908319, + "grad_norm": 14.898310297639378, + "learning_rate": 3.400804263270014e-06, + "loss": 0.091815185546875, + "step": 105785 + }, + { + "epoch": 0.9147348488123752, + "grad_norm": 0.6668296710238234, + "learning_rate": 3.4006023556135893e-06, + "loss": 0.0867279052734375, + "step": 105790 + }, + { + "epoch": 0.9147780823339184, + "grad_norm": 6.765782484033862, + "learning_rate": 3.4004004461096237e-06, + "loss": 0.0381103515625, + "step": 105795 + }, + { + "epoch": 0.9148213158554617, + "grad_norm": 7.169533832892295, + "learning_rate": 3.400198534759046e-06, + "loss": 0.199737548828125, + "step": 105800 + }, + { + "epoch": 0.914864549377005, + "grad_norm": 0.8046174107577402, + "learning_rate": 3.3999966215627873e-06, + "loss": 0.07628669738769531, + "step": 105805 + }, + { + "epoch": 0.9149077828985482, + "grad_norm": 52.388556231274656, + "learning_rate": 3.399794706521781e-06, + "loss": 0.244451904296875, + "step": 105810 + }, + { + "epoch": 0.9149510164200915, + "grad_norm": 19.258272094140455, + "learning_rate": 3.399592789636956e-06, + "loss": 0.13884315490722657, + "step": 105815 + }, + { + "epoch": 0.9149942499416347, + "grad_norm": 9.374972205240113, + "learning_rate": 3.3993908709092447e-06, + "loss": 0.1354736328125, + "step": 105820 + }, + { + "epoch": 0.915037483463178, + "grad_norm": 5.61533438024466, + "learning_rate": 3.3991889503395776e-06, + "loss": 0.191644287109375, + "step": 105825 + }, + { + "epoch": 0.9150807169847213, + "grad_norm": 1.8000871550897781, + "learning_rate": 3.398987027928886e-06, + "loss": 0.08214111328125, + "step": 105830 + }, + { + "epoch": 0.9151239505062645, + "grad_norm": 0.1586329361357811, + "learning_rate": 3.3987851036781026e-06, + "loss": 0.07913379669189453, + "step": 105835 + }, + { + "epoch": 0.9151671840278078, + "grad_norm": 17.592685323831947, + "learning_rate": 3.3985831775881566e-06, + "loss": 0.09443359375, + "step": 105840 + }, + { + "epoch": 0.9152104175493511, + "grad_norm": 23.742317755267905, + "learning_rate": 3.398381249659981e-06, + "loss": 0.13983001708984374, + "step": 105845 + }, + { + "epoch": 0.9152536510708943, + "grad_norm": 11.14912657160473, + "learning_rate": 3.3981793198945053e-06, + "loss": 0.21781768798828124, + "step": 105850 + }, + { + "epoch": 0.9152968845924376, + "grad_norm": 5.145905674064877, + "learning_rate": 3.397977388292662e-06, + "loss": 0.1590810775756836, + "step": 105855 + }, + { + "epoch": 0.9153401181139809, + "grad_norm": 1.5308696239347308, + "learning_rate": 3.3977754548553826e-06, + "loss": 0.13677520751953126, + "step": 105860 + }, + { + "epoch": 0.9153833516355241, + "grad_norm": 7.624970689127544, + "learning_rate": 3.397573519583598e-06, + "loss": 0.15055389404296876, + "step": 105865 + }, + { + "epoch": 0.9154265851570674, + "grad_norm": 6.94082182503984, + "learning_rate": 3.3973715824782394e-06, + "loss": 0.04661407470703125, + "step": 105870 + }, + { + "epoch": 0.9154698186786107, + "grad_norm": 5.307948741961355, + "learning_rate": 3.3971696435402376e-06, + "loss": 0.13646621704101564, + "step": 105875 + }, + { + "epoch": 0.9155130522001539, + "grad_norm": 0.8565605656209262, + "learning_rate": 3.3969677027705253e-06, + "loss": 0.10251083374023437, + "step": 105880 + }, + { + "epoch": 0.9155562857216972, + "grad_norm": 12.012751085166785, + "learning_rate": 3.396765760170032e-06, + "loss": 0.1697967529296875, + "step": 105885 + }, + { + "epoch": 0.9155995192432405, + "grad_norm": 12.53367967693737, + "learning_rate": 3.396563815739691e-06, + "loss": 0.175811767578125, + "step": 105890 + }, + { + "epoch": 0.9156427527647837, + "grad_norm": 6.955916816075134, + "learning_rate": 3.3963618694804327e-06, + "loss": 0.06670265197753907, + "step": 105895 + }, + { + "epoch": 0.9156859862863269, + "grad_norm": 7.087950827195141, + "learning_rate": 3.3961599213931884e-06, + "loss": 0.08565387725830079, + "step": 105900 + }, + { + "epoch": 0.9157292198078703, + "grad_norm": 9.881852236902795, + "learning_rate": 3.395957971478889e-06, + "loss": 0.2639984130859375, + "step": 105905 + }, + { + "epoch": 0.9157724533294135, + "grad_norm": 5.113621542969155, + "learning_rate": 3.3957560197384666e-06, + "loss": 0.30358352661132815, + "step": 105910 + }, + { + "epoch": 0.9158156868509567, + "grad_norm": 26.576817766654294, + "learning_rate": 3.3955540661728525e-06, + "loss": 0.186602783203125, + "step": 105915 + }, + { + "epoch": 0.9158589203725, + "grad_norm": 3.0228399012020533, + "learning_rate": 3.395352110782978e-06, + "loss": 0.06550922393798828, + "step": 105920 + }, + { + "epoch": 0.9159021538940433, + "grad_norm": 6.311507066720635, + "learning_rate": 3.3951501535697745e-06, + "loss": 0.17220458984375, + "step": 105925 + }, + { + "epoch": 0.9159453874155865, + "grad_norm": 13.592957187585279, + "learning_rate": 3.394948194534174e-06, + "loss": 0.21042461395263673, + "step": 105930 + }, + { + "epoch": 0.9159886209371298, + "grad_norm": 38.10070820143708, + "learning_rate": 3.394746233677107e-06, + "loss": 0.30018310546875, + "step": 105935 + }, + { + "epoch": 0.9160318544586731, + "grad_norm": 0.49851102081939636, + "learning_rate": 3.394544270999504e-06, + "loss": 0.1517852783203125, + "step": 105940 + }, + { + "epoch": 0.9160750879802163, + "grad_norm": 11.789078950041336, + "learning_rate": 3.3943423065022992e-06, + "loss": 0.34845542907714844, + "step": 105945 + }, + { + "epoch": 0.9161183215017596, + "grad_norm": 29.847734717901464, + "learning_rate": 3.394140340186422e-06, + "loss": 0.1431488037109375, + "step": 105950 + }, + { + "epoch": 0.9161615550233029, + "grad_norm": 1.1161469346876935, + "learning_rate": 3.3939383720528045e-06, + "loss": 0.09831562042236328, + "step": 105955 + }, + { + "epoch": 0.9162047885448461, + "grad_norm": 0.166771861483655, + "learning_rate": 3.3937364021023783e-06, + "loss": 0.06774673461914063, + "step": 105960 + }, + { + "epoch": 0.9162480220663894, + "grad_norm": 0.10088152733010476, + "learning_rate": 3.3935344303360734e-06, + "loss": 0.2623687744140625, + "step": 105965 + }, + { + "epoch": 0.9162912555879327, + "grad_norm": 14.790753467040815, + "learning_rate": 3.393332456754824e-06, + "loss": 0.06979141235351563, + "step": 105970 + }, + { + "epoch": 0.9163344891094759, + "grad_norm": 38.56591275760824, + "learning_rate": 3.3931304813595592e-06, + "loss": 0.550628662109375, + "step": 105975 + }, + { + "epoch": 0.9163777226310192, + "grad_norm": 1.4741775072929988, + "learning_rate": 3.3929285041512113e-06, + "loss": 0.16771621704101564, + "step": 105980 + }, + { + "epoch": 0.9164209561525625, + "grad_norm": 2.2774548059729116, + "learning_rate": 3.3927265251307126e-06, + "loss": 0.2570518493652344, + "step": 105985 + }, + { + "epoch": 0.9164641896741057, + "grad_norm": 52.04207215475518, + "learning_rate": 3.3925245442989935e-06, + "loss": 0.1923166275024414, + "step": 105990 + }, + { + "epoch": 0.9165074231956489, + "grad_norm": 6.264100109415104, + "learning_rate": 3.3923225616569855e-06, + "loss": 0.09266357421875, + "step": 105995 + }, + { + "epoch": 0.9165506567171923, + "grad_norm": 10.841532635691191, + "learning_rate": 3.3921205772056203e-06, + "loss": 0.1289642333984375, + "step": 106000 + }, + { + "epoch": 0.9165938902387355, + "grad_norm": 1.4569238405267726, + "learning_rate": 3.391918590945831e-06, + "loss": 0.06827774047851562, + "step": 106005 + }, + { + "epoch": 0.9166371237602787, + "grad_norm": 1.0493014692968652, + "learning_rate": 3.3917166028785468e-06, + "loss": 0.042032623291015626, + "step": 106010 + }, + { + "epoch": 0.9166803572818221, + "grad_norm": 5.8543873576519605, + "learning_rate": 3.3915146130046998e-06, + "loss": 0.05904998779296875, + "step": 106015 + }, + { + "epoch": 0.9167235908033653, + "grad_norm": 5.7634239455570535, + "learning_rate": 3.3913126213252234e-06, + "loss": 0.15178070068359376, + "step": 106020 + }, + { + "epoch": 0.9167668243249085, + "grad_norm": 0.29931484525377716, + "learning_rate": 3.3911106278410463e-06, + "loss": 0.07260475158691407, + "step": 106025 + }, + { + "epoch": 0.9168100578464519, + "grad_norm": 15.19458675593947, + "learning_rate": 3.390908632553102e-06, + "loss": 0.23577880859375, + "step": 106030 + }, + { + "epoch": 0.9168532913679951, + "grad_norm": 16.27173944923791, + "learning_rate": 3.390706635462322e-06, + "loss": 0.2693115234375, + "step": 106035 + }, + { + "epoch": 0.9168965248895383, + "grad_norm": 4.889073213464894, + "learning_rate": 3.390504636569638e-06, + "loss": 0.2100830078125, + "step": 106040 + }, + { + "epoch": 0.9169397584110817, + "grad_norm": 8.328304816208426, + "learning_rate": 3.3903026358759798e-06, + "loss": 0.3207870006561279, + "step": 106045 + }, + { + "epoch": 0.9169829919326249, + "grad_norm": 0.2529776715270526, + "learning_rate": 3.3901006333822817e-06, + "loss": 0.12840728759765624, + "step": 106050 + }, + { + "epoch": 0.9170262254541681, + "grad_norm": 1.546689873641719, + "learning_rate": 3.389898629089473e-06, + "loss": 0.11267433166503907, + "step": 106055 + }, + { + "epoch": 0.9170694589757115, + "grad_norm": 11.533741003558145, + "learning_rate": 3.389696622998486e-06, + "loss": 0.20989341735839845, + "step": 106060 + }, + { + "epoch": 0.9171126924972547, + "grad_norm": 15.018619669991894, + "learning_rate": 3.389494615110253e-06, + "loss": 0.12695579528808593, + "step": 106065 + }, + { + "epoch": 0.9171559260187979, + "grad_norm": 0.8148647571267644, + "learning_rate": 3.389292605425706e-06, + "loss": 0.10212783813476563, + "step": 106070 + }, + { + "epoch": 0.9171991595403411, + "grad_norm": 1.5739191001282757, + "learning_rate": 3.3890905939457758e-06, + "loss": 0.05042724609375, + "step": 106075 + }, + { + "epoch": 0.9172423930618845, + "grad_norm": 1.676469268133225, + "learning_rate": 3.388888580671393e-06, + "loss": 0.04675369262695313, + "step": 106080 + }, + { + "epoch": 0.9172856265834277, + "grad_norm": 0.6888044165717725, + "learning_rate": 3.3886865656034908e-06, + "loss": 0.16668777465820311, + "step": 106085 + }, + { + "epoch": 0.917328860104971, + "grad_norm": 3.8140516067758026, + "learning_rate": 3.3884845487430013e-06, + "loss": 0.23295135498046876, + "step": 106090 + }, + { + "epoch": 0.9173720936265143, + "grad_norm": 4.814582657824117, + "learning_rate": 3.3882825300908546e-06, + "loss": 0.19855270385742188, + "step": 106095 + }, + { + "epoch": 0.9174153271480575, + "grad_norm": 5.390254930043287, + "learning_rate": 3.3880805096479837e-06, + "loss": 0.029378890991210938, + "step": 106100 + }, + { + "epoch": 0.9174585606696007, + "grad_norm": 7.157679938451265, + "learning_rate": 3.3878784874153195e-06, + "loss": 0.0646392822265625, + "step": 106105 + }, + { + "epoch": 0.9175017941911441, + "grad_norm": 16.256993038080296, + "learning_rate": 3.3876764633937933e-06, + "loss": 0.076397705078125, + "step": 106110 + }, + { + "epoch": 0.9175450277126873, + "grad_norm": 2.9954230836237854, + "learning_rate": 3.3874744375843385e-06, + "loss": 0.34357461929321287, + "step": 106115 + }, + { + "epoch": 0.9175882612342305, + "grad_norm": 98.47086448222039, + "learning_rate": 3.387272409987886e-06, + "loss": 0.1233438491821289, + "step": 106120 + }, + { + "epoch": 0.9176314947557739, + "grad_norm": 12.284075536593825, + "learning_rate": 3.3870703806053663e-06, + "loss": 0.161090087890625, + "step": 106125 + }, + { + "epoch": 0.9176747282773171, + "grad_norm": 9.574650230729068, + "learning_rate": 3.386868349437713e-06, + "loss": 0.45557861328125, + "step": 106130 + }, + { + "epoch": 0.9177179617988603, + "grad_norm": 0.9905268876036777, + "learning_rate": 3.3866663164858567e-06, + "loss": 0.008509063720703125, + "step": 106135 + }, + { + "epoch": 0.9177611953204037, + "grad_norm": 20.253347711230738, + "learning_rate": 3.386464281750729e-06, + "loss": 0.11795806884765625, + "step": 106140 + }, + { + "epoch": 0.9178044288419469, + "grad_norm": 12.334167798319983, + "learning_rate": 3.386262245233264e-06, + "loss": 0.19301414489746094, + "step": 106145 + }, + { + "epoch": 0.9178476623634901, + "grad_norm": 0.7705068456388122, + "learning_rate": 3.3860602069343903e-06, + "loss": 0.0778879165649414, + "step": 106150 + }, + { + "epoch": 0.9178908958850334, + "grad_norm": 5.559889937021929, + "learning_rate": 3.3858581668550414e-06, + "loss": 0.1044647216796875, + "step": 106155 + }, + { + "epoch": 0.9179341294065767, + "grad_norm": 12.988564298037252, + "learning_rate": 3.385656124996149e-06, + "loss": 0.09178924560546875, + "step": 106160 + }, + { + "epoch": 0.9179773629281199, + "grad_norm": 2.6450924021705475, + "learning_rate": 3.3854540813586437e-06, + "loss": 0.08992919921875, + "step": 106165 + }, + { + "epoch": 0.9180205964496632, + "grad_norm": 15.823228552671932, + "learning_rate": 3.385252035943459e-06, + "loss": 0.056623077392578124, + "step": 106170 + }, + { + "epoch": 0.9180638299712065, + "grad_norm": 47.801143061206055, + "learning_rate": 3.385049988751525e-06, + "loss": 0.2664939880371094, + "step": 106175 + }, + { + "epoch": 0.9181070634927497, + "grad_norm": 3.4006986128481573, + "learning_rate": 3.384847939783776e-06, + "loss": 0.136029052734375, + "step": 106180 + }, + { + "epoch": 0.918150297014293, + "grad_norm": 3.0073459799376243, + "learning_rate": 3.3846458890411413e-06, + "loss": 0.10673236846923828, + "step": 106185 + }, + { + "epoch": 0.9181935305358363, + "grad_norm": 3.5111963008160805, + "learning_rate": 3.384443836524554e-06, + "loss": 0.056461334228515625, + "step": 106190 + }, + { + "epoch": 0.9182367640573795, + "grad_norm": 1.018986029172459, + "learning_rate": 3.384241782234945e-06, + "loss": 0.03379669189453125, + "step": 106195 + }, + { + "epoch": 0.9182799975789228, + "grad_norm": 19.983909066092156, + "learning_rate": 3.384039726173248e-06, + "loss": 0.14506988525390624, + "step": 106200 + }, + { + "epoch": 0.9183232311004661, + "grad_norm": 2.9058254997651107, + "learning_rate": 3.383837668340393e-06, + "loss": 0.39206771850585936, + "step": 106205 + }, + { + "epoch": 0.9183664646220093, + "grad_norm": 1.406563775368662, + "learning_rate": 3.3836356087373126e-06, + "loss": 0.029215240478515626, + "step": 106210 + }, + { + "epoch": 0.9184096981435526, + "grad_norm": 3.155318583776259, + "learning_rate": 3.38343354736494e-06, + "loss": 0.17525177001953124, + "step": 106215 + }, + { + "epoch": 0.9184529316650959, + "grad_norm": 6.39901402315111, + "learning_rate": 3.3832314842242036e-06, + "loss": 0.05419921875, + "step": 106220 + }, + { + "epoch": 0.9184961651866391, + "grad_norm": 0.74908523741808, + "learning_rate": 3.3830294193160383e-06, + "loss": 0.016218948364257812, + "step": 106225 + }, + { + "epoch": 0.9185393987081824, + "grad_norm": 31.43034268807685, + "learning_rate": 3.3828273526413756e-06, + "loss": 0.1904937744140625, + "step": 106230 + }, + { + "epoch": 0.9185826322297257, + "grad_norm": 0.7209853908064666, + "learning_rate": 3.382625284201147e-06, + "loss": 0.06602325439453124, + "step": 106235 + }, + { + "epoch": 0.9186258657512689, + "grad_norm": 1.000955821534805, + "learning_rate": 3.382423213996284e-06, + "loss": 0.2112579345703125, + "step": 106240 + }, + { + "epoch": 0.9186690992728122, + "grad_norm": 5.029190944270782, + "learning_rate": 3.382221142027719e-06, + "loss": 0.10139274597167969, + "step": 106245 + }, + { + "epoch": 0.9187123327943554, + "grad_norm": 34.496114172027255, + "learning_rate": 3.3820190682963837e-06, + "loss": 0.1413036346435547, + "step": 106250 + }, + { + "epoch": 0.9187555663158987, + "grad_norm": 2.5505830901658286, + "learning_rate": 3.3818169928032106e-06, + "loss": 0.060027694702148436, + "step": 106255 + }, + { + "epoch": 0.918798799837442, + "grad_norm": 8.535868062333005, + "learning_rate": 3.381614915549131e-06, + "loss": 0.05097980499267578, + "step": 106260 + }, + { + "epoch": 0.9188420333589852, + "grad_norm": 0.9872791900209116, + "learning_rate": 3.3814128365350774e-06, + "loss": 0.19600830078125, + "step": 106265 + }, + { + "epoch": 0.9188852668805285, + "grad_norm": 36.960808268117475, + "learning_rate": 3.381210755761981e-06, + "loss": 0.3394500732421875, + "step": 106270 + }, + { + "epoch": 0.9189285004020717, + "grad_norm": 7.448369138639257, + "learning_rate": 3.381008673230775e-06, + "loss": 0.09896240234375, + "step": 106275 + }, + { + "epoch": 0.918971733923615, + "grad_norm": 18.526090298497653, + "learning_rate": 3.3808065889423903e-06, + "loss": 0.16208133697509766, + "step": 106280 + }, + { + "epoch": 0.9190149674451583, + "grad_norm": 6.674785359151043, + "learning_rate": 3.3806045028977587e-06, + "loss": 0.09519424438476562, + "step": 106285 + }, + { + "epoch": 0.9190582009667015, + "grad_norm": 24.873954990014806, + "learning_rate": 3.380402415097814e-06, + "loss": 0.15899658203125, + "step": 106290 + }, + { + "epoch": 0.9191014344882448, + "grad_norm": 32.91146661873785, + "learning_rate": 3.3802003255434865e-06, + "loss": 0.19812088012695311, + "step": 106295 + }, + { + "epoch": 0.9191446680097881, + "grad_norm": 6.355507932707047, + "learning_rate": 3.379998234235708e-06, + "loss": 0.22122955322265625, + "step": 106300 + }, + { + "epoch": 0.9191879015313313, + "grad_norm": 31.718883401009062, + "learning_rate": 3.3797961411754117e-06, + "loss": 0.30628128051757814, + "step": 106305 + }, + { + "epoch": 0.9192311350528746, + "grad_norm": 0.7219270535152378, + "learning_rate": 3.379594046363529e-06, + "loss": 0.0447357177734375, + "step": 106310 + }, + { + "epoch": 0.9192743685744179, + "grad_norm": 33.19308573619236, + "learning_rate": 3.379391949800992e-06, + "loss": 0.1135894775390625, + "step": 106315 + }, + { + "epoch": 0.9193176020959611, + "grad_norm": 10.926850324362622, + "learning_rate": 3.3791898514887337e-06, + "loss": 0.3530719757080078, + "step": 106320 + }, + { + "epoch": 0.9193608356175044, + "grad_norm": 0.12739488221338285, + "learning_rate": 3.3789877514276846e-06, + "loss": 0.09336090087890625, + "step": 106325 + }, + { + "epoch": 0.9194040691390476, + "grad_norm": 0.6354080865496079, + "learning_rate": 3.3787856496187776e-06, + "loss": 0.2117828369140625, + "step": 106330 + }, + { + "epoch": 0.9194473026605909, + "grad_norm": 10.351282771615834, + "learning_rate": 3.3785835460629443e-06, + "loss": 0.08013992309570313, + "step": 106335 + }, + { + "epoch": 0.9194905361821342, + "grad_norm": 22.266464623461253, + "learning_rate": 3.3783814407611173e-06, + "loss": 0.20561599731445312, + "step": 106340 + }, + { + "epoch": 0.9195337697036774, + "grad_norm": 24.291280969243168, + "learning_rate": 3.378179333714229e-06, + "loss": 0.20460968017578124, + "step": 106345 + }, + { + "epoch": 0.9195770032252207, + "grad_norm": 13.655185314027555, + "learning_rate": 3.3779772249232105e-06, + "loss": 0.07589454650878906, + "step": 106350 + }, + { + "epoch": 0.919620236746764, + "grad_norm": 0.8225678094520907, + "learning_rate": 3.377775114388995e-06, + "loss": 0.222393798828125, + "step": 106355 + }, + { + "epoch": 0.9196634702683072, + "grad_norm": 2.228708604429135, + "learning_rate": 3.3775730021125137e-06, + "loss": 0.08650722503662109, + "step": 106360 + }, + { + "epoch": 0.9197067037898505, + "grad_norm": 0.5451348101362901, + "learning_rate": 3.3773708880946986e-06, + "loss": 0.05003814697265625, + "step": 106365 + }, + { + "epoch": 0.9197499373113938, + "grad_norm": 0.8752978768283931, + "learning_rate": 3.3771687723364837e-06, + "loss": 0.04582366943359375, + "step": 106370 + }, + { + "epoch": 0.919793170832937, + "grad_norm": 20.048557640728266, + "learning_rate": 3.3769666548387985e-06, + "loss": 0.164154052734375, + "step": 106375 + }, + { + "epoch": 0.9198364043544803, + "grad_norm": 0.9531675650692913, + "learning_rate": 3.3767645356025765e-06, + "loss": 0.028498458862304687, + "step": 106380 + }, + { + "epoch": 0.9198796378760236, + "grad_norm": 0.18050923662750024, + "learning_rate": 3.3765624146287507e-06, + "loss": 0.2723842620849609, + "step": 106385 + }, + { + "epoch": 0.9199228713975668, + "grad_norm": 1.7631811380438054, + "learning_rate": 3.376360291918251e-06, + "loss": 0.017425537109375, + "step": 106390 + }, + { + "epoch": 0.9199661049191101, + "grad_norm": 5.436557244396602, + "learning_rate": 3.376158167472011e-06, + "loss": 0.0817108154296875, + "step": 106395 + }, + { + "epoch": 0.9200093384406534, + "grad_norm": 48.00259443180208, + "learning_rate": 3.375956041290964e-06, + "loss": 0.10057525634765625, + "step": 106400 + }, + { + "epoch": 0.9200525719621966, + "grad_norm": 4.82021143211363, + "learning_rate": 3.3757539133760402e-06, + "loss": 0.272607421875, + "step": 106405 + }, + { + "epoch": 0.9200958054837399, + "grad_norm": 9.39207663473748, + "learning_rate": 3.3755517837281722e-06, + "loss": 0.1945209503173828, + "step": 106410 + }, + { + "epoch": 0.9201390390052832, + "grad_norm": 5.502904440083629, + "learning_rate": 3.375349652348294e-06, + "loss": 0.17153053283691405, + "step": 106415 + }, + { + "epoch": 0.9201822725268264, + "grad_norm": 0.2851842713714626, + "learning_rate": 3.3751475192373346e-06, + "loss": 0.3018951416015625, + "step": 106420 + }, + { + "epoch": 0.9202255060483696, + "grad_norm": 4.3709602532764915, + "learning_rate": 3.3749453843962284e-06, + "loss": 0.17541275024414063, + "step": 106425 + }, + { + "epoch": 0.920268739569913, + "grad_norm": 0.14141655646919432, + "learning_rate": 3.374743247825908e-06, + "loss": 0.018495941162109376, + "step": 106430 + }, + { + "epoch": 0.9203119730914562, + "grad_norm": 40.943518049119284, + "learning_rate": 3.3745411095273043e-06, + "loss": 0.5451629638671875, + "step": 106435 + }, + { + "epoch": 0.9203552066129994, + "grad_norm": 0.6509063313192744, + "learning_rate": 3.3743389695013506e-06, + "loss": 0.1542449951171875, + "step": 106440 + }, + { + "epoch": 0.9203984401345428, + "grad_norm": 0.5649379079945214, + "learning_rate": 3.3741368277489778e-06, + "loss": 0.019347381591796876, + "step": 106445 + }, + { + "epoch": 0.920441673656086, + "grad_norm": 4.833755318487319, + "learning_rate": 3.373934684271119e-06, + "loss": 0.08656539916992187, + "step": 106450 + }, + { + "epoch": 0.9204849071776292, + "grad_norm": 1.3331860888792506, + "learning_rate": 3.373732539068708e-06, + "loss": 0.0477020263671875, + "step": 106455 + }, + { + "epoch": 0.9205281406991725, + "grad_norm": 46.90680833241083, + "learning_rate": 3.3735303921426735e-06, + "loss": 0.6853965759277344, + "step": 106460 + }, + { + "epoch": 0.9205713742207158, + "grad_norm": 29.592922148878536, + "learning_rate": 3.373328243493951e-06, + "loss": 0.3127758026123047, + "step": 106465 + }, + { + "epoch": 0.920614607742259, + "grad_norm": 0.7709218728690064, + "learning_rate": 3.373126093123472e-06, + "loss": 0.015027618408203125, + "step": 106470 + }, + { + "epoch": 0.9206578412638023, + "grad_norm": 43.391403597025125, + "learning_rate": 3.3729239410321676e-06, + "loss": 0.33983097076416013, + "step": 106475 + }, + { + "epoch": 0.9207010747853456, + "grad_norm": 4.070944028691771, + "learning_rate": 3.372721787220971e-06, + "loss": 0.34038543701171875, + "step": 106480 + }, + { + "epoch": 0.9207443083068888, + "grad_norm": 16.878768136819993, + "learning_rate": 3.3725196316908147e-06, + "loss": 0.12689056396484374, + "step": 106485 + }, + { + "epoch": 0.9207875418284321, + "grad_norm": 19.313139265892957, + "learning_rate": 3.3723174744426306e-06, + "loss": 0.08548860549926758, + "step": 106490 + }, + { + "epoch": 0.9208307753499754, + "grad_norm": 74.0690307245934, + "learning_rate": 3.3721153154773514e-06, + "loss": 0.10216751098632812, + "step": 106495 + }, + { + "epoch": 0.9208740088715186, + "grad_norm": 1.1684866401689695, + "learning_rate": 3.3719131547959098e-06, + "loss": 0.187542724609375, + "step": 106500 + }, + { + "epoch": 0.9209172423930618, + "grad_norm": 6.1342224342623135, + "learning_rate": 3.3717109923992366e-06, + "loss": 0.29857940673828126, + "step": 106505 + }, + { + "epoch": 0.9209604759146052, + "grad_norm": 16.52807520973658, + "learning_rate": 3.371508828288265e-06, + "loss": 0.11440582275390625, + "step": 106510 + }, + { + "epoch": 0.9210037094361484, + "grad_norm": 4.290601626906495, + "learning_rate": 3.3713066624639285e-06, + "loss": 0.054864501953125, + "step": 106515 + }, + { + "epoch": 0.9210469429576916, + "grad_norm": 30.222391561507983, + "learning_rate": 3.3711044949271582e-06, + "loss": 0.11351585388183594, + "step": 106520 + }, + { + "epoch": 0.921090176479235, + "grad_norm": 9.059958126041195, + "learning_rate": 3.370902325678886e-06, + "loss": 0.16309051513671874, + "step": 106525 + }, + { + "epoch": 0.9211334100007782, + "grad_norm": 39.24835440860065, + "learning_rate": 3.3707001547200455e-06, + "loss": 0.199432373046875, + "step": 106530 + }, + { + "epoch": 0.9211766435223214, + "grad_norm": 0.3024167234237827, + "learning_rate": 3.370497982051569e-06, + "loss": 0.02482757568359375, + "step": 106535 + }, + { + "epoch": 0.9212198770438648, + "grad_norm": 24.720711163071016, + "learning_rate": 3.370295807674388e-06, + "loss": 0.27612991333007814, + "step": 106540 + }, + { + "epoch": 0.921263110565408, + "grad_norm": 23.17226948536282, + "learning_rate": 3.3700936315894363e-06, + "loss": 0.08492355346679688, + "step": 106545 + }, + { + "epoch": 0.9213063440869512, + "grad_norm": 13.425706524435558, + "learning_rate": 3.3698914537976454e-06, + "loss": 0.15981884002685548, + "step": 106550 + }, + { + "epoch": 0.9213495776084946, + "grad_norm": 11.62564790578213, + "learning_rate": 3.3696892742999466e-06, + "loss": 0.1448822021484375, + "step": 106555 + }, + { + "epoch": 0.9213928111300378, + "grad_norm": 12.21526578614958, + "learning_rate": 3.3694870930972743e-06, + "loss": 0.1349273681640625, + "step": 106560 + }, + { + "epoch": 0.921436044651581, + "grad_norm": 1.9565211187718485, + "learning_rate": 3.36928491019056e-06, + "loss": 0.04227581024169922, + "step": 106565 + }, + { + "epoch": 0.9214792781731244, + "grad_norm": 45.827105873205525, + "learning_rate": 3.3690827255807364e-06, + "loss": 0.43996734619140626, + "step": 106570 + }, + { + "epoch": 0.9215225116946676, + "grad_norm": 17.681065910791332, + "learning_rate": 3.3688805392687363e-06, + "loss": 0.034946441650390625, + "step": 106575 + }, + { + "epoch": 0.9215657452162108, + "grad_norm": 14.506679822576755, + "learning_rate": 3.3686783512554918e-06, + "loss": 0.17444305419921874, + "step": 106580 + }, + { + "epoch": 0.9216089787377542, + "grad_norm": 0.07522214321892383, + "learning_rate": 3.3684761615419345e-06, + "loss": 0.03621406555175781, + "step": 106585 + }, + { + "epoch": 0.9216522122592974, + "grad_norm": 5.339836956734435, + "learning_rate": 3.3682739701289974e-06, + "loss": 0.10560455322265624, + "step": 106590 + }, + { + "epoch": 0.9216954457808406, + "grad_norm": 0.2230742535217041, + "learning_rate": 3.368071777017615e-06, + "loss": 0.08022117614746094, + "step": 106595 + }, + { + "epoch": 0.9217386793023838, + "grad_norm": 0.6751669875910655, + "learning_rate": 3.367869582208717e-06, + "loss": 0.15668182373046874, + "step": 106600 + }, + { + "epoch": 0.9217819128239272, + "grad_norm": 7.357655385641153, + "learning_rate": 3.367667385703237e-06, + "loss": 0.10226402282714844, + "step": 106605 + }, + { + "epoch": 0.9218251463454704, + "grad_norm": 4.0744183735296895, + "learning_rate": 3.367465187502108e-06, + "loss": 0.30927581787109376, + "step": 106610 + }, + { + "epoch": 0.9218683798670136, + "grad_norm": 1.1377103752046767, + "learning_rate": 3.367262987606261e-06, + "loss": 0.11388206481933594, + "step": 106615 + }, + { + "epoch": 0.921911613388557, + "grad_norm": 5.665249740513407, + "learning_rate": 3.36706078601663e-06, + "loss": 0.0994659423828125, + "step": 106620 + }, + { + "epoch": 0.9219548469101002, + "grad_norm": 3.5796670633044454, + "learning_rate": 3.3668585827341474e-06, + "loss": 0.050574111938476565, + "step": 106625 + }, + { + "epoch": 0.9219980804316434, + "grad_norm": 2.449992696548071, + "learning_rate": 3.3666563777597457e-06, + "loss": 0.3144866943359375, + "step": 106630 + }, + { + "epoch": 0.9220413139531868, + "grad_norm": 0.49255275319329567, + "learning_rate": 3.3664541710943567e-06, + "loss": 0.06649246215820312, + "step": 106635 + }, + { + "epoch": 0.92208454747473, + "grad_norm": 0.1819038624168325, + "learning_rate": 3.3662519627389133e-06, + "loss": 0.14930877685546876, + "step": 106640 + }, + { + "epoch": 0.9221277809962732, + "grad_norm": 1.3038738013384947, + "learning_rate": 3.3660497526943486e-06, + "loss": 0.12586135864257814, + "step": 106645 + }, + { + "epoch": 0.9221710145178166, + "grad_norm": 4.289258042989628, + "learning_rate": 3.3658475409615947e-06, + "loss": 0.022379302978515626, + "step": 106650 + }, + { + "epoch": 0.9222142480393598, + "grad_norm": 26.017276658355826, + "learning_rate": 3.3656453275415838e-06, + "loss": 0.42218017578125, + "step": 106655 + }, + { + "epoch": 0.922257481560903, + "grad_norm": 5.753791611095678, + "learning_rate": 3.3654431124352497e-06, + "loss": 0.047591781616210936, + "step": 106660 + }, + { + "epoch": 0.9223007150824464, + "grad_norm": 3.8947284408117167, + "learning_rate": 3.3652408956435233e-06, + "loss": 0.0742532730102539, + "step": 106665 + }, + { + "epoch": 0.9223439486039896, + "grad_norm": 0.3755956661720871, + "learning_rate": 3.3650386771673394e-06, + "loss": 0.059619140625, + "step": 106670 + }, + { + "epoch": 0.9223871821255328, + "grad_norm": 27.589935552555414, + "learning_rate": 3.3648364570076288e-06, + "loss": 0.116094970703125, + "step": 106675 + }, + { + "epoch": 0.9224304156470761, + "grad_norm": 1.4838222691706853, + "learning_rate": 3.3646342351653245e-06, + "loss": 0.038100814819335936, + "step": 106680 + }, + { + "epoch": 0.9224736491686194, + "grad_norm": 21.149574355787664, + "learning_rate": 3.3644320116413595e-06, + "loss": 0.0931121826171875, + "step": 106685 + }, + { + "epoch": 0.9225168826901626, + "grad_norm": 2.7963613572968327, + "learning_rate": 3.3642297864366664e-06, + "loss": 0.184228515625, + "step": 106690 + }, + { + "epoch": 0.9225601162117059, + "grad_norm": 1.1553458024644807, + "learning_rate": 3.364027559552178e-06, + "loss": 0.38658962249755857, + "step": 106695 + }, + { + "epoch": 0.9226033497332492, + "grad_norm": 10.751068913975532, + "learning_rate": 3.363825330988826e-06, + "loss": 0.4778289794921875, + "step": 106700 + }, + { + "epoch": 0.9226465832547924, + "grad_norm": 1.576848764889923, + "learning_rate": 3.3636231007475436e-06, + "loss": 0.0334503173828125, + "step": 106705 + }, + { + "epoch": 0.9226898167763357, + "grad_norm": 0.3182704602181321, + "learning_rate": 3.363420868829264e-06, + "loss": 0.0362091064453125, + "step": 106710 + }, + { + "epoch": 0.922733050297879, + "grad_norm": 9.307050964103647, + "learning_rate": 3.3632186352349193e-06, + "loss": 0.08947744369506835, + "step": 106715 + }, + { + "epoch": 0.9227762838194222, + "grad_norm": 33.12229379808931, + "learning_rate": 3.3630163999654423e-06, + "loss": 0.39900970458984375, + "step": 106720 + }, + { + "epoch": 0.9228195173409655, + "grad_norm": 24.825295309753336, + "learning_rate": 3.3628141630217662e-06, + "loss": 0.301617431640625, + "step": 106725 + }, + { + "epoch": 0.9228627508625088, + "grad_norm": 0.53430497899059, + "learning_rate": 3.3626119244048223e-06, + "loss": 0.20172119140625, + "step": 106730 + }, + { + "epoch": 0.922905984384052, + "grad_norm": 1.2981165799163277, + "learning_rate": 3.3624096841155448e-06, + "loss": 0.01843547821044922, + "step": 106735 + }, + { + "epoch": 0.9229492179055953, + "grad_norm": 5.76482687023903, + "learning_rate": 3.3622074421548667e-06, + "loss": 0.08892669677734374, + "step": 106740 + }, + { + "epoch": 0.9229924514271386, + "grad_norm": 2.4419691381099295, + "learning_rate": 3.3620051985237185e-06, + "loss": 0.22296562194824218, + "step": 106745 + }, + { + "epoch": 0.9230356849486818, + "grad_norm": 4.330584114574907, + "learning_rate": 3.361802953223035e-06, + "loss": 0.24581222534179686, + "step": 106750 + }, + { + "epoch": 0.923078918470225, + "grad_norm": 6.194482722148359, + "learning_rate": 3.3616007062537477e-06, + "loss": 0.20977935791015626, + "step": 106755 + }, + { + "epoch": 0.9231221519917684, + "grad_norm": 13.842407454377904, + "learning_rate": 3.3613984576167903e-06, + "loss": 0.22724876403808594, + "step": 106760 + }, + { + "epoch": 0.9231653855133116, + "grad_norm": 19.921390440019742, + "learning_rate": 3.3611962073130956e-06, + "loss": 0.233074951171875, + "step": 106765 + }, + { + "epoch": 0.9232086190348548, + "grad_norm": 0.23195181480805963, + "learning_rate": 3.3609939553435954e-06, + "loss": 0.056043243408203124, + "step": 106770 + }, + { + "epoch": 0.9232518525563981, + "grad_norm": 3.495464858245596, + "learning_rate": 3.3607917017092236e-06, + "loss": 0.07469406127929687, + "step": 106775 + }, + { + "epoch": 0.9232950860779414, + "grad_norm": 6.768063782047886, + "learning_rate": 3.360589446410912e-06, + "loss": 0.09925079345703125, + "step": 106780 + }, + { + "epoch": 0.9233383195994846, + "grad_norm": 0.08032875922730111, + "learning_rate": 3.3603871894495933e-06, + "loss": 0.12543563842773436, + "step": 106785 + }, + { + "epoch": 0.9233815531210279, + "grad_norm": 12.02141810203963, + "learning_rate": 3.360184930826201e-06, + "loss": 0.09012069702148437, + "step": 106790 + }, + { + "epoch": 0.9234247866425712, + "grad_norm": 27.239857295006775, + "learning_rate": 3.3599826705416675e-06, + "loss": 0.1930694580078125, + "step": 106795 + }, + { + "epoch": 0.9234680201641144, + "grad_norm": 1.6215733934403491, + "learning_rate": 3.359780408596926e-06, + "loss": 0.14012641906738282, + "step": 106800 + }, + { + "epoch": 0.9235112536856577, + "grad_norm": 6.6156227603359286, + "learning_rate": 3.359578144992909e-06, + "loss": 0.22992324829101562, + "step": 106805 + }, + { + "epoch": 0.923554487207201, + "grad_norm": 1.7856285255710242, + "learning_rate": 3.359375879730549e-06, + "loss": 0.17357635498046875, + "step": 106810 + }, + { + "epoch": 0.9235977207287442, + "grad_norm": 29.707391667195274, + "learning_rate": 3.359173612810779e-06, + "loss": 0.09918670654296875, + "step": 106815 + }, + { + "epoch": 0.9236409542502875, + "grad_norm": 8.691169099561888, + "learning_rate": 3.3589713442345326e-06, + "loss": 0.31416854858398435, + "step": 106820 + }, + { + "epoch": 0.9236841877718308, + "grad_norm": 1.920533238910717, + "learning_rate": 3.358769074002742e-06, + "loss": 0.347137451171875, + "step": 106825 + }, + { + "epoch": 0.923727421293374, + "grad_norm": 4.844461239061457, + "learning_rate": 3.3585668021163404e-06, + "loss": 0.18105621337890626, + "step": 106830 + }, + { + "epoch": 0.9237706548149173, + "grad_norm": 2.405542997043811, + "learning_rate": 3.35836452857626e-06, + "loss": 0.052652740478515626, + "step": 106835 + }, + { + "epoch": 0.9238138883364606, + "grad_norm": 0.08178182540572343, + "learning_rate": 3.358162253383434e-06, + "loss": 0.24133720397949218, + "step": 106840 + }, + { + "epoch": 0.9238571218580038, + "grad_norm": 21.63852150679662, + "learning_rate": 3.3579599765387955e-06, + "loss": 0.18701629638671874, + "step": 106845 + }, + { + "epoch": 0.9239003553795471, + "grad_norm": 0.8534978650417517, + "learning_rate": 3.357757698043278e-06, + "loss": 0.11537628173828125, + "step": 106850 + }, + { + "epoch": 0.9239435889010903, + "grad_norm": 0.7543188941807545, + "learning_rate": 3.3575554178978128e-06, + "loss": 0.144580078125, + "step": 106855 + }, + { + "epoch": 0.9239868224226336, + "grad_norm": 5.604336187141335, + "learning_rate": 3.3573531361033334e-06, + "loss": 0.10519676208496094, + "step": 106860 + }, + { + "epoch": 0.9240300559441769, + "grad_norm": 1.2706552218034814, + "learning_rate": 3.357150852660773e-06, + "loss": 0.06363201141357422, + "step": 106865 + }, + { + "epoch": 0.9240732894657201, + "grad_norm": 16.64006301418791, + "learning_rate": 3.356948567571064e-06, + "loss": 0.06329574584960937, + "step": 106870 + }, + { + "epoch": 0.9241165229872634, + "grad_norm": 6.277612558642778, + "learning_rate": 3.3567462808351403e-06, + "loss": 0.25362720489501955, + "step": 106875 + }, + { + "epoch": 0.9241597565088067, + "grad_norm": 66.28418221735615, + "learning_rate": 3.3565439924539346e-06, + "loss": 0.3389373779296875, + "step": 106880 + }, + { + "epoch": 0.9242029900303499, + "grad_norm": 4.214889896872108, + "learning_rate": 3.3563417024283795e-06, + "loss": 0.18184814453125, + "step": 106885 + }, + { + "epoch": 0.9242462235518932, + "grad_norm": 1.885449903109106, + "learning_rate": 3.3561394107594073e-06, + "loss": 0.2330322265625, + "step": 106890 + }, + { + "epoch": 0.9242894570734365, + "grad_norm": 0.179380265845261, + "learning_rate": 3.3559371174479527e-06, + "loss": 0.06194915771484375, + "step": 106895 + }, + { + "epoch": 0.9243326905949797, + "grad_norm": 1.792514767277828, + "learning_rate": 3.3557348224949466e-06, + "loss": 0.1193878173828125, + "step": 106900 + }, + { + "epoch": 0.924375924116523, + "grad_norm": 13.974445000769082, + "learning_rate": 3.3555325259013227e-06, + "loss": 0.21485214233398436, + "step": 106905 + }, + { + "epoch": 0.9244191576380663, + "grad_norm": 5.155107447158649, + "learning_rate": 3.355330227668015e-06, + "loss": 0.18851318359375, + "step": 106910 + }, + { + "epoch": 0.9244623911596095, + "grad_norm": 33.308976535101465, + "learning_rate": 3.3551279277959556e-06, + "loss": 0.32047595977783205, + "step": 106915 + }, + { + "epoch": 0.9245056246811528, + "grad_norm": 2.4319993281384558, + "learning_rate": 3.3549256262860767e-06, + "loss": 0.2963226318359375, + "step": 106920 + }, + { + "epoch": 0.9245488582026961, + "grad_norm": 51.29560395510108, + "learning_rate": 3.3547233231393135e-06, + "loss": 0.15996246337890624, + "step": 106925 + }, + { + "epoch": 0.9245920917242393, + "grad_norm": 4.468402822717809, + "learning_rate": 3.3545210183565962e-06, + "loss": 0.42558975219726564, + "step": 106930 + }, + { + "epoch": 0.9246353252457826, + "grad_norm": 43.98465095781009, + "learning_rate": 3.3543187119388602e-06, + "loss": 0.3604438781738281, + "step": 106935 + }, + { + "epoch": 0.9246785587673259, + "grad_norm": 2.357125544014361, + "learning_rate": 3.3541164038870376e-06, + "loss": 0.13243560791015624, + "step": 106940 + }, + { + "epoch": 0.9247217922888691, + "grad_norm": 0.4819164917963191, + "learning_rate": 3.3539140942020617e-06, + "loss": 0.15643081665039063, + "step": 106945 + }, + { + "epoch": 0.9247650258104123, + "grad_norm": 0.10372478823482031, + "learning_rate": 3.3537117828848648e-06, + "loss": 0.01753692626953125, + "step": 106950 + }, + { + "epoch": 0.9248082593319557, + "grad_norm": 14.044697846169942, + "learning_rate": 3.3535094699363794e-06, + "loss": 0.1300588607788086, + "step": 106955 + }, + { + "epoch": 0.9248514928534989, + "grad_norm": 12.804384109589712, + "learning_rate": 3.3533071553575415e-06, + "loss": 0.5838336944580078, + "step": 106960 + }, + { + "epoch": 0.9248947263750421, + "grad_norm": 18.633698043180907, + "learning_rate": 3.353104839149282e-06, + "loss": 0.10918731689453125, + "step": 106965 + }, + { + "epoch": 0.9249379598965854, + "grad_norm": 16.50016185607239, + "learning_rate": 3.3529025213125326e-06, + "loss": 0.115374755859375, + "step": 106970 + }, + { + "epoch": 0.9249811934181287, + "grad_norm": 0.40841302766595566, + "learning_rate": 3.3527002018482293e-06, + "loss": 0.04918975830078125, + "step": 106975 + }, + { + "epoch": 0.9250244269396719, + "grad_norm": 2.7014145135587984, + "learning_rate": 3.3524978807573034e-06, + "loss": 0.1014434814453125, + "step": 106980 + }, + { + "epoch": 0.9250676604612152, + "grad_norm": 4.118044754599754, + "learning_rate": 3.3522955580406883e-06, + "loss": 0.19900741577148437, + "step": 106985 + }, + { + "epoch": 0.9251108939827585, + "grad_norm": 0.6461226030833119, + "learning_rate": 3.3520932336993176e-06, + "loss": 0.07050971984863282, + "step": 106990 + }, + { + "epoch": 0.9251541275043017, + "grad_norm": 0.8841620240359543, + "learning_rate": 3.3518909077341237e-06, + "loss": 0.240875244140625, + "step": 106995 + }, + { + "epoch": 0.925197361025845, + "grad_norm": 0.04638824907526806, + "learning_rate": 3.35168858014604e-06, + "loss": 0.24391326904296876, + "step": 107000 + }, + { + "epoch": 0.9252405945473883, + "grad_norm": 6.740850849522922, + "learning_rate": 3.351486250936e-06, + "loss": 0.06783905029296874, + "step": 107005 + }, + { + "epoch": 0.9252838280689315, + "grad_norm": 44.69507197387724, + "learning_rate": 3.351283920104936e-06, + "loss": 0.11315078735351562, + "step": 107010 + }, + { + "epoch": 0.9253270615904748, + "grad_norm": 6.174894739070488, + "learning_rate": 3.3510815876537817e-06, + "loss": 0.08253021240234375, + "step": 107015 + }, + { + "epoch": 0.9253702951120181, + "grad_norm": 6.944469173189599, + "learning_rate": 3.3508792535834706e-06, + "loss": 0.15269775390625, + "step": 107020 + }, + { + "epoch": 0.9254135286335613, + "grad_norm": 20.8625279755209, + "learning_rate": 3.3506769178949356e-06, + "loss": 0.1451202392578125, + "step": 107025 + }, + { + "epoch": 0.9254567621551045, + "grad_norm": 1.0253507174987924, + "learning_rate": 3.3504745805891094e-06, + "loss": 0.19972553253173828, + "step": 107030 + }, + { + "epoch": 0.9254999956766479, + "grad_norm": 9.990904716304838, + "learning_rate": 3.350272241666925e-06, + "loss": 0.026047134399414064, + "step": 107035 + }, + { + "epoch": 0.9255432291981911, + "grad_norm": 0.6078289026517147, + "learning_rate": 3.350069901129315e-06, + "loss": 0.2991523742675781, + "step": 107040 + }, + { + "epoch": 0.9255864627197343, + "grad_norm": 2.614514264781669, + "learning_rate": 3.3498675589772153e-06, + "loss": 0.09706134796142578, + "step": 107045 + }, + { + "epoch": 0.9256296962412777, + "grad_norm": 0.6332851869289566, + "learning_rate": 3.349665215211556e-06, + "loss": 0.249639892578125, + "step": 107050 + }, + { + "epoch": 0.9256729297628209, + "grad_norm": 32.90857203000553, + "learning_rate": 3.349462869833273e-06, + "loss": 0.40755767822265626, + "step": 107055 + }, + { + "epoch": 0.9257161632843641, + "grad_norm": 4.048843541453337, + "learning_rate": 3.349260522843297e-06, + "loss": 0.2576881408691406, + "step": 107060 + }, + { + "epoch": 0.9257593968059075, + "grad_norm": 3.108495769654481, + "learning_rate": 3.3490581742425626e-06, + "loss": 0.16834259033203125, + "step": 107065 + }, + { + "epoch": 0.9258026303274507, + "grad_norm": 6.606119966013662, + "learning_rate": 3.3488558240320027e-06, + "loss": 0.1906768798828125, + "step": 107070 + }, + { + "epoch": 0.9258458638489939, + "grad_norm": 19.366142398282946, + "learning_rate": 3.3486534722125515e-06, + "loss": 0.09279918670654297, + "step": 107075 + }, + { + "epoch": 0.9258890973705373, + "grad_norm": 10.975548684563647, + "learning_rate": 3.34845111878514e-06, + "loss": 0.1636219024658203, + "step": 107080 + }, + { + "epoch": 0.9259323308920805, + "grad_norm": 0.10352620286515285, + "learning_rate": 3.3482487637507036e-06, + "loss": 0.1259521484375, + "step": 107085 + }, + { + "epoch": 0.9259755644136237, + "grad_norm": 1.6825399777330816, + "learning_rate": 3.3480464071101743e-06, + "loss": 0.6514518737792969, + "step": 107090 + }, + { + "epoch": 0.9260187979351671, + "grad_norm": 32.66415026556592, + "learning_rate": 3.3478440488644857e-06, + "loss": 0.29118804931640624, + "step": 107095 + }, + { + "epoch": 0.9260620314567103, + "grad_norm": 19.61294643359345, + "learning_rate": 3.347641689014571e-06, + "loss": 0.140771484375, + "step": 107100 + }, + { + "epoch": 0.9261052649782535, + "grad_norm": 0.7143293213258266, + "learning_rate": 3.347439327561364e-06, + "loss": 0.05213470458984375, + "step": 107105 + }, + { + "epoch": 0.9261484984997969, + "grad_norm": 22.143864922786065, + "learning_rate": 3.3472369645057966e-06, + "loss": 0.286614990234375, + "step": 107110 + }, + { + "epoch": 0.9261917320213401, + "grad_norm": 0.9237299410817452, + "learning_rate": 3.347034599848804e-06, + "loss": 0.20161857604980468, + "step": 107115 + }, + { + "epoch": 0.9262349655428833, + "grad_norm": 23.277897036916393, + "learning_rate": 3.3468322335913186e-06, + "loss": 0.10588226318359376, + "step": 107120 + }, + { + "epoch": 0.9262781990644265, + "grad_norm": 55.19941261625083, + "learning_rate": 3.346629865734273e-06, + "loss": 0.16158447265625, + "step": 107125 + }, + { + "epoch": 0.9263214325859699, + "grad_norm": 13.750224007596874, + "learning_rate": 3.3464274962786006e-06, + "loss": 0.09407272338867187, + "step": 107130 + }, + { + "epoch": 0.9263646661075131, + "grad_norm": 0.621541324775178, + "learning_rate": 3.3462251252252365e-06, + "loss": 0.31011810302734377, + "step": 107135 + }, + { + "epoch": 0.9264078996290563, + "grad_norm": 5.729424768776348, + "learning_rate": 3.346022752575112e-06, + "loss": 0.05569000244140625, + "step": 107140 + }, + { + "epoch": 0.9264511331505997, + "grad_norm": 13.331447616611085, + "learning_rate": 3.3458203783291606e-06, + "loss": 0.0966461181640625, + "step": 107145 + }, + { + "epoch": 0.9264943666721429, + "grad_norm": 5.186740766973896, + "learning_rate": 3.3456180024883175e-06, + "loss": 0.1626220703125, + "step": 107150 + }, + { + "epoch": 0.9265376001936861, + "grad_norm": 4.440223858637013, + "learning_rate": 3.345415625053513e-06, + "loss": 0.10932769775390624, + "step": 107155 + }, + { + "epoch": 0.9265808337152295, + "grad_norm": 2.8907679562009054, + "learning_rate": 3.345213246025683e-06, + "loss": 0.25048370361328126, + "step": 107160 + }, + { + "epoch": 0.9266240672367727, + "grad_norm": 6.7241409054965615, + "learning_rate": 3.3450108654057606e-06, + "loss": 0.1641571044921875, + "step": 107165 + }, + { + "epoch": 0.9266673007583159, + "grad_norm": 7.46633982859314, + "learning_rate": 3.344808483194678e-06, + "loss": 0.17026100158691407, + "step": 107170 + }, + { + "epoch": 0.9267105342798593, + "grad_norm": 15.730770936121434, + "learning_rate": 3.3446060993933692e-06, + "loss": 0.1517608642578125, + "step": 107175 + }, + { + "epoch": 0.9267537678014025, + "grad_norm": 2.0778896487878384, + "learning_rate": 3.3444037140027675e-06, + "loss": 0.12435760498046874, + "step": 107180 + }, + { + "epoch": 0.9267970013229457, + "grad_norm": 28.65655013020746, + "learning_rate": 3.3442013270238062e-06, + "loss": 0.17628021240234376, + "step": 107185 + }, + { + "epoch": 0.9268402348444891, + "grad_norm": 12.202352464319508, + "learning_rate": 3.343998938457419e-06, + "loss": 0.0860565185546875, + "step": 107190 + }, + { + "epoch": 0.9268834683660323, + "grad_norm": 31.119955492992258, + "learning_rate": 3.3437965483045395e-06, + "loss": 0.3346595764160156, + "step": 107195 + }, + { + "epoch": 0.9269267018875755, + "grad_norm": 2.7354678274838617, + "learning_rate": 3.3435941565661e-06, + "loss": 0.0618927001953125, + "step": 107200 + }, + { + "epoch": 0.9269699354091188, + "grad_norm": 32.97092777978066, + "learning_rate": 3.3433917632430344e-06, + "loss": 0.17091522216796876, + "step": 107205 + }, + { + "epoch": 0.9270131689306621, + "grad_norm": 12.267872790203432, + "learning_rate": 3.3431893683362767e-06, + "loss": 0.473175048828125, + "step": 107210 + }, + { + "epoch": 0.9270564024522053, + "grad_norm": 3.697455735557134, + "learning_rate": 3.34298697184676e-06, + "loss": 0.098431396484375, + "step": 107215 + }, + { + "epoch": 0.9270996359737486, + "grad_norm": 1.1273363488428945, + "learning_rate": 3.3427845737754187e-06, + "loss": 0.06401214599609376, + "step": 107220 + }, + { + "epoch": 0.9271428694952919, + "grad_norm": 5.627360370012989, + "learning_rate": 3.3425821741231835e-06, + "loss": 0.10627593994140624, + "step": 107225 + }, + { + "epoch": 0.9271861030168351, + "grad_norm": 49.33217965431821, + "learning_rate": 3.3423797728909904e-06, + "loss": 0.26915283203125, + "step": 107230 + }, + { + "epoch": 0.9272293365383784, + "grad_norm": 1.0687731235143598, + "learning_rate": 3.342177370079772e-06, + "loss": 0.13808250427246094, + "step": 107235 + }, + { + "epoch": 0.9272725700599217, + "grad_norm": 0.2180045559156766, + "learning_rate": 3.3419749656904615e-06, + "loss": 0.17671051025390624, + "step": 107240 + }, + { + "epoch": 0.9273158035814649, + "grad_norm": 20.159232791099843, + "learning_rate": 3.341772559723993e-06, + "loss": 0.19174423217773437, + "step": 107245 + }, + { + "epoch": 0.9273590371030082, + "grad_norm": 0.5865948372718028, + "learning_rate": 3.3415701521813e-06, + "loss": 0.108837890625, + "step": 107250 + }, + { + "epoch": 0.9274022706245515, + "grad_norm": 1.2006549095888437, + "learning_rate": 3.3413677430633147e-06, + "loss": 0.06830368041992188, + "step": 107255 + }, + { + "epoch": 0.9274455041460947, + "grad_norm": 12.3183243668497, + "learning_rate": 3.341165332370973e-06, + "loss": 0.10822200775146484, + "step": 107260 + }, + { + "epoch": 0.927488737667638, + "grad_norm": 3.2288865752679166, + "learning_rate": 3.3409629201052053e-06, + "loss": 0.1690204620361328, + "step": 107265 + }, + { + "epoch": 0.9275319711891813, + "grad_norm": 26.7580838337694, + "learning_rate": 3.3407605062669475e-06, + "loss": 0.1859161376953125, + "step": 107270 + }, + { + "epoch": 0.9275752047107245, + "grad_norm": 13.846268678364178, + "learning_rate": 3.3405580908571326e-06, + "loss": 0.11989059448242187, + "step": 107275 + }, + { + "epoch": 0.9276184382322678, + "grad_norm": 7.074468002902209, + "learning_rate": 3.3403556738766933e-06, + "loss": 0.074981689453125, + "step": 107280 + }, + { + "epoch": 0.9276616717538111, + "grad_norm": 1.2038593103378268, + "learning_rate": 3.340153255326564e-06, + "loss": 0.06987991333007812, + "step": 107285 + }, + { + "epoch": 0.9277049052753543, + "grad_norm": 0.31468983018993113, + "learning_rate": 3.339950835207678e-06, + "loss": 0.0553166389465332, + "step": 107290 + }, + { + "epoch": 0.9277481387968975, + "grad_norm": 16.552962154010572, + "learning_rate": 3.339748413520969e-06, + "loss": 0.1457611083984375, + "step": 107295 + }, + { + "epoch": 0.9277913723184408, + "grad_norm": 46.95090566402765, + "learning_rate": 3.33954599026737e-06, + "loss": 0.30403289794921873, + "step": 107300 + }, + { + "epoch": 0.9278346058399841, + "grad_norm": 1.459864107012972, + "learning_rate": 3.3393435654478148e-06, + "loss": 0.18728713989257811, + "step": 107305 + }, + { + "epoch": 0.9278778393615273, + "grad_norm": 9.126387861464542, + "learning_rate": 3.3391411390632376e-06, + "loss": 0.285308837890625, + "step": 107310 + }, + { + "epoch": 0.9279210728830706, + "grad_norm": 19.42902651489989, + "learning_rate": 3.338938711114571e-06, + "loss": 0.1845916748046875, + "step": 107315 + }, + { + "epoch": 0.9279643064046139, + "grad_norm": 11.179037412964972, + "learning_rate": 3.3387362816027488e-06, + "loss": 0.06179046630859375, + "step": 107320 + }, + { + "epoch": 0.9280075399261571, + "grad_norm": 1.3900747707997338, + "learning_rate": 3.338533850528705e-06, + "loss": 0.03587493896484375, + "step": 107325 + }, + { + "epoch": 0.9280507734477004, + "grad_norm": 0.9561671405275735, + "learning_rate": 3.338331417893373e-06, + "loss": 0.083636474609375, + "step": 107330 + }, + { + "epoch": 0.9280940069692437, + "grad_norm": 0.5595391197331241, + "learning_rate": 3.338128983697686e-06, + "loss": 0.04319343566894531, + "step": 107335 + }, + { + "epoch": 0.9281372404907869, + "grad_norm": 5.2909136801538885, + "learning_rate": 3.3379265479425792e-06, + "loss": 0.106561279296875, + "step": 107340 + }, + { + "epoch": 0.9281804740123302, + "grad_norm": 24.18366493562595, + "learning_rate": 3.337724110628985e-06, + "loss": 0.10148563385009765, + "step": 107345 + }, + { + "epoch": 0.9282237075338735, + "grad_norm": 21.21425801548835, + "learning_rate": 3.337521671757836e-06, + "loss": 0.13165740966796874, + "step": 107350 + }, + { + "epoch": 0.9282669410554167, + "grad_norm": 3.216855307502887, + "learning_rate": 3.3373192313300665e-06, + "loss": 0.18717994689941406, + "step": 107355 + }, + { + "epoch": 0.92831017457696, + "grad_norm": 7.9746890228721, + "learning_rate": 3.3371167893466114e-06, + "loss": 0.12578887939453126, + "step": 107360 + }, + { + "epoch": 0.9283534080985033, + "grad_norm": 1.7640155149821861, + "learning_rate": 3.3369143458084028e-06, + "loss": 0.07428054809570313, + "step": 107365 + }, + { + "epoch": 0.9283966416200465, + "grad_norm": 1.0050459440541988, + "learning_rate": 3.336711900716376e-06, + "loss": 0.06605682373046876, + "step": 107370 + }, + { + "epoch": 0.9284398751415898, + "grad_norm": 4.827679807629159, + "learning_rate": 3.3365094540714633e-06, + "loss": 0.13719863891601564, + "step": 107375 + }, + { + "epoch": 0.928483108663133, + "grad_norm": 14.282781836823027, + "learning_rate": 3.336307005874598e-06, + "loss": 0.2317535400390625, + "step": 107380 + }, + { + "epoch": 0.9285263421846763, + "grad_norm": 8.899520482676808, + "learning_rate": 3.336104556126715e-06, + "loss": 0.18281326293945313, + "step": 107385 + }, + { + "epoch": 0.9285695757062196, + "grad_norm": 10.207370828136385, + "learning_rate": 3.3359021048287477e-06, + "loss": 0.06132965087890625, + "step": 107390 + }, + { + "epoch": 0.9286128092277628, + "grad_norm": 3.331661494702765, + "learning_rate": 3.3356996519816305e-06, + "loss": 0.045749664306640625, + "step": 107395 + }, + { + "epoch": 0.9286560427493061, + "grad_norm": 30.072061233748034, + "learning_rate": 3.335497197586294e-06, + "loss": 0.18905181884765626, + "step": 107400 + }, + { + "epoch": 0.9286992762708494, + "grad_norm": 3.5693862082188974, + "learning_rate": 3.3352947416436747e-06, + "loss": 0.1897979736328125, + "step": 107405 + }, + { + "epoch": 0.9287425097923926, + "grad_norm": 8.506179871820118, + "learning_rate": 3.335092284154707e-06, + "loss": 0.07761383056640625, + "step": 107410 + }, + { + "epoch": 0.9287857433139359, + "grad_norm": 1.2765577797773997, + "learning_rate": 3.3348898251203217e-06, + "loss": 0.09667320251464843, + "step": 107415 + }, + { + "epoch": 0.9288289768354792, + "grad_norm": 9.650348427731746, + "learning_rate": 3.3346873645414556e-06, + "loss": 0.08423881530761719, + "step": 107420 + }, + { + "epoch": 0.9288722103570224, + "grad_norm": 33.36881623608536, + "learning_rate": 3.3344849024190404e-06, + "loss": 0.13755569458007813, + "step": 107425 + }, + { + "epoch": 0.9289154438785657, + "grad_norm": 1.5313119400671296, + "learning_rate": 3.3342824387540097e-06, + "loss": 0.09408340454101563, + "step": 107430 + }, + { + "epoch": 0.928958677400109, + "grad_norm": 10.80781101976481, + "learning_rate": 3.3340799735472985e-06, + "loss": 0.3438240051269531, + "step": 107435 + }, + { + "epoch": 0.9290019109216522, + "grad_norm": 7.547619516684864, + "learning_rate": 3.33387750679984e-06, + "loss": 0.08104286193847657, + "step": 107440 + }, + { + "epoch": 0.9290451444431955, + "grad_norm": 1.333376883875095, + "learning_rate": 3.3336750385125674e-06, + "loss": 0.12411079406738282, + "step": 107445 + }, + { + "epoch": 0.9290883779647388, + "grad_norm": 3.3826832941144094, + "learning_rate": 3.333472568686416e-06, + "loss": 0.0451568603515625, + "step": 107450 + }, + { + "epoch": 0.929131611486282, + "grad_norm": 5.4150295714152445, + "learning_rate": 3.333270097322318e-06, + "loss": 0.05589599609375, + "step": 107455 + }, + { + "epoch": 0.9291748450078252, + "grad_norm": 0.0384176167048437, + "learning_rate": 3.3330676244212077e-06, + "loss": 0.013462066650390625, + "step": 107460 + }, + { + "epoch": 0.9292180785293686, + "grad_norm": 20.45969412020234, + "learning_rate": 3.332865149984019e-06, + "loss": 0.2276470184326172, + "step": 107465 + }, + { + "epoch": 0.9292613120509118, + "grad_norm": 11.772973481124358, + "learning_rate": 3.332662674011686e-06, + "loss": 0.04655532836914063, + "step": 107470 + }, + { + "epoch": 0.929304545572455, + "grad_norm": 2.505180013878137, + "learning_rate": 3.332460196505142e-06, + "loss": 0.04183807373046875, + "step": 107475 + }, + { + "epoch": 0.9293477790939983, + "grad_norm": 6.260637713027597, + "learning_rate": 3.3322577174653206e-06, + "loss": 0.10339374542236328, + "step": 107480 + }, + { + "epoch": 0.9293910126155416, + "grad_norm": 1.690695048787204, + "learning_rate": 3.332055236893156e-06, + "loss": 0.116717529296875, + "step": 107485 + }, + { + "epoch": 0.9294342461370848, + "grad_norm": 0.49824683406755405, + "learning_rate": 3.331852754789582e-06, + "loss": 0.0900238037109375, + "step": 107490 + }, + { + "epoch": 0.9294774796586281, + "grad_norm": 6.277298549384416, + "learning_rate": 3.3316502711555317e-06, + "loss": 0.11387939453125, + "step": 107495 + }, + { + "epoch": 0.9295207131801714, + "grad_norm": 0.26526703443169614, + "learning_rate": 3.331447785991941e-06, + "loss": 0.10340023040771484, + "step": 107500 + }, + { + "epoch": 0.9295639467017146, + "grad_norm": 16.022221806012215, + "learning_rate": 3.331245299299742e-06, + "loss": 0.16922607421875, + "step": 107505 + }, + { + "epoch": 0.929607180223258, + "grad_norm": 0.6509548025203762, + "learning_rate": 3.3310428110798688e-06, + "loss": 0.06844024658203125, + "step": 107510 + }, + { + "epoch": 0.9296504137448012, + "grad_norm": 40.66902644571573, + "learning_rate": 3.3308403213332558e-06, + "loss": 0.34660682678222654, + "step": 107515 + }, + { + "epoch": 0.9296936472663444, + "grad_norm": 27.058294015513688, + "learning_rate": 3.330637830060836e-06, + "loss": 0.1381061553955078, + "step": 107520 + }, + { + "epoch": 0.9297368807878877, + "grad_norm": 1.034963412998358, + "learning_rate": 3.330435337263544e-06, + "loss": 0.08315200805664062, + "step": 107525 + }, + { + "epoch": 0.929780114309431, + "grad_norm": 1.0209829492565587, + "learning_rate": 3.3302328429423134e-06, + "loss": 0.014156723022460937, + "step": 107530 + }, + { + "epoch": 0.9298233478309742, + "grad_norm": 2.234083827190959, + "learning_rate": 3.3300303470980783e-06, + "loss": 0.0966278076171875, + "step": 107535 + }, + { + "epoch": 0.9298665813525175, + "grad_norm": 3.3437737948865487, + "learning_rate": 3.3298278497317717e-06, + "loss": 0.13304634094238282, + "step": 107540 + }, + { + "epoch": 0.9299098148740608, + "grad_norm": 48.21480110347195, + "learning_rate": 3.329625350844329e-06, + "loss": 0.5599029541015625, + "step": 107545 + }, + { + "epoch": 0.929953048395604, + "grad_norm": 40.61697081458917, + "learning_rate": 3.3294228504366827e-06, + "loss": 0.27198333740234376, + "step": 107550 + }, + { + "epoch": 0.9299962819171472, + "grad_norm": 13.88658573053556, + "learning_rate": 3.3292203485097677e-06, + "loss": 0.2141193389892578, + "step": 107555 + }, + { + "epoch": 0.9300395154386906, + "grad_norm": 40.81015183910947, + "learning_rate": 3.3290178450645164e-06, + "loss": 0.086761474609375, + "step": 107560 + }, + { + "epoch": 0.9300827489602338, + "grad_norm": 14.069808841681047, + "learning_rate": 3.3288153401018654e-06, + "loss": 0.20257568359375, + "step": 107565 + }, + { + "epoch": 0.930125982481777, + "grad_norm": 2.1296549904465683, + "learning_rate": 3.328612833622747e-06, + "loss": 0.102716064453125, + "step": 107570 + }, + { + "epoch": 0.9301692160033204, + "grad_norm": 11.985197314146001, + "learning_rate": 3.3284103256280943e-06, + "loss": 0.25630950927734375, + "step": 107575 + }, + { + "epoch": 0.9302124495248636, + "grad_norm": 11.804523774315618, + "learning_rate": 3.3282078161188427e-06, + "loss": 0.11672286987304688, + "step": 107580 + }, + { + "epoch": 0.9302556830464068, + "grad_norm": 1.5253627543944952, + "learning_rate": 3.328005305095926e-06, + "loss": 0.015930747985839842, + "step": 107585 + }, + { + "epoch": 0.9302989165679502, + "grad_norm": 8.625669125915625, + "learning_rate": 3.327802792560277e-06, + "loss": 0.07460479736328125, + "step": 107590 + }, + { + "epoch": 0.9303421500894934, + "grad_norm": 18.976236720503113, + "learning_rate": 3.3276002785128315e-06, + "loss": 0.26028289794921877, + "step": 107595 + }, + { + "epoch": 0.9303853836110366, + "grad_norm": 13.20008732080553, + "learning_rate": 3.3273977629545213e-06, + "loss": 0.08311233520507813, + "step": 107600 + }, + { + "epoch": 0.93042861713258, + "grad_norm": 0.6197653157368166, + "learning_rate": 3.327195245886282e-06, + "loss": 0.33032989501953125, + "step": 107605 + }, + { + "epoch": 0.9304718506541232, + "grad_norm": 0.8976098438500294, + "learning_rate": 3.3269927273090472e-06, + "loss": 0.1924114227294922, + "step": 107610 + }, + { + "epoch": 0.9305150841756664, + "grad_norm": 4.20026010836049, + "learning_rate": 3.3267902072237516e-06, + "loss": 0.11459808349609375, + "step": 107615 + }, + { + "epoch": 0.9305583176972098, + "grad_norm": 11.746397766158674, + "learning_rate": 3.3265876856313273e-06, + "loss": 0.0471099853515625, + "step": 107620 + }, + { + "epoch": 0.930601551218753, + "grad_norm": 18.674861898072695, + "learning_rate": 3.3263851625327107e-06, + "loss": 0.3809722900390625, + "step": 107625 + }, + { + "epoch": 0.9306447847402962, + "grad_norm": 62.039376239178665, + "learning_rate": 3.3261826379288335e-06, + "loss": 0.16687545776367188, + "step": 107630 + }, + { + "epoch": 0.9306880182618394, + "grad_norm": 1.0216085992189887, + "learning_rate": 3.325980111820631e-06, + "loss": 0.07420272827148437, + "step": 107635 + }, + { + "epoch": 0.9307312517833828, + "grad_norm": 0.5786038772455977, + "learning_rate": 3.3257775842090367e-06, + "loss": 0.008308792114257812, + "step": 107640 + }, + { + "epoch": 0.930774485304926, + "grad_norm": 2.207385811640696, + "learning_rate": 3.325575055094986e-06, + "loss": 0.04029998779296875, + "step": 107645 + }, + { + "epoch": 0.9308177188264692, + "grad_norm": 2.2812843489669676, + "learning_rate": 3.3253725244794116e-06, + "loss": 0.146734619140625, + "step": 107650 + }, + { + "epoch": 0.9308609523480126, + "grad_norm": 0.48543572211716146, + "learning_rate": 3.3251699923632466e-06, + "loss": 0.21095428466796876, + "step": 107655 + }, + { + "epoch": 0.9309041858695558, + "grad_norm": 2.092164419722221, + "learning_rate": 3.3249674587474276e-06, + "loss": 0.117315673828125, + "step": 107660 + }, + { + "epoch": 0.930947419391099, + "grad_norm": 6.271730683147224, + "learning_rate": 3.3247649236328876e-06, + "loss": 0.051921844482421875, + "step": 107665 + }, + { + "epoch": 0.9309906529126424, + "grad_norm": 30.006632818402785, + "learning_rate": 3.324562387020559e-06, + "loss": 0.23470497131347656, + "step": 107670 + }, + { + "epoch": 0.9310338864341856, + "grad_norm": 44.23732427058935, + "learning_rate": 3.324359848911379e-06, + "loss": 0.27008590698242185, + "step": 107675 + }, + { + "epoch": 0.9310771199557288, + "grad_norm": 13.6609526434562, + "learning_rate": 3.3241573093062794e-06, + "loss": 0.1140411376953125, + "step": 107680 + }, + { + "epoch": 0.9311203534772722, + "grad_norm": 1.5857712000445396, + "learning_rate": 3.3239547682061946e-06, + "loss": 0.22466583251953126, + "step": 107685 + }, + { + "epoch": 0.9311635869988154, + "grad_norm": 1.998001706728274, + "learning_rate": 3.3237522256120593e-06, + "loss": 0.047624969482421876, + "step": 107690 + }, + { + "epoch": 0.9312068205203586, + "grad_norm": 3.3626044906785286, + "learning_rate": 3.3235496815248074e-06, + "loss": 0.037420654296875, + "step": 107695 + }, + { + "epoch": 0.931250054041902, + "grad_norm": 0.4051589199426092, + "learning_rate": 3.3233471359453726e-06, + "loss": 0.201678466796875, + "step": 107700 + }, + { + "epoch": 0.9312932875634452, + "grad_norm": 11.668815769243423, + "learning_rate": 3.3231445888746902e-06, + "loss": 0.10021514892578125, + "step": 107705 + }, + { + "epoch": 0.9313365210849884, + "grad_norm": 7.472505505527645, + "learning_rate": 3.3229420403136936e-06, + "loss": 0.06809368133544921, + "step": 107710 + }, + { + "epoch": 0.9313797546065318, + "grad_norm": 5.6703524046207505, + "learning_rate": 3.3227394902633153e-06, + "loss": 0.029288482666015626, + "step": 107715 + }, + { + "epoch": 0.931422988128075, + "grad_norm": 2.0582141928755746, + "learning_rate": 3.3225369387244917e-06, + "loss": 0.13645172119140625, + "step": 107720 + }, + { + "epoch": 0.9314662216496182, + "grad_norm": 33.89103705151937, + "learning_rate": 3.322334385698157e-06, + "loss": 0.15391082763671876, + "step": 107725 + }, + { + "epoch": 0.9315094551711615, + "grad_norm": 2.085524123975029, + "learning_rate": 3.3221318311852443e-06, + "loss": 0.6345787048339844, + "step": 107730 + }, + { + "epoch": 0.9315526886927048, + "grad_norm": 1.409513240068698, + "learning_rate": 3.3219292751866872e-06, + "loss": 0.06570358276367187, + "step": 107735 + }, + { + "epoch": 0.931595922214248, + "grad_norm": 0.9741378938381416, + "learning_rate": 3.3217267177034225e-06, + "loss": 0.03124847412109375, + "step": 107740 + }, + { + "epoch": 0.9316391557357913, + "grad_norm": 0.6142708117846415, + "learning_rate": 3.321524158736381e-06, + "loss": 0.04781036376953125, + "step": 107745 + }, + { + "epoch": 0.9316823892573346, + "grad_norm": 4.576100184319282, + "learning_rate": 3.321321598286498e-06, + "loss": 0.08995094299316406, + "step": 107750 + }, + { + "epoch": 0.9317256227788778, + "grad_norm": 27.3865346663107, + "learning_rate": 3.3211190363547097e-06, + "loss": 0.1674276351928711, + "step": 107755 + }, + { + "epoch": 0.9317688563004211, + "grad_norm": 4.994218353762296, + "learning_rate": 3.3209164729419484e-06, + "loss": 0.1853759765625, + "step": 107760 + }, + { + "epoch": 0.9318120898219644, + "grad_norm": 42.3412918150883, + "learning_rate": 3.3207139080491485e-06, + "loss": 0.7682746887207031, + "step": 107765 + }, + { + "epoch": 0.9318553233435076, + "grad_norm": 32.951022967783565, + "learning_rate": 3.320511341677245e-06, + "loss": 0.0911590576171875, + "step": 107770 + }, + { + "epoch": 0.9318985568650509, + "grad_norm": 13.208870506315556, + "learning_rate": 3.32030877382717e-06, + "loss": 0.21276168823242186, + "step": 107775 + }, + { + "epoch": 0.9319417903865942, + "grad_norm": 1.1900313570795134, + "learning_rate": 3.3201062044998602e-06, + "loss": 0.13580894470214844, + "step": 107780 + }, + { + "epoch": 0.9319850239081374, + "grad_norm": 2.3180748831219304, + "learning_rate": 3.319903633696249e-06, + "loss": 0.1444915771484375, + "step": 107785 + }, + { + "epoch": 0.9320282574296807, + "grad_norm": 1.4723516459513917, + "learning_rate": 3.31970106141727e-06, + "loss": 0.04259757995605469, + "step": 107790 + }, + { + "epoch": 0.932071490951224, + "grad_norm": 0.7817009799271809, + "learning_rate": 3.3194984876638585e-06, + "loss": 0.1010650634765625, + "step": 107795 + }, + { + "epoch": 0.9321147244727672, + "grad_norm": 37.32424561356309, + "learning_rate": 3.3192959124369476e-06, + "loss": 0.2053150177001953, + "step": 107800 + }, + { + "epoch": 0.9321579579943104, + "grad_norm": 0.5039039786282057, + "learning_rate": 3.3190933357374732e-06, + "loss": 0.00887603759765625, + "step": 107805 + }, + { + "epoch": 0.9322011915158537, + "grad_norm": 0.11356349646292685, + "learning_rate": 3.3188907575663677e-06, + "loss": 0.2624725341796875, + "step": 107810 + }, + { + "epoch": 0.932244425037397, + "grad_norm": 4.328490562905253, + "learning_rate": 3.3186881779245664e-06, + "loss": 0.20720672607421875, + "step": 107815 + }, + { + "epoch": 0.9322876585589402, + "grad_norm": 1.1036415315699777, + "learning_rate": 3.3184855968130033e-06, + "loss": 0.099163818359375, + "step": 107820 + }, + { + "epoch": 0.9323308920804835, + "grad_norm": 28.37115899648527, + "learning_rate": 3.318283014232613e-06, + "loss": 0.214007568359375, + "step": 107825 + }, + { + "epoch": 0.9323741256020268, + "grad_norm": 0.9364481720929098, + "learning_rate": 3.318080430184329e-06, + "loss": 0.19007568359375, + "step": 107830 + }, + { + "epoch": 0.93241735912357, + "grad_norm": 35.17474386436487, + "learning_rate": 3.3178778446690867e-06, + "loss": 0.1078094482421875, + "step": 107835 + }, + { + "epoch": 0.9324605926451133, + "grad_norm": 26.127868308015966, + "learning_rate": 3.31767525768782e-06, + "loss": 0.27458934783935546, + "step": 107840 + }, + { + "epoch": 0.9325038261666566, + "grad_norm": 10.447783775587405, + "learning_rate": 3.3174726692414627e-06, + "loss": 0.13796844482421874, + "step": 107845 + }, + { + "epoch": 0.9325470596881998, + "grad_norm": 0.7876424034025563, + "learning_rate": 3.31727007933095e-06, + "loss": 0.06250534057617188, + "step": 107850 + }, + { + "epoch": 0.9325902932097431, + "grad_norm": 7.170962605772694, + "learning_rate": 3.3170674879572144e-06, + "loss": 0.33351898193359375, + "step": 107855 + }, + { + "epoch": 0.9326335267312864, + "grad_norm": 24.60221746343455, + "learning_rate": 3.316864895121193e-06, + "loss": 0.20001354217529296, + "step": 107860 + }, + { + "epoch": 0.9326767602528296, + "grad_norm": 23.941498159351756, + "learning_rate": 3.3166623008238183e-06, + "loss": 0.1472076416015625, + "step": 107865 + }, + { + "epoch": 0.9327199937743729, + "grad_norm": 6.974426842131093, + "learning_rate": 3.316459705066025e-06, + "loss": 0.20604019165039061, + "step": 107870 + }, + { + "epoch": 0.9327632272959162, + "grad_norm": 3.39466599180369, + "learning_rate": 3.316257107848747e-06, + "loss": 0.1761810302734375, + "step": 107875 + }, + { + "epoch": 0.9328064608174594, + "grad_norm": 4.572835879954099, + "learning_rate": 3.31605450917292e-06, + "loss": 0.042107009887695314, + "step": 107880 + }, + { + "epoch": 0.9328496943390027, + "grad_norm": 0.6996026382350539, + "learning_rate": 3.3158519090394766e-06, + "loss": 0.09512481689453126, + "step": 107885 + }, + { + "epoch": 0.932892927860546, + "grad_norm": 9.413288649867699, + "learning_rate": 3.315649307449353e-06, + "loss": 0.09565582275390624, + "step": 107890 + }, + { + "epoch": 0.9329361613820892, + "grad_norm": 0.31849116616932965, + "learning_rate": 3.315446704403482e-06, + "loss": 0.08585205078125, + "step": 107895 + }, + { + "epoch": 0.9329793949036325, + "grad_norm": 5.681796982245118, + "learning_rate": 3.3152440999027995e-06, + "loss": 0.037340545654296876, + "step": 107900 + }, + { + "epoch": 0.9330226284251757, + "grad_norm": 1.7515521175233613, + "learning_rate": 3.3150414939482386e-06, + "loss": 0.09189605712890625, + "step": 107905 + }, + { + "epoch": 0.933065861946719, + "grad_norm": 2.8233667645103306, + "learning_rate": 3.3148388865407342e-06, + "loss": 0.06548080444335938, + "step": 107910 + }, + { + "epoch": 0.9331090954682623, + "grad_norm": 7.238028764190413, + "learning_rate": 3.3146362776812205e-06, + "loss": 0.13533935546875, + "step": 107915 + }, + { + "epoch": 0.9331523289898055, + "grad_norm": 60.47207021451787, + "learning_rate": 3.314433667370632e-06, + "loss": 0.45842666625976564, + "step": 107920 + }, + { + "epoch": 0.9331955625113488, + "grad_norm": 20.1551270170607, + "learning_rate": 3.3142310556099036e-06, + "loss": 0.16402130126953124, + "step": 107925 + }, + { + "epoch": 0.9332387960328921, + "grad_norm": 5.4199768669064845, + "learning_rate": 3.3140284423999694e-06, + "loss": 0.09760017395019531, + "step": 107930 + }, + { + "epoch": 0.9332820295544353, + "grad_norm": 1.3073446810873404, + "learning_rate": 3.3138258277417642e-06, + "loss": 0.0459503173828125, + "step": 107935 + }, + { + "epoch": 0.9333252630759786, + "grad_norm": 47.16715383023159, + "learning_rate": 3.313623211636221e-06, + "loss": 0.3162109375, + "step": 107940 + }, + { + "epoch": 0.9333684965975219, + "grad_norm": 1.011171489348933, + "learning_rate": 3.313420594084275e-06, + "loss": 0.005689811706542969, + "step": 107945 + }, + { + "epoch": 0.9334117301190651, + "grad_norm": 3.6267713035459495, + "learning_rate": 3.313217975086862e-06, + "loss": 0.13712539672851562, + "step": 107950 + }, + { + "epoch": 0.9334549636406084, + "grad_norm": 3.5104967934820706, + "learning_rate": 3.3130153546449142e-06, + "loss": 0.26140480041503905, + "step": 107955 + }, + { + "epoch": 0.9334981971621517, + "grad_norm": 13.838243359758742, + "learning_rate": 3.3128127327593683e-06, + "loss": 0.058675384521484374, + "step": 107960 + }, + { + "epoch": 0.9335414306836949, + "grad_norm": 55.95670018541039, + "learning_rate": 3.312610109431158e-06, + "loss": 0.480767822265625, + "step": 107965 + }, + { + "epoch": 0.9335846642052382, + "grad_norm": 1.4940702055761856, + "learning_rate": 3.3124074846612167e-06, + "loss": 0.034799957275390626, + "step": 107970 + }, + { + "epoch": 0.9336278977267815, + "grad_norm": 12.04977071559344, + "learning_rate": 3.312204858450479e-06, + "loss": 0.1891693115234375, + "step": 107975 + }, + { + "epoch": 0.9336711312483247, + "grad_norm": 4.988232575196548, + "learning_rate": 3.3120022307998812e-06, + "loss": 0.034197235107421876, + "step": 107980 + }, + { + "epoch": 0.9337143647698679, + "grad_norm": 3.7527807889456817, + "learning_rate": 3.311799601710357e-06, + "loss": 0.1597686767578125, + "step": 107985 + }, + { + "epoch": 0.9337575982914113, + "grad_norm": 5.063052473893226, + "learning_rate": 3.311596971182839e-06, + "loss": 0.1481536865234375, + "step": 107990 + }, + { + "epoch": 0.9338008318129545, + "grad_norm": 1.2496967589728458, + "learning_rate": 3.3113943392182652e-06, + "loss": 0.06365776062011719, + "step": 107995 + }, + { + "epoch": 0.9338440653344977, + "grad_norm": 0.5723333615912455, + "learning_rate": 3.311191705817567e-06, + "loss": 0.09063034057617188, + "step": 108000 + }, + { + "epoch": 0.933887298856041, + "grad_norm": 1.7740082716998977, + "learning_rate": 3.31098907098168e-06, + "loss": 0.11746368408203126, + "step": 108005 + }, + { + "epoch": 0.9339305323775843, + "grad_norm": 50.93751186657827, + "learning_rate": 3.31078643471154e-06, + "loss": 0.1932615280151367, + "step": 108010 + }, + { + "epoch": 0.9339737658991275, + "grad_norm": 6.323320062366751, + "learning_rate": 3.3105837970080795e-06, + "loss": 0.26109619140625, + "step": 108015 + }, + { + "epoch": 0.9340169994206708, + "grad_norm": 0.9968801420269982, + "learning_rate": 3.3103811578722345e-06, + "loss": 0.03663711547851563, + "step": 108020 + }, + { + "epoch": 0.9340602329422141, + "grad_norm": 5.257444390268155, + "learning_rate": 3.310178517304938e-06, + "loss": 0.0969512939453125, + "step": 108025 + }, + { + "epoch": 0.9341034664637573, + "grad_norm": 5.500056069763115, + "learning_rate": 3.3099758753071264e-06, + "loss": 0.13796539306640626, + "step": 108030 + }, + { + "epoch": 0.9341466999853006, + "grad_norm": 15.247223148788532, + "learning_rate": 3.3097732318797335e-06, + "loss": 0.079913330078125, + "step": 108035 + }, + { + "epoch": 0.9341899335068439, + "grad_norm": 2.9110074935944685, + "learning_rate": 3.3095705870236942e-06, + "loss": 0.11741809844970703, + "step": 108040 + }, + { + "epoch": 0.9342331670283871, + "grad_norm": 10.794424388129855, + "learning_rate": 3.3093679407399428e-06, + "loss": 0.05606346130371094, + "step": 108045 + }, + { + "epoch": 0.9342764005499304, + "grad_norm": 19.03850884242857, + "learning_rate": 3.3091652930294126e-06, + "loss": 0.0921875, + "step": 108050 + }, + { + "epoch": 0.9343196340714737, + "grad_norm": 2.304452652748543, + "learning_rate": 3.3089626438930397e-06, + "loss": 0.20521888732910157, + "step": 108055 + }, + { + "epoch": 0.9343628675930169, + "grad_norm": 0.8847660571988994, + "learning_rate": 3.308759993331759e-06, + "loss": 0.25304718017578126, + "step": 108060 + }, + { + "epoch": 0.9344061011145602, + "grad_norm": 7.864529112534332, + "learning_rate": 3.308557341346504e-06, + "loss": 0.19490203857421876, + "step": 108065 + }, + { + "epoch": 0.9344493346361035, + "grad_norm": 11.269167100127705, + "learning_rate": 3.30835468793821e-06, + "loss": 0.17336673736572267, + "step": 108070 + }, + { + "epoch": 0.9344925681576467, + "grad_norm": 4.597809393836719, + "learning_rate": 3.308152033107812e-06, + "loss": 0.17975540161132814, + "step": 108075 + }, + { + "epoch": 0.9345358016791899, + "grad_norm": 1.4963319363114653, + "learning_rate": 3.3079493768562433e-06, + "loss": 0.020782470703125, + "step": 108080 + }, + { + "epoch": 0.9345790352007333, + "grad_norm": 0.9934427599520557, + "learning_rate": 3.3077467191844387e-06, + "loss": 0.2096874237060547, + "step": 108085 + }, + { + "epoch": 0.9346222687222765, + "grad_norm": 26.72299865131933, + "learning_rate": 3.307544060093335e-06, + "loss": 0.2581787109375, + "step": 108090 + }, + { + "epoch": 0.9346655022438197, + "grad_norm": 15.32789935826644, + "learning_rate": 3.3073413995838645e-06, + "loss": 0.3201740264892578, + "step": 108095 + }, + { + "epoch": 0.9347087357653631, + "grad_norm": 0.2611637339773643, + "learning_rate": 3.3071387376569624e-06, + "loss": 0.0281707763671875, + "step": 108100 + }, + { + "epoch": 0.9347519692869063, + "grad_norm": 5.173592787315436, + "learning_rate": 3.306936074313564e-06, + "loss": 0.2679147720336914, + "step": 108105 + }, + { + "epoch": 0.9347952028084495, + "grad_norm": 8.545239138237307, + "learning_rate": 3.3067334095546033e-06, + "loss": 0.4648105621337891, + "step": 108110 + }, + { + "epoch": 0.9348384363299929, + "grad_norm": 13.944616512230951, + "learning_rate": 3.3065307433810147e-06, + "loss": 0.185693359375, + "step": 108115 + }, + { + "epoch": 0.9348816698515361, + "grad_norm": 1.0581108107971335, + "learning_rate": 3.306328075793734e-06, + "loss": 0.29770050048828123, + "step": 108120 + }, + { + "epoch": 0.9349249033730793, + "grad_norm": 0.12463621737128562, + "learning_rate": 3.3061254067936953e-06, + "loss": 0.146893310546875, + "step": 108125 + }, + { + "epoch": 0.9349681368946227, + "grad_norm": 6.708344435443043, + "learning_rate": 3.305922736381833e-06, + "loss": 0.17771453857421876, + "step": 108130 + }, + { + "epoch": 0.9350113704161659, + "grad_norm": 2.778826153563948, + "learning_rate": 3.3057200645590827e-06, + "loss": 0.0524871826171875, + "step": 108135 + }, + { + "epoch": 0.9350546039377091, + "grad_norm": 8.664983263742583, + "learning_rate": 3.3055173913263775e-06, + "loss": 0.1369384765625, + "step": 108140 + }, + { + "epoch": 0.9350978374592525, + "grad_norm": 3.294377714040597, + "learning_rate": 3.3053147166846533e-06, + "loss": 0.19395484924316406, + "step": 108145 + }, + { + "epoch": 0.9351410709807957, + "grad_norm": 0.705075424377913, + "learning_rate": 3.3051120406348457e-06, + "loss": 0.10465316772460938, + "step": 108150 + }, + { + "epoch": 0.9351843045023389, + "grad_norm": 0.5570461026663464, + "learning_rate": 3.3049093631778876e-06, + "loss": 0.15038681030273438, + "step": 108155 + }, + { + "epoch": 0.9352275380238821, + "grad_norm": 17.317871184787332, + "learning_rate": 3.304706684314715e-06, + "loss": 0.08385009765625, + "step": 108160 + }, + { + "epoch": 0.9352707715454255, + "grad_norm": 7.9679093020144585, + "learning_rate": 3.304504004046261e-06, + "loss": 0.07071685791015625, + "step": 108165 + }, + { + "epoch": 0.9353140050669687, + "grad_norm": 1.694769131835112, + "learning_rate": 3.3043013223734613e-06, + "loss": 0.18937721252441406, + "step": 108170 + }, + { + "epoch": 0.9353572385885119, + "grad_norm": 3.655599663187419, + "learning_rate": 3.304098639297252e-06, + "loss": 0.04110722541809082, + "step": 108175 + }, + { + "epoch": 0.9354004721100553, + "grad_norm": 2.831483447477576, + "learning_rate": 3.3038959548185656e-06, + "loss": 0.08398895263671875, + "step": 108180 + }, + { + "epoch": 0.9354437056315985, + "grad_norm": 7.474846285306097, + "learning_rate": 3.3036932689383384e-06, + "loss": 0.06759185791015625, + "step": 108185 + }, + { + "epoch": 0.9354869391531417, + "grad_norm": 42.82938753232085, + "learning_rate": 3.3034905816575053e-06, + "loss": 0.1742889404296875, + "step": 108190 + }, + { + "epoch": 0.9355301726746851, + "grad_norm": 0.5113820736392629, + "learning_rate": 3.303287892977e-06, + "loss": 0.0941986083984375, + "step": 108195 + }, + { + "epoch": 0.9355734061962283, + "grad_norm": 15.787158429438687, + "learning_rate": 3.3030852028977563e-06, + "loss": 0.05739307403564453, + "step": 108200 + }, + { + "epoch": 0.9356166397177715, + "grad_norm": 8.998620887697292, + "learning_rate": 3.3028825114207127e-06, + "loss": 0.11643218994140625, + "step": 108205 + }, + { + "epoch": 0.9356598732393149, + "grad_norm": 34.57428317704938, + "learning_rate": 3.3026798185468e-06, + "loss": 0.130877685546875, + "step": 108210 + }, + { + "epoch": 0.9357031067608581, + "grad_norm": 0.8807705751701724, + "learning_rate": 3.3024771242769554e-06, + "loss": 0.030263900756835938, + "step": 108215 + }, + { + "epoch": 0.9357463402824013, + "grad_norm": 7.547639629800132, + "learning_rate": 3.3022744286121136e-06, + "loss": 0.065191650390625, + "step": 108220 + }, + { + "epoch": 0.9357895738039447, + "grad_norm": 0.6701594114109369, + "learning_rate": 3.3020717315532075e-06, + "loss": 0.13677902221679689, + "step": 108225 + }, + { + "epoch": 0.9358328073254879, + "grad_norm": 1.2315407537425729, + "learning_rate": 3.301869033101174e-06, + "loss": 0.1196990966796875, + "step": 108230 + }, + { + "epoch": 0.9358760408470311, + "grad_norm": 0.5274581920847593, + "learning_rate": 3.301666333256948e-06, + "loss": 0.106353759765625, + "step": 108235 + }, + { + "epoch": 0.9359192743685745, + "grad_norm": 10.789996224270071, + "learning_rate": 3.301463632021463e-06, + "loss": 0.22805099487304686, + "step": 108240 + }, + { + "epoch": 0.9359625078901177, + "grad_norm": 0.35317661592281435, + "learning_rate": 3.301260929395654e-06, + "loss": 0.12462005615234376, + "step": 108245 + }, + { + "epoch": 0.9360057414116609, + "grad_norm": 17.834625022557585, + "learning_rate": 3.301058225380456e-06, + "loss": 0.1612701416015625, + "step": 108250 + }, + { + "epoch": 0.9360489749332042, + "grad_norm": 4.979759879804711, + "learning_rate": 3.300855519976805e-06, + "loss": 0.3307014465332031, + "step": 108255 + }, + { + "epoch": 0.9360922084547475, + "grad_norm": 27.547873083641218, + "learning_rate": 3.300652813185634e-06, + "loss": 0.0641510009765625, + "step": 108260 + }, + { + "epoch": 0.9361354419762907, + "grad_norm": 55.050553887830134, + "learning_rate": 3.300450105007879e-06, + "loss": 0.42942352294921876, + "step": 108265 + }, + { + "epoch": 0.936178675497834, + "grad_norm": 49.4845558705744, + "learning_rate": 3.3002473954444753e-06, + "loss": 0.16656723022460937, + "step": 108270 + }, + { + "epoch": 0.9362219090193773, + "grad_norm": 5.653331906006453, + "learning_rate": 3.3000446844963567e-06, + "loss": 0.2739391326904297, + "step": 108275 + }, + { + "epoch": 0.9362651425409205, + "grad_norm": 28.823598690727373, + "learning_rate": 3.2998419721644577e-06, + "loss": 0.203399658203125, + "step": 108280 + }, + { + "epoch": 0.9363083760624638, + "grad_norm": 1.1965467443265239, + "learning_rate": 3.2996392584497157e-06, + "loss": 0.17697296142578126, + "step": 108285 + }, + { + "epoch": 0.9363516095840071, + "grad_norm": 12.271551728549396, + "learning_rate": 3.2994365433530626e-06, + "loss": 0.3093914031982422, + "step": 108290 + }, + { + "epoch": 0.9363948431055503, + "grad_norm": 0.8457250930192456, + "learning_rate": 3.299233826875435e-06, + "loss": 0.29746856689453127, + "step": 108295 + }, + { + "epoch": 0.9364380766270936, + "grad_norm": 60.677947188106714, + "learning_rate": 3.2990311090177675e-06, + "loss": 0.36343994140625, + "step": 108300 + }, + { + "epoch": 0.9364813101486369, + "grad_norm": 0.08638486131497797, + "learning_rate": 3.2988283897809953e-06, + "loss": 0.09333648681640624, + "step": 108305 + }, + { + "epoch": 0.9365245436701801, + "grad_norm": 47.795880403942704, + "learning_rate": 3.298625669166052e-06, + "loss": 0.22158164978027345, + "step": 108310 + }, + { + "epoch": 0.9365677771917233, + "grad_norm": 27.518224174895245, + "learning_rate": 3.2984229471738742e-06, + "loss": 0.1536590576171875, + "step": 108315 + }, + { + "epoch": 0.9366110107132667, + "grad_norm": 0.3920235082956617, + "learning_rate": 3.2982202238053956e-06, + "loss": 0.09016342163085937, + "step": 108320 + }, + { + "epoch": 0.9366542442348099, + "grad_norm": 4.61052155684753, + "learning_rate": 3.2980174990615523e-06, + "loss": 0.08088512420654297, + "step": 108325 + }, + { + "epoch": 0.9366974777563531, + "grad_norm": 1.095832884331259, + "learning_rate": 3.2978147729432794e-06, + "loss": 0.17494354248046876, + "step": 108330 + }, + { + "epoch": 0.9367407112778964, + "grad_norm": 1.5446259155556579, + "learning_rate": 3.2976120454515096e-06, + "loss": 0.1878185272216797, + "step": 108335 + }, + { + "epoch": 0.9367839447994397, + "grad_norm": 3.549313623067352, + "learning_rate": 3.2974093165871796e-06, + "loss": 0.06459312438964844, + "step": 108340 + }, + { + "epoch": 0.936827178320983, + "grad_norm": 13.515868078652838, + "learning_rate": 3.2972065863512242e-06, + "loss": 0.306353759765625, + "step": 108345 + }, + { + "epoch": 0.9368704118425262, + "grad_norm": 2.3821315491770436, + "learning_rate": 3.297003854744579e-06, + "loss": 0.15552787780761718, + "step": 108350 + }, + { + "epoch": 0.9369136453640695, + "grad_norm": 0.5515334129610938, + "learning_rate": 3.2968011217681776e-06, + "loss": 0.16162481307983398, + "step": 108355 + }, + { + "epoch": 0.9369568788856127, + "grad_norm": 1.9464571011900846, + "learning_rate": 3.2965983874229563e-06, + "loss": 0.32676849365234373, + "step": 108360 + }, + { + "epoch": 0.937000112407156, + "grad_norm": 16.043998849405938, + "learning_rate": 3.2963956517098482e-06, + "loss": 0.1293914794921875, + "step": 108365 + }, + { + "epoch": 0.9370433459286993, + "grad_norm": 11.469993097008626, + "learning_rate": 3.2961929146297904e-06, + "loss": 0.15871734619140626, + "step": 108370 + }, + { + "epoch": 0.9370865794502425, + "grad_norm": 0.3679297707597019, + "learning_rate": 3.295990176183717e-06, + "loss": 0.03469161987304688, + "step": 108375 + }, + { + "epoch": 0.9371298129717858, + "grad_norm": 30.329763762327943, + "learning_rate": 3.2957874363725632e-06, + "loss": 0.09722824096679687, + "step": 108380 + }, + { + "epoch": 0.9371730464933291, + "grad_norm": 6.3780714260111395, + "learning_rate": 3.2955846951972637e-06, + "loss": 0.15116729736328124, + "step": 108385 + }, + { + "epoch": 0.9372162800148723, + "grad_norm": 4.9099908464003015, + "learning_rate": 3.2953819526587536e-06, + "loss": 0.08918609619140624, + "step": 108390 + }, + { + "epoch": 0.9372595135364156, + "grad_norm": 7.058071670468874, + "learning_rate": 3.2951792087579676e-06, + "loss": 0.11420211791992188, + "step": 108395 + }, + { + "epoch": 0.9373027470579589, + "grad_norm": 0.702182180365068, + "learning_rate": 3.2949764634958413e-06, + "loss": 0.130645751953125, + "step": 108400 + }, + { + "epoch": 0.9373459805795021, + "grad_norm": 23.994635429881033, + "learning_rate": 3.2947737168733106e-06, + "loss": 0.296405029296875, + "step": 108405 + }, + { + "epoch": 0.9373892141010454, + "grad_norm": 42.9358944225004, + "learning_rate": 3.2945709688913094e-06, + "loss": 0.42737579345703125, + "step": 108410 + }, + { + "epoch": 0.9374324476225887, + "grad_norm": 3.588899158338891, + "learning_rate": 3.294368219550772e-06, + "loss": 0.12383651733398438, + "step": 108415 + }, + { + "epoch": 0.9374756811441319, + "grad_norm": 0.6720192717517152, + "learning_rate": 3.2941654688526346e-06, + "loss": 0.22141532897949218, + "step": 108420 + }, + { + "epoch": 0.9375189146656752, + "grad_norm": 1.8826389119475306, + "learning_rate": 3.2939627167978326e-06, + "loss": 0.04466705322265625, + "step": 108425 + }, + { + "epoch": 0.9375621481872184, + "grad_norm": 2.134529455801774, + "learning_rate": 3.2937599633873006e-06, + "loss": 0.1720245361328125, + "step": 108430 + }, + { + "epoch": 0.9376053817087617, + "grad_norm": 18.63896202503529, + "learning_rate": 3.293557208621973e-06, + "loss": 0.12093582153320312, + "step": 108435 + }, + { + "epoch": 0.937648615230305, + "grad_norm": 0.4286541920917439, + "learning_rate": 3.293354452502786e-06, + "loss": 0.07202205657958985, + "step": 108440 + }, + { + "epoch": 0.9376918487518482, + "grad_norm": 0.07841100942752642, + "learning_rate": 3.293151695030674e-06, + "loss": 0.06495857238769531, + "step": 108445 + }, + { + "epoch": 0.9377350822733915, + "grad_norm": 6.154514834117447, + "learning_rate": 3.2929489362065713e-06, + "loss": 0.16060028076171876, + "step": 108450 + }, + { + "epoch": 0.9377783157949348, + "grad_norm": 8.004165508380458, + "learning_rate": 3.2927461760314152e-06, + "loss": 0.120574951171875, + "step": 108455 + }, + { + "epoch": 0.937821549316478, + "grad_norm": 2.2037340783222366, + "learning_rate": 3.2925434145061394e-06, + "loss": 0.0384765625, + "step": 108460 + }, + { + "epoch": 0.9378647828380213, + "grad_norm": 18.39244833125044, + "learning_rate": 3.2923406516316786e-06, + "loss": 0.319573974609375, + "step": 108465 + }, + { + "epoch": 0.9379080163595646, + "grad_norm": 19.93025162609197, + "learning_rate": 3.2921378874089692e-06, + "loss": 0.1617156982421875, + "step": 108470 + }, + { + "epoch": 0.9379512498811078, + "grad_norm": 22.216444176581057, + "learning_rate": 3.291935121838946e-06, + "loss": 0.3554573059082031, + "step": 108475 + }, + { + "epoch": 0.9379944834026511, + "grad_norm": 25.61638912909868, + "learning_rate": 3.2917323549225426e-06, + "loss": 0.11292724609375, + "step": 108480 + }, + { + "epoch": 0.9380377169241944, + "grad_norm": 1.0714020014445194, + "learning_rate": 3.291529586660697e-06, + "loss": 0.07891502380371093, + "step": 108485 + }, + { + "epoch": 0.9380809504457376, + "grad_norm": 3.8487521274913443, + "learning_rate": 3.2913268170543417e-06, + "loss": 0.03824195861816406, + "step": 108490 + }, + { + "epoch": 0.9381241839672809, + "grad_norm": 7.316504371978335, + "learning_rate": 3.2911240461044124e-06, + "loss": 0.2934844970703125, + "step": 108495 + }, + { + "epoch": 0.9381674174888242, + "grad_norm": 12.27023133588527, + "learning_rate": 3.290921273811846e-06, + "loss": 0.17836227416992187, + "step": 108500 + }, + { + "epoch": 0.9382106510103674, + "grad_norm": 7.511173417981167, + "learning_rate": 3.2907185001775752e-06, + "loss": 0.1433879852294922, + "step": 108505 + }, + { + "epoch": 0.9382538845319106, + "grad_norm": 5.761957952523643, + "learning_rate": 3.290515725202537e-06, + "loss": 0.12676467895507812, + "step": 108510 + }, + { + "epoch": 0.938297118053454, + "grad_norm": 15.346110792490665, + "learning_rate": 3.2903129488876656e-06, + "loss": 0.242779541015625, + "step": 108515 + }, + { + "epoch": 0.9383403515749972, + "grad_norm": 2.587724486379231, + "learning_rate": 3.2901101712338974e-06, + "loss": 0.09796695709228516, + "step": 108520 + }, + { + "epoch": 0.9383835850965404, + "grad_norm": 16.89625249425045, + "learning_rate": 3.289907392242166e-06, + "loss": 0.1317901611328125, + "step": 108525 + }, + { + "epoch": 0.9384268186180837, + "grad_norm": 1.6478509285756167, + "learning_rate": 3.2897046119134072e-06, + "loss": 0.091351318359375, + "step": 108530 + }, + { + "epoch": 0.938470052139627, + "grad_norm": 4.474340206848763, + "learning_rate": 3.2895018302485564e-06, + "loss": 0.2682281494140625, + "step": 108535 + }, + { + "epoch": 0.9385132856611702, + "grad_norm": 23.627837338267174, + "learning_rate": 3.289299047248549e-06, + "loss": 0.0520965576171875, + "step": 108540 + }, + { + "epoch": 0.9385565191827135, + "grad_norm": 3.1423488034369544, + "learning_rate": 3.2890962629143193e-06, + "loss": 0.18656845092773439, + "step": 108545 + }, + { + "epoch": 0.9385997527042568, + "grad_norm": 2.1545513172561384, + "learning_rate": 3.288893477246804e-06, + "loss": 0.08435592651367188, + "step": 108550 + }, + { + "epoch": 0.9386429862258, + "grad_norm": 1.8743742100264729, + "learning_rate": 3.288690690246938e-06, + "loss": 0.10942192077636718, + "step": 108555 + }, + { + "epoch": 0.9386862197473433, + "grad_norm": 1.6790816510198696, + "learning_rate": 3.2884879019156545e-06, + "loss": 0.0525909423828125, + "step": 108560 + }, + { + "epoch": 0.9387294532688866, + "grad_norm": 15.103161031631497, + "learning_rate": 3.28828511225389e-06, + "loss": 0.1947662353515625, + "step": 108565 + }, + { + "epoch": 0.9387726867904298, + "grad_norm": 6.803718415309107, + "learning_rate": 3.2880823212625824e-06, + "loss": 0.06942825317382813, + "step": 108570 + }, + { + "epoch": 0.9388159203119731, + "grad_norm": 0.1763164133021101, + "learning_rate": 3.2878795289426623e-06, + "loss": 0.06349067687988282, + "step": 108575 + }, + { + "epoch": 0.9388591538335164, + "grad_norm": 9.884462122268967, + "learning_rate": 3.2876767352950693e-06, + "loss": 0.0997314453125, + "step": 108580 + }, + { + "epoch": 0.9389023873550596, + "grad_norm": 78.57347427448704, + "learning_rate": 3.287473940320735e-06, + "loss": 0.3208351135253906, + "step": 108585 + }, + { + "epoch": 0.9389456208766029, + "grad_norm": 26.290053137400296, + "learning_rate": 3.287271144020597e-06, + "loss": 0.10606651306152344, + "step": 108590 + }, + { + "epoch": 0.9389888543981462, + "grad_norm": 7.27769855448167, + "learning_rate": 3.2870683463955894e-06, + "loss": 0.06344757080078126, + "step": 108595 + }, + { + "epoch": 0.9390320879196894, + "grad_norm": 26.44151434565737, + "learning_rate": 3.2868655474466484e-06, + "loss": 0.20339736938476563, + "step": 108600 + }, + { + "epoch": 0.9390753214412326, + "grad_norm": 2.595401908710817, + "learning_rate": 3.2866627471747093e-06, + "loss": 0.054338836669921876, + "step": 108605 + }, + { + "epoch": 0.939118554962776, + "grad_norm": 0.8216222564174624, + "learning_rate": 3.286459945580706e-06, + "loss": 0.06888313293457031, + "step": 108610 + }, + { + "epoch": 0.9391617884843192, + "grad_norm": 48.631309649158034, + "learning_rate": 3.286257142665575e-06, + "loss": 0.304296875, + "step": 108615 + }, + { + "epoch": 0.9392050220058624, + "grad_norm": 19.14375483752482, + "learning_rate": 3.286054338430251e-06, + "loss": 0.14843902587890626, + "step": 108620 + }, + { + "epoch": 0.9392482555274058, + "grad_norm": 3.230345337003884, + "learning_rate": 3.28585153287567e-06, + "loss": 0.15489501953125, + "step": 108625 + }, + { + "epoch": 0.939291489048949, + "grad_norm": 6.937775413152844, + "learning_rate": 3.285648726002767e-06, + "loss": 0.03398590087890625, + "step": 108630 + }, + { + "epoch": 0.9393347225704922, + "grad_norm": 53.60096221424304, + "learning_rate": 3.285445917812478e-06, + "loss": 0.3322410583496094, + "step": 108635 + }, + { + "epoch": 0.9393779560920356, + "grad_norm": 0.7382363412903302, + "learning_rate": 3.2852431083057365e-06, + "loss": 0.57225341796875, + "step": 108640 + }, + { + "epoch": 0.9394211896135788, + "grad_norm": 0.7192252665229005, + "learning_rate": 3.2850402974834803e-06, + "loss": 0.018218994140625, + "step": 108645 + }, + { + "epoch": 0.939464423135122, + "grad_norm": 5.7258190778397795, + "learning_rate": 3.284837485346642e-06, + "loss": 0.043154144287109376, + "step": 108650 + }, + { + "epoch": 0.9395076566566654, + "grad_norm": 1.1785911617714542, + "learning_rate": 3.2846346718961587e-06, + "loss": 0.0627197265625, + "step": 108655 + }, + { + "epoch": 0.9395508901782086, + "grad_norm": 10.815635589918333, + "learning_rate": 3.2844318571329665e-06, + "loss": 0.4228202819824219, + "step": 108660 + }, + { + "epoch": 0.9395941236997518, + "grad_norm": 72.73319960352146, + "learning_rate": 3.2842290410579993e-06, + "loss": 0.267205810546875, + "step": 108665 + }, + { + "epoch": 0.9396373572212952, + "grad_norm": 7.6333293481632545, + "learning_rate": 3.284026223672192e-06, + "loss": 0.10703125, + "step": 108670 + }, + { + "epoch": 0.9396805907428384, + "grad_norm": 16.840819048274557, + "learning_rate": 3.283823404976481e-06, + "loss": 0.1094329833984375, + "step": 108675 + }, + { + "epoch": 0.9397238242643816, + "grad_norm": 3.889228917599136, + "learning_rate": 3.283620584971802e-06, + "loss": 0.0323211669921875, + "step": 108680 + }, + { + "epoch": 0.9397670577859248, + "grad_norm": 0.6307891685619619, + "learning_rate": 3.2834177636590907e-06, + "loss": 0.05038909912109375, + "step": 108685 + }, + { + "epoch": 0.9398102913074682, + "grad_norm": 0.30109165431426277, + "learning_rate": 3.28321494103928e-06, + "loss": 0.07855815887451172, + "step": 108690 + }, + { + "epoch": 0.9398535248290114, + "grad_norm": 16.19435266922549, + "learning_rate": 3.2830121171133084e-06, + "loss": 0.08151626586914062, + "step": 108695 + }, + { + "epoch": 0.9398967583505546, + "grad_norm": 4.010338186062547, + "learning_rate": 3.2828092918821095e-06, + "loss": 0.05271701812744141, + "step": 108700 + }, + { + "epoch": 0.939939991872098, + "grad_norm": 2.4069399139599668, + "learning_rate": 3.2826064653466183e-06, + "loss": 0.165771484375, + "step": 108705 + }, + { + "epoch": 0.9399832253936412, + "grad_norm": 2.51790205705175, + "learning_rate": 3.2824036375077726e-06, + "loss": 0.0401123046875, + "step": 108710 + }, + { + "epoch": 0.9400264589151844, + "grad_norm": 48.91054406051402, + "learning_rate": 3.2822008083665053e-06, + "loss": 0.12175979614257812, + "step": 108715 + }, + { + "epoch": 0.9400696924367278, + "grad_norm": 19.195496423491456, + "learning_rate": 3.2819979779237532e-06, + "loss": 0.07412109375, + "step": 108720 + }, + { + "epoch": 0.940112925958271, + "grad_norm": 10.638460191425242, + "learning_rate": 3.2817951461804515e-06, + "loss": 0.08931121826171876, + "step": 108725 + }, + { + "epoch": 0.9401561594798142, + "grad_norm": 2.8268392619330975, + "learning_rate": 3.2815923131375345e-06, + "loss": 0.12464828491210937, + "step": 108730 + }, + { + "epoch": 0.9401993930013576, + "grad_norm": 14.99403765910288, + "learning_rate": 3.2813894787959393e-06, + "loss": 0.06606369018554688, + "step": 108735 + }, + { + "epoch": 0.9402426265229008, + "grad_norm": 9.721622182368112, + "learning_rate": 3.2811866431566003e-06, + "loss": 0.03730964660644531, + "step": 108740 + }, + { + "epoch": 0.940285860044444, + "grad_norm": 30.116536038741796, + "learning_rate": 3.2809838062204546e-06, + "loss": 0.1828460693359375, + "step": 108745 + }, + { + "epoch": 0.9403290935659874, + "grad_norm": 4.379321408957302, + "learning_rate": 3.280780967988435e-06, + "loss": 0.16886444091796876, + "step": 108750 + }, + { + "epoch": 0.9403723270875306, + "grad_norm": 8.837260703582524, + "learning_rate": 3.2805781284614795e-06, + "loss": 0.28301010131835935, + "step": 108755 + }, + { + "epoch": 0.9404155606090738, + "grad_norm": 6.803803617698886, + "learning_rate": 3.280375287640521e-06, + "loss": 0.33041229248046877, + "step": 108760 + }, + { + "epoch": 0.9404587941306172, + "grad_norm": 2.134478762284606, + "learning_rate": 3.280172445526497e-06, + "loss": 0.06847763061523438, + "step": 108765 + }, + { + "epoch": 0.9405020276521604, + "grad_norm": 4.692193250550059, + "learning_rate": 3.279969602120343e-06, + "loss": 0.096685791015625, + "step": 108770 + }, + { + "epoch": 0.9405452611737036, + "grad_norm": 6.698686945208147, + "learning_rate": 3.279766757422994e-06, + "loss": 0.11608734130859374, + "step": 108775 + }, + { + "epoch": 0.9405884946952469, + "grad_norm": 2.390199928388805, + "learning_rate": 3.279563911435386e-06, + "loss": 0.13157272338867188, + "step": 108780 + }, + { + "epoch": 0.9406317282167902, + "grad_norm": 1.571607384446923, + "learning_rate": 3.2793610641584524e-06, + "loss": 0.33269615173339845, + "step": 108785 + }, + { + "epoch": 0.9406749617383334, + "grad_norm": 9.758891599050115, + "learning_rate": 3.2791582155931306e-06, + "loss": 0.08674049377441406, + "step": 108790 + }, + { + "epoch": 0.9407181952598767, + "grad_norm": 3.9773138678574798, + "learning_rate": 3.2789553657403564e-06, + "loss": 0.2538299560546875, + "step": 108795 + }, + { + "epoch": 0.94076142878142, + "grad_norm": 21.65553867911243, + "learning_rate": 3.2787525146010645e-06, + "loss": 0.44298744201660156, + "step": 108800 + }, + { + "epoch": 0.9408046623029632, + "grad_norm": 31.75428328802569, + "learning_rate": 3.278549662176191e-06, + "loss": 0.10119400024414063, + "step": 108805 + }, + { + "epoch": 0.9408478958245065, + "grad_norm": 14.00008127709605, + "learning_rate": 3.2783468084666707e-06, + "loss": 0.34119720458984376, + "step": 108810 + }, + { + "epoch": 0.9408911293460498, + "grad_norm": 0.32009906305442726, + "learning_rate": 3.278143953473439e-06, + "loss": 0.0646636962890625, + "step": 108815 + }, + { + "epoch": 0.940934362867593, + "grad_norm": 27.53843959014206, + "learning_rate": 3.277941097197432e-06, + "loss": 0.21986427307128906, + "step": 108820 + }, + { + "epoch": 0.9409775963891363, + "grad_norm": 6.865652459226501, + "learning_rate": 3.2777382396395856e-06, + "loss": 0.08681640625, + "step": 108825 + }, + { + "epoch": 0.9410208299106796, + "grad_norm": 15.112779661375763, + "learning_rate": 3.2775353808008353e-06, + "loss": 0.305841064453125, + "step": 108830 + }, + { + "epoch": 0.9410640634322228, + "grad_norm": 50.57880989588066, + "learning_rate": 3.2773325206821162e-06, + "loss": 0.650225830078125, + "step": 108835 + }, + { + "epoch": 0.941107296953766, + "grad_norm": 37.85062857875642, + "learning_rate": 3.277129659284364e-06, + "loss": 0.124749755859375, + "step": 108840 + }, + { + "epoch": 0.9411505304753094, + "grad_norm": 0.08375296159560086, + "learning_rate": 3.2769267966085137e-06, + "loss": 0.044891357421875, + "step": 108845 + }, + { + "epoch": 0.9411937639968526, + "grad_norm": 16.600814523775554, + "learning_rate": 3.2767239326555017e-06, + "loss": 0.1801055908203125, + "step": 108850 + }, + { + "epoch": 0.9412369975183958, + "grad_norm": 22.898565929631523, + "learning_rate": 3.2765210674262636e-06, + "loss": 0.339044189453125, + "step": 108855 + }, + { + "epoch": 0.9412802310399391, + "grad_norm": 3.3561411185758336, + "learning_rate": 3.2763182009217356e-06, + "loss": 0.16897544860839844, + "step": 108860 + }, + { + "epoch": 0.9413234645614824, + "grad_norm": 2.7142789066670523, + "learning_rate": 3.276115333142851e-06, + "loss": 0.1816650390625, + "step": 108865 + }, + { + "epoch": 0.9413666980830256, + "grad_norm": 0.7519366382153985, + "learning_rate": 3.275912464090547e-06, + "loss": 0.1494415283203125, + "step": 108870 + }, + { + "epoch": 0.9414099316045689, + "grad_norm": 2.383664492832254, + "learning_rate": 3.2757095937657603e-06, + "loss": 0.08977203369140625, + "step": 108875 + }, + { + "epoch": 0.9414531651261122, + "grad_norm": 9.767564311630679, + "learning_rate": 3.2755067221694242e-06, + "loss": 0.0943263053894043, + "step": 108880 + }, + { + "epoch": 0.9414963986476554, + "grad_norm": 0.3516322727391284, + "learning_rate": 3.2753038493024764e-06, + "loss": 0.2290924072265625, + "step": 108885 + }, + { + "epoch": 0.9415396321691987, + "grad_norm": 2.5381494037956824, + "learning_rate": 3.275100975165851e-06, + "loss": 0.06533203125, + "step": 108890 + }, + { + "epoch": 0.941582865690742, + "grad_norm": 13.54350378431459, + "learning_rate": 3.274898099760484e-06, + "loss": 0.0527618408203125, + "step": 108895 + }, + { + "epoch": 0.9416260992122852, + "grad_norm": 2.789176667372888, + "learning_rate": 3.2746952230873116e-06, + "loss": 0.16233978271484376, + "step": 108900 + }, + { + "epoch": 0.9416693327338285, + "grad_norm": 9.802465666538643, + "learning_rate": 3.2744923451472696e-06, + "loss": 0.06317596435546875, + "step": 108905 + }, + { + "epoch": 0.9417125662553718, + "grad_norm": 2.9594063424789105, + "learning_rate": 3.274289465941292e-06, + "loss": 0.10281143188476563, + "step": 108910 + }, + { + "epoch": 0.941755799776915, + "grad_norm": 14.220456216395991, + "learning_rate": 3.274086585470317e-06, + "loss": 0.0665771484375, + "step": 108915 + }, + { + "epoch": 0.9417990332984583, + "grad_norm": 34.78195792929898, + "learning_rate": 3.2738837037352784e-06, + "loss": 0.08472442626953125, + "step": 108920 + }, + { + "epoch": 0.9418422668200016, + "grad_norm": 4.327459118112083, + "learning_rate": 3.2736808207371113e-06, + "loss": 0.1590789794921875, + "step": 108925 + }, + { + "epoch": 0.9418855003415448, + "grad_norm": 0.714461657284222, + "learning_rate": 3.273477936476753e-06, + "loss": 0.17448654174804687, + "step": 108930 + }, + { + "epoch": 0.9419287338630881, + "grad_norm": 0.0902554106957559, + "learning_rate": 3.2732750509551393e-06, + "loss": 0.016934967041015624, + "step": 108935 + }, + { + "epoch": 0.9419719673846313, + "grad_norm": 4.583039039532312, + "learning_rate": 3.2730721641732046e-06, + "loss": 0.1401336669921875, + "step": 108940 + }, + { + "epoch": 0.9420152009061746, + "grad_norm": 0.11556753312138787, + "learning_rate": 3.272869276131885e-06, + "loss": 0.12211990356445312, + "step": 108945 + }, + { + "epoch": 0.9420584344277179, + "grad_norm": 18.33228349554326, + "learning_rate": 3.272666386832117e-06, + "loss": 0.050230979919433594, + "step": 108950 + }, + { + "epoch": 0.9421016679492611, + "grad_norm": 0.7201760271983649, + "learning_rate": 3.2724634962748352e-06, + "loss": 0.042000198364257814, + "step": 108955 + }, + { + "epoch": 0.9421449014708044, + "grad_norm": 33.8973863566848, + "learning_rate": 3.272260604460976e-06, + "loss": 0.24698638916015625, + "step": 108960 + }, + { + "epoch": 0.9421881349923477, + "grad_norm": 13.783519414745033, + "learning_rate": 3.2720577113914753e-06, + "loss": 0.18056163787841797, + "step": 108965 + }, + { + "epoch": 0.9422313685138909, + "grad_norm": 22.893256902220187, + "learning_rate": 3.2718548170672682e-06, + "loss": 0.1172454833984375, + "step": 108970 + }, + { + "epoch": 0.9422746020354342, + "grad_norm": 30.629568809142803, + "learning_rate": 3.2716519214892906e-06, + "loss": 0.1154144287109375, + "step": 108975 + }, + { + "epoch": 0.9423178355569775, + "grad_norm": 5.087485649882501, + "learning_rate": 3.271449024658479e-06, + "loss": 0.0403167724609375, + "step": 108980 + }, + { + "epoch": 0.9423610690785207, + "grad_norm": 0.44914442656898135, + "learning_rate": 3.271246126575767e-06, + "loss": 0.12222442626953126, + "step": 108985 + }, + { + "epoch": 0.942404302600064, + "grad_norm": 0.8097391437245297, + "learning_rate": 3.2710432272420925e-06, + "loss": 0.06657867431640625, + "step": 108990 + }, + { + "epoch": 0.9424475361216073, + "grad_norm": 4.4026825049705165, + "learning_rate": 3.2708403266583916e-06, + "loss": 0.02863922119140625, + "step": 108995 + }, + { + "epoch": 0.9424907696431505, + "grad_norm": 21.85803271727308, + "learning_rate": 3.2706374248255987e-06, + "loss": 0.12449798583984376, + "step": 109000 + }, + { + "epoch": 0.9425340031646938, + "grad_norm": 14.100310688304164, + "learning_rate": 3.2704345217446494e-06, + "loss": 0.07099685668945313, + "step": 109005 + }, + { + "epoch": 0.942577236686237, + "grad_norm": 27.478122375453246, + "learning_rate": 3.27023161741648e-06, + "loss": 0.19081573486328124, + "step": 109010 + }, + { + "epoch": 0.9426204702077803, + "grad_norm": 0.10071720181093366, + "learning_rate": 3.2700287118420262e-06, + "loss": 0.09294853210449219, + "step": 109015 + }, + { + "epoch": 0.9426637037293236, + "grad_norm": 4.929462199419168, + "learning_rate": 3.2698258050222235e-06, + "loss": 0.12675323486328124, + "step": 109020 + }, + { + "epoch": 0.9427069372508668, + "grad_norm": 10.759734156892979, + "learning_rate": 3.269622896958009e-06, + "loss": 0.101898193359375, + "step": 109025 + }, + { + "epoch": 0.9427501707724101, + "grad_norm": 13.95940878222466, + "learning_rate": 3.2694199876503173e-06, + "loss": 0.08809871673583984, + "step": 109030 + }, + { + "epoch": 0.9427934042939533, + "grad_norm": 21.595924804759775, + "learning_rate": 3.2692170771000845e-06, + "loss": 0.14126510620117189, + "step": 109035 + }, + { + "epoch": 0.9428366378154966, + "grad_norm": 28.317926890983145, + "learning_rate": 3.2690141653082454e-06, + "loss": 0.1075714111328125, + "step": 109040 + }, + { + "epoch": 0.9428798713370399, + "grad_norm": 0.8651465612962531, + "learning_rate": 3.2688112522757377e-06, + "loss": 0.05355377197265625, + "step": 109045 + }, + { + "epoch": 0.9429231048585831, + "grad_norm": 9.26459590147621, + "learning_rate": 3.268608338003496e-06, + "loss": 0.07080764770507812, + "step": 109050 + }, + { + "epoch": 0.9429663383801264, + "grad_norm": 3.717921027417506, + "learning_rate": 3.2684054224924555e-06, + "loss": 0.09096755981445312, + "step": 109055 + }, + { + "epoch": 0.9430095719016697, + "grad_norm": 2.9102585294034933, + "learning_rate": 3.2682025057435543e-06, + "loss": 0.1086456298828125, + "step": 109060 + }, + { + "epoch": 0.9430528054232129, + "grad_norm": 5.393056710225276, + "learning_rate": 3.267999587757727e-06, + "loss": 0.042955970764160155, + "step": 109065 + }, + { + "epoch": 0.9430960389447562, + "grad_norm": 2.8830220663563204, + "learning_rate": 3.2677966685359072e-06, + "loss": 0.12993240356445312, + "step": 109070 + }, + { + "epoch": 0.9431392724662995, + "grad_norm": 11.139677142063151, + "learning_rate": 3.267593748079034e-06, + "loss": 0.18079833984375, + "step": 109075 + }, + { + "epoch": 0.9431825059878427, + "grad_norm": 0.6949948851267269, + "learning_rate": 3.2673908263880434e-06, + "loss": 0.24044952392578126, + "step": 109080 + }, + { + "epoch": 0.943225739509386, + "grad_norm": 0.750796842277866, + "learning_rate": 3.267187903463868e-06, + "loss": 0.02779541015625, + "step": 109085 + }, + { + "epoch": 0.9432689730309293, + "grad_norm": 1.088658171910655, + "learning_rate": 3.2669849793074468e-06, + "loss": 0.168792724609375, + "step": 109090 + }, + { + "epoch": 0.9433122065524725, + "grad_norm": 2.8150405540571604, + "learning_rate": 3.2667820539197137e-06, + "loss": 0.0294281005859375, + "step": 109095 + }, + { + "epoch": 0.9433554400740158, + "grad_norm": 0.6778207540000829, + "learning_rate": 3.266579127301605e-06, + "loss": 0.019640350341796876, + "step": 109100 + }, + { + "epoch": 0.9433986735955591, + "grad_norm": 28.31040063413481, + "learning_rate": 3.266376199454058e-06, + "loss": 0.30484466552734374, + "step": 109105 + }, + { + "epoch": 0.9434419071171023, + "grad_norm": 10.425298204043962, + "learning_rate": 3.2661732703780075e-06, + "loss": 0.11174087524414063, + "step": 109110 + }, + { + "epoch": 0.9434851406386455, + "grad_norm": 3.871927382136403, + "learning_rate": 3.265970340074389e-06, + "loss": 0.06486663818359376, + "step": 109115 + }, + { + "epoch": 0.9435283741601889, + "grad_norm": 20.272590332983675, + "learning_rate": 3.2657674085441387e-06, + "loss": 0.21515045166015626, + "step": 109120 + }, + { + "epoch": 0.9435716076817321, + "grad_norm": 4.306017514687261, + "learning_rate": 3.2655644757881926e-06, + "loss": 0.248779296875, + "step": 109125 + }, + { + "epoch": 0.9436148412032753, + "grad_norm": 53.3314894883907, + "learning_rate": 3.265361541807487e-06, + "loss": 0.34777259826660156, + "step": 109130 + }, + { + "epoch": 0.9436580747248187, + "grad_norm": 1.8128506483982418, + "learning_rate": 3.2651586066029567e-06, + "loss": 0.09182510375976563, + "step": 109135 + }, + { + "epoch": 0.9437013082463619, + "grad_norm": 16.245363953188132, + "learning_rate": 3.264955670175539e-06, + "loss": 0.07557525634765624, + "step": 109140 + }, + { + "epoch": 0.9437445417679051, + "grad_norm": 4.591527047714826, + "learning_rate": 3.2647527325261694e-06, + "loss": 0.27947235107421875, + "step": 109145 + }, + { + "epoch": 0.9437877752894485, + "grad_norm": 14.389936257304113, + "learning_rate": 3.264549793655782e-06, + "loss": 0.147344970703125, + "step": 109150 + }, + { + "epoch": 0.9438310088109917, + "grad_norm": 0.6710405857432953, + "learning_rate": 3.264346853565316e-06, + "loss": 0.09954833984375, + "step": 109155 + }, + { + "epoch": 0.9438742423325349, + "grad_norm": 11.725560649661512, + "learning_rate": 3.2641439122557047e-06, + "loss": 0.17386188507080078, + "step": 109160 + }, + { + "epoch": 0.9439174758540783, + "grad_norm": 10.594091415045144, + "learning_rate": 3.263940969727885e-06, + "loss": 0.15381011962890626, + "step": 109165 + }, + { + "epoch": 0.9439607093756215, + "grad_norm": 2.9025066706247715, + "learning_rate": 3.263738025982794e-06, + "loss": 0.15944061279296876, + "step": 109170 + }, + { + "epoch": 0.9440039428971647, + "grad_norm": 19.321469896233378, + "learning_rate": 3.2635350810213663e-06, + "loss": 0.136798095703125, + "step": 109175 + }, + { + "epoch": 0.9440471764187081, + "grad_norm": 0.5420727337764474, + "learning_rate": 3.2633321348445372e-06, + "loss": 0.16829071044921876, + "step": 109180 + }, + { + "epoch": 0.9440904099402513, + "grad_norm": 10.798882652063554, + "learning_rate": 3.2631291874532433e-06, + "loss": 0.33522491455078124, + "step": 109185 + }, + { + "epoch": 0.9441336434617945, + "grad_norm": 0.9161223857792513, + "learning_rate": 3.262926238848422e-06, + "loss": 0.08954486846923829, + "step": 109190 + }, + { + "epoch": 0.9441768769833379, + "grad_norm": 14.487584354380541, + "learning_rate": 3.2627232890310075e-06, + "loss": 0.08830223083496094, + "step": 109195 + }, + { + "epoch": 0.9442201105048811, + "grad_norm": 32.00188380259772, + "learning_rate": 3.2625203380019363e-06, + "loss": 0.2317047119140625, + "step": 109200 + }, + { + "epoch": 0.9442633440264243, + "grad_norm": 2.0862029610186457, + "learning_rate": 3.262317385762145e-06, + "loss": 0.171319580078125, + "step": 109205 + }, + { + "epoch": 0.9443065775479675, + "grad_norm": 5.266939423638585, + "learning_rate": 3.262114432312569e-06, + "loss": 0.18903732299804688, + "step": 109210 + }, + { + "epoch": 0.9443498110695109, + "grad_norm": 2.3541498108445116, + "learning_rate": 3.2619114776541437e-06, + "loss": 0.049488067626953125, + "step": 109215 + }, + { + "epoch": 0.9443930445910541, + "grad_norm": 1.5555358880328765, + "learning_rate": 3.2617085217878066e-06, + "loss": 0.045246124267578125, + "step": 109220 + }, + { + "epoch": 0.9444362781125973, + "grad_norm": 11.663636683557236, + "learning_rate": 3.261505564714493e-06, + "loss": 0.10275478363037109, + "step": 109225 + }, + { + "epoch": 0.9444795116341407, + "grad_norm": 6.138216249959403, + "learning_rate": 3.261302606435138e-06, + "loss": 0.08706436157226563, + "step": 109230 + }, + { + "epoch": 0.9445227451556839, + "grad_norm": 4.274667106395314, + "learning_rate": 3.2610996469506795e-06, + "loss": 0.20127716064453124, + "step": 109235 + }, + { + "epoch": 0.9445659786772271, + "grad_norm": 1.9926796424507123, + "learning_rate": 3.2608966862620513e-06, + "loss": 0.05501556396484375, + "step": 109240 + }, + { + "epoch": 0.9446092121987705, + "grad_norm": 0.5338225455015352, + "learning_rate": 3.2606937243701907e-06, + "loss": 0.044429779052734375, + "step": 109245 + }, + { + "epoch": 0.9446524457203137, + "grad_norm": 3.1048490431339393, + "learning_rate": 3.2604907612760347e-06, + "loss": 0.07178115844726562, + "step": 109250 + }, + { + "epoch": 0.9446956792418569, + "grad_norm": 6.793095563241265, + "learning_rate": 3.2602877969805183e-06, + "loss": 0.1296356201171875, + "step": 109255 + }, + { + "epoch": 0.9447389127634003, + "grad_norm": 8.879081337837006, + "learning_rate": 3.260084831484577e-06, + "loss": 0.07178955078125, + "step": 109260 + }, + { + "epoch": 0.9447821462849435, + "grad_norm": 1.1180822050789014, + "learning_rate": 3.2598818647891478e-06, + "loss": 0.15600662231445311, + "step": 109265 + }, + { + "epoch": 0.9448253798064867, + "grad_norm": 5.76526904353671, + "learning_rate": 3.259678896895166e-06, + "loss": 0.0693511962890625, + "step": 109270 + }, + { + "epoch": 0.9448686133280301, + "grad_norm": 16.95820940592036, + "learning_rate": 3.2594759278035678e-06, + "loss": 0.395343017578125, + "step": 109275 + }, + { + "epoch": 0.9449118468495733, + "grad_norm": 1.15391527883246, + "learning_rate": 3.2592729575152907e-06, + "loss": 0.08363609313964844, + "step": 109280 + }, + { + "epoch": 0.9449550803711165, + "grad_norm": 0.40750864128706665, + "learning_rate": 3.259069986031269e-06, + "loss": 0.08163528442382813, + "step": 109285 + }, + { + "epoch": 0.9449983138926598, + "grad_norm": 53.55428196812166, + "learning_rate": 3.25886701335244e-06, + "loss": 0.1957733154296875, + "step": 109290 + }, + { + "epoch": 0.9450415474142031, + "grad_norm": 8.168941314111262, + "learning_rate": 3.2586640394797374e-06, + "loss": 0.06908435821533203, + "step": 109295 + }, + { + "epoch": 0.9450847809357463, + "grad_norm": 32.55215767073499, + "learning_rate": 3.258461064414101e-06, + "loss": 0.3577606201171875, + "step": 109300 + }, + { + "epoch": 0.9451280144572896, + "grad_norm": 15.487755168962803, + "learning_rate": 3.2582580881564646e-06, + "loss": 0.1390869140625, + "step": 109305 + }, + { + "epoch": 0.9451712479788329, + "grad_norm": 4.223817791572218, + "learning_rate": 3.2580551107077636e-06, + "loss": 0.06929550170898438, + "step": 109310 + }, + { + "epoch": 0.9452144815003761, + "grad_norm": 2.491136820790447, + "learning_rate": 3.257852132068937e-06, + "loss": 0.035962677001953124, + "step": 109315 + }, + { + "epoch": 0.9452577150219194, + "grad_norm": 1.2852269996867147, + "learning_rate": 3.2576491522409173e-06, + "loss": 0.1544586181640625, + "step": 109320 + }, + { + "epoch": 0.9453009485434627, + "grad_norm": 0.9878175848861069, + "learning_rate": 3.257446171224643e-06, + "loss": 0.13964080810546875, + "step": 109325 + }, + { + "epoch": 0.9453441820650059, + "grad_norm": 3.7846689863902934, + "learning_rate": 3.2572431890210507e-06, + "loss": 0.0574310302734375, + "step": 109330 + }, + { + "epoch": 0.9453874155865492, + "grad_norm": 17.069417042444076, + "learning_rate": 3.2570402056310746e-06, + "loss": 0.1794219970703125, + "step": 109335 + }, + { + "epoch": 0.9454306491080925, + "grad_norm": 26.34417508721416, + "learning_rate": 3.256837221055652e-06, + "loss": 0.2311279296875, + "step": 109340 + }, + { + "epoch": 0.9454738826296357, + "grad_norm": 4.060227008149443, + "learning_rate": 3.2566342352957196e-06, + "loss": 0.15367813110351564, + "step": 109345 + }, + { + "epoch": 0.945517116151179, + "grad_norm": 3.092066374690727, + "learning_rate": 3.2564312483522117e-06, + "loss": 0.0922433853149414, + "step": 109350 + }, + { + "epoch": 0.9455603496727223, + "grad_norm": 14.891592649578174, + "learning_rate": 3.2562282602260658e-06, + "loss": 0.08495254516601562, + "step": 109355 + }, + { + "epoch": 0.9456035831942655, + "grad_norm": 9.336305246790808, + "learning_rate": 3.256025270918218e-06, + "loss": 0.12125701904296875, + "step": 109360 + }, + { + "epoch": 0.9456468167158087, + "grad_norm": 2.194507327993506, + "learning_rate": 3.2558222804296047e-06, + "loss": 0.17754783630371093, + "step": 109365 + }, + { + "epoch": 0.9456900502373521, + "grad_norm": 7.411288751731758, + "learning_rate": 3.255619288761161e-06, + "loss": 0.05885009765625, + "step": 109370 + }, + { + "epoch": 0.9457332837588953, + "grad_norm": 0.9634378258822947, + "learning_rate": 3.255416295913824e-06, + "loss": 0.23260421752929689, + "step": 109375 + }, + { + "epoch": 0.9457765172804385, + "grad_norm": 14.96834606510269, + "learning_rate": 3.255213301888529e-06, + "loss": 0.3317901611328125, + "step": 109380 + }, + { + "epoch": 0.9458197508019818, + "grad_norm": 15.920223553377497, + "learning_rate": 3.2550103066862137e-06, + "loss": 0.19745635986328125, + "step": 109385 + }, + { + "epoch": 0.9458629843235251, + "grad_norm": 0.1853050178176262, + "learning_rate": 3.2548073103078133e-06, + "loss": 0.044205856323242185, + "step": 109390 + }, + { + "epoch": 0.9459062178450683, + "grad_norm": 0.48834595833687017, + "learning_rate": 3.254604312754264e-06, + "loss": 0.3782367706298828, + "step": 109395 + }, + { + "epoch": 0.9459494513666116, + "grad_norm": 8.470142501623464, + "learning_rate": 3.2544013140265024e-06, + "loss": 0.29241180419921875, + "step": 109400 + }, + { + "epoch": 0.9459926848881549, + "grad_norm": 2.5773768287380854, + "learning_rate": 3.2541983141254635e-06, + "loss": 0.12962799072265624, + "step": 109405 + }, + { + "epoch": 0.9460359184096981, + "grad_norm": 0.35531805906252695, + "learning_rate": 3.2539953130520844e-06, + "loss": 0.2996025085449219, + "step": 109410 + }, + { + "epoch": 0.9460791519312414, + "grad_norm": 4.49522593949294, + "learning_rate": 3.2537923108073024e-06, + "loss": 0.10558319091796875, + "step": 109415 + }, + { + "epoch": 0.9461223854527847, + "grad_norm": 5.111901733609597, + "learning_rate": 3.2535893073920517e-06, + "loss": 0.0503662109375, + "step": 109420 + }, + { + "epoch": 0.9461656189743279, + "grad_norm": 31.89027352216599, + "learning_rate": 3.25338630280727e-06, + "loss": 0.13416023254394532, + "step": 109425 + }, + { + "epoch": 0.9462088524958712, + "grad_norm": 16.04668941964102, + "learning_rate": 3.2531832970538932e-06, + "loss": 0.2692298889160156, + "step": 109430 + }, + { + "epoch": 0.9462520860174145, + "grad_norm": 1.6132282532097648, + "learning_rate": 3.252980290132857e-06, + "loss": 0.022370147705078124, + "step": 109435 + }, + { + "epoch": 0.9462953195389577, + "grad_norm": 7.987944350583394, + "learning_rate": 3.252777282045098e-06, + "loss": 0.2136962890625, + "step": 109440 + }, + { + "epoch": 0.946338553060501, + "grad_norm": 3.100933275790881, + "learning_rate": 3.2525742727915534e-06, + "loss": 0.21041259765625, + "step": 109445 + }, + { + "epoch": 0.9463817865820443, + "grad_norm": 0.9869188316572606, + "learning_rate": 3.2523712623731575e-06, + "loss": 0.06714973449707032, + "step": 109450 + }, + { + "epoch": 0.9464250201035875, + "grad_norm": 15.671008583604399, + "learning_rate": 3.2521682507908486e-06, + "loss": 0.2279216766357422, + "step": 109455 + }, + { + "epoch": 0.9464682536251308, + "grad_norm": 3.8106256424215506, + "learning_rate": 3.2519652380455613e-06, + "loss": 0.09029273986816407, + "step": 109460 + }, + { + "epoch": 0.946511487146674, + "grad_norm": 0.1124486108012292, + "learning_rate": 3.251762224138233e-06, + "loss": 0.11628875732421876, + "step": 109465 + }, + { + "epoch": 0.9465547206682173, + "grad_norm": 1.5629642906004533, + "learning_rate": 3.2515592090697986e-06, + "loss": 0.2423583984375, + "step": 109470 + }, + { + "epoch": 0.9465979541897606, + "grad_norm": 4.67535310929372, + "learning_rate": 3.2513561928411967e-06, + "loss": 0.18507032394409179, + "step": 109475 + }, + { + "epoch": 0.9466411877113038, + "grad_norm": 13.815980829121884, + "learning_rate": 3.2511531754533616e-06, + "loss": 0.08721857070922852, + "step": 109480 + }, + { + "epoch": 0.9466844212328471, + "grad_norm": 5.982215809129009, + "learning_rate": 3.2509501569072306e-06, + "loss": 0.1023223876953125, + "step": 109485 + }, + { + "epoch": 0.9467276547543904, + "grad_norm": 1.145577143870028, + "learning_rate": 3.2507471372037392e-06, + "loss": 0.2939914703369141, + "step": 109490 + }, + { + "epoch": 0.9467708882759336, + "grad_norm": 7.54692379007318, + "learning_rate": 3.2505441163438246e-06, + "loss": 0.2231109619140625, + "step": 109495 + }, + { + "epoch": 0.9468141217974769, + "grad_norm": 12.399094509687153, + "learning_rate": 3.250341094328422e-06, + "loss": 0.5615337371826172, + "step": 109500 + }, + { + "epoch": 0.9468573553190202, + "grad_norm": 0.4534494772724265, + "learning_rate": 3.250138071158469e-06, + "loss": 0.208172607421875, + "step": 109505 + }, + { + "epoch": 0.9469005888405634, + "grad_norm": 0.2672828576659961, + "learning_rate": 3.249935046834901e-06, + "loss": 0.1552581787109375, + "step": 109510 + }, + { + "epoch": 0.9469438223621067, + "grad_norm": 97.89722777911084, + "learning_rate": 3.2497320213586545e-06, + "loss": 0.2941093444824219, + "step": 109515 + }, + { + "epoch": 0.94698705588365, + "grad_norm": 30.313974222815503, + "learning_rate": 3.249528994730666e-06, + "loss": 0.1431243896484375, + "step": 109520 + }, + { + "epoch": 0.9470302894051932, + "grad_norm": 11.298403892148578, + "learning_rate": 3.2493259669518727e-06, + "loss": 0.06333122253417969, + "step": 109525 + }, + { + "epoch": 0.9470735229267365, + "grad_norm": 1.2939013288472359, + "learning_rate": 3.2491229380232086e-06, + "loss": 0.20065765380859374, + "step": 109530 + }, + { + "epoch": 0.9471167564482798, + "grad_norm": 24.165517080274956, + "learning_rate": 3.2489199079456134e-06, + "loss": 0.40264434814453126, + "step": 109535 + }, + { + "epoch": 0.947159989969823, + "grad_norm": 0.0987444453065504, + "learning_rate": 3.248716876720021e-06, + "loss": 0.014387893676757812, + "step": 109540 + }, + { + "epoch": 0.9472032234913663, + "grad_norm": 1.1038576165692753, + "learning_rate": 3.248513844347367e-06, + "loss": 0.18235282897949218, + "step": 109545 + }, + { + "epoch": 0.9472464570129095, + "grad_norm": 0.23842854672425953, + "learning_rate": 3.2483108108285896e-06, + "loss": 0.018881607055664062, + "step": 109550 + }, + { + "epoch": 0.9472896905344528, + "grad_norm": 7.935741691991602, + "learning_rate": 3.2481077761646256e-06, + "loss": 0.09339675903320313, + "step": 109555 + }, + { + "epoch": 0.947332924055996, + "grad_norm": 0.2049381375891559, + "learning_rate": 3.24790474035641e-06, + "loss": 0.3306861877441406, + "step": 109560 + }, + { + "epoch": 0.9473761575775393, + "grad_norm": 0.22331028315114237, + "learning_rate": 3.2477017034048794e-06, + "loss": 0.10040435791015626, + "step": 109565 + }, + { + "epoch": 0.9474193910990826, + "grad_norm": 0.24731431226193362, + "learning_rate": 3.2474986653109707e-06, + "loss": 0.09908428192138671, + "step": 109570 + }, + { + "epoch": 0.9474626246206258, + "grad_norm": 0.6602967700595981, + "learning_rate": 3.24729562607562e-06, + "loss": 0.03415336608886719, + "step": 109575 + }, + { + "epoch": 0.9475058581421691, + "grad_norm": 5.775901829874539, + "learning_rate": 3.247092585699763e-06, + "loss": 0.1358612060546875, + "step": 109580 + }, + { + "epoch": 0.9475490916637124, + "grad_norm": 0.7156432132332028, + "learning_rate": 3.246889544184338e-06, + "loss": 0.02742156982421875, + "step": 109585 + }, + { + "epoch": 0.9475923251852556, + "grad_norm": 4.085848968458943, + "learning_rate": 3.24668650153028e-06, + "loss": 0.13629074096679689, + "step": 109590 + }, + { + "epoch": 0.9476355587067989, + "grad_norm": 9.389265368869664, + "learning_rate": 3.246483457738525e-06, + "loss": 0.04922828674316406, + "step": 109595 + }, + { + "epoch": 0.9476787922283422, + "grad_norm": 3.760587984572049, + "learning_rate": 3.246280412810011e-06, + "loss": 0.16936798095703126, + "step": 109600 + }, + { + "epoch": 0.9477220257498854, + "grad_norm": 3.9360074869367843, + "learning_rate": 3.2460773667456723e-06, + "loss": 0.05266876220703125, + "step": 109605 + }, + { + "epoch": 0.9477652592714287, + "grad_norm": 20.277379714328607, + "learning_rate": 3.2458743195464475e-06, + "loss": 0.13947219848632814, + "step": 109610 + }, + { + "epoch": 0.947808492792972, + "grad_norm": 35.98385340230991, + "learning_rate": 3.2456712712132717e-06, + "loss": 0.5298690795898438, + "step": 109615 + }, + { + "epoch": 0.9478517263145152, + "grad_norm": 4.080319264411265, + "learning_rate": 3.2454682217470817e-06, + "loss": 0.05168304443359375, + "step": 109620 + }, + { + "epoch": 0.9478949598360585, + "grad_norm": 20.208083104240576, + "learning_rate": 3.245265171148814e-06, + "loss": 0.38715553283691406, + "step": 109625 + }, + { + "epoch": 0.9479381933576018, + "grad_norm": 0.19537957914805468, + "learning_rate": 3.2450621194194054e-06, + "loss": 0.22989139556884766, + "step": 109630 + }, + { + "epoch": 0.947981426879145, + "grad_norm": 0.06208411257364325, + "learning_rate": 3.244859066559791e-06, + "loss": 0.023369979858398438, + "step": 109635 + }, + { + "epoch": 0.9480246604006882, + "grad_norm": 12.739157859320612, + "learning_rate": 3.2446560125709092e-06, + "loss": 0.06660690307617187, + "step": 109640 + }, + { + "epoch": 0.9480678939222316, + "grad_norm": 0.39093157087779334, + "learning_rate": 3.244452957453695e-06, + "loss": 0.07548408508300782, + "step": 109645 + }, + { + "epoch": 0.9481111274437748, + "grad_norm": 20.26678490520062, + "learning_rate": 3.2442499012090853e-06, + "loss": 0.14707870483398439, + "step": 109650 + }, + { + "epoch": 0.948154360965318, + "grad_norm": 0.5873649980549095, + "learning_rate": 3.244046843838017e-06, + "loss": 0.1937286376953125, + "step": 109655 + }, + { + "epoch": 0.9481975944868614, + "grad_norm": 6.3064015791382575, + "learning_rate": 3.243843785341426e-06, + "loss": 0.19049301147460937, + "step": 109660 + }, + { + "epoch": 0.9482408280084046, + "grad_norm": 11.890172443645332, + "learning_rate": 3.2436407257202486e-06, + "loss": 0.20550117492675782, + "step": 109665 + }, + { + "epoch": 0.9482840615299478, + "grad_norm": 0.3170160633987867, + "learning_rate": 3.2434376649754224e-06, + "loss": 0.21839218139648436, + "step": 109670 + }, + { + "epoch": 0.9483272950514912, + "grad_norm": 1.2971501307949207, + "learning_rate": 3.2432346031078823e-06, + "loss": 0.03763561248779297, + "step": 109675 + }, + { + "epoch": 0.9483705285730344, + "grad_norm": 6.256836332367815, + "learning_rate": 3.243031540118567e-06, + "loss": 0.0385711669921875, + "step": 109680 + }, + { + "epoch": 0.9484137620945776, + "grad_norm": 0.25053353652228383, + "learning_rate": 3.242828476008411e-06, + "loss": 0.23524093627929688, + "step": 109685 + }, + { + "epoch": 0.948456995616121, + "grad_norm": 7.440168842682594, + "learning_rate": 3.2426254107783514e-06, + "loss": 0.2103038787841797, + "step": 109690 + }, + { + "epoch": 0.9485002291376642, + "grad_norm": 13.269552122158826, + "learning_rate": 3.242422344429325e-06, + "loss": 0.07860794067382812, + "step": 109695 + }, + { + "epoch": 0.9485434626592074, + "grad_norm": 47.69798029976272, + "learning_rate": 3.2422192769622685e-06, + "loss": 0.402734375, + "step": 109700 + }, + { + "epoch": 0.9485866961807508, + "grad_norm": 1.0427149924993315, + "learning_rate": 3.242016208378117e-06, + "loss": 0.13710975646972656, + "step": 109705 + }, + { + "epoch": 0.948629929702294, + "grad_norm": 0.18341253341173444, + "learning_rate": 3.2418131386778094e-06, + "loss": 0.12240447998046874, + "step": 109710 + }, + { + "epoch": 0.9486731632238372, + "grad_norm": 19.051570447952887, + "learning_rate": 3.2416100678622806e-06, + "loss": 0.08437576293945312, + "step": 109715 + }, + { + "epoch": 0.9487163967453806, + "grad_norm": 43.57528683935058, + "learning_rate": 3.241406995932467e-06, + "loss": 0.10935859680175782, + "step": 109720 + }, + { + "epoch": 0.9487596302669238, + "grad_norm": 16.540049395426337, + "learning_rate": 3.2412039228893054e-06, + "loss": 0.208984375, + "step": 109725 + }, + { + "epoch": 0.948802863788467, + "grad_norm": 2.6015769549751044, + "learning_rate": 3.2410008487337343e-06, + "loss": 0.124224853515625, + "step": 109730 + }, + { + "epoch": 0.9488460973100102, + "grad_norm": 0.12179740484598903, + "learning_rate": 3.240797773466688e-06, + "loss": 0.2775001525878906, + "step": 109735 + }, + { + "epoch": 0.9488893308315536, + "grad_norm": 3.531969671008027, + "learning_rate": 3.2405946970891024e-06, + "loss": 0.11542816162109375, + "step": 109740 + }, + { + "epoch": 0.9489325643530968, + "grad_norm": 3.141142214468508, + "learning_rate": 3.2403916196019153e-06, + "loss": 0.1281444549560547, + "step": 109745 + }, + { + "epoch": 0.94897579787464, + "grad_norm": 43.00436844022133, + "learning_rate": 3.2401885410060653e-06, + "loss": 0.3253143310546875, + "step": 109750 + }, + { + "epoch": 0.9490190313961834, + "grad_norm": 0.3461924543375924, + "learning_rate": 3.239985461302485e-06, + "loss": 0.01757946014404297, + "step": 109755 + }, + { + "epoch": 0.9490622649177266, + "grad_norm": 0.5753956945944974, + "learning_rate": 3.239782380492114e-06, + "loss": 0.022254133224487306, + "step": 109760 + }, + { + "epoch": 0.9491054984392698, + "grad_norm": 0.7845417852753359, + "learning_rate": 3.239579298575888e-06, + "loss": 0.163155460357666, + "step": 109765 + }, + { + "epoch": 0.9491487319608132, + "grad_norm": 25.107287308966576, + "learning_rate": 3.2393762155547423e-06, + "loss": 0.3566596984863281, + "step": 109770 + }, + { + "epoch": 0.9491919654823564, + "grad_norm": 4.12206449896247, + "learning_rate": 3.239173131429615e-06, + "loss": 0.17084922790527343, + "step": 109775 + }, + { + "epoch": 0.9492351990038996, + "grad_norm": 0.06359173710121534, + "learning_rate": 3.2389700462014437e-06, + "loss": 0.2806999206542969, + "step": 109780 + }, + { + "epoch": 0.949278432525443, + "grad_norm": 0.25437488844182804, + "learning_rate": 3.2387669598711623e-06, + "loss": 0.3106706619262695, + "step": 109785 + }, + { + "epoch": 0.9493216660469862, + "grad_norm": 1.2856274726168853, + "learning_rate": 3.2385638724397092e-06, + "loss": 0.05969696044921875, + "step": 109790 + }, + { + "epoch": 0.9493648995685294, + "grad_norm": 0.33128627979425496, + "learning_rate": 3.2383607839080205e-06, + "loss": 0.036985015869140624, + "step": 109795 + }, + { + "epoch": 0.9494081330900728, + "grad_norm": 16.48701160220291, + "learning_rate": 3.238157694277033e-06, + "loss": 0.07719345092773437, + "step": 109800 + }, + { + "epoch": 0.949451366611616, + "grad_norm": 17.188634612734, + "learning_rate": 3.2379546035476834e-06, + "loss": 0.2891632080078125, + "step": 109805 + }, + { + "epoch": 0.9494946001331592, + "grad_norm": 9.623998531145617, + "learning_rate": 3.237751511720908e-06, + "loss": 0.23313751220703124, + "step": 109810 + }, + { + "epoch": 0.9495378336547025, + "grad_norm": 3.3310806399561423, + "learning_rate": 3.2375484187976437e-06, + "loss": 0.060937118530273435, + "step": 109815 + }, + { + "epoch": 0.9495810671762458, + "grad_norm": 10.534844346834502, + "learning_rate": 3.237345324778827e-06, + "loss": 0.169415283203125, + "step": 109820 + }, + { + "epoch": 0.949624300697789, + "grad_norm": 16.59774711684429, + "learning_rate": 3.2371422296653953e-06, + "loss": 0.4799896240234375, + "step": 109825 + }, + { + "epoch": 0.9496675342193323, + "grad_norm": 1.335056374685933, + "learning_rate": 3.236939133458283e-06, + "loss": 0.047307586669921874, + "step": 109830 + }, + { + "epoch": 0.9497107677408756, + "grad_norm": 17.291238213461575, + "learning_rate": 3.236736036158429e-06, + "loss": 0.0501190185546875, + "step": 109835 + }, + { + "epoch": 0.9497540012624188, + "grad_norm": 43.186933749838985, + "learning_rate": 3.23653293776677e-06, + "loss": 0.372503662109375, + "step": 109840 + }, + { + "epoch": 0.949797234783962, + "grad_norm": 16.629918566815633, + "learning_rate": 3.236329838284242e-06, + "loss": 0.16514835357666016, + "step": 109845 + }, + { + "epoch": 0.9498404683055054, + "grad_norm": 3.2376849750445995, + "learning_rate": 3.2361267377117814e-06, + "loss": 0.06808662414550781, + "step": 109850 + }, + { + "epoch": 0.9498837018270486, + "grad_norm": 2.291716585372345, + "learning_rate": 3.235923636050325e-06, + "loss": 0.13303146362304688, + "step": 109855 + }, + { + "epoch": 0.9499269353485918, + "grad_norm": 2.6693654587953404, + "learning_rate": 3.23572053330081e-06, + "loss": 0.20887260437011718, + "step": 109860 + }, + { + "epoch": 0.9499701688701352, + "grad_norm": 16.10638531108141, + "learning_rate": 3.2355174294641713e-06, + "loss": 0.0498809814453125, + "step": 109865 + }, + { + "epoch": 0.9500134023916784, + "grad_norm": 1.0308851667925059, + "learning_rate": 3.2353143245413485e-06, + "loss": 0.49585800170898436, + "step": 109870 + }, + { + "epoch": 0.9500566359132216, + "grad_norm": 25.859608507094684, + "learning_rate": 3.235111218533277e-06, + "loss": 0.24895248413085938, + "step": 109875 + }, + { + "epoch": 0.950099869434765, + "grad_norm": 4.263439376928589, + "learning_rate": 3.2349081114408925e-06, + "loss": 0.5203235626220704, + "step": 109880 + }, + { + "epoch": 0.9501431029563082, + "grad_norm": 44.364467591925404, + "learning_rate": 3.234705003265133e-06, + "loss": 0.38956451416015625, + "step": 109885 + }, + { + "epoch": 0.9501863364778514, + "grad_norm": 1.7119842173036235, + "learning_rate": 3.2345018940069342e-06, + "loss": 0.03421974182128906, + "step": 109890 + }, + { + "epoch": 0.9502295699993948, + "grad_norm": 33.5485565013682, + "learning_rate": 3.2342987836672337e-06, + "loss": 0.1819488525390625, + "step": 109895 + }, + { + "epoch": 0.950272803520938, + "grad_norm": 2.337270992466396, + "learning_rate": 3.234095672246968e-06, + "loss": 0.2345062255859375, + "step": 109900 + }, + { + "epoch": 0.9503160370424812, + "grad_norm": 3.7184101084420673, + "learning_rate": 3.2338925597470745e-06, + "loss": 0.342425537109375, + "step": 109905 + }, + { + "epoch": 0.9503592705640245, + "grad_norm": 1.4279269770508887, + "learning_rate": 3.233689446168489e-06, + "loss": 0.11778717041015625, + "step": 109910 + }, + { + "epoch": 0.9504025040855678, + "grad_norm": 11.517307257835128, + "learning_rate": 3.2334863315121467e-06, + "loss": 0.15993423461914064, + "step": 109915 + }, + { + "epoch": 0.950445737607111, + "grad_norm": 6.492995612460533, + "learning_rate": 3.233283215778987e-06, + "loss": 0.20589599609375, + "step": 109920 + }, + { + "epoch": 0.9504889711286543, + "grad_norm": 37.41547148321761, + "learning_rate": 3.2330800989699463e-06, + "loss": 0.17873077392578124, + "step": 109925 + }, + { + "epoch": 0.9505322046501976, + "grad_norm": 0.6631860162155595, + "learning_rate": 3.232876981085961e-06, + "loss": 0.0647735595703125, + "step": 109930 + }, + { + "epoch": 0.9505754381717408, + "grad_norm": 5.360331664338708, + "learning_rate": 3.232673862127967e-06, + "loss": 0.0611968994140625, + "step": 109935 + }, + { + "epoch": 0.9506186716932841, + "grad_norm": 8.843968657063385, + "learning_rate": 3.2324707420969013e-06, + "loss": 0.13338088989257812, + "step": 109940 + }, + { + "epoch": 0.9506619052148274, + "grad_norm": 22.669515151268133, + "learning_rate": 3.2322676209937016e-06, + "loss": 0.26652374267578127, + "step": 109945 + }, + { + "epoch": 0.9507051387363706, + "grad_norm": 0.4608474964243104, + "learning_rate": 3.232064498819304e-06, + "loss": 0.043043327331542966, + "step": 109950 + }, + { + "epoch": 0.9507483722579139, + "grad_norm": 28.972247506650998, + "learning_rate": 3.2318613755746463e-06, + "loss": 0.08466033935546875, + "step": 109955 + }, + { + "epoch": 0.9507916057794572, + "grad_norm": 16.870191977200587, + "learning_rate": 3.231658251260663e-06, + "loss": 0.11776046752929688, + "step": 109960 + }, + { + "epoch": 0.9508348393010004, + "grad_norm": 1.2479071670305915, + "learning_rate": 3.2314551258782936e-06, + "loss": 0.049653244018554685, + "step": 109965 + }, + { + "epoch": 0.9508780728225437, + "grad_norm": 0.6362268754604369, + "learning_rate": 3.2312519994284724e-06, + "loss": 0.2732383728027344, + "step": 109970 + }, + { + "epoch": 0.950921306344087, + "grad_norm": 6.462958804970542, + "learning_rate": 3.231048871912138e-06, + "loss": 0.253729248046875, + "step": 109975 + }, + { + "epoch": 0.9509645398656302, + "grad_norm": 9.529983358598546, + "learning_rate": 3.230845743330227e-06, + "loss": 0.0425262451171875, + "step": 109980 + }, + { + "epoch": 0.9510077733871735, + "grad_norm": 18.11019590920071, + "learning_rate": 3.230642613683676e-06, + "loss": 0.1263824462890625, + "step": 109985 + }, + { + "epoch": 0.9510510069087167, + "grad_norm": 0.7253701937653955, + "learning_rate": 3.2304394829734216e-06, + "loss": 0.5975872039794922, + "step": 109990 + }, + { + "epoch": 0.95109424043026, + "grad_norm": 5.588526589595385, + "learning_rate": 3.2302363512003994e-06, + "loss": 0.0659423828125, + "step": 109995 + }, + { + "epoch": 0.9511374739518033, + "grad_norm": 0.44169665378785333, + "learning_rate": 3.2300332183655477e-06, + "loss": 0.03046112060546875, + "step": 110000 + }, + { + "epoch": 0.9511807074733465, + "grad_norm": 1.9514771696057378, + "learning_rate": 3.229830084469804e-06, + "loss": 0.079412841796875, + "step": 110005 + }, + { + "epoch": 0.9512239409948898, + "grad_norm": 8.645725823501037, + "learning_rate": 3.229626949514104e-06, + "loss": 0.16050949096679687, + "step": 110010 + }, + { + "epoch": 0.9512671745164331, + "grad_norm": 3.779479674952417, + "learning_rate": 3.2294238134993846e-06, + "loss": 0.059049224853515624, + "step": 110015 + }, + { + "epoch": 0.9513104080379763, + "grad_norm": 11.750574603406813, + "learning_rate": 3.2292206764265836e-06, + "loss": 0.0818511962890625, + "step": 110020 + }, + { + "epoch": 0.9513536415595196, + "grad_norm": 1.7578820176405634, + "learning_rate": 3.2290175382966367e-06, + "loss": 0.0465423583984375, + "step": 110025 + }, + { + "epoch": 0.9513968750810629, + "grad_norm": 5.342937774512975, + "learning_rate": 3.2288143991104803e-06, + "loss": 0.1237213134765625, + "step": 110030 + }, + { + "epoch": 0.9514401086026061, + "grad_norm": 17.450420821892514, + "learning_rate": 3.2286112588690527e-06, + "loss": 0.29945068359375, + "step": 110035 + }, + { + "epoch": 0.9514833421241494, + "grad_norm": 12.335977256150674, + "learning_rate": 3.2284081175732897e-06, + "loss": 0.21875534057617188, + "step": 110040 + }, + { + "epoch": 0.9515265756456927, + "grad_norm": 45.98451529402776, + "learning_rate": 3.22820497522413e-06, + "loss": 0.27165756225585935, + "step": 110045 + }, + { + "epoch": 0.9515698091672359, + "grad_norm": 28.98079617940257, + "learning_rate": 3.2280018318225086e-06, + "loss": 0.08105697631835937, + "step": 110050 + }, + { + "epoch": 0.9516130426887792, + "grad_norm": 0.061256098252519216, + "learning_rate": 3.227798687369362e-06, + "loss": 0.045256423950195315, + "step": 110055 + }, + { + "epoch": 0.9516562762103224, + "grad_norm": 4.502628430927189, + "learning_rate": 3.227595541865628e-06, + "loss": 0.22235183715820311, + "step": 110060 + }, + { + "epoch": 0.9516995097318657, + "grad_norm": 8.029583107285161, + "learning_rate": 3.227392395312245e-06, + "loss": 0.6532085418701172, + "step": 110065 + }, + { + "epoch": 0.951742743253409, + "grad_norm": 3.778933232104088, + "learning_rate": 3.2271892477101468e-06, + "loss": 0.14590835571289062, + "step": 110070 + }, + { + "epoch": 0.9517859767749522, + "grad_norm": 4.711727369951396, + "learning_rate": 3.226986099060273e-06, + "loss": 0.21379737854003905, + "step": 110075 + }, + { + "epoch": 0.9518292102964955, + "grad_norm": 2.39795757874312, + "learning_rate": 3.226782949363559e-06, + "loss": 0.19698867797851563, + "step": 110080 + }, + { + "epoch": 0.9518724438180387, + "grad_norm": 10.768517715994655, + "learning_rate": 3.2265797986209416e-06, + "loss": 0.09525012969970703, + "step": 110085 + }, + { + "epoch": 0.951915677339582, + "grad_norm": 4.4566406611392875, + "learning_rate": 3.226376646833358e-06, + "loss": 0.015702056884765624, + "step": 110090 + }, + { + "epoch": 0.9519589108611253, + "grad_norm": 2.051490869686838, + "learning_rate": 3.226173494001746e-06, + "loss": 0.06584014892578124, + "step": 110095 + }, + { + "epoch": 0.9520021443826685, + "grad_norm": 0.46288607963712236, + "learning_rate": 3.2259703401270423e-06, + "loss": 0.1465179443359375, + "step": 110100 + }, + { + "epoch": 0.9520453779042118, + "grad_norm": 0.06878976918280162, + "learning_rate": 3.2257671852101823e-06, + "loss": 0.20715103149414063, + "step": 110105 + }, + { + "epoch": 0.9520886114257551, + "grad_norm": 0.5928192213063294, + "learning_rate": 3.225564029252105e-06, + "loss": 0.04011516571044922, + "step": 110110 + }, + { + "epoch": 0.9521318449472983, + "grad_norm": 20.6088269586943, + "learning_rate": 3.225360872253745e-06, + "loss": 0.4241790771484375, + "step": 110115 + }, + { + "epoch": 0.9521750784688416, + "grad_norm": 22.216402601020587, + "learning_rate": 3.225157714216041e-06, + "loss": 0.0851531982421875, + "step": 110120 + }, + { + "epoch": 0.9522183119903849, + "grad_norm": 21.367026416956197, + "learning_rate": 3.2249545551399303e-06, + "loss": 0.17678985595703126, + "step": 110125 + }, + { + "epoch": 0.9522615455119281, + "grad_norm": 4.690103373706053, + "learning_rate": 3.2247513950263487e-06, + "loss": 0.139276123046875, + "step": 110130 + }, + { + "epoch": 0.9523047790334714, + "grad_norm": 4.51588073626261, + "learning_rate": 3.2245482338762333e-06, + "loss": 0.12066421508789063, + "step": 110135 + }, + { + "epoch": 0.9523480125550147, + "grad_norm": 4.369974452398656, + "learning_rate": 3.224345071690521e-06, + "loss": 0.037641143798828124, + "step": 110140 + }, + { + "epoch": 0.9523912460765579, + "grad_norm": 10.136712888634479, + "learning_rate": 3.2241419084701496e-06, + "loss": 0.3753318786621094, + "step": 110145 + }, + { + "epoch": 0.9524344795981012, + "grad_norm": 48.879054087712575, + "learning_rate": 3.223938744216055e-06, + "loss": 0.229302978515625, + "step": 110150 + }, + { + "epoch": 0.9524777131196445, + "grad_norm": 10.359041070338309, + "learning_rate": 3.2237355789291755e-06, + "loss": 0.09562835693359376, + "step": 110155 + }, + { + "epoch": 0.9525209466411877, + "grad_norm": 7.311901191576818, + "learning_rate": 3.223532412610447e-06, + "loss": 0.02512969970703125, + "step": 110160 + }, + { + "epoch": 0.9525641801627309, + "grad_norm": 6.391290325546852, + "learning_rate": 3.2233292452608063e-06, + "loss": 0.11540374755859376, + "step": 110165 + }, + { + "epoch": 0.9526074136842743, + "grad_norm": 40.801762237102345, + "learning_rate": 3.223126076881191e-06, + "loss": 0.25812835693359376, + "step": 110170 + }, + { + "epoch": 0.9526506472058175, + "grad_norm": 3.1134417683882547, + "learning_rate": 3.2229229074725384e-06, + "loss": 0.03262786865234375, + "step": 110175 + }, + { + "epoch": 0.9526938807273607, + "grad_norm": 2.8492796153928053, + "learning_rate": 3.2227197370357854e-06, + "loss": 0.07790908813476563, + "step": 110180 + }, + { + "epoch": 0.9527371142489041, + "grad_norm": 3.196097497642683, + "learning_rate": 3.222516565571867e-06, + "loss": 0.073077392578125, + "step": 110185 + }, + { + "epoch": 0.9527803477704473, + "grad_norm": 15.16412521841275, + "learning_rate": 3.222313393081724e-06, + "loss": 0.12928924560546876, + "step": 110190 + }, + { + "epoch": 0.9528235812919905, + "grad_norm": 0.09327109096715656, + "learning_rate": 3.2221102195662893e-06, + "loss": 0.01670989990234375, + "step": 110195 + }, + { + "epoch": 0.9528668148135339, + "grad_norm": 0.18506321168232365, + "learning_rate": 3.2219070450265024e-06, + "loss": 0.04234161376953125, + "step": 110200 + }, + { + "epoch": 0.9529100483350771, + "grad_norm": 0.35089300205824864, + "learning_rate": 3.2217038694633007e-06, + "loss": 0.0166015625, + "step": 110205 + }, + { + "epoch": 0.9529532818566203, + "grad_norm": 7.715860975721739, + "learning_rate": 3.22150069287762e-06, + "loss": 0.1402130126953125, + "step": 110210 + }, + { + "epoch": 0.9529965153781637, + "grad_norm": 1.4873406242100005, + "learning_rate": 3.221297515270397e-06, + "loss": 0.035076904296875, + "step": 110215 + }, + { + "epoch": 0.9530397488997069, + "grad_norm": 14.774241427571951, + "learning_rate": 3.221094336642571e-06, + "loss": 0.15551910400390626, + "step": 110220 + }, + { + "epoch": 0.9530829824212501, + "grad_norm": 0.5813948269615132, + "learning_rate": 3.2208911569950753e-06, + "loss": 0.5433135986328125, + "step": 110225 + }, + { + "epoch": 0.9531262159427935, + "grad_norm": 1.9080955314503278, + "learning_rate": 3.22068797632885e-06, + "loss": 0.2263427734375, + "step": 110230 + }, + { + "epoch": 0.9531694494643367, + "grad_norm": 1.5132869061373517, + "learning_rate": 3.2204847946448314e-06, + "loss": 0.34659881591796876, + "step": 110235 + }, + { + "epoch": 0.9532126829858799, + "grad_norm": 49.2586396832392, + "learning_rate": 3.2202816119439574e-06, + "loss": 0.2928592681884766, + "step": 110240 + }, + { + "epoch": 0.9532559165074233, + "grad_norm": 15.605034001113697, + "learning_rate": 3.2200784282271624e-06, + "loss": 0.17321929931640626, + "step": 110245 + }, + { + "epoch": 0.9532991500289665, + "grad_norm": 18.591425221551702, + "learning_rate": 3.2198752434953863e-06, + "loss": 0.11825485229492187, + "step": 110250 + }, + { + "epoch": 0.9533423835505097, + "grad_norm": 14.456200959912405, + "learning_rate": 3.2196720577495636e-06, + "loss": 0.2777107238769531, + "step": 110255 + }, + { + "epoch": 0.9533856170720529, + "grad_norm": 3.1920492389940054, + "learning_rate": 3.2194688709906345e-06, + "loss": 0.22991943359375, + "step": 110260 + }, + { + "epoch": 0.9534288505935963, + "grad_norm": 1.9599375549552063, + "learning_rate": 3.219265683219533e-06, + "loss": 0.03392333984375, + "step": 110265 + }, + { + "epoch": 0.9534720841151395, + "grad_norm": 23.46098941828903, + "learning_rate": 3.2190624944371985e-06, + "loss": 0.12360954284667969, + "step": 110270 + }, + { + "epoch": 0.9535153176366827, + "grad_norm": 22.090684000876216, + "learning_rate": 3.218859304644567e-06, + "loss": 0.1171722412109375, + "step": 110275 + }, + { + "epoch": 0.9535585511582261, + "grad_norm": 22.855146903101872, + "learning_rate": 3.218656113842575e-06, + "loss": 0.1672760009765625, + "step": 110280 + }, + { + "epoch": 0.9536017846797693, + "grad_norm": 14.746380876084762, + "learning_rate": 3.2184529220321604e-06, + "loss": 0.19913787841796876, + "step": 110285 + }, + { + "epoch": 0.9536450182013125, + "grad_norm": 14.166817385408018, + "learning_rate": 3.218249729214261e-06, + "loss": 0.1356842041015625, + "step": 110290 + }, + { + "epoch": 0.9536882517228559, + "grad_norm": 0.7223582281406666, + "learning_rate": 3.218046535389813e-06, + "loss": 0.061742973327636716, + "step": 110295 + }, + { + "epoch": 0.9537314852443991, + "grad_norm": 26.78591239232085, + "learning_rate": 3.2178433405597533e-06, + "loss": 0.24648818969726563, + "step": 110300 + }, + { + "epoch": 0.9537747187659423, + "grad_norm": 3.726820023490071, + "learning_rate": 3.21764014472502e-06, + "loss": 0.219873046875, + "step": 110305 + }, + { + "epoch": 0.9538179522874857, + "grad_norm": 4.381901436831521, + "learning_rate": 3.2174369478865485e-06, + "loss": 0.03520050048828125, + "step": 110310 + }, + { + "epoch": 0.9538611858090289, + "grad_norm": 5.218721607010756, + "learning_rate": 3.2172337500452774e-06, + "loss": 0.072711181640625, + "step": 110315 + }, + { + "epoch": 0.9539044193305721, + "grad_norm": 1.051933555134644, + "learning_rate": 3.217030551202144e-06, + "loss": 0.14996414184570311, + "step": 110320 + }, + { + "epoch": 0.9539476528521155, + "grad_norm": 17.315869087762273, + "learning_rate": 3.216827351358084e-06, + "loss": 0.08797454833984375, + "step": 110325 + }, + { + "epoch": 0.9539908863736587, + "grad_norm": 8.075477287907677, + "learning_rate": 3.2166241505140363e-06, + "loss": 0.14448490142822265, + "step": 110330 + }, + { + "epoch": 0.9540341198952019, + "grad_norm": 23.31023511892316, + "learning_rate": 3.216420948670937e-06, + "loss": 0.15665969848632813, + "step": 110335 + }, + { + "epoch": 0.9540773534167452, + "grad_norm": 0.42879532076681653, + "learning_rate": 3.2162177458297227e-06, + "loss": 0.15500640869140625, + "step": 110340 + }, + { + "epoch": 0.9541205869382885, + "grad_norm": 0.6507873113638484, + "learning_rate": 3.2160145419913313e-06, + "loss": 0.045721435546875, + "step": 110345 + }, + { + "epoch": 0.9541638204598317, + "grad_norm": 3.5177490262585565, + "learning_rate": 3.2158113371567012e-06, + "loss": 0.022784423828125, + "step": 110350 + }, + { + "epoch": 0.954207053981375, + "grad_norm": 4.500712903906002, + "learning_rate": 3.2156081313267677e-06, + "loss": 0.06653556823730469, + "step": 110355 + }, + { + "epoch": 0.9542502875029183, + "grad_norm": 1.460404789548897, + "learning_rate": 3.2154049245024674e-06, + "loss": 0.15357742309570313, + "step": 110360 + }, + { + "epoch": 0.9542935210244615, + "grad_norm": 12.762105042568907, + "learning_rate": 3.2152017166847393e-06, + "loss": 0.11629867553710938, + "step": 110365 + }, + { + "epoch": 0.9543367545460048, + "grad_norm": 40.25487353015887, + "learning_rate": 3.2149985078745203e-06, + "loss": 0.31001434326171873, + "step": 110370 + }, + { + "epoch": 0.9543799880675481, + "grad_norm": 3.535966959617858, + "learning_rate": 3.214795298072747e-06, + "loss": 0.3303955078125, + "step": 110375 + }, + { + "epoch": 0.9544232215890913, + "grad_norm": 6.721184302447873, + "learning_rate": 3.2145920872803568e-06, + "loss": 0.058243179321289064, + "step": 110380 + }, + { + "epoch": 0.9544664551106345, + "grad_norm": 9.965246684707937, + "learning_rate": 3.214388875498287e-06, + "loss": 0.05571327209472656, + "step": 110385 + }, + { + "epoch": 0.9545096886321779, + "grad_norm": 3.468048039438658, + "learning_rate": 3.2141856627274738e-06, + "loss": 0.14121856689453124, + "step": 110390 + }, + { + "epoch": 0.9545529221537211, + "grad_norm": 0.6559113429803841, + "learning_rate": 3.213982448968856e-06, + "loss": 0.09620742797851563, + "step": 110395 + }, + { + "epoch": 0.9545961556752643, + "grad_norm": 1.4526622508213647, + "learning_rate": 3.21377923422337e-06, + "loss": 0.08550567626953125, + "step": 110400 + }, + { + "epoch": 0.9546393891968077, + "grad_norm": 2.906663987160682, + "learning_rate": 3.213576018491953e-06, + "loss": 0.03834609985351563, + "step": 110405 + }, + { + "epoch": 0.9546826227183509, + "grad_norm": 1.3313284353235615, + "learning_rate": 3.2133728017755424e-06, + "loss": 0.04182586669921875, + "step": 110410 + }, + { + "epoch": 0.9547258562398941, + "grad_norm": 7.707022480106129, + "learning_rate": 3.2131695840750757e-06, + "loss": 0.08544921875, + "step": 110415 + }, + { + "epoch": 0.9547690897614374, + "grad_norm": 6.878410029850563, + "learning_rate": 3.2129663653914887e-06, + "loss": 0.09385833740234376, + "step": 110420 + }, + { + "epoch": 0.9548123232829807, + "grad_norm": 5.021018618516132, + "learning_rate": 3.2127631457257196e-06, + "loss": 0.1555255889892578, + "step": 110425 + }, + { + "epoch": 0.9548555568045239, + "grad_norm": 52.81695351642401, + "learning_rate": 3.2125599250787065e-06, + "loss": 0.3633296966552734, + "step": 110430 + }, + { + "epoch": 0.9548987903260672, + "grad_norm": 3.9534517985533717, + "learning_rate": 3.2123567034513865e-06, + "loss": 0.25837860107421873, + "step": 110435 + }, + { + "epoch": 0.9549420238476105, + "grad_norm": 65.01702032217757, + "learning_rate": 3.212153480844694e-06, + "loss": 0.2925201416015625, + "step": 110440 + }, + { + "epoch": 0.9549852573691537, + "grad_norm": 1.7390394599230723, + "learning_rate": 3.21195025725957e-06, + "loss": 0.234307861328125, + "step": 110445 + }, + { + "epoch": 0.955028490890697, + "grad_norm": 11.844205433450876, + "learning_rate": 3.2117470326969498e-06, + "loss": 0.10588130950927735, + "step": 110450 + }, + { + "epoch": 0.9550717244122403, + "grad_norm": 0.2240442159714222, + "learning_rate": 3.21154380715777e-06, + "loss": 0.26930999755859375, + "step": 110455 + }, + { + "epoch": 0.9551149579337835, + "grad_norm": 15.69975170506084, + "learning_rate": 3.21134058064297e-06, + "loss": 0.240081787109375, + "step": 110460 + }, + { + "epoch": 0.9551581914553268, + "grad_norm": 9.94005939237433, + "learning_rate": 3.2111373531534865e-06, + "loss": 0.5731460571289062, + "step": 110465 + }, + { + "epoch": 0.9552014249768701, + "grad_norm": 5.4086238185236, + "learning_rate": 3.210934124690255e-06, + "loss": 0.18485641479492188, + "step": 110470 + }, + { + "epoch": 0.9552446584984133, + "grad_norm": 2.138246561319409, + "learning_rate": 3.2107308952542145e-06, + "loss": 0.06961669921875, + "step": 110475 + }, + { + "epoch": 0.9552878920199566, + "grad_norm": 1.8894628909212658, + "learning_rate": 3.2105276648463015e-06, + "loss": 0.15765838623046874, + "step": 110480 + }, + { + "epoch": 0.9553311255414999, + "grad_norm": 26.360538663645986, + "learning_rate": 3.210324433467453e-06, + "loss": 0.11449871063232422, + "step": 110485 + }, + { + "epoch": 0.9553743590630431, + "grad_norm": 1.0306951169007479, + "learning_rate": 3.2101212011186076e-06, + "loss": 0.1854705810546875, + "step": 110490 + }, + { + "epoch": 0.9554175925845864, + "grad_norm": 7.344448768743419, + "learning_rate": 3.2099179678007015e-06, + "loss": 0.3610076904296875, + "step": 110495 + }, + { + "epoch": 0.9554608261061297, + "grad_norm": 9.657457931874909, + "learning_rate": 3.209714733514672e-06, + "loss": 0.08954620361328125, + "step": 110500 + }, + { + "epoch": 0.9555040596276729, + "grad_norm": 4.1275718522578835, + "learning_rate": 3.2095114982614574e-06, + "loss": 0.05399932861328125, + "step": 110505 + }, + { + "epoch": 0.9555472931492162, + "grad_norm": 36.41336053248058, + "learning_rate": 3.2093082620419934e-06, + "loss": 0.484735107421875, + "step": 110510 + }, + { + "epoch": 0.9555905266707594, + "grad_norm": 0.41635954942027337, + "learning_rate": 3.2091050248572193e-06, + "loss": 0.1916261672973633, + "step": 110515 + }, + { + "epoch": 0.9556337601923027, + "grad_norm": 4.051015747893547, + "learning_rate": 3.20890178670807e-06, + "loss": 0.0775360107421875, + "step": 110520 + }, + { + "epoch": 0.955676993713846, + "grad_norm": 12.294312094911314, + "learning_rate": 3.208698547595485e-06, + "loss": 0.021180343627929688, + "step": 110525 + }, + { + "epoch": 0.9557202272353892, + "grad_norm": 8.331195950738826, + "learning_rate": 3.208495307520401e-06, + "loss": 0.0668426513671875, + "step": 110530 + }, + { + "epoch": 0.9557634607569325, + "grad_norm": 0.4503530076200507, + "learning_rate": 3.2082920664837546e-06, + "loss": 0.06378326416015626, + "step": 110535 + }, + { + "epoch": 0.9558066942784758, + "grad_norm": 12.162586153215086, + "learning_rate": 3.208088824486483e-06, + "loss": 0.39974250793457033, + "step": 110540 + }, + { + "epoch": 0.955849927800019, + "grad_norm": 3.475085719778009, + "learning_rate": 3.207885581529525e-06, + "loss": 0.118096923828125, + "step": 110545 + }, + { + "epoch": 0.9558931613215623, + "grad_norm": 4.155261889385248, + "learning_rate": 3.2076823376138164e-06, + "loss": 0.07938041687011718, + "step": 110550 + }, + { + "epoch": 0.9559363948431056, + "grad_norm": 0.3162468409451337, + "learning_rate": 3.2074790927402955e-06, + "loss": 0.12900238037109374, + "step": 110555 + }, + { + "epoch": 0.9559796283646488, + "grad_norm": 0.023987885991665323, + "learning_rate": 3.2072758469098998e-06, + "loss": 0.038820648193359376, + "step": 110560 + }, + { + "epoch": 0.9560228618861921, + "grad_norm": 5.926754806407999, + "learning_rate": 3.2070726001235655e-06, + "loss": 0.241064453125, + "step": 110565 + }, + { + "epoch": 0.9560660954077354, + "grad_norm": 11.794867394240509, + "learning_rate": 3.206869352382231e-06, + "loss": 0.11514511108398437, + "step": 110570 + }, + { + "epoch": 0.9561093289292786, + "grad_norm": 6.538331809380305, + "learning_rate": 3.2066661036868337e-06, + "loss": 0.14952564239501953, + "step": 110575 + }, + { + "epoch": 0.9561525624508219, + "grad_norm": 0.6045083513083561, + "learning_rate": 3.20646285403831e-06, + "loss": 0.11164588928222656, + "step": 110580 + }, + { + "epoch": 0.9561957959723651, + "grad_norm": 1.9800750455404479, + "learning_rate": 3.206259603437598e-06, + "loss": 0.0654754638671875, + "step": 110585 + }, + { + "epoch": 0.9562390294939084, + "grad_norm": 2.848818229865349, + "learning_rate": 3.206056351885635e-06, + "loss": 0.0239654541015625, + "step": 110590 + }, + { + "epoch": 0.9562822630154516, + "grad_norm": 4.520477948144509, + "learning_rate": 3.2058530993833585e-06, + "loss": 0.038465118408203124, + "step": 110595 + }, + { + "epoch": 0.956325496536995, + "grad_norm": 17.824859552779337, + "learning_rate": 3.2056498459317055e-06, + "loss": 0.1968385696411133, + "step": 110600 + }, + { + "epoch": 0.9563687300585382, + "grad_norm": 0.40391306125539805, + "learning_rate": 3.2054465915316135e-06, + "loss": 0.14229278564453124, + "step": 110605 + }, + { + "epoch": 0.9564119635800814, + "grad_norm": 1.251627790808717, + "learning_rate": 3.2052433361840208e-06, + "loss": 0.16332473754882812, + "step": 110610 + }, + { + "epoch": 0.9564551971016247, + "grad_norm": 0.22991071326248164, + "learning_rate": 3.2050400798898624e-06, + "loss": 0.024468231201171874, + "step": 110615 + }, + { + "epoch": 0.956498430623168, + "grad_norm": 54.76405331204136, + "learning_rate": 3.204836822650078e-06, + "loss": 0.0896514892578125, + "step": 110620 + }, + { + "epoch": 0.9565416641447112, + "grad_norm": 4.991560316431429, + "learning_rate": 3.204633564465605e-06, + "loss": 0.35123491287231445, + "step": 110625 + }, + { + "epoch": 0.9565848976662545, + "grad_norm": 22.675192003734896, + "learning_rate": 3.204430305337379e-06, + "loss": 0.13383560180664061, + "step": 110630 + }, + { + "epoch": 0.9566281311877978, + "grad_norm": 0.029144001371406575, + "learning_rate": 3.2042270452663393e-06, + "loss": 0.10246391296386718, + "step": 110635 + }, + { + "epoch": 0.956671364709341, + "grad_norm": 3.3999220247769686, + "learning_rate": 3.2040237842534226e-06, + "loss": 0.16843032836914062, + "step": 110640 + }, + { + "epoch": 0.9567145982308843, + "grad_norm": 14.421248964023638, + "learning_rate": 3.2038205222995653e-06, + "loss": 0.08077163696289062, + "step": 110645 + }, + { + "epoch": 0.9567578317524276, + "grad_norm": 3.8623390664777113, + "learning_rate": 3.203617259405706e-06, + "loss": 0.1442108154296875, + "step": 110650 + }, + { + "epoch": 0.9568010652739708, + "grad_norm": 0.9323134111368125, + "learning_rate": 3.2034139955727825e-06, + "loss": 0.038275146484375, + "step": 110655 + }, + { + "epoch": 0.9568442987955141, + "grad_norm": 1.0057505018620625, + "learning_rate": 3.203210730801731e-06, + "loss": 0.2602867126464844, + "step": 110660 + }, + { + "epoch": 0.9568875323170574, + "grad_norm": 0.24721008164936312, + "learning_rate": 3.2030074650934904e-06, + "loss": 0.00967559814453125, + "step": 110665 + }, + { + "epoch": 0.9569307658386006, + "grad_norm": 0.04760444036388584, + "learning_rate": 3.2028041984489968e-06, + "loss": 0.20412216186523438, + "step": 110670 + }, + { + "epoch": 0.9569739993601439, + "grad_norm": 7.383892346294666, + "learning_rate": 3.202600930869188e-06, + "loss": 0.19112625122070312, + "step": 110675 + }, + { + "epoch": 0.9570172328816872, + "grad_norm": 0.5391383423370749, + "learning_rate": 3.2023976623550015e-06, + "loss": 0.0495880126953125, + "step": 110680 + }, + { + "epoch": 0.9570604664032304, + "grad_norm": 22.09854963007559, + "learning_rate": 3.2021943929073747e-06, + "loss": 0.29410400390625, + "step": 110685 + }, + { + "epoch": 0.9571036999247736, + "grad_norm": 0.7266625293236224, + "learning_rate": 3.2019911225272464e-06, + "loss": 0.13676300048828124, + "step": 110690 + }, + { + "epoch": 0.957146933446317, + "grad_norm": 15.471374962470291, + "learning_rate": 3.201787851215552e-06, + "loss": 0.19927215576171875, + "step": 110695 + }, + { + "epoch": 0.9571901669678602, + "grad_norm": 12.4034157593092, + "learning_rate": 3.2015845789732304e-06, + "loss": 0.036833000183105466, + "step": 110700 + }, + { + "epoch": 0.9572334004894034, + "grad_norm": 12.190889310355063, + "learning_rate": 3.2013813058012176e-06, + "loss": 0.17545318603515625, + "step": 110705 + }, + { + "epoch": 0.9572766340109468, + "grad_norm": 15.514543950972357, + "learning_rate": 3.2011780317004518e-06, + "loss": 0.3004608154296875, + "step": 110710 + }, + { + "epoch": 0.95731986753249, + "grad_norm": 25.46226157619388, + "learning_rate": 3.2009747566718723e-06, + "loss": 0.3383796691894531, + "step": 110715 + }, + { + "epoch": 0.9573631010540332, + "grad_norm": 1.0833795967628752, + "learning_rate": 3.200771480716414e-06, + "loss": 0.19077701568603517, + "step": 110720 + }, + { + "epoch": 0.9574063345755766, + "grad_norm": 40.75961435942934, + "learning_rate": 3.200568203835015e-06, + "loss": 0.1110626220703125, + "step": 110725 + }, + { + "epoch": 0.9574495680971198, + "grad_norm": 1.3166608690723152, + "learning_rate": 3.200364926028614e-06, + "loss": 0.3377994537353516, + "step": 110730 + }, + { + "epoch": 0.957492801618663, + "grad_norm": 0.2883034554323189, + "learning_rate": 3.200161647298147e-06, + "loss": 0.08128433227539063, + "step": 110735 + }, + { + "epoch": 0.9575360351402064, + "grad_norm": 15.42642194913792, + "learning_rate": 3.1999583676445524e-06, + "loss": 0.119415283203125, + "step": 110740 + }, + { + "epoch": 0.9575792686617496, + "grad_norm": 3.2130686114270666, + "learning_rate": 3.199755087068768e-06, + "loss": 0.0387176513671875, + "step": 110745 + }, + { + "epoch": 0.9576225021832928, + "grad_norm": 6.7361510518966154, + "learning_rate": 3.1995518055717305e-06, + "loss": 0.1526958465576172, + "step": 110750 + }, + { + "epoch": 0.9576657357048362, + "grad_norm": 3.6722418902212945, + "learning_rate": 3.1993485231543772e-06, + "loss": 0.11775341033935546, + "step": 110755 + }, + { + "epoch": 0.9577089692263794, + "grad_norm": 25.985616756635476, + "learning_rate": 3.1991452398176467e-06, + "loss": 0.41406097412109377, + "step": 110760 + }, + { + "epoch": 0.9577522027479226, + "grad_norm": 4.140923623539087, + "learning_rate": 3.198941955562476e-06, + "loss": 0.17923812866210936, + "step": 110765 + }, + { + "epoch": 0.9577954362694658, + "grad_norm": 0.35352854785908655, + "learning_rate": 3.198738670389802e-06, + "loss": 0.0442230224609375, + "step": 110770 + }, + { + "epoch": 0.9578386697910092, + "grad_norm": 0.12560076734647868, + "learning_rate": 3.1985353843005633e-06, + "loss": 0.2578380584716797, + "step": 110775 + }, + { + "epoch": 0.9578819033125524, + "grad_norm": 0.6365310649079301, + "learning_rate": 3.198332097295697e-06, + "loss": 0.04340972900390625, + "step": 110780 + }, + { + "epoch": 0.9579251368340956, + "grad_norm": 1.1510885089768113, + "learning_rate": 3.1981288093761395e-06, + "loss": 0.065240478515625, + "step": 110785 + }, + { + "epoch": 0.957968370355639, + "grad_norm": 4.253420767844587, + "learning_rate": 3.19792552054283e-06, + "loss": 0.08992691040039062, + "step": 110790 + }, + { + "epoch": 0.9580116038771822, + "grad_norm": 42.2141827859285, + "learning_rate": 3.1977222307967063e-06, + "loss": 0.19419021606445314, + "step": 110795 + }, + { + "epoch": 0.9580548373987254, + "grad_norm": 4.715727734027359, + "learning_rate": 3.197518940138705e-06, + "loss": 0.06250686645507812, + "step": 110800 + }, + { + "epoch": 0.9580980709202688, + "grad_norm": 0.23182099973842427, + "learning_rate": 3.197315648569763e-06, + "loss": 0.11286392211914062, + "step": 110805 + }, + { + "epoch": 0.958141304441812, + "grad_norm": 7.519147642626269, + "learning_rate": 3.1971123560908195e-06, + "loss": 0.13078155517578124, + "step": 110810 + }, + { + "epoch": 0.9581845379633552, + "grad_norm": 5.282031218584406, + "learning_rate": 3.1969090627028106e-06, + "loss": 0.10083942413330078, + "step": 110815 + }, + { + "epoch": 0.9582277714848986, + "grad_norm": 19.184799643553603, + "learning_rate": 3.196705768406674e-06, + "loss": 0.09382209777832032, + "step": 110820 + }, + { + "epoch": 0.9582710050064418, + "grad_norm": 1.1445162881905624, + "learning_rate": 3.196502473203349e-06, + "loss": 0.025796890258789062, + "step": 110825 + }, + { + "epoch": 0.958314238527985, + "grad_norm": 5.691591834547065, + "learning_rate": 3.196299177093772e-06, + "loss": 0.09666023254394532, + "step": 110830 + }, + { + "epoch": 0.9583574720495284, + "grad_norm": 1.5953238073700546, + "learning_rate": 3.1960958800788796e-06, + "loss": 0.04929161071777344, + "step": 110835 + }, + { + "epoch": 0.9584007055710716, + "grad_norm": 7.343556132395507, + "learning_rate": 3.1958925821596112e-06, + "loss": 0.10548858642578125, + "step": 110840 + }, + { + "epoch": 0.9584439390926148, + "grad_norm": 27.09397376282731, + "learning_rate": 3.195689283336902e-06, + "loss": 0.1961181640625, + "step": 110845 + }, + { + "epoch": 0.9584871726141582, + "grad_norm": 8.369421283965247, + "learning_rate": 3.1954859836116927e-06, + "loss": 0.20114288330078126, + "step": 110850 + }, + { + "epoch": 0.9585304061357014, + "grad_norm": 7.335631085732296, + "learning_rate": 3.1952826829849187e-06, + "loss": 0.11303634643554687, + "step": 110855 + }, + { + "epoch": 0.9585736396572446, + "grad_norm": 0.24355332241470556, + "learning_rate": 3.1950793814575187e-06, + "loss": 0.08656940460205079, + "step": 110860 + }, + { + "epoch": 0.9586168731787879, + "grad_norm": 8.2545387945657, + "learning_rate": 3.1948760790304295e-06, + "loss": 0.083349609375, + "step": 110865 + }, + { + "epoch": 0.9586601067003312, + "grad_norm": 2.106468175501637, + "learning_rate": 3.194672775704589e-06, + "loss": 0.1205474853515625, + "step": 110870 + }, + { + "epoch": 0.9587033402218744, + "grad_norm": 10.05780195622641, + "learning_rate": 3.1944694714809347e-06, + "loss": 0.09102020263671876, + "step": 110875 + }, + { + "epoch": 0.9587465737434177, + "grad_norm": 0.8968572181376746, + "learning_rate": 3.194266166360405e-06, + "loss": 0.2574432373046875, + "step": 110880 + }, + { + "epoch": 0.958789807264961, + "grad_norm": 9.233403144782518, + "learning_rate": 3.1940628603439356e-06, + "loss": 0.18330230712890624, + "step": 110885 + }, + { + "epoch": 0.9588330407865042, + "grad_norm": 8.098428670944088, + "learning_rate": 3.193859553432467e-06, + "loss": 0.5719165802001953, + "step": 110890 + }, + { + "epoch": 0.9588762743080474, + "grad_norm": 1.0083664419196334, + "learning_rate": 3.193656245626935e-06, + "loss": 0.44130096435546873, + "step": 110895 + }, + { + "epoch": 0.9589195078295908, + "grad_norm": 14.845040201186459, + "learning_rate": 3.1934529369282765e-06, + "loss": 0.13099136352539062, + "step": 110900 + }, + { + "epoch": 0.958962741351134, + "grad_norm": 0.4131571252685712, + "learning_rate": 3.1932496273374295e-06, + "loss": 0.2974740982055664, + "step": 110905 + }, + { + "epoch": 0.9590059748726772, + "grad_norm": 7.0348270612754185, + "learning_rate": 3.193046316855334e-06, + "loss": 0.2827880859375, + "step": 110910 + }, + { + "epoch": 0.9590492083942206, + "grad_norm": 0.678931049852465, + "learning_rate": 3.1928430054829254e-06, + "loss": 0.05395698547363281, + "step": 110915 + }, + { + "epoch": 0.9590924419157638, + "grad_norm": 4.739818073607133, + "learning_rate": 3.192639693221142e-06, + "loss": 0.29756317138671873, + "step": 110920 + }, + { + "epoch": 0.959135675437307, + "grad_norm": 4.675428821892834, + "learning_rate": 3.192436380070922e-06, + "loss": 0.414764404296875, + "step": 110925 + }, + { + "epoch": 0.9591789089588504, + "grad_norm": 1.0181001377598138, + "learning_rate": 3.192233066033201e-06, + "loss": 0.05039138793945312, + "step": 110930 + }, + { + "epoch": 0.9592221424803936, + "grad_norm": 7.671894411406306, + "learning_rate": 3.192029751108919e-06, + "loss": 0.1516876220703125, + "step": 110935 + }, + { + "epoch": 0.9592653760019368, + "grad_norm": 8.717341419024997, + "learning_rate": 3.1918264352990127e-06, + "loss": 0.09448165893554687, + "step": 110940 + }, + { + "epoch": 0.9593086095234801, + "grad_norm": 13.12701543777508, + "learning_rate": 3.1916231186044197e-06, + "loss": 0.05257720947265625, + "step": 110945 + }, + { + "epoch": 0.9593518430450234, + "grad_norm": 8.866510693065635, + "learning_rate": 3.191419801026078e-06, + "loss": 0.3640045166015625, + "step": 110950 + }, + { + "epoch": 0.9593950765665666, + "grad_norm": 7.767153316872274, + "learning_rate": 3.1912164825649247e-06, + "loss": 0.15260696411132812, + "step": 110955 + }, + { + "epoch": 0.9594383100881099, + "grad_norm": 17.611929932670183, + "learning_rate": 3.1910131632218977e-06, + "loss": 0.09300537109375, + "step": 110960 + }, + { + "epoch": 0.9594815436096532, + "grad_norm": 0.33126725980109495, + "learning_rate": 3.190809842997935e-06, + "loss": 0.07114791870117188, + "step": 110965 + }, + { + "epoch": 0.9595247771311964, + "grad_norm": 14.07325753054633, + "learning_rate": 3.1906065218939745e-06, + "loss": 0.23495101928710938, + "step": 110970 + }, + { + "epoch": 0.9595680106527397, + "grad_norm": 59.25966217769865, + "learning_rate": 3.190403199910954e-06, + "loss": 0.13221588134765624, + "step": 110975 + }, + { + "epoch": 0.959611244174283, + "grad_norm": 23.576394449403168, + "learning_rate": 3.1901998770498096e-06, + "loss": 0.13107452392578126, + "step": 110980 + }, + { + "epoch": 0.9596544776958262, + "grad_norm": 0.905929456789742, + "learning_rate": 3.1899965533114807e-06, + "loss": 0.0408538818359375, + "step": 110985 + }, + { + "epoch": 0.9596977112173695, + "grad_norm": 0.5863848574369052, + "learning_rate": 3.1897932286969048e-06, + "loss": 0.2447296142578125, + "step": 110990 + }, + { + "epoch": 0.9597409447389128, + "grad_norm": 15.721684790783678, + "learning_rate": 3.189589903207019e-06, + "loss": 0.18093376159667968, + "step": 110995 + }, + { + "epoch": 0.959784178260456, + "grad_norm": 3.0375329331538117, + "learning_rate": 3.1893865768427613e-06, + "loss": 0.02251014709472656, + "step": 111000 + }, + { + "epoch": 0.9598274117819993, + "grad_norm": 5.719992935777959, + "learning_rate": 3.18918324960507e-06, + "loss": 0.12350120544433593, + "step": 111005 + }, + { + "epoch": 0.9598706453035426, + "grad_norm": 2.6559607462468597, + "learning_rate": 3.1889799214948813e-06, + "loss": 0.05792312622070313, + "step": 111010 + }, + { + "epoch": 0.9599138788250858, + "grad_norm": 2.817106754942598, + "learning_rate": 3.1887765925131343e-06, + "loss": 0.15328445434570312, + "step": 111015 + }, + { + "epoch": 0.9599571123466291, + "grad_norm": 4.586897252781191, + "learning_rate": 3.1885732626607666e-06, + "loss": 0.1262298583984375, + "step": 111020 + }, + { + "epoch": 0.9600003458681724, + "grad_norm": 1.0015363203143504, + "learning_rate": 3.188369931938715e-06, + "loss": 0.041710281372070314, + "step": 111025 + }, + { + "epoch": 0.9600435793897156, + "grad_norm": 0.408905784563543, + "learning_rate": 3.1881666003479186e-06, + "loss": 0.1389850616455078, + "step": 111030 + }, + { + "epoch": 0.9600868129112589, + "grad_norm": 1.26911309000746, + "learning_rate": 3.1879632678893148e-06, + "loss": 0.07881927490234375, + "step": 111035 + }, + { + "epoch": 0.9601300464328021, + "grad_norm": 2.5247952416860553, + "learning_rate": 3.1877599345638405e-06, + "loss": 0.1012939453125, + "step": 111040 + }, + { + "epoch": 0.9601732799543454, + "grad_norm": 0.3054824605474061, + "learning_rate": 3.1875566003724335e-06, + "loss": 0.05379638671875, + "step": 111045 + }, + { + "epoch": 0.9602165134758887, + "grad_norm": 6.7471682952771355, + "learning_rate": 3.1873532653160326e-06, + "loss": 0.21649665832519532, + "step": 111050 + }, + { + "epoch": 0.9602597469974319, + "grad_norm": 3.725697595062789, + "learning_rate": 3.1871499293955755e-06, + "loss": 0.0986083984375, + "step": 111055 + }, + { + "epoch": 0.9603029805189752, + "grad_norm": 7.920986130654675, + "learning_rate": 3.1869465926119986e-06, + "loss": 0.0395751953125, + "step": 111060 + }, + { + "epoch": 0.9603462140405185, + "grad_norm": 0.151367328223591, + "learning_rate": 3.186743254966241e-06, + "loss": 0.08073654174804687, + "step": 111065 + }, + { + "epoch": 0.9603894475620617, + "grad_norm": 4.397903562545854, + "learning_rate": 3.1865399164592395e-06, + "loss": 0.1470672607421875, + "step": 111070 + }, + { + "epoch": 0.960432681083605, + "grad_norm": 0.8816973600962419, + "learning_rate": 3.186336577091932e-06, + "loss": 0.15635719299316406, + "step": 111075 + }, + { + "epoch": 0.9604759146051483, + "grad_norm": 2.069219387126587, + "learning_rate": 3.1861332368652583e-06, + "loss": 0.33481178283691404, + "step": 111080 + }, + { + "epoch": 0.9605191481266915, + "grad_norm": 0.5425999661865902, + "learning_rate": 3.1859298957801538e-06, + "loss": 0.1555755615234375, + "step": 111085 + }, + { + "epoch": 0.9605623816482348, + "grad_norm": 5.554360990166647, + "learning_rate": 3.1857265538375563e-06, + "loss": 0.1667572021484375, + "step": 111090 + }, + { + "epoch": 0.960605615169778, + "grad_norm": 7.923257441577377, + "learning_rate": 3.1855232110384056e-06, + "loss": 0.056482696533203126, + "step": 111095 + }, + { + "epoch": 0.9606488486913213, + "grad_norm": 58.805179457991876, + "learning_rate": 3.1853198673836373e-06, + "loss": 0.27817459106445314, + "step": 111100 + }, + { + "epoch": 0.9606920822128646, + "grad_norm": 0.23131866089227612, + "learning_rate": 3.18511652287419e-06, + "loss": 0.1246337890625, + "step": 111105 + }, + { + "epoch": 0.9607353157344078, + "grad_norm": 3.3087331662942367, + "learning_rate": 3.184913177511002e-06, + "loss": 0.06244220733642578, + "step": 111110 + }, + { + "epoch": 0.9607785492559511, + "grad_norm": 0.30175419924163527, + "learning_rate": 3.184709831295011e-06, + "loss": 0.2979743957519531, + "step": 111115 + }, + { + "epoch": 0.9608217827774943, + "grad_norm": 19.896231201753636, + "learning_rate": 3.184506484227155e-06, + "loss": 0.13078155517578124, + "step": 111120 + }, + { + "epoch": 0.9608650162990376, + "grad_norm": 21.40022401344008, + "learning_rate": 3.184303136308371e-06, + "loss": 0.42562103271484375, + "step": 111125 + }, + { + "epoch": 0.9609082498205809, + "grad_norm": 15.83263445143739, + "learning_rate": 3.1840997875395963e-06, + "loss": 0.2741851806640625, + "step": 111130 + }, + { + "epoch": 0.9609514833421241, + "grad_norm": 14.782838183509542, + "learning_rate": 3.1838964379217707e-06, + "loss": 0.37445220947265623, + "step": 111135 + }, + { + "epoch": 0.9609947168636674, + "grad_norm": 5.843124904044339, + "learning_rate": 3.18369308745583e-06, + "loss": 0.027762222290039062, + "step": 111140 + }, + { + "epoch": 0.9610379503852107, + "grad_norm": 2.8374525918244298, + "learning_rate": 3.1834897361427146e-06, + "loss": 0.0810882568359375, + "step": 111145 + }, + { + "epoch": 0.9610811839067539, + "grad_norm": 4.455081377913945, + "learning_rate": 3.18328638398336e-06, + "loss": 0.05462722778320313, + "step": 111150 + }, + { + "epoch": 0.9611244174282972, + "grad_norm": 6.55017778833958, + "learning_rate": 3.1830830309787043e-06, + "loss": 0.12686767578125, + "step": 111155 + }, + { + "epoch": 0.9611676509498405, + "grad_norm": 5.400831156806025, + "learning_rate": 3.182879677129686e-06, + "loss": 0.47566375732421873, + "step": 111160 + }, + { + "epoch": 0.9612108844713837, + "grad_norm": 32.071265511654644, + "learning_rate": 3.182676322437243e-06, + "loss": 0.19027023315429686, + "step": 111165 + }, + { + "epoch": 0.961254117992927, + "grad_norm": 0.2161086627103608, + "learning_rate": 3.1824729669023124e-06, + "loss": 0.06832275390625, + "step": 111170 + }, + { + "epoch": 0.9612973515144703, + "grad_norm": 2.9396096954563977, + "learning_rate": 3.1822696105258335e-06, + "loss": 0.05463104248046875, + "step": 111175 + }, + { + "epoch": 0.9613405850360135, + "grad_norm": 0.7599290098618248, + "learning_rate": 3.1820662533087434e-06, + "loss": 0.0306396484375, + "step": 111180 + }, + { + "epoch": 0.9613838185575568, + "grad_norm": 6.1041473199535226, + "learning_rate": 3.181862895251978e-06, + "loss": 0.06852340698242188, + "step": 111185 + }, + { + "epoch": 0.9614270520791001, + "grad_norm": 7.1942917787374006, + "learning_rate": 3.181659536356479e-06, + "loss": 0.048075103759765626, + "step": 111190 + }, + { + "epoch": 0.9614702856006433, + "grad_norm": 2.0715674930980135, + "learning_rate": 3.1814561766231814e-06, + "loss": 0.10370330810546875, + "step": 111195 + }, + { + "epoch": 0.9615135191221866, + "grad_norm": 34.82634784426655, + "learning_rate": 3.1812528160530237e-06, + "loss": 0.378656005859375, + "step": 111200 + }, + { + "epoch": 0.9615567526437299, + "grad_norm": 12.158057369427683, + "learning_rate": 3.1810494546469453e-06, + "loss": 0.13988037109375, + "step": 111205 + }, + { + "epoch": 0.9615999861652731, + "grad_norm": 11.269695328159512, + "learning_rate": 3.1808460924058817e-06, + "loss": 0.2504547119140625, + "step": 111210 + }, + { + "epoch": 0.9616432196868163, + "grad_norm": 1.5173342733682333, + "learning_rate": 3.180642729330772e-06, + "loss": 0.1090606689453125, + "step": 111215 + }, + { + "epoch": 0.9616864532083597, + "grad_norm": 0.2651337972911044, + "learning_rate": 3.180439365422554e-06, + "loss": 0.06589736938476562, + "step": 111220 + }, + { + "epoch": 0.9617296867299029, + "grad_norm": 0.7472588174405743, + "learning_rate": 3.1802360006821654e-06, + "loss": 0.0744659423828125, + "step": 111225 + }, + { + "epoch": 0.9617729202514461, + "grad_norm": 15.00565115381793, + "learning_rate": 3.1800326351105454e-06, + "loss": 0.1658233642578125, + "step": 111230 + }, + { + "epoch": 0.9618161537729895, + "grad_norm": 0.4349437496324852, + "learning_rate": 3.1798292687086298e-06, + "loss": 0.05116195678710937, + "step": 111235 + }, + { + "epoch": 0.9618593872945327, + "grad_norm": 0.7196070399393145, + "learning_rate": 3.179625901477357e-06, + "loss": 0.05856781005859375, + "step": 111240 + }, + { + "epoch": 0.9619026208160759, + "grad_norm": 26.875834165546, + "learning_rate": 3.1794225334176668e-06, + "loss": 0.3813629150390625, + "step": 111245 + }, + { + "epoch": 0.9619458543376193, + "grad_norm": 18.97087166374117, + "learning_rate": 3.179219164530495e-06, + "loss": 0.15091590881347655, + "step": 111250 + }, + { + "epoch": 0.9619890878591625, + "grad_norm": 28.28639436168117, + "learning_rate": 3.17901579481678e-06, + "loss": 0.207879638671875, + "step": 111255 + }, + { + "epoch": 0.9620323213807057, + "grad_norm": 7.079129694164568, + "learning_rate": 3.1788124242774606e-06, + "loss": 0.1738056182861328, + "step": 111260 + }, + { + "epoch": 0.962075554902249, + "grad_norm": 48.28598452414642, + "learning_rate": 3.178609052913474e-06, + "loss": 0.5263263702392578, + "step": 111265 + }, + { + "epoch": 0.9621187884237923, + "grad_norm": 2.9621012723480047, + "learning_rate": 3.1784056807257575e-06, + "loss": 0.34731330871582033, + "step": 111270 + }, + { + "epoch": 0.9621620219453355, + "grad_norm": 11.018249709642173, + "learning_rate": 3.1782023077152506e-06, + "loss": 0.08703327178955078, + "step": 111275 + }, + { + "epoch": 0.9622052554668789, + "grad_norm": 12.327930929035762, + "learning_rate": 3.1779989338828893e-06, + "loss": 0.130792236328125, + "step": 111280 + }, + { + "epoch": 0.9622484889884221, + "grad_norm": 19.4723631550267, + "learning_rate": 3.1777955592296137e-06, + "loss": 0.3306529998779297, + "step": 111285 + }, + { + "epoch": 0.9622917225099653, + "grad_norm": 0.1263856768871394, + "learning_rate": 3.177592183756361e-06, + "loss": 0.18765716552734374, + "step": 111290 + }, + { + "epoch": 0.9623349560315085, + "grad_norm": 11.384877705788062, + "learning_rate": 3.1773888074640678e-06, + "loss": 0.07459487915039062, + "step": 111295 + }, + { + "epoch": 0.9623781895530519, + "grad_norm": 5.051606130517039, + "learning_rate": 3.1771854303536732e-06, + "loss": 0.0472076416015625, + "step": 111300 + }, + { + "epoch": 0.9624214230745951, + "grad_norm": 0.6494430704843103, + "learning_rate": 3.1769820524261155e-06, + "loss": 0.02203216552734375, + "step": 111305 + }, + { + "epoch": 0.9624646565961383, + "grad_norm": 0.41608508165735325, + "learning_rate": 3.1767786736823327e-06, + "loss": 0.333599853515625, + "step": 111310 + }, + { + "epoch": 0.9625078901176817, + "grad_norm": 4.249437145215692, + "learning_rate": 3.1765752941232617e-06, + "loss": 0.06044769287109375, + "step": 111315 + }, + { + "epoch": 0.9625511236392249, + "grad_norm": 0.379765014057841, + "learning_rate": 3.176371913749841e-06, + "loss": 0.383428955078125, + "step": 111320 + }, + { + "epoch": 0.9625943571607681, + "grad_norm": 15.141389627944008, + "learning_rate": 3.1761685325630084e-06, + "loss": 0.10537071228027343, + "step": 111325 + }, + { + "epoch": 0.9626375906823115, + "grad_norm": 12.198090670400978, + "learning_rate": 3.175965150563702e-06, + "loss": 0.05383720397949219, + "step": 111330 + }, + { + "epoch": 0.9626808242038547, + "grad_norm": 7.184702659333605, + "learning_rate": 3.1757617677528607e-06, + "loss": 0.11869354248046875, + "step": 111335 + }, + { + "epoch": 0.9627240577253979, + "grad_norm": 10.669452598886181, + "learning_rate": 3.1755583841314213e-06, + "loss": 0.15491180419921874, + "step": 111340 + }, + { + "epoch": 0.9627672912469413, + "grad_norm": 9.018044218153024, + "learning_rate": 3.1753549997003214e-06, + "loss": 0.02490081787109375, + "step": 111345 + }, + { + "epoch": 0.9628105247684845, + "grad_norm": 1.6660545740661736, + "learning_rate": 3.175151614460501e-06, + "loss": 0.09627685546875, + "step": 111350 + }, + { + "epoch": 0.9628537582900277, + "grad_norm": 24.364742111570948, + "learning_rate": 3.1749482284128954e-06, + "loss": 0.13072662353515624, + "step": 111355 + }, + { + "epoch": 0.9628969918115711, + "grad_norm": 4.614681211475359, + "learning_rate": 3.1747448415584444e-06, + "loss": 0.0521026611328125, + "step": 111360 + }, + { + "epoch": 0.9629402253331143, + "grad_norm": 2.543492565029426, + "learning_rate": 3.174541453898087e-06, + "loss": 0.059150314331054686, + "step": 111365 + }, + { + "epoch": 0.9629834588546575, + "grad_norm": 31.73572017339651, + "learning_rate": 3.1743380654327586e-06, + "loss": 0.293963623046875, + "step": 111370 + }, + { + "epoch": 0.9630266923762009, + "grad_norm": 12.07482434959601, + "learning_rate": 3.174134676163399e-06, + "loss": 0.06397705078125, + "step": 111375 + }, + { + "epoch": 0.9630699258977441, + "grad_norm": 1.0554173292102345, + "learning_rate": 3.1739312860909455e-06, + "loss": 0.06105117797851563, + "step": 111380 + }, + { + "epoch": 0.9631131594192873, + "grad_norm": 0.9854917422856988, + "learning_rate": 3.173727895216336e-06, + "loss": 0.5562156677246094, + "step": 111385 + }, + { + "epoch": 0.9631563929408306, + "grad_norm": 31.16884576090797, + "learning_rate": 3.173524503540509e-06, + "loss": 0.249951171875, + "step": 111390 + }, + { + "epoch": 0.9631996264623739, + "grad_norm": 3.5319693647320145, + "learning_rate": 3.1733211110644022e-06, + "loss": 0.15518646240234374, + "step": 111395 + }, + { + "epoch": 0.9632428599839171, + "grad_norm": 1.8152464742860175, + "learning_rate": 3.1731177177889544e-06, + "loss": 0.17042083740234376, + "step": 111400 + }, + { + "epoch": 0.9632860935054604, + "grad_norm": 3.2649357932700176, + "learning_rate": 3.172914323715102e-06, + "loss": 0.4238275527954102, + "step": 111405 + }, + { + "epoch": 0.9633293270270037, + "grad_norm": 13.678252680661641, + "learning_rate": 3.1727109288437837e-06, + "loss": 0.26722412109375, + "step": 111410 + }, + { + "epoch": 0.9633725605485469, + "grad_norm": 0.9274546858448712, + "learning_rate": 3.1725075331759387e-06, + "loss": 0.054864501953125, + "step": 111415 + }, + { + "epoch": 0.9634157940700901, + "grad_norm": 5.786689173490908, + "learning_rate": 3.172304136712505e-06, + "loss": 0.08592910766601562, + "step": 111420 + }, + { + "epoch": 0.9634590275916335, + "grad_norm": 0.493783833234649, + "learning_rate": 3.1721007394544183e-06, + "loss": 0.02523345947265625, + "step": 111425 + }, + { + "epoch": 0.9635022611131767, + "grad_norm": 1.0396090758513166, + "learning_rate": 3.171897341402619e-06, + "loss": 0.048541259765625, + "step": 111430 + }, + { + "epoch": 0.96354549463472, + "grad_norm": 15.799209298036358, + "learning_rate": 3.171693942558044e-06, + "loss": 0.14490814208984376, + "step": 111435 + }, + { + "epoch": 0.9635887281562633, + "grad_norm": 3.846193580270547, + "learning_rate": 3.1714905429216314e-06, + "loss": 0.04669189453125, + "step": 111440 + }, + { + "epoch": 0.9636319616778065, + "grad_norm": 0.028528744206607313, + "learning_rate": 3.1712871424943206e-06, + "loss": 0.04774818420410156, + "step": 111445 + }, + { + "epoch": 0.9636751951993497, + "grad_norm": 29.55908379997806, + "learning_rate": 3.1710837412770486e-06, + "loss": 0.5194823265075683, + "step": 111450 + }, + { + "epoch": 0.9637184287208931, + "grad_norm": 3.03038789685753, + "learning_rate": 3.1708803392707527e-06, + "loss": 0.20155372619628906, + "step": 111455 + }, + { + "epoch": 0.9637616622424363, + "grad_norm": 60.64784073143553, + "learning_rate": 3.1706769364763724e-06, + "loss": 0.1479400634765625, + "step": 111460 + }, + { + "epoch": 0.9638048957639795, + "grad_norm": 0.0865928521377501, + "learning_rate": 3.1704735328948442e-06, + "loss": 0.10044746398925782, + "step": 111465 + }, + { + "epoch": 0.9638481292855228, + "grad_norm": 7.2349581969367875, + "learning_rate": 3.170270128527108e-06, + "loss": 0.19879608154296874, + "step": 111470 + }, + { + "epoch": 0.9638913628070661, + "grad_norm": 19.275345368682387, + "learning_rate": 3.1700667233741006e-06, + "loss": 0.0644012451171875, + "step": 111475 + }, + { + "epoch": 0.9639345963286093, + "grad_norm": 21.331717166301786, + "learning_rate": 3.1698633174367613e-06, + "loss": 0.078021240234375, + "step": 111480 + }, + { + "epoch": 0.9639778298501526, + "grad_norm": 9.67011877151677, + "learning_rate": 3.169659910716027e-06, + "loss": 0.063360595703125, + "step": 111485 + }, + { + "epoch": 0.9640210633716959, + "grad_norm": 27.70432872006361, + "learning_rate": 3.1694565032128354e-06, + "loss": 0.3993247985839844, + "step": 111490 + }, + { + "epoch": 0.9640642968932391, + "grad_norm": 47.64896043882806, + "learning_rate": 3.1692530949281255e-06, + "loss": 0.21357269287109376, + "step": 111495 + }, + { + "epoch": 0.9641075304147824, + "grad_norm": 24.836191615294613, + "learning_rate": 3.1690496858628362e-06, + "loss": 0.07510986328125, + "step": 111500 + }, + { + "epoch": 0.9641507639363257, + "grad_norm": 0.38855111997323794, + "learning_rate": 3.168846276017904e-06, + "loss": 0.10218582153320313, + "step": 111505 + }, + { + "epoch": 0.9641939974578689, + "grad_norm": 20.397617937381888, + "learning_rate": 3.1686428653942676e-06, + "loss": 0.12463035583496093, + "step": 111510 + }, + { + "epoch": 0.9642372309794122, + "grad_norm": 14.768819012817104, + "learning_rate": 3.168439453992866e-06, + "loss": 0.07274856567382812, + "step": 111515 + }, + { + "epoch": 0.9642804645009555, + "grad_norm": 5.284423578920956, + "learning_rate": 3.168236041814636e-06, + "loss": 0.09962310791015624, + "step": 111520 + }, + { + "epoch": 0.9643236980224987, + "grad_norm": 0.044729938131704484, + "learning_rate": 3.1680326288605158e-06, + "loss": 0.036957931518554685, + "step": 111525 + }, + { + "epoch": 0.964366931544042, + "grad_norm": 21.771480195915583, + "learning_rate": 3.167829215131445e-06, + "loss": 0.290472412109375, + "step": 111530 + }, + { + "epoch": 0.9644101650655853, + "grad_norm": 8.001638563621812, + "learning_rate": 3.1676258006283595e-06, + "loss": 0.045380783081054685, + "step": 111535 + }, + { + "epoch": 0.9644533985871285, + "grad_norm": 12.578705325790573, + "learning_rate": 3.1674223853521994e-06, + "loss": 0.20870437622070312, + "step": 111540 + }, + { + "epoch": 0.9644966321086718, + "grad_norm": 3.868108818105837, + "learning_rate": 3.1672189693039027e-06, + "loss": 0.02135772705078125, + "step": 111545 + }, + { + "epoch": 0.9645398656302151, + "grad_norm": 1.630618406625462, + "learning_rate": 3.1670155524844054e-06, + "loss": 0.37011566162109377, + "step": 111550 + }, + { + "epoch": 0.9645830991517583, + "grad_norm": 28.567434548703265, + "learning_rate": 3.1668121348946473e-06, + "loss": 0.399530029296875, + "step": 111555 + }, + { + "epoch": 0.9646263326733016, + "grad_norm": 0.7632349334868209, + "learning_rate": 3.1666087165355672e-06, + "loss": 0.05325164794921875, + "step": 111560 + }, + { + "epoch": 0.9646695661948448, + "grad_norm": 0.5661808924218537, + "learning_rate": 3.1664052974081025e-06, + "loss": 0.12611236572265624, + "step": 111565 + }, + { + "epoch": 0.9647127997163881, + "grad_norm": 9.733945935101103, + "learning_rate": 3.166201877513191e-06, + "loss": 0.06286087036132812, + "step": 111570 + }, + { + "epoch": 0.9647560332379314, + "grad_norm": 37.46253069564221, + "learning_rate": 3.1659984568517706e-06, + "loss": 0.37688369750976564, + "step": 111575 + }, + { + "epoch": 0.9647992667594746, + "grad_norm": 0.6409001033261402, + "learning_rate": 3.1657950354247804e-06, + "loss": 0.10157623291015624, + "step": 111580 + }, + { + "epoch": 0.9648425002810179, + "grad_norm": 5.8836765255030725, + "learning_rate": 3.1655916132331575e-06, + "loss": 0.135308837890625, + "step": 111585 + }, + { + "epoch": 0.9648857338025612, + "grad_norm": 0.08640360799576328, + "learning_rate": 3.1653881902778416e-06, + "loss": 0.0607269287109375, + "step": 111590 + }, + { + "epoch": 0.9649289673241044, + "grad_norm": 1.3159513727165817, + "learning_rate": 3.16518476655977e-06, + "loss": 0.455010986328125, + "step": 111595 + }, + { + "epoch": 0.9649722008456477, + "grad_norm": 6.161699352044521, + "learning_rate": 3.1649813420798803e-06, + "loss": 0.04412384033203125, + "step": 111600 + }, + { + "epoch": 0.965015434367191, + "grad_norm": 5.009293349064166, + "learning_rate": 3.164777916839112e-06, + "loss": 0.051715850830078125, + "step": 111605 + }, + { + "epoch": 0.9650586678887342, + "grad_norm": 1.163072494935025, + "learning_rate": 3.1645744908384014e-06, + "loss": 0.1844329833984375, + "step": 111610 + }, + { + "epoch": 0.9651019014102775, + "grad_norm": 0.5246402781236668, + "learning_rate": 3.1643710640786883e-06, + "loss": 0.015523529052734375, + "step": 111615 + }, + { + "epoch": 0.9651451349318207, + "grad_norm": 0.7563905599098358, + "learning_rate": 3.1641676365609108e-06, + "loss": 0.0524658203125, + "step": 111620 + }, + { + "epoch": 0.965188368453364, + "grad_norm": 17.509873426188815, + "learning_rate": 3.163964208286007e-06, + "loss": 0.2188426971435547, + "step": 111625 + }, + { + "epoch": 0.9652316019749073, + "grad_norm": 0.6122894937317457, + "learning_rate": 3.1637607792549135e-06, + "loss": 0.016362762451171874, + "step": 111630 + }, + { + "epoch": 0.9652748354964505, + "grad_norm": 3.05649798266547, + "learning_rate": 3.16355734946857e-06, + "loss": 0.04633827209472656, + "step": 111635 + }, + { + "epoch": 0.9653180690179938, + "grad_norm": 5.406906789133297, + "learning_rate": 3.1633539189279153e-06, + "loss": 0.14651947021484374, + "step": 111640 + }, + { + "epoch": 0.965361302539537, + "grad_norm": 0.7228387867516834, + "learning_rate": 3.1631504876338865e-06, + "loss": 0.059374237060546876, + "step": 111645 + }, + { + "epoch": 0.9654045360610803, + "grad_norm": 0.17513800096880588, + "learning_rate": 3.1629470555874215e-06, + "loss": 0.054058074951171875, + "step": 111650 + }, + { + "epoch": 0.9654477695826236, + "grad_norm": 0.8398743742913107, + "learning_rate": 3.1627436227894596e-06, + "loss": 0.42395477294921874, + "step": 111655 + }, + { + "epoch": 0.9654910031041668, + "grad_norm": 35.74326494675246, + "learning_rate": 3.162540189240938e-06, + "loss": 0.27625732421875, + "step": 111660 + }, + { + "epoch": 0.9655342366257101, + "grad_norm": 15.271654017260913, + "learning_rate": 3.1623367549427954e-06, + "loss": 0.09302520751953125, + "step": 111665 + }, + { + "epoch": 0.9655774701472534, + "grad_norm": 1.161903056728575, + "learning_rate": 3.1621333198959707e-06, + "loss": 0.05844039916992187, + "step": 111670 + }, + { + "epoch": 0.9656207036687966, + "grad_norm": 1.4687242558814013, + "learning_rate": 3.161929884101401e-06, + "loss": 0.2387847900390625, + "step": 111675 + }, + { + "epoch": 0.9656639371903399, + "grad_norm": 6.413451035046645, + "learning_rate": 3.1617264475600248e-06, + "loss": 0.062413597106933595, + "step": 111680 + }, + { + "epoch": 0.9657071707118832, + "grad_norm": 14.924321888185403, + "learning_rate": 3.1615230102727814e-06, + "loss": 0.304376220703125, + "step": 111685 + }, + { + "epoch": 0.9657504042334264, + "grad_norm": 29.807631782485675, + "learning_rate": 3.1613195722406073e-06, + "loss": 0.2135498046875, + "step": 111690 + }, + { + "epoch": 0.9657936377549697, + "grad_norm": 3.71436000365719, + "learning_rate": 3.1611161334644414e-06, + "loss": 0.05269317626953125, + "step": 111695 + }, + { + "epoch": 0.965836871276513, + "grad_norm": 2.762881446333561, + "learning_rate": 3.1609126939452226e-06, + "loss": 0.17137794494628905, + "step": 111700 + }, + { + "epoch": 0.9658801047980562, + "grad_norm": 3.9015693626532633, + "learning_rate": 3.1607092536838894e-06, + "loss": 0.05487270355224609, + "step": 111705 + }, + { + "epoch": 0.9659233383195995, + "grad_norm": 0.09198958442536617, + "learning_rate": 3.160505812681378e-06, + "loss": 0.117529296875, + "step": 111710 + }, + { + "epoch": 0.9659665718411428, + "grad_norm": 8.368820612422155, + "learning_rate": 3.1603023709386294e-06, + "loss": 0.25947036743164065, + "step": 111715 + }, + { + "epoch": 0.966009805362686, + "grad_norm": 5.875678509223494, + "learning_rate": 3.1600989284565782e-06, + "loss": 0.1608978271484375, + "step": 111720 + }, + { + "epoch": 0.9660530388842293, + "grad_norm": 3.466196052643621, + "learning_rate": 3.1598954852361664e-06, + "loss": 0.12402381896972656, + "step": 111725 + }, + { + "epoch": 0.9660962724057726, + "grad_norm": 0.22710054748477612, + "learning_rate": 3.1596920412783306e-06, + "loss": 0.29480743408203125, + "step": 111730 + }, + { + "epoch": 0.9661395059273158, + "grad_norm": 9.15433067950387, + "learning_rate": 3.15948859658401e-06, + "loss": 0.098052978515625, + "step": 111735 + }, + { + "epoch": 0.966182739448859, + "grad_norm": 1.0701932827292122, + "learning_rate": 3.1592851511541418e-06, + "loss": 0.01270294189453125, + "step": 111740 + }, + { + "epoch": 0.9662259729704024, + "grad_norm": 3.197996619138887, + "learning_rate": 3.1590817049896632e-06, + "loss": 0.0809539794921875, + "step": 111745 + }, + { + "epoch": 0.9662692064919456, + "grad_norm": 10.510328287819021, + "learning_rate": 3.1588782580915143e-06, + "loss": 0.0982086181640625, + "step": 111750 + }, + { + "epoch": 0.9663124400134888, + "grad_norm": 2.3247936832920475, + "learning_rate": 3.158674810460634e-06, + "loss": 0.4840038299560547, + "step": 111755 + }, + { + "epoch": 0.9663556735350322, + "grad_norm": 1.2570042742034138, + "learning_rate": 3.158471362097958e-06, + "loss": 0.03549041748046875, + "step": 111760 + }, + { + "epoch": 0.9663989070565754, + "grad_norm": 8.614694307519509, + "learning_rate": 3.1582679130044273e-06, + "loss": 0.1930248260498047, + "step": 111765 + }, + { + "epoch": 0.9664421405781186, + "grad_norm": 1.3179789333004905, + "learning_rate": 3.158064463180979e-06, + "loss": 0.27786941528320314, + "step": 111770 + }, + { + "epoch": 0.966485374099662, + "grad_norm": 7.614792168732223, + "learning_rate": 3.157861012628551e-06, + "loss": 0.04200439453125, + "step": 111775 + }, + { + "epoch": 0.9665286076212052, + "grad_norm": 38.21777876265045, + "learning_rate": 3.1576575613480814e-06, + "loss": 0.22525520324707032, + "step": 111780 + }, + { + "epoch": 0.9665718411427484, + "grad_norm": 6.480336945097095, + "learning_rate": 3.1574541093405095e-06, + "loss": 0.14050979614257814, + "step": 111785 + }, + { + "epoch": 0.9666150746642918, + "grad_norm": 1.1549676017752166, + "learning_rate": 3.1572506566067733e-06, + "loss": 0.08225936889648437, + "step": 111790 + }, + { + "epoch": 0.966658308185835, + "grad_norm": 0.786108670222589, + "learning_rate": 3.1570472031478113e-06, + "loss": 0.05968780517578125, + "step": 111795 + }, + { + "epoch": 0.9667015417073782, + "grad_norm": 4.694810329492065, + "learning_rate": 3.156843748964561e-06, + "loss": 0.05988807678222656, + "step": 111800 + }, + { + "epoch": 0.9667447752289215, + "grad_norm": 101.42754995960755, + "learning_rate": 3.156640294057961e-06, + "loss": 0.18579864501953125, + "step": 111805 + }, + { + "epoch": 0.9667880087504648, + "grad_norm": 0.6343330096997891, + "learning_rate": 3.1564368384289493e-06, + "loss": 0.1677093505859375, + "step": 111810 + }, + { + "epoch": 0.966831242272008, + "grad_norm": 28.172433636602744, + "learning_rate": 3.1562333820784663e-06, + "loss": 0.03244094848632813, + "step": 111815 + }, + { + "epoch": 0.9668744757935512, + "grad_norm": 2.6951427863371853, + "learning_rate": 3.1560299250074483e-06, + "loss": 0.03879051208496094, + "step": 111820 + }, + { + "epoch": 0.9669177093150946, + "grad_norm": 35.678522762489706, + "learning_rate": 3.155826467216833e-06, + "loss": 0.25325698852539064, + "step": 111825 + }, + { + "epoch": 0.9669609428366378, + "grad_norm": 22.67601290370916, + "learning_rate": 3.155623008707561e-06, + "loss": 0.140765380859375, + "step": 111830 + }, + { + "epoch": 0.967004176358181, + "grad_norm": 0.8962903540470353, + "learning_rate": 3.1554195494805688e-06, + "loss": 0.04200582504272461, + "step": 111835 + }, + { + "epoch": 0.9670474098797244, + "grad_norm": 0.8337747360970453, + "learning_rate": 3.1552160895367945e-06, + "loss": 0.0524688720703125, + "step": 111840 + }, + { + "epoch": 0.9670906434012676, + "grad_norm": 2.0147426728456383, + "learning_rate": 3.155012628877179e-06, + "loss": 0.5277637481689453, + "step": 111845 + }, + { + "epoch": 0.9671338769228108, + "grad_norm": 1.4767957889299845, + "learning_rate": 3.1548091675026587e-06, + "loss": 0.011673736572265624, + "step": 111850 + }, + { + "epoch": 0.9671771104443542, + "grad_norm": 10.284398719796128, + "learning_rate": 3.154605705414171e-06, + "loss": 0.256500244140625, + "step": 111855 + }, + { + "epoch": 0.9672203439658974, + "grad_norm": 3.902410795094992, + "learning_rate": 3.154402242612656e-06, + "loss": 0.15133819580078126, + "step": 111860 + }, + { + "epoch": 0.9672635774874406, + "grad_norm": 5.582078756307376, + "learning_rate": 3.154198779099052e-06, + "loss": 0.10440292358398437, + "step": 111865 + }, + { + "epoch": 0.967306811008984, + "grad_norm": 3.9740516406901008, + "learning_rate": 3.1539953148742963e-06, + "loss": 0.08783340454101562, + "step": 111870 + }, + { + "epoch": 0.9673500445305272, + "grad_norm": 15.717520208659689, + "learning_rate": 3.153791849939328e-06, + "loss": 0.33439178466796876, + "step": 111875 + }, + { + "epoch": 0.9673932780520704, + "grad_norm": 2.873382968962089, + "learning_rate": 3.153588384295086e-06, + "loss": 0.1401458740234375, + "step": 111880 + }, + { + "epoch": 0.9674365115736138, + "grad_norm": 114.52503476929193, + "learning_rate": 3.153384917942507e-06, + "loss": 0.24568862915039064, + "step": 111885 + }, + { + "epoch": 0.967479745095157, + "grad_norm": 0.6165893850512613, + "learning_rate": 3.15318145088253e-06, + "loss": 0.200372314453125, + "step": 111890 + }, + { + "epoch": 0.9675229786167002, + "grad_norm": 0.8960894777382598, + "learning_rate": 3.1529779831160943e-06, + "loss": 0.20200042724609374, + "step": 111895 + }, + { + "epoch": 0.9675662121382435, + "grad_norm": 0.31347291268896876, + "learning_rate": 3.1527745146441383e-06, + "loss": 0.10008392333984376, + "step": 111900 + }, + { + "epoch": 0.9676094456597868, + "grad_norm": 12.532735732444886, + "learning_rate": 3.152571045467599e-06, + "loss": 0.1606719970703125, + "step": 111905 + }, + { + "epoch": 0.96765267918133, + "grad_norm": 1.008416299381478, + "learning_rate": 3.1523675755874155e-06, + "loss": 0.3069114685058594, + "step": 111910 + }, + { + "epoch": 0.9676959127028733, + "grad_norm": 1.7594992135501966, + "learning_rate": 3.152164105004526e-06, + "loss": 0.04278106689453125, + "step": 111915 + }, + { + "epoch": 0.9677391462244166, + "grad_norm": 2.4194700470379775, + "learning_rate": 3.151960633719869e-06, + "loss": 0.07512741088867188, + "step": 111920 + }, + { + "epoch": 0.9677823797459598, + "grad_norm": 20.78353322905245, + "learning_rate": 3.151757161734384e-06, + "loss": 0.11029510498046875, + "step": 111925 + }, + { + "epoch": 0.967825613267503, + "grad_norm": 1.4851135653411534, + "learning_rate": 3.151553689049008e-06, + "loss": 0.13405380249023438, + "step": 111930 + }, + { + "epoch": 0.9678688467890464, + "grad_norm": 9.40986924296542, + "learning_rate": 3.151350215664679e-06, + "loss": 0.145941162109375, + "step": 111935 + }, + { + "epoch": 0.9679120803105896, + "grad_norm": 0.07040212937710422, + "learning_rate": 3.1511467415823375e-06, + "loss": 0.12109107971191406, + "step": 111940 + }, + { + "epoch": 0.9679553138321328, + "grad_norm": 8.233644228161554, + "learning_rate": 3.1509432668029193e-06, + "loss": 0.21241912841796876, + "step": 111945 + }, + { + "epoch": 0.9679985473536762, + "grad_norm": 8.036309806579299, + "learning_rate": 3.150739791327364e-06, + "loss": 0.24519195556640624, + "step": 111950 + }, + { + "epoch": 0.9680417808752194, + "grad_norm": 14.067143500085859, + "learning_rate": 3.150536315156611e-06, + "loss": 0.16845321655273438, + "step": 111955 + }, + { + "epoch": 0.9680850143967626, + "grad_norm": 0.7165533792329418, + "learning_rate": 3.1503328382915983e-06, + "loss": 0.013926315307617187, + "step": 111960 + }, + { + "epoch": 0.968128247918306, + "grad_norm": 11.205171007748389, + "learning_rate": 3.1501293607332624e-06, + "loss": 0.06540985107421875, + "step": 111965 + }, + { + "epoch": 0.9681714814398492, + "grad_norm": 2.4971329837111607, + "learning_rate": 3.1499258824825443e-06, + "loss": 0.16744461059570312, + "step": 111970 + }, + { + "epoch": 0.9682147149613924, + "grad_norm": 1.1618415909638184, + "learning_rate": 3.1497224035403805e-06, + "loss": 0.03551673889160156, + "step": 111975 + }, + { + "epoch": 0.9682579484829358, + "grad_norm": 26.456816682356152, + "learning_rate": 3.14951892390771e-06, + "loss": 0.219525146484375, + "step": 111980 + }, + { + "epoch": 0.968301182004479, + "grad_norm": 1.269954013170479, + "learning_rate": 3.149315443585472e-06, + "loss": 0.05353851318359375, + "step": 111985 + }, + { + "epoch": 0.9683444155260222, + "grad_norm": 0.34066284139867437, + "learning_rate": 3.1491119625746047e-06, + "loss": 0.13322601318359376, + "step": 111990 + }, + { + "epoch": 0.9683876490475655, + "grad_norm": 4.192547570405804, + "learning_rate": 3.1489084808760463e-06, + "loss": 0.0954681396484375, + "step": 111995 + }, + { + "epoch": 0.9684308825691088, + "grad_norm": 0.9722101039556812, + "learning_rate": 3.148704998490734e-06, + "loss": 0.22932510375976561, + "step": 112000 + }, + { + "epoch": 0.968474116090652, + "grad_norm": 11.054937212990527, + "learning_rate": 3.1485015154196075e-06, + "loss": 0.19906463623046874, + "step": 112005 + }, + { + "epoch": 0.9685173496121953, + "grad_norm": 8.918246366490239, + "learning_rate": 3.1482980316636064e-06, + "loss": 0.1952392578125, + "step": 112010 + }, + { + "epoch": 0.9685605831337386, + "grad_norm": 4.657074576914169, + "learning_rate": 3.1480945472236665e-06, + "loss": 0.14338455200195313, + "step": 112015 + }, + { + "epoch": 0.9686038166552818, + "grad_norm": 1.7702912805173818, + "learning_rate": 3.1478910621007283e-06, + "loss": 0.09913063049316406, + "step": 112020 + }, + { + "epoch": 0.9686470501768251, + "grad_norm": 12.263103287106578, + "learning_rate": 3.1476875762957303e-06, + "loss": 0.35574188232421877, + "step": 112025 + }, + { + "epoch": 0.9686902836983684, + "grad_norm": 2.722762508213224, + "learning_rate": 3.1474840898096095e-06, + "loss": 0.33348808288574217, + "step": 112030 + }, + { + "epoch": 0.9687335172199116, + "grad_norm": 39.33603099574927, + "learning_rate": 3.1472806026433042e-06, + "loss": 0.2561618804931641, + "step": 112035 + }, + { + "epoch": 0.9687767507414549, + "grad_norm": 9.789941024505945, + "learning_rate": 3.1470771147977552e-06, + "loss": 0.043351173400878906, + "step": 112040 + }, + { + "epoch": 0.9688199842629982, + "grad_norm": 9.442015470116965, + "learning_rate": 3.146873626273899e-06, + "loss": 0.09213714599609375, + "step": 112045 + }, + { + "epoch": 0.9688632177845414, + "grad_norm": 1.422449207073487, + "learning_rate": 3.146670137072675e-06, + "loss": 0.0313079833984375, + "step": 112050 + }, + { + "epoch": 0.9689064513060847, + "grad_norm": 10.024185212658475, + "learning_rate": 3.14646664719502e-06, + "loss": 0.0592010498046875, + "step": 112055 + }, + { + "epoch": 0.968949684827628, + "grad_norm": 0.8555661368975166, + "learning_rate": 3.146263156641875e-06, + "loss": 0.022349739074707033, + "step": 112060 + }, + { + "epoch": 0.9689929183491712, + "grad_norm": 19.236906351513422, + "learning_rate": 3.1460596654141766e-06, + "loss": 0.07822265625, + "step": 112065 + }, + { + "epoch": 0.9690361518707145, + "grad_norm": 40.07691380101301, + "learning_rate": 3.1458561735128644e-06, + "loss": 0.24430465698242188, + "step": 112070 + }, + { + "epoch": 0.9690793853922577, + "grad_norm": 45.058198085174695, + "learning_rate": 3.1456526809388772e-06, + "loss": 0.1169525146484375, + "step": 112075 + }, + { + "epoch": 0.969122618913801, + "grad_norm": 0.23117587642914517, + "learning_rate": 3.1454491876931514e-06, + "loss": 0.06745948791503906, + "step": 112080 + }, + { + "epoch": 0.9691658524353443, + "grad_norm": 1.1023450086790592, + "learning_rate": 3.1452456937766263e-06, + "loss": 0.14293365478515624, + "step": 112085 + }, + { + "epoch": 0.9692090859568875, + "grad_norm": 21.046405882459656, + "learning_rate": 3.145042199190242e-06, + "loss": 0.17830810546875, + "step": 112090 + }, + { + "epoch": 0.9692523194784308, + "grad_norm": 2.6877378916885983, + "learning_rate": 3.1448387039349357e-06, + "loss": 0.1495513916015625, + "step": 112095 + }, + { + "epoch": 0.969295552999974, + "grad_norm": 0.1485376971909444, + "learning_rate": 3.1446352080116464e-06, + "loss": 0.23016586303710937, + "step": 112100 + }, + { + "epoch": 0.9693387865215173, + "grad_norm": 0.14365361498979148, + "learning_rate": 3.1444317114213123e-06, + "loss": 0.057233428955078124, + "step": 112105 + }, + { + "epoch": 0.9693820200430606, + "grad_norm": 11.334846932635266, + "learning_rate": 3.144228214164871e-06, + "loss": 0.34395751953125, + "step": 112110 + }, + { + "epoch": 0.9694252535646039, + "grad_norm": 7.917835980876091, + "learning_rate": 3.144024716243262e-06, + "loss": 0.1351837158203125, + "step": 112115 + }, + { + "epoch": 0.9694684870861471, + "grad_norm": 1.9086693275822577, + "learning_rate": 3.143821217657425e-06, + "loss": 0.024000930786132812, + "step": 112120 + }, + { + "epoch": 0.9695117206076904, + "grad_norm": 1.364474486803534, + "learning_rate": 3.143617718408296e-06, + "loss": 0.08633060455322265, + "step": 112125 + }, + { + "epoch": 0.9695549541292336, + "grad_norm": 5.361341557245814, + "learning_rate": 3.1434142184968157e-06, + "loss": 0.18327293395996094, + "step": 112130 + }, + { + "epoch": 0.9695981876507769, + "grad_norm": 7.609562423269734, + "learning_rate": 3.143210717923922e-06, + "loss": 0.13351192474365234, + "step": 112135 + }, + { + "epoch": 0.9696414211723202, + "grad_norm": 1.1663431894155192, + "learning_rate": 3.1430072166905514e-06, + "loss": 0.1311553955078125, + "step": 112140 + }, + { + "epoch": 0.9696846546938634, + "grad_norm": 19.927920259996966, + "learning_rate": 3.1428037147976453e-06, + "loss": 0.07166824340820313, + "step": 112145 + }, + { + "epoch": 0.9697278882154067, + "grad_norm": 0.14412650615522812, + "learning_rate": 3.142600212246141e-06, + "loss": 0.007953643798828125, + "step": 112150 + }, + { + "epoch": 0.96977112173695, + "grad_norm": 5.58666718934744, + "learning_rate": 3.142396709036977e-06, + "loss": 0.18309326171875, + "step": 112155 + }, + { + "epoch": 0.9698143552584932, + "grad_norm": 5.434454445713974, + "learning_rate": 3.142193205171092e-06, + "loss": 0.09711570739746093, + "step": 112160 + }, + { + "epoch": 0.9698575887800365, + "grad_norm": 9.365522171043045, + "learning_rate": 3.141989700649425e-06, + "loss": 0.13214492797851562, + "step": 112165 + }, + { + "epoch": 0.9699008223015797, + "grad_norm": 16.404232605463406, + "learning_rate": 3.141786195472913e-06, + "loss": 0.211138916015625, + "step": 112170 + }, + { + "epoch": 0.969944055823123, + "grad_norm": 17.078469351326167, + "learning_rate": 3.1415826896424956e-06, + "loss": 0.1660064697265625, + "step": 112175 + }, + { + "epoch": 0.9699872893446663, + "grad_norm": 3.1371354784956766, + "learning_rate": 3.1413791831591127e-06, + "loss": 0.29605884552001954, + "step": 112180 + }, + { + "epoch": 0.9700305228662095, + "grad_norm": 10.058349686345233, + "learning_rate": 3.1411756760237008e-06, + "loss": 0.43728790283203123, + "step": 112185 + }, + { + "epoch": 0.9700737563877528, + "grad_norm": 1.6923955433590947, + "learning_rate": 3.140972168237199e-06, + "loss": 0.0413482666015625, + "step": 112190 + }, + { + "epoch": 0.9701169899092961, + "grad_norm": 6.30586082390097, + "learning_rate": 3.1407686598005465e-06, + "loss": 0.095794677734375, + "step": 112195 + }, + { + "epoch": 0.9701602234308393, + "grad_norm": 0.4755059212388937, + "learning_rate": 3.1405651507146803e-06, + "loss": 0.319110107421875, + "step": 112200 + }, + { + "epoch": 0.9702034569523826, + "grad_norm": 0.17770148011807452, + "learning_rate": 3.1403616409805404e-06, + "loss": 0.34000511169433595, + "step": 112205 + }, + { + "epoch": 0.9702466904739259, + "grad_norm": 0.8021051375186511, + "learning_rate": 3.1401581305990657e-06, + "loss": 0.10572280883789062, + "step": 112210 + }, + { + "epoch": 0.9702899239954691, + "grad_norm": 4.79578104870636, + "learning_rate": 3.139954619571194e-06, + "loss": 0.11162109375, + "step": 112215 + }, + { + "epoch": 0.9703331575170124, + "grad_norm": 21.211778711587037, + "learning_rate": 3.139751107897863e-06, + "loss": 0.05276813507080078, + "step": 112220 + }, + { + "epoch": 0.9703763910385557, + "grad_norm": 8.490290454926354, + "learning_rate": 3.139547595580013e-06, + "loss": 0.1382232666015625, + "step": 112225 + }, + { + "epoch": 0.9704196245600989, + "grad_norm": 8.660778051325881, + "learning_rate": 3.1393440826185816e-06, + "loss": 0.115118408203125, + "step": 112230 + }, + { + "epoch": 0.9704628580816422, + "grad_norm": 14.554669313561483, + "learning_rate": 3.139140569014508e-06, + "loss": 0.35175514221191406, + "step": 112235 + }, + { + "epoch": 0.9705060916031855, + "grad_norm": 38.97510287851103, + "learning_rate": 3.1389370547687303e-06, + "loss": 0.12252655029296874, + "step": 112240 + }, + { + "epoch": 0.9705493251247287, + "grad_norm": 9.6693055339356, + "learning_rate": 3.138733539882187e-06, + "loss": 0.06798782348632812, + "step": 112245 + }, + { + "epoch": 0.9705925586462719, + "grad_norm": 1.033794332503491, + "learning_rate": 3.138530024355817e-06, + "loss": 0.17201080322265624, + "step": 112250 + }, + { + "epoch": 0.9706357921678153, + "grad_norm": 3.1810116169875493, + "learning_rate": 3.1383265081905575e-06, + "loss": 0.05144195556640625, + "step": 112255 + }, + { + "epoch": 0.9706790256893585, + "grad_norm": 1.7999014700850495, + "learning_rate": 3.13812299138735e-06, + "loss": 0.09677734375, + "step": 112260 + }, + { + "epoch": 0.9707222592109017, + "grad_norm": 1.7664180176355955, + "learning_rate": 3.137919473947131e-06, + "loss": 0.11680440902709961, + "step": 112265 + }, + { + "epoch": 0.9707654927324451, + "grad_norm": 6.300396036822842, + "learning_rate": 3.1377159558708397e-06, + "loss": 0.10479469299316406, + "step": 112270 + }, + { + "epoch": 0.9708087262539883, + "grad_norm": 1.4352349280744574, + "learning_rate": 3.1375124371594147e-06, + "loss": 0.07217254638671874, + "step": 112275 + }, + { + "epoch": 0.9708519597755315, + "grad_norm": 19.56731760971623, + "learning_rate": 3.1373089178137937e-06, + "loss": 0.11290740966796875, + "step": 112280 + }, + { + "epoch": 0.9708951932970749, + "grad_norm": 0.4785584603469558, + "learning_rate": 3.137105397834916e-06, + "loss": 0.1267364501953125, + "step": 112285 + }, + { + "epoch": 0.9709384268186181, + "grad_norm": 17.476811482966852, + "learning_rate": 3.1369018772237216e-06, + "loss": 0.12689056396484374, + "step": 112290 + }, + { + "epoch": 0.9709816603401613, + "grad_norm": 29.09879934349467, + "learning_rate": 3.136698355981147e-06, + "loss": 0.20855941772460937, + "step": 112295 + }, + { + "epoch": 0.9710248938617047, + "grad_norm": 3.7726764518953106, + "learning_rate": 3.1364948341081323e-06, + "loss": 0.02257709503173828, + "step": 112300 + }, + { + "epoch": 0.9710681273832479, + "grad_norm": 10.500734817044664, + "learning_rate": 3.1362913116056153e-06, + "loss": 0.04398841857910156, + "step": 112305 + }, + { + "epoch": 0.9711113609047911, + "grad_norm": 9.979371167001656, + "learning_rate": 3.1360877884745338e-06, + "loss": 0.4746044158935547, + "step": 112310 + }, + { + "epoch": 0.9711545944263344, + "grad_norm": 2.4263526874420087, + "learning_rate": 3.135884264715828e-06, + "loss": 0.16455078125, + "step": 112315 + }, + { + "epoch": 0.9711978279478777, + "grad_norm": 6.138135695113758, + "learning_rate": 3.1356807403304368e-06, + "loss": 0.0545623779296875, + "step": 112320 + }, + { + "epoch": 0.9712410614694209, + "grad_norm": 0.4573485988696741, + "learning_rate": 3.135477215319297e-06, + "loss": 0.030426788330078124, + "step": 112325 + }, + { + "epoch": 0.9712842949909642, + "grad_norm": 10.029917235969409, + "learning_rate": 3.1352736896833485e-06, + "loss": 0.307861328125, + "step": 112330 + }, + { + "epoch": 0.9713275285125075, + "grad_norm": 1.583411430190494, + "learning_rate": 3.13507016342353e-06, + "loss": 0.08273773193359375, + "step": 112335 + }, + { + "epoch": 0.9713707620340507, + "grad_norm": 3.5029106105644012, + "learning_rate": 3.1348666365407792e-06, + "loss": 0.07546234130859375, + "step": 112340 + }, + { + "epoch": 0.9714139955555939, + "grad_norm": 4.607959913373419, + "learning_rate": 3.1346631090360363e-06, + "loss": 0.15836620330810547, + "step": 112345 + }, + { + "epoch": 0.9714572290771373, + "grad_norm": 1.2383417070517395, + "learning_rate": 3.1344595809102383e-06, + "loss": 0.0877777099609375, + "step": 112350 + }, + { + "epoch": 0.9715004625986805, + "grad_norm": 1.1446297632538642, + "learning_rate": 3.1342560521643253e-06, + "loss": 0.0344085693359375, + "step": 112355 + }, + { + "epoch": 0.9715436961202237, + "grad_norm": 5.731111292275337, + "learning_rate": 3.134052522799235e-06, + "loss": 0.112066650390625, + "step": 112360 + }, + { + "epoch": 0.9715869296417671, + "grad_norm": 6.667840541709808, + "learning_rate": 3.1338489928159064e-06, + "loss": 0.12794570922851561, + "step": 112365 + }, + { + "epoch": 0.9716301631633103, + "grad_norm": 15.425161700231707, + "learning_rate": 3.133645462215277e-06, + "loss": 0.22672252655029296, + "step": 112370 + }, + { + "epoch": 0.9716733966848535, + "grad_norm": 1.8016291630637076, + "learning_rate": 3.1334419309982874e-06, + "loss": 0.10675773620605469, + "step": 112375 + }, + { + "epoch": 0.9717166302063969, + "grad_norm": 0.9165758838368898, + "learning_rate": 3.133238399165875e-06, + "loss": 0.12728271484375, + "step": 112380 + }, + { + "epoch": 0.9717598637279401, + "grad_norm": 1.0605142398528387, + "learning_rate": 3.13303486671898e-06, + "loss": 0.2841835021972656, + "step": 112385 + }, + { + "epoch": 0.9718030972494833, + "grad_norm": 3.751246243348125, + "learning_rate": 3.1328313336585393e-06, + "loss": 0.26047744750976565, + "step": 112390 + }, + { + "epoch": 0.9718463307710267, + "grad_norm": 17.21059973742275, + "learning_rate": 3.132627799985492e-06, + "loss": 0.4910894393920898, + "step": 112395 + }, + { + "epoch": 0.9718895642925699, + "grad_norm": 2.649082246263759, + "learning_rate": 3.1324242657007767e-06, + "loss": 0.1866546630859375, + "step": 112400 + }, + { + "epoch": 0.9719327978141131, + "grad_norm": 0.1389796853535103, + "learning_rate": 3.1322207308053334e-06, + "loss": 0.02000885009765625, + "step": 112405 + }, + { + "epoch": 0.9719760313356565, + "grad_norm": 68.12269157682525, + "learning_rate": 3.1320171953000987e-06, + "loss": 0.29587478637695314, + "step": 112410 + }, + { + "epoch": 0.9720192648571997, + "grad_norm": 3.6785269741982543, + "learning_rate": 3.131813659186013e-06, + "loss": 0.06356925964355468, + "step": 112415 + }, + { + "epoch": 0.9720624983787429, + "grad_norm": 8.2144882237446, + "learning_rate": 3.1316101224640147e-06, + "loss": 0.1302581787109375, + "step": 112420 + }, + { + "epoch": 0.9721057319002862, + "grad_norm": 4.335059872814892, + "learning_rate": 3.131406585135041e-06, + "loss": 0.0595733642578125, + "step": 112425 + }, + { + "epoch": 0.9721489654218295, + "grad_norm": 20.78220265022481, + "learning_rate": 3.131203047200032e-06, + "loss": 0.2497316360473633, + "step": 112430 + }, + { + "epoch": 0.9721921989433727, + "grad_norm": 12.376428479030924, + "learning_rate": 3.130999508659927e-06, + "loss": 0.07894363403320312, + "step": 112435 + }, + { + "epoch": 0.972235432464916, + "grad_norm": 28.84571803929129, + "learning_rate": 3.1307959695156633e-06, + "loss": 0.13666648864746095, + "step": 112440 + }, + { + "epoch": 0.9722786659864593, + "grad_norm": 5.774999466959954, + "learning_rate": 3.1305924297681802e-06, + "loss": 0.06717529296875, + "step": 112445 + }, + { + "epoch": 0.9723218995080025, + "grad_norm": 5.4726339613058235, + "learning_rate": 3.1303888894184164e-06, + "loss": 0.1015472412109375, + "step": 112450 + }, + { + "epoch": 0.9723651330295457, + "grad_norm": 14.338801032308742, + "learning_rate": 3.1301853484673105e-06, + "loss": 0.20790367126464843, + "step": 112455 + }, + { + "epoch": 0.9724083665510891, + "grad_norm": 11.239584748438478, + "learning_rate": 3.1299818069158005e-06, + "loss": 0.16177215576171874, + "step": 112460 + }, + { + "epoch": 0.9724516000726323, + "grad_norm": 14.180611969325716, + "learning_rate": 3.1297782647648276e-06, + "loss": 0.27901554107666016, + "step": 112465 + }, + { + "epoch": 0.9724948335941755, + "grad_norm": 18.148694685107206, + "learning_rate": 3.1295747220153276e-06, + "loss": 0.1513946533203125, + "step": 112470 + }, + { + "epoch": 0.9725380671157189, + "grad_norm": 4.864213951926714, + "learning_rate": 3.1293711786682407e-06, + "loss": 0.1021942138671875, + "step": 112475 + }, + { + "epoch": 0.9725813006372621, + "grad_norm": 7.8336238308754345, + "learning_rate": 3.1291676347245047e-06, + "loss": 0.056584930419921874, + "step": 112480 + }, + { + "epoch": 0.9726245341588053, + "grad_norm": 1.7638397522101636, + "learning_rate": 3.12896409018506e-06, + "loss": 0.20341796875, + "step": 112485 + }, + { + "epoch": 0.9726677676803487, + "grad_norm": 9.474593276454353, + "learning_rate": 3.128760545050844e-06, + "loss": 0.03511505126953125, + "step": 112490 + }, + { + "epoch": 0.9727110012018919, + "grad_norm": 4.817746632550351, + "learning_rate": 3.128556999322796e-06, + "loss": 0.06170578002929687, + "step": 112495 + }, + { + "epoch": 0.9727542347234351, + "grad_norm": 2.118174927308956, + "learning_rate": 3.1283534530018546e-06, + "loss": 0.10417976379394531, + "step": 112500 + }, + { + "epoch": 0.9727974682449785, + "grad_norm": 37.24076807366444, + "learning_rate": 3.1281499060889572e-06, + "loss": 0.18379287719726561, + "step": 112505 + }, + { + "epoch": 0.9728407017665217, + "grad_norm": 0.45489247856667786, + "learning_rate": 3.1279463585850448e-06, + "loss": 0.30265274047851565, + "step": 112510 + }, + { + "epoch": 0.9728839352880649, + "grad_norm": 2.8965793489404255, + "learning_rate": 3.127742810491055e-06, + "loss": 0.20064697265625, + "step": 112515 + }, + { + "epoch": 0.9729271688096082, + "grad_norm": 7.514834004589916, + "learning_rate": 3.127539261807927e-06, + "loss": 0.039080047607421876, + "step": 112520 + }, + { + "epoch": 0.9729704023311515, + "grad_norm": 25.532656637848582, + "learning_rate": 3.127335712536598e-06, + "loss": 0.0952056884765625, + "step": 112525 + }, + { + "epoch": 0.9730136358526947, + "grad_norm": 7.21954384055485, + "learning_rate": 3.127132162678009e-06, + "loss": 0.11132888793945313, + "step": 112530 + }, + { + "epoch": 0.973056869374238, + "grad_norm": 11.53113099353969, + "learning_rate": 3.126928612233097e-06, + "loss": 0.10676651000976563, + "step": 112535 + }, + { + "epoch": 0.9731001028957813, + "grad_norm": 3.3906367155378345, + "learning_rate": 3.126725061202802e-06, + "loss": 0.07578048706054688, + "step": 112540 + }, + { + "epoch": 0.9731433364173245, + "grad_norm": 7.367010858078704, + "learning_rate": 3.126521509588062e-06, + "loss": 0.11638088226318359, + "step": 112545 + }, + { + "epoch": 0.9731865699388678, + "grad_norm": 1.0234233044931669, + "learning_rate": 3.126317957389816e-06, + "loss": 0.14570770263671876, + "step": 112550 + }, + { + "epoch": 0.9732298034604111, + "grad_norm": 5.513547819349191, + "learning_rate": 3.1261144046090025e-06, + "loss": 0.21710357666015626, + "step": 112555 + }, + { + "epoch": 0.9732730369819543, + "grad_norm": 1.2505658098271606, + "learning_rate": 3.1259108512465616e-06, + "loss": 0.36048126220703125, + "step": 112560 + }, + { + "epoch": 0.9733162705034976, + "grad_norm": 0.2714488120279901, + "learning_rate": 3.1257072973034297e-06, + "loss": 0.029947662353515626, + "step": 112565 + }, + { + "epoch": 0.9733595040250409, + "grad_norm": 13.755789369836487, + "learning_rate": 3.1255037427805466e-06, + "loss": 0.04122314453125, + "step": 112570 + }, + { + "epoch": 0.9734027375465841, + "grad_norm": 2.8855567984004433, + "learning_rate": 3.1253001876788528e-06, + "loss": 0.17257080078125, + "step": 112575 + }, + { + "epoch": 0.9734459710681274, + "grad_norm": 27.944364863567497, + "learning_rate": 3.1250966319992847e-06, + "loss": 0.12941226959228516, + "step": 112580 + }, + { + "epoch": 0.9734892045896707, + "grad_norm": 28.008865347292456, + "learning_rate": 3.124893075742782e-06, + "loss": 0.1352558135986328, + "step": 112585 + }, + { + "epoch": 0.9735324381112139, + "grad_norm": 5.1104584026127, + "learning_rate": 3.1246895189102837e-06, + "loss": 0.05038604736328125, + "step": 112590 + }, + { + "epoch": 0.9735756716327572, + "grad_norm": 10.37294968599793, + "learning_rate": 3.1244859615027277e-06, + "loss": 0.2496612548828125, + "step": 112595 + }, + { + "epoch": 0.9736189051543004, + "grad_norm": 0.6456253422131006, + "learning_rate": 3.1242824035210545e-06, + "loss": 0.08853759765625, + "step": 112600 + }, + { + "epoch": 0.9736621386758437, + "grad_norm": 22.993450090803567, + "learning_rate": 3.124078844966201e-06, + "loss": 0.09107408523559571, + "step": 112605 + }, + { + "epoch": 0.973705372197387, + "grad_norm": 16.07661114990637, + "learning_rate": 3.123875285839107e-06, + "loss": 0.2722206115722656, + "step": 112610 + }, + { + "epoch": 0.9737486057189302, + "grad_norm": 0.6405926529353335, + "learning_rate": 3.123671726140712e-06, + "loss": 0.055517578125, + "step": 112615 + }, + { + "epoch": 0.9737918392404735, + "grad_norm": 7.234211993386434, + "learning_rate": 3.123468165871953e-06, + "loss": 0.0474090576171875, + "step": 112620 + }, + { + "epoch": 0.9738350727620168, + "grad_norm": 0.18555459143090622, + "learning_rate": 3.123264605033769e-06, + "loss": 0.2745487213134766, + "step": 112625 + }, + { + "epoch": 0.97387830628356, + "grad_norm": 1.3564551838417205, + "learning_rate": 3.123061043627101e-06, + "loss": 0.030522489547729494, + "step": 112630 + }, + { + "epoch": 0.9739215398051033, + "grad_norm": 15.432300797258845, + "learning_rate": 3.1228574816528854e-06, + "loss": 0.07772636413574219, + "step": 112635 + }, + { + "epoch": 0.9739647733266465, + "grad_norm": 0.9528505595319247, + "learning_rate": 3.1226539191120633e-06, + "loss": 0.0460052490234375, + "step": 112640 + }, + { + "epoch": 0.9740080068481898, + "grad_norm": 0.325837430099371, + "learning_rate": 3.1224503560055714e-06, + "loss": 0.12710418701171874, + "step": 112645 + }, + { + "epoch": 0.9740512403697331, + "grad_norm": 59.55292965697087, + "learning_rate": 3.1222467923343483e-06, + "loss": 0.35936279296875, + "step": 112650 + }, + { + "epoch": 0.9740944738912763, + "grad_norm": 1.4511076817513449, + "learning_rate": 3.1220432280993346e-06, + "loss": 0.049249267578125, + "step": 112655 + }, + { + "epoch": 0.9741377074128196, + "grad_norm": 3.8447043643982206, + "learning_rate": 3.1218396633014687e-06, + "loss": 0.1094970703125, + "step": 112660 + }, + { + "epoch": 0.9741809409343629, + "grad_norm": 1.1450999715910828, + "learning_rate": 3.1216360979416883e-06, + "loss": 0.15178604125976564, + "step": 112665 + }, + { + "epoch": 0.9742241744559061, + "grad_norm": 28.469532436735367, + "learning_rate": 3.1214325320209333e-06, + "loss": 0.06567153930664063, + "step": 112670 + }, + { + "epoch": 0.9742674079774494, + "grad_norm": 2.787086007997781, + "learning_rate": 3.1212289655401425e-06, + "loss": 0.060797119140625, + "step": 112675 + }, + { + "epoch": 0.9743106414989927, + "grad_norm": 1.7721360155083525, + "learning_rate": 3.121025398500253e-06, + "loss": 0.1663848876953125, + "step": 112680 + }, + { + "epoch": 0.9743538750205359, + "grad_norm": 26.66909657059564, + "learning_rate": 3.1208218309022066e-06, + "loss": 0.11538238525390625, + "step": 112685 + }, + { + "epoch": 0.9743971085420792, + "grad_norm": 22.768597896623806, + "learning_rate": 3.120618262746941e-06, + "loss": 0.1206268310546875, + "step": 112690 + }, + { + "epoch": 0.9744403420636224, + "grad_norm": 4.142742444096484, + "learning_rate": 3.120414694035394e-06, + "loss": 0.2339771270751953, + "step": 112695 + }, + { + "epoch": 0.9744835755851657, + "grad_norm": 6.97260807349374, + "learning_rate": 3.1202111247685044e-06, + "loss": 0.186614990234375, + "step": 112700 + }, + { + "epoch": 0.974526809106709, + "grad_norm": 1.0702959565470365, + "learning_rate": 3.120007554947212e-06, + "loss": 0.1267253875732422, + "step": 112705 + }, + { + "epoch": 0.9745700426282522, + "grad_norm": 0.5098692083595936, + "learning_rate": 3.1198039845724556e-06, + "loss": 0.045205307006835935, + "step": 112710 + }, + { + "epoch": 0.9746132761497955, + "grad_norm": 81.22081124167059, + "learning_rate": 3.119600413645174e-06, + "loss": 0.3878509521484375, + "step": 112715 + }, + { + "epoch": 0.9746565096713388, + "grad_norm": 2.279938464623643, + "learning_rate": 3.1193968421663055e-06, + "loss": 0.08589401245117187, + "step": 112720 + }, + { + "epoch": 0.974699743192882, + "grad_norm": 6.757385644547692, + "learning_rate": 3.1191932701367895e-06, + "loss": 0.021672821044921874, + "step": 112725 + }, + { + "epoch": 0.9747429767144253, + "grad_norm": 35.01263319447369, + "learning_rate": 3.1189896975575644e-06, + "loss": 0.5070953369140625, + "step": 112730 + }, + { + "epoch": 0.9747862102359686, + "grad_norm": 0.7056123343383, + "learning_rate": 3.118786124429569e-06, + "loss": 0.018229293823242187, + "step": 112735 + }, + { + "epoch": 0.9748294437575118, + "grad_norm": 9.017606300649136, + "learning_rate": 3.118582550753744e-06, + "loss": 0.594525146484375, + "step": 112740 + }, + { + "epoch": 0.9748726772790551, + "grad_norm": 1.8409368403965862, + "learning_rate": 3.1183789765310253e-06, + "loss": 0.24191741943359374, + "step": 112745 + }, + { + "epoch": 0.9749159108005984, + "grad_norm": 0.8684279188715408, + "learning_rate": 3.1181754017623535e-06, + "loss": 0.06087303161621094, + "step": 112750 + }, + { + "epoch": 0.9749591443221416, + "grad_norm": 8.707864856765651, + "learning_rate": 3.117971826448668e-06, + "loss": 0.08420639038085938, + "step": 112755 + }, + { + "epoch": 0.9750023778436849, + "grad_norm": 0.9454906632910026, + "learning_rate": 3.117768250590906e-06, + "loss": 0.009944534301757813, + "step": 112760 + }, + { + "epoch": 0.9750456113652282, + "grad_norm": 2.7697685832728416, + "learning_rate": 3.1175646741900073e-06, + "loss": 0.0764404296875, + "step": 112765 + }, + { + "epoch": 0.9750888448867714, + "grad_norm": 2.835760869697227, + "learning_rate": 3.1173610972469108e-06, + "loss": 0.34841766357421877, + "step": 112770 + }, + { + "epoch": 0.9751320784083146, + "grad_norm": 7.070760489845002, + "learning_rate": 3.1171575197625555e-06, + "loss": 0.071087646484375, + "step": 112775 + }, + { + "epoch": 0.975175311929858, + "grad_norm": 9.152944741408303, + "learning_rate": 3.1169539417378797e-06, + "loss": 0.18740692138671874, + "step": 112780 + }, + { + "epoch": 0.9752185454514012, + "grad_norm": 6.524946099012612, + "learning_rate": 3.116750363173823e-06, + "loss": 0.06823501586914063, + "step": 112785 + }, + { + "epoch": 0.9752617789729444, + "grad_norm": 7.79963902042771, + "learning_rate": 3.116546784071324e-06, + "loss": 0.06704330444335938, + "step": 112790 + }, + { + "epoch": 0.9753050124944878, + "grad_norm": 10.152721076861141, + "learning_rate": 3.116343204431321e-06, + "loss": 0.1120870590209961, + "step": 112795 + }, + { + "epoch": 0.975348246016031, + "grad_norm": 0.9997394849310218, + "learning_rate": 3.116139624254754e-06, + "loss": 0.061277008056640624, + "step": 112800 + }, + { + "epoch": 0.9753914795375742, + "grad_norm": 0.9940034116206697, + "learning_rate": 3.115936043542561e-06, + "loss": 0.06669464111328124, + "step": 112805 + }, + { + "epoch": 0.9754347130591176, + "grad_norm": 0.061896337565242474, + "learning_rate": 3.1157324622956817e-06, + "loss": 0.23061676025390626, + "step": 112810 + }, + { + "epoch": 0.9754779465806608, + "grad_norm": 5.895125308243072, + "learning_rate": 3.1155288805150543e-06, + "loss": 0.05571060180664063, + "step": 112815 + }, + { + "epoch": 0.975521180102204, + "grad_norm": 9.28558164586237, + "learning_rate": 3.1153252982016177e-06, + "loss": 0.032142257690429686, + "step": 112820 + }, + { + "epoch": 0.9755644136237474, + "grad_norm": 18.514609874675482, + "learning_rate": 3.1151217153563105e-06, + "loss": 0.16714706420898437, + "step": 112825 + }, + { + "epoch": 0.9756076471452906, + "grad_norm": 32.88759650243853, + "learning_rate": 3.114918131980073e-06, + "loss": 0.16970062255859375, + "step": 112830 + }, + { + "epoch": 0.9756508806668338, + "grad_norm": 0.31082250274873413, + "learning_rate": 3.114714548073843e-06, + "loss": 0.35335617065429686, + "step": 112835 + }, + { + "epoch": 0.9756941141883771, + "grad_norm": 24.92721513367181, + "learning_rate": 3.114510963638559e-06, + "loss": 0.0614105224609375, + "step": 112840 + }, + { + "epoch": 0.9757373477099204, + "grad_norm": 0.262937985172027, + "learning_rate": 3.1143073786751618e-06, + "loss": 0.24828948974609374, + "step": 112845 + }, + { + "epoch": 0.9757805812314636, + "grad_norm": 1.1490904646207547, + "learning_rate": 3.114103793184588e-06, + "loss": 0.11528701782226562, + "step": 112850 + }, + { + "epoch": 0.975823814753007, + "grad_norm": 1.2324483382313287, + "learning_rate": 3.1139002071677777e-06, + "loss": 0.305902099609375, + "step": 112855 + }, + { + "epoch": 0.9758670482745502, + "grad_norm": 2.5247406903557823, + "learning_rate": 3.1136966206256706e-06, + "loss": 0.1205078125, + "step": 112860 + }, + { + "epoch": 0.9759102817960934, + "grad_norm": 0.19202359635065863, + "learning_rate": 3.113493033559204e-06, + "loss": 0.2832185745239258, + "step": 112865 + }, + { + "epoch": 0.9759535153176366, + "grad_norm": 18.515079037712812, + "learning_rate": 3.1132894459693183e-06, + "loss": 0.111376953125, + "step": 112870 + }, + { + "epoch": 0.97599674883918, + "grad_norm": 15.138205120519908, + "learning_rate": 3.11308585785695e-06, + "loss": 0.1045654296875, + "step": 112875 + }, + { + "epoch": 0.9760399823607232, + "grad_norm": 2.522069316318466, + "learning_rate": 3.112882269223041e-06, + "loss": 0.08606719970703125, + "step": 112880 + }, + { + "epoch": 0.9760832158822664, + "grad_norm": 2.9963447405898402, + "learning_rate": 3.112678680068529e-06, + "loss": 0.09769611358642578, + "step": 112885 + }, + { + "epoch": 0.9761264494038098, + "grad_norm": 6.96809370809019, + "learning_rate": 3.112475090394352e-06, + "loss": 0.05949592590332031, + "step": 112890 + }, + { + "epoch": 0.976169682925353, + "grad_norm": 0.8681953179315137, + "learning_rate": 3.1122715002014513e-06, + "loss": 0.0895355224609375, + "step": 112895 + }, + { + "epoch": 0.9762129164468962, + "grad_norm": 30.121700405223503, + "learning_rate": 3.112067909490763e-06, + "loss": 0.41564407348632815, + "step": 112900 + }, + { + "epoch": 0.9762561499684396, + "grad_norm": 8.882159662439612, + "learning_rate": 3.111864318263227e-06, + "loss": 0.08969955444335938, + "step": 112905 + }, + { + "epoch": 0.9762993834899828, + "grad_norm": 16.31794416492043, + "learning_rate": 3.1116607265197837e-06, + "loss": 0.3362846374511719, + "step": 112910 + }, + { + "epoch": 0.976342617011526, + "grad_norm": 41.77886904817739, + "learning_rate": 3.111457134261372e-06, + "loss": 0.22561111450195312, + "step": 112915 + }, + { + "epoch": 0.9763858505330694, + "grad_norm": 1.0594446619158444, + "learning_rate": 3.1112535414889274e-06, + "loss": 0.0560302734375, + "step": 112920 + }, + { + "epoch": 0.9764290840546126, + "grad_norm": 1.925274068729068, + "learning_rate": 3.1110499482033932e-06, + "loss": 0.2324737548828125, + "step": 112925 + }, + { + "epoch": 0.9764723175761558, + "grad_norm": 0.6259485786826933, + "learning_rate": 3.1108463544057056e-06, + "loss": 0.1926250457763672, + "step": 112930 + }, + { + "epoch": 0.9765155510976992, + "grad_norm": 23.537920965616802, + "learning_rate": 3.1106427600968045e-06, + "loss": 0.07804107666015625, + "step": 112935 + }, + { + "epoch": 0.9765587846192424, + "grad_norm": 10.601173181272516, + "learning_rate": 3.110439165277629e-06, + "loss": 0.14169769287109374, + "step": 112940 + }, + { + "epoch": 0.9766020181407856, + "grad_norm": 0.5369221053975533, + "learning_rate": 3.1102355699491183e-06, + "loss": 0.31604576110839844, + "step": 112945 + }, + { + "epoch": 0.9766452516623289, + "grad_norm": 2.8201834723484978, + "learning_rate": 3.1100319741122107e-06, + "loss": 0.1348876953125, + "step": 112950 + }, + { + "epoch": 0.9766884851838722, + "grad_norm": 0.9194043488602999, + "learning_rate": 3.109828377767844e-06, + "loss": 0.032213592529296876, + "step": 112955 + }, + { + "epoch": 0.9767317187054154, + "grad_norm": 1.4988146343322484, + "learning_rate": 3.1096247809169597e-06, + "loss": 0.16764068603515625, + "step": 112960 + }, + { + "epoch": 0.9767749522269586, + "grad_norm": 12.21415474427918, + "learning_rate": 3.109421183560496e-06, + "loss": 0.0635711669921875, + "step": 112965 + }, + { + "epoch": 0.976818185748502, + "grad_norm": 1.4562870654670077, + "learning_rate": 3.10921758569939e-06, + "loss": 0.0445556640625, + "step": 112970 + }, + { + "epoch": 0.9768614192700452, + "grad_norm": 25.485671348777032, + "learning_rate": 3.1090139873345833e-06, + "loss": 0.1498645782470703, + "step": 112975 + }, + { + "epoch": 0.9769046527915884, + "grad_norm": 0.6054656425495659, + "learning_rate": 3.108810388467014e-06, + "loss": 0.065850830078125, + "step": 112980 + }, + { + "epoch": 0.9769478863131318, + "grad_norm": 31.010641649534644, + "learning_rate": 3.1086067890976194e-06, + "loss": 0.19759731292724608, + "step": 112985 + }, + { + "epoch": 0.976991119834675, + "grad_norm": 9.091475682955803, + "learning_rate": 3.108403189227341e-06, + "loss": 0.16340179443359376, + "step": 112990 + }, + { + "epoch": 0.9770343533562182, + "grad_norm": 30.51277408907867, + "learning_rate": 3.1081995888571164e-06, + "loss": 0.1119964599609375, + "step": 112995 + }, + { + "epoch": 0.9770775868777616, + "grad_norm": 0.3109150716820502, + "learning_rate": 3.1079959879878845e-06, + "loss": 0.32938995361328127, + "step": 113000 + }, + { + "epoch": 0.9771208203993048, + "grad_norm": 10.746986093027354, + "learning_rate": 3.1077923866205853e-06, + "loss": 0.08715581893920898, + "step": 113005 + }, + { + "epoch": 0.977164053920848, + "grad_norm": 11.978503009122026, + "learning_rate": 3.1075887847561574e-06, + "loss": 0.09001007080078124, + "step": 113010 + }, + { + "epoch": 0.9772072874423914, + "grad_norm": 0.2830641184731976, + "learning_rate": 3.107385182395539e-06, + "loss": 0.18862457275390626, + "step": 113015 + }, + { + "epoch": 0.9772505209639346, + "grad_norm": 1.638249883840251, + "learning_rate": 3.1071815795396687e-06, + "loss": 0.3741172790527344, + "step": 113020 + }, + { + "epoch": 0.9772937544854778, + "grad_norm": 1.7704012073534505, + "learning_rate": 3.106977976189488e-06, + "loss": 0.0498016357421875, + "step": 113025 + }, + { + "epoch": 0.9773369880070212, + "grad_norm": 1.031880343576725, + "learning_rate": 3.1067743723459328e-06, + "loss": 0.09179954528808594, + "step": 113030 + }, + { + "epoch": 0.9773802215285644, + "grad_norm": 2.6202322645485197, + "learning_rate": 3.1065707680099453e-06, + "loss": 0.06650676727294921, + "step": 113035 + }, + { + "epoch": 0.9774234550501076, + "grad_norm": 2.527275017137076, + "learning_rate": 3.106367163182462e-06, + "loss": 0.081707763671875, + "step": 113040 + }, + { + "epoch": 0.9774666885716509, + "grad_norm": 0.5345838761231546, + "learning_rate": 3.1061635578644224e-06, + "loss": 0.03937606811523438, + "step": 113045 + }, + { + "epoch": 0.9775099220931942, + "grad_norm": 6.246740444301408, + "learning_rate": 3.105959952056766e-06, + "loss": 0.3409404754638672, + "step": 113050 + }, + { + "epoch": 0.9775531556147374, + "grad_norm": 9.687536389318229, + "learning_rate": 3.1057563457604318e-06, + "loss": 0.1642822265625, + "step": 113055 + }, + { + "epoch": 0.9775963891362807, + "grad_norm": 27.127996045312024, + "learning_rate": 3.105552738976359e-06, + "loss": 0.14366531372070312, + "step": 113060 + }, + { + "epoch": 0.977639622657824, + "grad_norm": 9.280745428064634, + "learning_rate": 3.105349131705486e-06, + "loss": 0.09394378662109375, + "step": 113065 + }, + { + "epoch": 0.9776828561793672, + "grad_norm": 2.866402205740052, + "learning_rate": 3.1051455239487525e-06, + "loss": 0.09051742553710937, + "step": 113070 + }, + { + "epoch": 0.9777260897009105, + "grad_norm": 0.6008258778747575, + "learning_rate": 3.1049419157070966e-06, + "loss": 0.2017059326171875, + "step": 113075 + }, + { + "epoch": 0.9777693232224538, + "grad_norm": 6.559212810442241, + "learning_rate": 3.1047383069814574e-06, + "loss": 0.290869140625, + "step": 113080 + }, + { + "epoch": 0.977812556743997, + "grad_norm": 3.527397884719405, + "learning_rate": 3.1045346977727758e-06, + "loss": 0.1387847900390625, + "step": 113085 + }, + { + "epoch": 0.9778557902655403, + "grad_norm": 1.8169907335111193, + "learning_rate": 3.1043310880819887e-06, + "loss": 0.3062774658203125, + "step": 113090 + }, + { + "epoch": 0.9778990237870836, + "grad_norm": 30.462555470707244, + "learning_rate": 3.1041274779100354e-06, + "loss": 0.28067779541015625, + "step": 113095 + }, + { + "epoch": 0.9779422573086268, + "grad_norm": 2.6083922916123505, + "learning_rate": 3.1039238672578562e-06, + "loss": 0.07238597869873047, + "step": 113100 + }, + { + "epoch": 0.9779854908301701, + "grad_norm": 5.245717667095015, + "learning_rate": 3.103720256126388e-06, + "loss": 0.054308319091796876, + "step": 113105 + }, + { + "epoch": 0.9780287243517134, + "grad_norm": 1.7933500137959308, + "learning_rate": 3.103516644516572e-06, + "loss": 0.09646759033203126, + "step": 113110 + }, + { + "epoch": 0.9780719578732566, + "grad_norm": 11.325850668263744, + "learning_rate": 3.103313032429347e-06, + "loss": 0.4029632568359375, + "step": 113115 + }, + { + "epoch": 0.9781151913947999, + "grad_norm": 2.8735390527937295, + "learning_rate": 3.1031094198656507e-06, + "loss": 0.137274169921875, + "step": 113120 + }, + { + "epoch": 0.9781584249163431, + "grad_norm": 0.7686509346836582, + "learning_rate": 3.102905806826423e-06, + "loss": 0.042218017578125, + "step": 113125 + }, + { + "epoch": 0.9782016584378864, + "grad_norm": 0.8580329283212756, + "learning_rate": 3.1027021933126024e-06, + "loss": 0.07282524108886719, + "step": 113130 + }, + { + "epoch": 0.9782448919594297, + "grad_norm": 13.195437183471672, + "learning_rate": 3.102498579325129e-06, + "loss": 0.05363540649414063, + "step": 113135 + }, + { + "epoch": 0.9782881254809729, + "grad_norm": 1.5412565239163192, + "learning_rate": 3.102294964864941e-06, + "loss": 0.17107086181640624, + "step": 113140 + }, + { + "epoch": 0.9783313590025162, + "grad_norm": 1.3261313552413865, + "learning_rate": 3.1020913499329773e-06, + "loss": 0.1138427734375, + "step": 113145 + }, + { + "epoch": 0.9783745925240594, + "grad_norm": 28.57709820153421, + "learning_rate": 3.1018877345301783e-06, + "loss": 0.2865631103515625, + "step": 113150 + }, + { + "epoch": 0.9784178260456027, + "grad_norm": 2.0323152658894585, + "learning_rate": 3.101684118657481e-06, + "loss": 0.036734390258789065, + "step": 113155 + }, + { + "epoch": 0.978461059567146, + "grad_norm": 11.858459136207857, + "learning_rate": 3.1014805023158256e-06, + "loss": 0.1346282958984375, + "step": 113160 + }, + { + "epoch": 0.9785042930886892, + "grad_norm": 1.3163207043120366, + "learning_rate": 3.101276885506152e-06, + "loss": 0.083404541015625, + "step": 113165 + }, + { + "epoch": 0.9785475266102325, + "grad_norm": 21.970424589812882, + "learning_rate": 3.101073268229398e-06, + "loss": 0.14618988037109376, + "step": 113170 + }, + { + "epoch": 0.9785907601317758, + "grad_norm": 7.470688332769162, + "learning_rate": 3.1008696504865027e-06, + "loss": 0.3125419616699219, + "step": 113175 + }, + { + "epoch": 0.978633993653319, + "grad_norm": 15.339533032007365, + "learning_rate": 3.1006660322784056e-06, + "loss": 0.0590606689453125, + "step": 113180 + }, + { + "epoch": 0.9786772271748623, + "grad_norm": 1.2656214466995745, + "learning_rate": 3.100462413606045e-06, + "loss": 0.2509613037109375, + "step": 113185 + }, + { + "epoch": 0.9787204606964056, + "grad_norm": 0.4727500339249947, + "learning_rate": 3.1002587944703612e-06, + "loss": 0.03137359619140625, + "step": 113190 + }, + { + "epoch": 0.9787636942179488, + "grad_norm": 0.6363657999715902, + "learning_rate": 3.1000551748722936e-06, + "loss": 0.050537109375, + "step": 113195 + }, + { + "epoch": 0.9788069277394921, + "grad_norm": 7.386102897410646, + "learning_rate": 3.0998515548127793e-06, + "loss": 0.14073581695556642, + "step": 113200 + }, + { + "epoch": 0.9788501612610353, + "grad_norm": 10.904101099243592, + "learning_rate": 3.0996479342927587e-06, + "loss": 0.1891021728515625, + "step": 113205 + }, + { + "epoch": 0.9788933947825786, + "grad_norm": 18.30566382775224, + "learning_rate": 3.0994443133131706e-06, + "loss": 0.05513687133789062, + "step": 113210 + }, + { + "epoch": 0.9789366283041219, + "grad_norm": 5.596948622087882, + "learning_rate": 3.099240691874954e-06, + "loss": 0.1907217025756836, + "step": 113215 + }, + { + "epoch": 0.9789798618256651, + "grad_norm": 9.385464458380266, + "learning_rate": 3.099037069979048e-06, + "loss": 0.1743255615234375, + "step": 113220 + }, + { + "epoch": 0.9790230953472084, + "grad_norm": 0.2509270189011642, + "learning_rate": 3.0988334476263916e-06, + "loss": 0.04868621826171875, + "step": 113225 + }, + { + "epoch": 0.9790663288687517, + "grad_norm": 5.23091653067733, + "learning_rate": 3.098629824817925e-06, + "loss": 0.05811004638671875, + "step": 113230 + }, + { + "epoch": 0.9791095623902949, + "grad_norm": 23.50609197416519, + "learning_rate": 3.0984262015545867e-06, + "loss": 0.08865966796875, + "step": 113235 + }, + { + "epoch": 0.9791527959118382, + "grad_norm": 0.1949738452390753, + "learning_rate": 3.0982225778373144e-06, + "loss": 0.0435699462890625, + "step": 113240 + }, + { + "epoch": 0.9791960294333815, + "grad_norm": 11.766167694007242, + "learning_rate": 3.0980189536670476e-06, + "loss": 0.21567840576171876, + "step": 113245 + }, + { + "epoch": 0.9792392629549247, + "grad_norm": 15.26127225699314, + "learning_rate": 3.0978153290447274e-06, + "loss": 0.04685134887695312, + "step": 113250 + }, + { + "epoch": 0.979282496476468, + "grad_norm": 2.94803528661919, + "learning_rate": 3.097611703971291e-06, + "loss": 0.1643035888671875, + "step": 113255 + }, + { + "epoch": 0.9793257299980113, + "grad_norm": 6.320143458628221, + "learning_rate": 3.097408078447678e-06, + "loss": 0.038860321044921875, + "step": 113260 + }, + { + "epoch": 0.9793689635195545, + "grad_norm": 41.330303139328116, + "learning_rate": 3.0972044524748283e-06, + "loss": 0.17049179077148438, + "step": 113265 + }, + { + "epoch": 0.9794121970410978, + "grad_norm": 6.839074866206, + "learning_rate": 3.097000826053679e-06, + "loss": 0.0667510986328125, + "step": 113270 + }, + { + "epoch": 0.9794554305626411, + "grad_norm": 55.579801569995205, + "learning_rate": 3.0967971991851706e-06, + "loss": 0.22034683227539062, + "step": 113275 + }, + { + "epoch": 0.9794986640841843, + "grad_norm": 14.710065536016879, + "learning_rate": 3.096593571870243e-06, + "loss": 0.18559341430664061, + "step": 113280 + }, + { + "epoch": 0.9795418976057276, + "grad_norm": 6.0514633944298035, + "learning_rate": 3.0963899441098336e-06, + "loss": 0.11884613037109375, + "step": 113285 + }, + { + "epoch": 0.9795851311272709, + "grad_norm": 11.39598254867336, + "learning_rate": 3.0961863159048833e-06, + "loss": 0.11727218627929688, + "step": 113290 + }, + { + "epoch": 0.9796283646488141, + "grad_norm": 2.1040209743336815, + "learning_rate": 3.0959826872563296e-06, + "loss": 0.085760498046875, + "step": 113295 + }, + { + "epoch": 0.9796715981703573, + "grad_norm": 20.80806912320949, + "learning_rate": 3.0957790581651114e-06, + "loss": 0.13826751708984375, + "step": 113300 + }, + { + "epoch": 0.9797148316919007, + "grad_norm": 22.286146680456444, + "learning_rate": 3.095575428632169e-06, + "loss": 0.19305267333984374, + "step": 113305 + }, + { + "epoch": 0.9797580652134439, + "grad_norm": 14.928434807197954, + "learning_rate": 3.0953717986584424e-06, + "loss": 0.046990966796875, + "step": 113310 + }, + { + "epoch": 0.9798012987349871, + "grad_norm": 9.606995373114277, + "learning_rate": 3.0951681682448685e-06, + "loss": 0.1168426513671875, + "step": 113315 + }, + { + "epoch": 0.9798445322565305, + "grad_norm": 27.1725193580534, + "learning_rate": 3.094964537392387e-06, + "loss": 0.23846435546875, + "step": 113320 + }, + { + "epoch": 0.9798877657780737, + "grad_norm": 2.632453223449269, + "learning_rate": 3.094760906101938e-06, + "loss": 0.09092864990234376, + "step": 113325 + }, + { + "epoch": 0.9799309992996169, + "grad_norm": 44.353553264582395, + "learning_rate": 3.0945572743744608e-06, + "loss": 0.4672401428222656, + "step": 113330 + }, + { + "epoch": 0.9799742328211603, + "grad_norm": 9.43193655083499, + "learning_rate": 3.0943536422108923e-06, + "loss": 0.09795761108398438, + "step": 113335 + }, + { + "epoch": 0.9800174663427035, + "grad_norm": 2.8172669810171476, + "learning_rate": 3.0941500096121746e-06, + "loss": 0.097210693359375, + "step": 113340 + }, + { + "epoch": 0.9800606998642467, + "grad_norm": 2.8612323788632517, + "learning_rate": 3.0939463765792446e-06, + "loss": 0.10818977355957031, + "step": 113345 + }, + { + "epoch": 0.98010393338579, + "grad_norm": 38.88606630044984, + "learning_rate": 3.093742743113042e-06, + "loss": 0.16036376953125, + "step": 113350 + }, + { + "epoch": 0.9801471669073333, + "grad_norm": 5.634635079591732, + "learning_rate": 3.093539109214507e-06, + "loss": 0.31761932373046875, + "step": 113355 + }, + { + "epoch": 0.9801904004288765, + "grad_norm": 0.14105519954468063, + "learning_rate": 3.093335474884577e-06, + "loss": 0.18881263732910156, + "step": 113360 + }, + { + "epoch": 0.9802336339504198, + "grad_norm": 0.36723941591885284, + "learning_rate": 3.0931318401241924e-06, + "loss": 0.10246429443359376, + "step": 113365 + }, + { + "epoch": 0.9802768674719631, + "grad_norm": 3.4231142021699195, + "learning_rate": 3.092928204934292e-06, + "loss": 0.29431304931640623, + "step": 113370 + }, + { + "epoch": 0.9803201009935063, + "grad_norm": 0.02066100580073109, + "learning_rate": 3.092724569315815e-06, + "loss": 0.055690956115722653, + "step": 113375 + }, + { + "epoch": 0.9803633345150495, + "grad_norm": 11.265925282994152, + "learning_rate": 3.0925209332697e-06, + "loss": 0.04344635009765625, + "step": 113380 + }, + { + "epoch": 0.9804065680365929, + "grad_norm": 6.09290670549124, + "learning_rate": 3.0923172967968866e-06, + "loss": 0.08572158813476563, + "step": 113385 + }, + { + "epoch": 0.9804498015581361, + "grad_norm": 1.060767153577919, + "learning_rate": 3.092113659898315e-06, + "loss": 0.03643798828125, + "step": 113390 + }, + { + "epoch": 0.9804930350796793, + "grad_norm": 41.53288694187843, + "learning_rate": 3.0919100225749233e-06, + "loss": 0.22684555053710936, + "step": 113395 + }, + { + "epoch": 0.9805362686012227, + "grad_norm": 17.6940483547782, + "learning_rate": 3.0917063848276497e-06, + "loss": 0.13233184814453125, + "step": 113400 + }, + { + "epoch": 0.9805795021227659, + "grad_norm": 2.477169401911379, + "learning_rate": 3.0915027466574348e-06, + "loss": 0.16849517822265625, + "step": 113405 + }, + { + "epoch": 0.9806227356443091, + "grad_norm": 1.2582437494497958, + "learning_rate": 3.0912991080652174e-06, + "loss": 0.134375, + "step": 113410 + }, + { + "epoch": 0.9806659691658525, + "grad_norm": 16.19294475440778, + "learning_rate": 3.091095469051936e-06, + "loss": 0.1268869400024414, + "step": 113415 + }, + { + "epoch": 0.9807092026873957, + "grad_norm": 17.255360424247275, + "learning_rate": 3.0908918296185312e-06, + "loss": 0.1181243896484375, + "step": 113420 + }, + { + "epoch": 0.9807524362089389, + "grad_norm": 6.007877327455367, + "learning_rate": 3.0906881897659416e-06, + "loss": 0.08400726318359375, + "step": 113425 + }, + { + "epoch": 0.9807956697304823, + "grad_norm": 16.197528455235776, + "learning_rate": 3.090484549495105e-06, + "loss": 0.35376739501953125, + "step": 113430 + }, + { + "epoch": 0.9808389032520255, + "grad_norm": 2.9715287040596383, + "learning_rate": 3.0902809088069624e-06, + "loss": 0.6183753967285156, + "step": 113435 + }, + { + "epoch": 0.9808821367735687, + "grad_norm": 11.121040781512296, + "learning_rate": 3.0900772677024513e-06, + "loss": 0.11452178955078125, + "step": 113440 + }, + { + "epoch": 0.9809253702951121, + "grad_norm": 1.5093972328386613, + "learning_rate": 3.0898736261825124e-06, + "loss": 0.03737564086914062, + "step": 113445 + }, + { + "epoch": 0.9809686038166553, + "grad_norm": 10.153569071086057, + "learning_rate": 3.0896699842480843e-06, + "loss": 0.08190345764160156, + "step": 113450 + }, + { + "epoch": 0.9810118373381985, + "grad_norm": 1.0152588714156556, + "learning_rate": 3.0894663419001063e-06, + "loss": 0.0566558837890625, + "step": 113455 + }, + { + "epoch": 0.9810550708597419, + "grad_norm": 26.236589222904037, + "learning_rate": 3.089262699139517e-06, + "loss": 0.16392250061035157, + "step": 113460 + }, + { + "epoch": 0.9810983043812851, + "grad_norm": 1.8275861851554585, + "learning_rate": 3.0890590559672566e-06, + "loss": 0.09498062133789062, + "step": 113465 + }, + { + "epoch": 0.9811415379028283, + "grad_norm": 63.236723759389115, + "learning_rate": 3.0888554123842635e-06, + "loss": 0.4109954833984375, + "step": 113470 + }, + { + "epoch": 0.9811847714243715, + "grad_norm": 22.99126391029818, + "learning_rate": 3.088651768391477e-06, + "loss": 0.17913036346435546, + "step": 113475 + }, + { + "epoch": 0.9812280049459149, + "grad_norm": 48.24878370043301, + "learning_rate": 3.088448123989836e-06, + "loss": 0.19270553588867187, + "step": 113480 + }, + { + "epoch": 0.9812712384674581, + "grad_norm": 0.6788680268659988, + "learning_rate": 3.0882444791802812e-06, + "loss": 0.02631988525390625, + "step": 113485 + }, + { + "epoch": 0.9813144719890013, + "grad_norm": 10.169628737774511, + "learning_rate": 3.08804083396375e-06, + "loss": 0.05835113525390625, + "step": 113490 + }, + { + "epoch": 0.9813577055105447, + "grad_norm": 9.608440058460273, + "learning_rate": 3.087837188341182e-06, + "loss": 0.09548873901367187, + "step": 113495 + }, + { + "epoch": 0.9814009390320879, + "grad_norm": 3.2176637764568516, + "learning_rate": 3.0876335423135163e-06, + "loss": 0.06082706451416016, + "step": 113500 + }, + { + "epoch": 0.9814441725536311, + "grad_norm": 8.33755125458627, + "learning_rate": 3.0874298958816935e-06, + "loss": 0.053765869140625, + "step": 113505 + }, + { + "epoch": 0.9814874060751745, + "grad_norm": 3.456105435520941, + "learning_rate": 3.087226249046651e-06, + "loss": 0.030917930603027343, + "step": 113510 + }, + { + "epoch": 0.9815306395967177, + "grad_norm": 15.556121917728841, + "learning_rate": 3.0870226018093294e-06, + "loss": 0.212591552734375, + "step": 113515 + }, + { + "epoch": 0.9815738731182609, + "grad_norm": 8.762299296438085, + "learning_rate": 3.0868189541706667e-06, + "loss": 0.23563232421875, + "step": 113520 + }, + { + "epoch": 0.9816171066398043, + "grad_norm": 18.692987667275343, + "learning_rate": 3.086615306131602e-06, + "loss": 0.0998138427734375, + "step": 113525 + }, + { + "epoch": 0.9816603401613475, + "grad_norm": 1.4861359786103008, + "learning_rate": 3.0864116576930763e-06, + "loss": 0.06045989990234375, + "step": 113530 + }, + { + "epoch": 0.9817035736828907, + "grad_norm": 6.094023965393822, + "learning_rate": 3.0862080088560276e-06, + "loss": 0.108270263671875, + "step": 113535 + }, + { + "epoch": 0.9817468072044341, + "grad_norm": 1.7799553443559484, + "learning_rate": 3.0860043596213944e-06, + "loss": 0.15811004638671874, + "step": 113540 + }, + { + "epoch": 0.9817900407259773, + "grad_norm": 0.09145428960250566, + "learning_rate": 3.0858007099901172e-06, + "loss": 0.19831619262695313, + "step": 113545 + }, + { + "epoch": 0.9818332742475205, + "grad_norm": 29.064759253006564, + "learning_rate": 3.0855970599631347e-06, + "loss": 0.24008750915527344, + "step": 113550 + }, + { + "epoch": 0.9818765077690638, + "grad_norm": 5.018414169676406, + "learning_rate": 3.0853934095413865e-06, + "loss": 0.029775238037109374, + "step": 113555 + }, + { + "epoch": 0.9819197412906071, + "grad_norm": 52.43495957386796, + "learning_rate": 3.0851897587258107e-06, + "loss": 0.0793243408203125, + "step": 113560 + }, + { + "epoch": 0.9819629748121503, + "grad_norm": 16.319719424646074, + "learning_rate": 3.0849861075173476e-06, + "loss": 0.15460433959960937, + "step": 113565 + }, + { + "epoch": 0.9820062083336936, + "grad_norm": 1.724038650286398, + "learning_rate": 3.0847824559169365e-06, + "loss": 0.085748291015625, + "step": 113570 + }, + { + "epoch": 0.9820494418552369, + "grad_norm": 3.681294385408634, + "learning_rate": 3.084578803925515e-06, + "loss": 0.08235893249511719, + "step": 113575 + }, + { + "epoch": 0.9820926753767801, + "grad_norm": 2.670186179084016, + "learning_rate": 3.084375151544024e-06, + "loss": 0.055147552490234376, + "step": 113580 + }, + { + "epoch": 0.9821359088983234, + "grad_norm": 54.25770674445339, + "learning_rate": 3.084171498773403e-06, + "loss": 0.6874923706054688, + "step": 113585 + }, + { + "epoch": 0.9821791424198667, + "grad_norm": 12.361355220147802, + "learning_rate": 3.08396784561459e-06, + "loss": 0.32918624877929686, + "step": 113590 + }, + { + "epoch": 0.9822223759414099, + "grad_norm": 3.1369324271515047, + "learning_rate": 3.0837641920685247e-06, + "loss": 0.20720367431640624, + "step": 113595 + }, + { + "epoch": 0.9822656094629532, + "grad_norm": 2.780320090839978, + "learning_rate": 3.0835605381361468e-06, + "loss": 0.12471694946289062, + "step": 113600 + }, + { + "epoch": 0.9823088429844965, + "grad_norm": 0.15712995194870383, + "learning_rate": 3.0833568838183947e-06, + "loss": 0.059468841552734374, + "step": 113605 + }, + { + "epoch": 0.9823520765060397, + "grad_norm": 5.191442377419283, + "learning_rate": 3.083153229116208e-06, + "loss": 0.09957733154296874, + "step": 113610 + }, + { + "epoch": 0.982395310027583, + "grad_norm": 3.832462405308464, + "learning_rate": 3.0829495740305256e-06, + "loss": 0.048410797119140626, + "step": 113615 + }, + { + "epoch": 0.9824385435491263, + "grad_norm": 7.303713897580179, + "learning_rate": 3.0827459185622876e-06, + "loss": 0.16457748413085938, + "step": 113620 + }, + { + "epoch": 0.9824817770706695, + "grad_norm": 0.9715444642001076, + "learning_rate": 3.0825422627124325e-06, + "loss": 0.05549774169921875, + "step": 113625 + }, + { + "epoch": 0.9825250105922128, + "grad_norm": 0.8430111643942766, + "learning_rate": 3.0823386064819005e-06, + "loss": 0.02758636474609375, + "step": 113630 + }, + { + "epoch": 0.9825682441137561, + "grad_norm": 34.237175952945435, + "learning_rate": 3.0821349498716292e-06, + "loss": 0.11532135009765625, + "step": 113635 + }, + { + "epoch": 0.9826114776352993, + "grad_norm": 0.11858171791918459, + "learning_rate": 3.0819312928825585e-06, + "loss": 0.22184677124023439, + "step": 113640 + }, + { + "epoch": 0.9826547111568426, + "grad_norm": 51.768549928466214, + "learning_rate": 3.081727635515629e-06, + "loss": 0.34546289443969724, + "step": 113645 + }, + { + "epoch": 0.9826979446783858, + "grad_norm": 10.481452917850268, + "learning_rate": 3.0815239777717784e-06, + "loss": 0.14446563720703126, + "step": 113650 + }, + { + "epoch": 0.9827411781999291, + "grad_norm": 21.094449655007978, + "learning_rate": 3.081320319651946e-06, + "loss": 0.17432327270507814, + "step": 113655 + }, + { + "epoch": 0.9827844117214724, + "grad_norm": 23.736021247316906, + "learning_rate": 3.0811166611570723e-06, + "loss": 0.15674514770507814, + "step": 113660 + }, + { + "epoch": 0.9828276452430156, + "grad_norm": 1.0824840528672321, + "learning_rate": 3.0809130022880945e-06, + "loss": 0.18484649658203126, + "step": 113665 + }, + { + "epoch": 0.9828708787645589, + "grad_norm": 1.15659694079943, + "learning_rate": 3.080709343045954e-06, + "loss": 0.11249847412109375, + "step": 113670 + }, + { + "epoch": 0.9829141122861021, + "grad_norm": 16.817150253980117, + "learning_rate": 3.080505683431589e-06, + "loss": 0.22796401977539063, + "step": 113675 + }, + { + "epoch": 0.9829573458076454, + "grad_norm": 4.435109324022531, + "learning_rate": 3.0803020234459393e-06, + "loss": 0.17384986877441405, + "step": 113680 + }, + { + "epoch": 0.9830005793291887, + "grad_norm": 13.414865411871286, + "learning_rate": 3.080098363089943e-06, + "loss": 0.418853759765625, + "step": 113685 + }, + { + "epoch": 0.983043812850732, + "grad_norm": 0.48815356462725495, + "learning_rate": 3.079894702364541e-06, + "loss": 0.0185333251953125, + "step": 113690 + }, + { + "epoch": 0.9830870463722752, + "grad_norm": 31.962948586489464, + "learning_rate": 3.079691041270671e-06, + "loss": 0.23176422119140624, + "step": 113695 + }, + { + "epoch": 0.9831302798938185, + "grad_norm": 0.7553125575324102, + "learning_rate": 3.079487379809273e-06, + "loss": 0.11001968383789062, + "step": 113700 + }, + { + "epoch": 0.9831735134153617, + "grad_norm": 3.5788990482150536, + "learning_rate": 3.079283717981286e-06, + "loss": 0.0368011474609375, + "step": 113705 + }, + { + "epoch": 0.983216746936905, + "grad_norm": 10.744719482881418, + "learning_rate": 3.0790800557876505e-06, + "loss": 0.03855438232421875, + "step": 113710 + }, + { + "epoch": 0.9832599804584483, + "grad_norm": 17.038326117622645, + "learning_rate": 3.0788763932293038e-06, + "loss": 0.20519332885742186, + "step": 113715 + }, + { + "epoch": 0.9833032139799915, + "grad_norm": 6.91143883376352, + "learning_rate": 3.078672730307187e-06, + "loss": 0.048413848876953124, + "step": 113720 + }, + { + "epoch": 0.9833464475015348, + "grad_norm": 1.9103692299677173, + "learning_rate": 3.0784690670222375e-06, + "loss": 0.037311553955078125, + "step": 113725 + }, + { + "epoch": 0.983389681023078, + "grad_norm": 6.06839173685411, + "learning_rate": 3.0782654033753963e-06, + "loss": 0.066015625, + "step": 113730 + }, + { + "epoch": 0.9834329145446213, + "grad_norm": 11.471237190589262, + "learning_rate": 3.0780617393676013e-06, + "loss": 0.086395263671875, + "step": 113735 + }, + { + "epoch": 0.9834761480661646, + "grad_norm": 4.947968967471392, + "learning_rate": 3.077858074999793e-06, + "loss": 0.22220268249511718, + "step": 113740 + }, + { + "epoch": 0.9835193815877078, + "grad_norm": 3.0635314506092186, + "learning_rate": 3.077654410272911e-06, + "loss": 0.12761917114257812, + "step": 113745 + }, + { + "epoch": 0.9835626151092511, + "grad_norm": 8.507525767809371, + "learning_rate": 3.077450745187891e-06, + "loss": 0.524871826171875, + "step": 113750 + }, + { + "epoch": 0.9836058486307944, + "grad_norm": 1.0094476002117558, + "learning_rate": 3.0772470797456776e-06, + "loss": 0.2828033447265625, + "step": 113755 + }, + { + "epoch": 0.9836490821523376, + "grad_norm": 8.607662995096565, + "learning_rate": 3.0770434139472066e-06, + "loss": 0.156573486328125, + "step": 113760 + }, + { + "epoch": 0.9836923156738809, + "grad_norm": 2.34717420889378, + "learning_rate": 3.076839747793418e-06, + "loss": 0.06650161743164062, + "step": 113765 + }, + { + "epoch": 0.9837355491954242, + "grad_norm": 1.4429545682734903, + "learning_rate": 3.0766360812852517e-06, + "loss": 0.08545455932617188, + "step": 113770 + }, + { + "epoch": 0.9837787827169674, + "grad_norm": 0.5724874820614917, + "learning_rate": 3.076432414423646e-06, + "loss": 0.34602785110473633, + "step": 113775 + }, + { + "epoch": 0.9838220162385107, + "grad_norm": 56.78857801479526, + "learning_rate": 3.076228747209541e-06, + "loss": 0.427874755859375, + "step": 113780 + }, + { + "epoch": 0.983865249760054, + "grad_norm": 0.10936671543294836, + "learning_rate": 3.0760250796438766e-06, + "loss": 0.04091625213623047, + "step": 113785 + }, + { + "epoch": 0.9839084832815972, + "grad_norm": 13.263817933827431, + "learning_rate": 3.075821411727591e-06, + "loss": 0.08172149658203125, + "step": 113790 + }, + { + "epoch": 0.9839517168031405, + "grad_norm": 13.77486573612989, + "learning_rate": 3.075617743461623e-06, + "loss": 0.036672019958496095, + "step": 113795 + }, + { + "epoch": 0.9839949503246838, + "grad_norm": 19.470087470334942, + "learning_rate": 3.075414074846913e-06, + "loss": 0.13894500732421874, + "step": 113800 + }, + { + "epoch": 0.984038183846227, + "grad_norm": 12.514994071777828, + "learning_rate": 3.0752104058844e-06, + "loss": 0.04506683349609375, + "step": 113805 + }, + { + "epoch": 0.9840814173677703, + "grad_norm": 12.843496130451173, + "learning_rate": 3.0750067365750224e-06, + "loss": 0.11171340942382812, + "step": 113810 + }, + { + "epoch": 0.9841246508893136, + "grad_norm": 2.1675427807455407, + "learning_rate": 3.074803066919721e-06, + "loss": 0.14706802368164062, + "step": 113815 + }, + { + "epoch": 0.9841678844108568, + "grad_norm": 0.7428173030059015, + "learning_rate": 3.0745993969194357e-06, + "loss": 0.08760528564453125, + "step": 113820 + }, + { + "epoch": 0.9842111179324, + "grad_norm": 1.2527847087181099, + "learning_rate": 3.0743957265751033e-06, + "loss": 0.0944814682006836, + "step": 113825 + }, + { + "epoch": 0.9842543514539434, + "grad_norm": 3.322717894893897, + "learning_rate": 3.0741920558876645e-06, + "loss": 0.0833892822265625, + "step": 113830 + }, + { + "epoch": 0.9842975849754866, + "grad_norm": 1.0863196018489765, + "learning_rate": 3.0739883848580583e-06, + "loss": 0.09117202758789063, + "step": 113835 + }, + { + "epoch": 0.9843408184970298, + "grad_norm": 3.4574508613798747, + "learning_rate": 3.0737847134872244e-06, + "loss": 0.08027191162109375, + "step": 113840 + }, + { + "epoch": 0.9843840520185732, + "grad_norm": 0.28872983359975296, + "learning_rate": 3.073581041776102e-06, + "loss": 0.12813873291015626, + "step": 113845 + }, + { + "epoch": 0.9844272855401164, + "grad_norm": 12.794143096657656, + "learning_rate": 3.0733773697256303e-06, + "loss": 0.12908859252929689, + "step": 113850 + }, + { + "epoch": 0.9844705190616596, + "grad_norm": 16.573567158481943, + "learning_rate": 3.0731736973367492e-06, + "loss": 0.09458465576171875, + "step": 113855 + }, + { + "epoch": 0.984513752583203, + "grad_norm": 0.6929948689334213, + "learning_rate": 3.0729700246103962e-06, + "loss": 0.1429351806640625, + "step": 113860 + }, + { + "epoch": 0.9845569861047462, + "grad_norm": 6.175898335640306, + "learning_rate": 3.0727663515475124e-06, + "loss": 0.3160400390625, + "step": 113865 + }, + { + "epoch": 0.9846002196262894, + "grad_norm": 0.5381793319804831, + "learning_rate": 3.0725626781490375e-06, + "loss": 0.0465240478515625, + "step": 113870 + }, + { + "epoch": 0.9846434531478327, + "grad_norm": 7.275739932053156, + "learning_rate": 3.072359004415909e-06, + "loss": 0.1289337158203125, + "step": 113875 + }, + { + "epoch": 0.984686686669376, + "grad_norm": 2.4114702080813464, + "learning_rate": 3.0721553303490677e-06, + "loss": 0.05478057861328125, + "step": 113880 + }, + { + "epoch": 0.9847299201909192, + "grad_norm": 1.4650269409978165, + "learning_rate": 3.071951655949452e-06, + "loss": 0.10737037658691406, + "step": 113885 + }, + { + "epoch": 0.9847731537124625, + "grad_norm": 6.339496317875203, + "learning_rate": 3.0717479812180018e-06, + "loss": 0.05306167602539062, + "step": 113890 + }, + { + "epoch": 0.9848163872340058, + "grad_norm": 40.43928813370783, + "learning_rate": 3.071544306155656e-06, + "loss": 0.368524169921875, + "step": 113895 + }, + { + "epoch": 0.984859620755549, + "grad_norm": 1.6880242038768896, + "learning_rate": 3.0713406307633545e-06, + "loss": 0.13928680419921874, + "step": 113900 + }, + { + "epoch": 0.9849028542770922, + "grad_norm": 0.05583760380940721, + "learning_rate": 3.0711369550420366e-06, + "loss": 0.26377410888671876, + "step": 113905 + }, + { + "epoch": 0.9849460877986356, + "grad_norm": 1.4243407602981817, + "learning_rate": 3.0709332789926404e-06, + "loss": 0.06943511962890625, + "step": 113910 + }, + { + "epoch": 0.9849893213201788, + "grad_norm": 0.8736352453068464, + "learning_rate": 3.0707296026161073e-06, + "loss": 0.043239593505859375, + "step": 113915 + }, + { + "epoch": 0.985032554841722, + "grad_norm": 7.967505607490444, + "learning_rate": 3.0705259259133743e-06, + "loss": 0.2726470947265625, + "step": 113920 + }, + { + "epoch": 0.9850757883632654, + "grad_norm": 0.5567149280768591, + "learning_rate": 3.0703222488853822e-06, + "loss": 0.1448455810546875, + "step": 113925 + }, + { + "epoch": 0.9851190218848086, + "grad_norm": 0.4960404314855453, + "learning_rate": 3.070118571533071e-06, + "loss": 0.1189422607421875, + "step": 113930 + }, + { + "epoch": 0.9851622554063518, + "grad_norm": 57.044705401173744, + "learning_rate": 3.069914893857379e-06, + "loss": 0.20055999755859374, + "step": 113935 + }, + { + "epoch": 0.9852054889278952, + "grad_norm": 16.594914592693513, + "learning_rate": 3.069711215859245e-06, + "loss": 0.2003772735595703, + "step": 113940 + }, + { + "epoch": 0.9852487224494384, + "grad_norm": 7.512209214610338, + "learning_rate": 3.06950753753961e-06, + "loss": 0.3319427490234375, + "step": 113945 + }, + { + "epoch": 0.9852919559709816, + "grad_norm": 0.4075114124634373, + "learning_rate": 3.069303858899411e-06, + "loss": 0.23476486206054686, + "step": 113950 + }, + { + "epoch": 0.985335189492525, + "grad_norm": 3.0193091271099597, + "learning_rate": 3.06910017993959e-06, + "loss": 0.347576904296875, + "step": 113955 + }, + { + "epoch": 0.9853784230140682, + "grad_norm": 3.419600212125369, + "learning_rate": 3.0688965006610846e-06, + "loss": 0.05924568176269531, + "step": 113960 + }, + { + "epoch": 0.9854216565356114, + "grad_norm": 2.0561674907294507, + "learning_rate": 3.068692821064835e-06, + "loss": 0.056681060791015626, + "step": 113965 + }, + { + "epoch": 0.9854648900571548, + "grad_norm": 0.2386544396591173, + "learning_rate": 3.0684891411517792e-06, + "loss": 0.551019287109375, + "step": 113970 + }, + { + "epoch": 0.985508123578698, + "grad_norm": 1.799381572278513, + "learning_rate": 3.0682854609228578e-06, + "loss": 0.179400634765625, + "step": 113975 + }, + { + "epoch": 0.9855513571002412, + "grad_norm": 0.6741653198003777, + "learning_rate": 3.0680817803790107e-06, + "loss": 0.2367870330810547, + "step": 113980 + }, + { + "epoch": 0.9855945906217846, + "grad_norm": 8.48559943711084, + "learning_rate": 3.0678780995211753e-06, + "loss": 0.18411102294921874, + "step": 113985 + }, + { + "epoch": 0.9856378241433278, + "grad_norm": 1.416840430577512, + "learning_rate": 3.0676744183502935e-06, + "loss": 0.19022598266601562, + "step": 113990 + }, + { + "epoch": 0.985681057664871, + "grad_norm": 1.3646488537597625, + "learning_rate": 3.0674707368673024e-06, + "loss": 0.25079193115234377, + "step": 113995 + }, + { + "epoch": 0.9857242911864142, + "grad_norm": 1.4171192955918341, + "learning_rate": 3.067267055073142e-06, + "loss": 0.131005859375, + "step": 114000 + }, + { + "epoch": 0.9857675247079576, + "grad_norm": 0.30488901211534825, + "learning_rate": 3.067063372968752e-06, + "loss": 0.31568222045898436, + "step": 114005 + }, + { + "epoch": 0.9858107582295008, + "grad_norm": 20.689574867907996, + "learning_rate": 3.066859690555072e-06, + "loss": 0.08788604736328125, + "step": 114010 + }, + { + "epoch": 0.985853991751044, + "grad_norm": 5.855714088411563, + "learning_rate": 3.0666560078330416e-06, + "loss": 0.11479415893554687, + "step": 114015 + }, + { + "epoch": 0.9858972252725874, + "grad_norm": 15.459313346563782, + "learning_rate": 3.066452324803598e-06, + "loss": 0.162646484375, + "step": 114020 + }, + { + "epoch": 0.9859404587941306, + "grad_norm": 0.7732462168723895, + "learning_rate": 3.066248641467684e-06, + "loss": 0.0559051513671875, + "step": 114025 + }, + { + "epoch": 0.9859836923156738, + "grad_norm": 6.258044487004343, + "learning_rate": 3.066044957826235e-06, + "loss": 0.26737632751464846, + "step": 114030 + }, + { + "epoch": 0.9860269258372172, + "grad_norm": 20.447061344199756, + "learning_rate": 3.065841273880194e-06, + "loss": 0.13558349609375, + "step": 114035 + }, + { + "epoch": 0.9860701593587604, + "grad_norm": 9.554373396075661, + "learning_rate": 3.0656375896304985e-06, + "loss": 0.13272514343261718, + "step": 114040 + }, + { + "epoch": 0.9861133928803036, + "grad_norm": 2.0359427691602083, + "learning_rate": 3.0654339050780888e-06, + "loss": 0.2102447509765625, + "step": 114045 + }, + { + "epoch": 0.986156626401847, + "grad_norm": 0.2923587547532228, + "learning_rate": 3.0652302202239024e-06, + "loss": 0.06790008544921874, + "step": 114050 + }, + { + "epoch": 0.9861998599233902, + "grad_norm": 0.14299412599083702, + "learning_rate": 3.0650265350688812e-06, + "loss": 0.036434173583984375, + "step": 114055 + }, + { + "epoch": 0.9862430934449334, + "grad_norm": 45.013913603684436, + "learning_rate": 3.0648228496139626e-06, + "loss": 0.26714439392089845, + "step": 114060 + }, + { + "epoch": 0.9862863269664768, + "grad_norm": 8.368446767638375, + "learning_rate": 3.0646191638600866e-06, + "loss": 0.34617252349853517, + "step": 114065 + }, + { + "epoch": 0.98632956048802, + "grad_norm": 1.7829917273034162, + "learning_rate": 3.0644154778081935e-06, + "loss": 0.172857666015625, + "step": 114070 + }, + { + "epoch": 0.9863727940095632, + "grad_norm": 0.35057754524974005, + "learning_rate": 3.0642117914592222e-06, + "loss": 0.126910400390625, + "step": 114075 + }, + { + "epoch": 0.9864160275311065, + "grad_norm": 4.040601105325127, + "learning_rate": 3.064008104814111e-06, + "loss": 0.0715057373046875, + "step": 114080 + }, + { + "epoch": 0.9864592610526498, + "grad_norm": 7.296368073857016, + "learning_rate": 3.0638044178737996e-06, + "loss": 0.171234130859375, + "step": 114085 + }, + { + "epoch": 0.986502494574193, + "grad_norm": 16.873452567317646, + "learning_rate": 3.0636007306392288e-06, + "loss": 0.1749879837036133, + "step": 114090 + }, + { + "epoch": 0.9865457280957363, + "grad_norm": 3.05657989541057, + "learning_rate": 3.063397043111337e-06, + "loss": 0.05895729064941406, + "step": 114095 + }, + { + "epoch": 0.9865889616172796, + "grad_norm": 1.919766614569061, + "learning_rate": 3.063193355291063e-06, + "loss": 0.028980255126953125, + "step": 114100 + }, + { + "epoch": 0.9866321951388228, + "grad_norm": 4.896568794833659, + "learning_rate": 3.0629896671793474e-06, + "loss": 0.224957275390625, + "step": 114105 + }, + { + "epoch": 0.9866754286603661, + "grad_norm": 20.77431277153977, + "learning_rate": 3.062785978777129e-06, + "loss": 0.14117660522460937, + "step": 114110 + }, + { + "epoch": 0.9867186621819094, + "grad_norm": 1.40894423848233, + "learning_rate": 3.062582290085347e-06, + "loss": 0.030440521240234376, + "step": 114115 + }, + { + "epoch": 0.9867618957034526, + "grad_norm": 6.325349982039012, + "learning_rate": 3.0623786011049405e-06, + "loss": 0.040603256225585936, + "step": 114120 + }, + { + "epoch": 0.9868051292249959, + "grad_norm": 1.9601073723977964, + "learning_rate": 3.06217491183685e-06, + "loss": 0.14087142944335937, + "step": 114125 + }, + { + "epoch": 0.9868483627465392, + "grad_norm": 13.55403651515942, + "learning_rate": 3.0619712222820146e-06, + "loss": 0.20276145935058593, + "step": 114130 + }, + { + "epoch": 0.9868915962680824, + "grad_norm": 14.243174175010662, + "learning_rate": 3.061767532441373e-06, + "loss": 0.22497787475585937, + "step": 114135 + }, + { + "epoch": 0.9869348297896257, + "grad_norm": 30.629523504595678, + "learning_rate": 3.0615638423158653e-06, + "loss": 0.09141464233398437, + "step": 114140 + }, + { + "epoch": 0.986978063311169, + "grad_norm": 2.8104709629838043, + "learning_rate": 3.0613601519064303e-06, + "loss": 0.05905303955078125, + "step": 114145 + }, + { + "epoch": 0.9870212968327122, + "grad_norm": 3.4547717014121826, + "learning_rate": 3.0611564612140075e-06, + "loss": 0.119134521484375, + "step": 114150 + }, + { + "epoch": 0.9870645303542555, + "grad_norm": 1.041954649446427, + "learning_rate": 3.0609527702395367e-06, + "loss": 0.17832069396972655, + "step": 114155 + }, + { + "epoch": 0.9871077638757988, + "grad_norm": 0.4478672910806166, + "learning_rate": 3.060749078983957e-06, + "loss": 0.011145782470703126, + "step": 114160 + }, + { + "epoch": 0.987150997397342, + "grad_norm": 16.076008238434444, + "learning_rate": 3.0605453874482085e-06, + "loss": 0.07681007385253906, + "step": 114165 + }, + { + "epoch": 0.9871942309188853, + "grad_norm": 0.7743099398766056, + "learning_rate": 3.0603416956332294e-06, + "loss": 0.018278121948242188, + "step": 114170 + }, + { + "epoch": 0.9872374644404285, + "grad_norm": 0.8815151957839785, + "learning_rate": 3.0601380035399602e-06, + "loss": 0.3253143310546875, + "step": 114175 + }, + { + "epoch": 0.9872806979619718, + "grad_norm": 43.04391701136089, + "learning_rate": 3.059934311169339e-06, + "loss": 0.248779296875, + "step": 114180 + }, + { + "epoch": 0.987323931483515, + "grad_norm": 16.217390113011927, + "learning_rate": 3.059730618522307e-06, + "loss": 0.06188201904296875, + "step": 114185 + }, + { + "epoch": 0.9873671650050583, + "grad_norm": 0.7611601741683853, + "learning_rate": 3.0595269255998025e-06, + "loss": 0.0753173828125, + "step": 114190 + }, + { + "epoch": 0.9874103985266016, + "grad_norm": 0.8685598868042279, + "learning_rate": 3.0593232324027647e-06, + "loss": 0.03798370361328125, + "step": 114195 + }, + { + "epoch": 0.9874536320481448, + "grad_norm": 4.3341656610049535, + "learning_rate": 3.059119538932133e-06, + "loss": 0.1008514404296875, + "step": 114200 + }, + { + "epoch": 0.9874968655696881, + "grad_norm": 11.167438974370233, + "learning_rate": 3.0589158451888488e-06, + "loss": 0.1134674072265625, + "step": 114205 + }, + { + "epoch": 0.9875400990912314, + "grad_norm": 67.17031770004996, + "learning_rate": 3.058712151173848e-06, + "loss": 0.3464225769042969, + "step": 114210 + }, + { + "epoch": 0.9875833326127746, + "grad_norm": 13.289510727215788, + "learning_rate": 3.0585084568880736e-06, + "loss": 0.0895416259765625, + "step": 114215 + }, + { + "epoch": 0.9876265661343179, + "grad_norm": 21.164398835088, + "learning_rate": 3.0583047623324627e-06, + "loss": 0.14486236572265626, + "step": 114220 + }, + { + "epoch": 0.9876697996558612, + "grad_norm": 0.1378371104738897, + "learning_rate": 3.0581010675079552e-06, + "loss": 0.33060569763183595, + "step": 114225 + }, + { + "epoch": 0.9877130331774044, + "grad_norm": 9.075469954184424, + "learning_rate": 3.0578973724154906e-06, + "loss": 0.1537200927734375, + "step": 114230 + }, + { + "epoch": 0.9877562666989477, + "grad_norm": 6.493011855063489, + "learning_rate": 3.0576936770560086e-06, + "loss": 0.17044525146484374, + "step": 114235 + }, + { + "epoch": 0.987799500220491, + "grad_norm": 2.2844516147166845, + "learning_rate": 3.0574899814304485e-06, + "loss": 0.05509796142578125, + "step": 114240 + }, + { + "epoch": 0.9878427337420342, + "grad_norm": 7.509828588760094, + "learning_rate": 3.0572862855397498e-06, + "loss": 0.085308837890625, + "step": 114245 + }, + { + "epoch": 0.9878859672635775, + "grad_norm": 14.75390250489835, + "learning_rate": 3.057082589384852e-06, + "loss": 0.09878997802734375, + "step": 114250 + }, + { + "epoch": 0.9879292007851207, + "grad_norm": 4.232166075921649, + "learning_rate": 3.056878892966694e-06, + "loss": 0.153851318359375, + "step": 114255 + }, + { + "epoch": 0.987972434306664, + "grad_norm": 0.021955011860349396, + "learning_rate": 3.056675196286215e-06, + "loss": 0.10652236938476563, + "step": 114260 + }, + { + "epoch": 0.9880156678282073, + "grad_norm": 23.098289030069978, + "learning_rate": 3.0564714993443565e-06, + "loss": 0.2815546035766602, + "step": 114265 + }, + { + "epoch": 0.9880589013497505, + "grad_norm": 23.744873693172764, + "learning_rate": 3.056267802142055e-06, + "loss": 0.10201454162597656, + "step": 114270 + }, + { + "epoch": 0.9881021348712938, + "grad_norm": 6.253237638233143, + "learning_rate": 3.056064104680252e-06, + "loss": 0.2523681640625, + "step": 114275 + }, + { + "epoch": 0.9881453683928371, + "grad_norm": 18.622246471167532, + "learning_rate": 3.055860406959887e-06, + "loss": 0.1009246826171875, + "step": 114280 + }, + { + "epoch": 0.9881886019143803, + "grad_norm": 0.0732436309691742, + "learning_rate": 3.055656708981897e-06, + "loss": 0.013238143920898438, + "step": 114285 + }, + { + "epoch": 0.9882318354359236, + "grad_norm": 0.5763096017233615, + "learning_rate": 3.055453010747224e-06, + "loss": 0.106390380859375, + "step": 114290 + }, + { + "epoch": 0.9882750689574669, + "grad_norm": 6.543370844759797, + "learning_rate": 3.0552493122568074e-06, + "loss": 0.12620086669921876, + "step": 114295 + }, + { + "epoch": 0.9883183024790101, + "grad_norm": 17.523503036419847, + "learning_rate": 3.055045613511585e-06, + "loss": 0.24175262451171875, + "step": 114300 + }, + { + "epoch": 0.9883615360005534, + "grad_norm": 1.368027179672835, + "learning_rate": 3.0548419145124975e-06, + "loss": 0.3582908630371094, + "step": 114305 + }, + { + "epoch": 0.9884047695220967, + "grad_norm": 8.769748160713544, + "learning_rate": 3.0546382152604836e-06, + "loss": 0.0613372802734375, + "step": 114310 + }, + { + "epoch": 0.9884480030436399, + "grad_norm": 18.579828892544665, + "learning_rate": 3.054434515756483e-06, + "loss": 0.08806915283203125, + "step": 114315 + }, + { + "epoch": 0.9884912365651832, + "grad_norm": 3.141795664927369, + "learning_rate": 3.054230816001435e-06, + "loss": 0.1051065444946289, + "step": 114320 + }, + { + "epoch": 0.9885344700867265, + "grad_norm": 2.8782590781351844, + "learning_rate": 3.0540271159962807e-06, + "loss": 0.1919342041015625, + "step": 114325 + }, + { + "epoch": 0.9885777036082697, + "grad_norm": 37.89202376846093, + "learning_rate": 3.053823415741957e-06, + "loss": 0.1388824462890625, + "step": 114330 + }, + { + "epoch": 0.988620937129813, + "grad_norm": 4.368315974119807, + "learning_rate": 3.0536197152394045e-06, + "loss": 0.03195953369140625, + "step": 114335 + }, + { + "epoch": 0.9886641706513563, + "grad_norm": 33.16147544038433, + "learning_rate": 3.0534160144895627e-06, + "loss": 0.24441070556640626, + "step": 114340 + }, + { + "epoch": 0.9887074041728995, + "grad_norm": 16.606745479866223, + "learning_rate": 3.0532123134933704e-06, + "loss": 0.1124908447265625, + "step": 114345 + }, + { + "epoch": 0.9887506376944427, + "grad_norm": 3.8410376149531262, + "learning_rate": 3.053008612251769e-06, + "loss": 0.19503860473632811, + "step": 114350 + }, + { + "epoch": 0.988793871215986, + "grad_norm": 0.42396056485104044, + "learning_rate": 3.052804910765695e-06, + "loss": 0.18733978271484375, + "step": 114355 + }, + { + "epoch": 0.9888371047375293, + "grad_norm": 26.891781773690138, + "learning_rate": 3.052601209036091e-06, + "loss": 0.19661598205566405, + "step": 114360 + }, + { + "epoch": 0.9888803382590725, + "grad_norm": 2.2868776359322935, + "learning_rate": 3.052397507063894e-06, + "loss": 0.065960693359375, + "step": 114365 + }, + { + "epoch": 0.9889235717806159, + "grad_norm": 11.45822610718832, + "learning_rate": 3.0521938048500435e-06, + "loss": 0.2322418212890625, + "step": 114370 + }, + { + "epoch": 0.9889668053021591, + "grad_norm": 36.54191863352815, + "learning_rate": 3.051990102395481e-06, + "loss": 0.20747528076171876, + "step": 114375 + }, + { + "epoch": 0.9890100388237023, + "grad_norm": 12.827079079613126, + "learning_rate": 3.0517863997011447e-06, + "loss": 0.15323486328125, + "step": 114380 + }, + { + "epoch": 0.9890532723452456, + "grad_norm": 0.7678260606853788, + "learning_rate": 3.051582696767973e-06, + "loss": 0.0530853271484375, + "step": 114385 + }, + { + "epoch": 0.9890965058667889, + "grad_norm": 0.06540488911907633, + "learning_rate": 3.051378993596908e-06, + "loss": 0.42976837158203124, + "step": 114390 + }, + { + "epoch": 0.9891397393883321, + "grad_norm": 5.387718328159541, + "learning_rate": 3.0511752901888867e-06, + "loss": 0.03822021484375, + "step": 114395 + }, + { + "epoch": 0.9891829729098754, + "grad_norm": 1.9234799946626877, + "learning_rate": 3.050971586544849e-06, + "loss": 0.09473495483398438, + "step": 114400 + }, + { + "epoch": 0.9892262064314187, + "grad_norm": 2.0624425604712138, + "learning_rate": 3.0507678826657355e-06, + "loss": 0.48941497802734374, + "step": 114405 + }, + { + "epoch": 0.9892694399529619, + "grad_norm": 27.11662382668905, + "learning_rate": 3.0505641785524856e-06, + "loss": 0.20417327880859376, + "step": 114410 + }, + { + "epoch": 0.9893126734745052, + "grad_norm": 39.7370806200111, + "learning_rate": 3.0503604742060374e-06, + "loss": 0.18545989990234374, + "step": 114415 + }, + { + "epoch": 0.9893559069960485, + "grad_norm": 13.10926636659338, + "learning_rate": 3.0501567696273315e-06, + "loss": 0.22604217529296874, + "step": 114420 + }, + { + "epoch": 0.9893991405175917, + "grad_norm": 6.814662934913133, + "learning_rate": 3.0499530648173068e-06, + "loss": 0.3620635986328125, + "step": 114425 + }, + { + "epoch": 0.9894423740391349, + "grad_norm": 23.3568877327269, + "learning_rate": 3.049749359776903e-06, + "loss": 0.3512939453125, + "step": 114430 + }, + { + "epoch": 0.9894856075606783, + "grad_norm": 30.777614525395126, + "learning_rate": 3.0495456545070593e-06, + "loss": 0.2811727523803711, + "step": 114435 + }, + { + "epoch": 0.9895288410822215, + "grad_norm": 42.44521831699633, + "learning_rate": 3.049341949008716e-06, + "loss": 0.207366943359375, + "step": 114440 + }, + { + "epoch": 0.9895720746037647, + "grad_norm": 33.99062717701267, + "learning_rate": 3.0491382432828125e-06, + "loss": 0.18852996826171875, + "step": 114445 + }, + { + "epoch": 0.9896153081253081, + "grad_norm": 1.4210645205656545, + "learning_rate": 3.0489345373302865e-06, + "loss": 0.41611328125, + "step": 114450 + }, + { + "epoch": 0.9896585416468513, + "grad_norm": 5.092346888235715, + "learning_rate": 3.0487308311520787e-06, + "loss": 0.1650390625, + "step": 114455 + }, + { + "epoch": 0.9897017751683945, + "grad_norm": 1.9803641870133482, + "learning_rate": 3.0485271247491293e-06, + "loss": 0.11529617309570313, + "step": 114460 + }, + { + "epoch": 0.9897450086899379, + "grad_norm": 6.696116350763504, + "learning_rate": 3.0483234181223765e-06, + "loss": 0.2191802978515625, + "step": 114465 + }, + { + "epoch": 0.9897882422114811, + "grad_norm": 2.0685546536686714, + "learning_rate": 3.0481197112727612e-06, + "loss": 0.21596755981445312, + "step": 114470 + }, + { + "epoch": 0.9898314757330243, + "grad_norm": 3.444698372917628, + "learning_rate": 3.0479160042012217e-06, + "loss": 0.037586402893066403, + "step": 114475 + }, + { + "epoch": 0.9898747092545677, + "grad_norm": 0.74799610999262, + "learning_rate": 3.0477122969086976e-06, + "loss": 0.032065963745117186, + "step": 114480 + }, + { + "epoch": 0.9899179427761109, + "grad_norm": 21.098754352693792, + "learning_rate": 3.0475085893961287e-06, + "loss": 0.10758209228515625, + "step": 114485 + }, + { + "epoch": 0.9899611762976541, + "grad_norm": 10.556595347946635, + "learning_rate": 3.047304881664454e-06, + "loss": 0.27962646484375, + "step": 114490 + }, + { + "epoch": 0.9900044098191975, + "grad_norm": 1.9720324561283702, + "learning_rate": 3.0471011737146137e-06, + "loss": 0.13972625732421876, + "step": 114495 + }, + { + "epoch": 0.9900476433407407, + "grad_norm": 17.44108654242275, + "learning_rate": 3.0468974655475475e-06, + "loss": 0.2107574462890625, + "step": 114500 + }, + { + "epoch": 0.9900908768622839, + "grad_norm": 17.962394303085144, + "learning_rate": 3.046693757164194e-06, + "loss": 0.14551849365234376, + "step": 114505 + }, + { + "epoch": 0.9901341103838273, + "grad_norm": 28.303951052655655, + "learning_rate": 3.0464900485654926e-06, + "loss": 0.2161712646484375, + "step": 114510 + }, + { + "epoch": 0.9901773439053705, + "grad_norm": 2.228641235075412, + "learning_rate": 3.0462863397523834e-06, + "loss": 0.04885292053222656, + "step": 114515 + }, + { + "epoch": 0.9902205774269137, + "grad_norm": 40.929149196158114, + "learning_rate": 3.0460826307258063e-06, + "loss": 0.2744293212890625, + "step": 114520 + }, + { + "epoch": 0.990263810948457, + "grad_norm": 0.34261437481507245, + "learning_rate": 3.0458789214866996e-06, + "loss": 0.03377685546875, + "step": 114525 + }, + { + "epoch": 0.9903070444700003, + "grad_norm": 0.18847431708604595, + "learning_rate": 3.045675212036003e-06, + "loss": 0.09259452819824218, + "step": 114530 + }, + { + "epoch": 0.9903502779915435, + "grad_norm": 6.003262572889806, + "learning_rate": 3.0454715023746576e-06, + "loss": 0.13561248779296875, + "step": 114535 + }, + { + "epoch": 0.9903935115130867, + "grad_norm": 0.6341397992014134, + "learning_rate": 3.0452677925036e-06, + "loss": 0.06952056884765626, + "step": 114540 + }, + { + "epoch": 0.9904367450346301, + "grad_norm": 18.970225331964972, + "learning_rate": 3.045064082423772e-06, + "loss": 0.172119140625, + "step": 114545 + }, + { + "epoch": 0.9904799785561733, + "grad_norm": 4.671691061663101, + "learning_rate": 3.0448603721361128e-06, + "loss": 0.05798664093017578, + "step": 114550 + }, + { + "epoch": 0.9905232120777165, + "grad_norm": 5.285772870787945, + "learning_rate": 3.044656661641562e-06, + "loss": 0.0912200927734375, + "step": 114555 + }, + { + "epoch": 0.9905664455992599, + "grad_norm": 8.990569275639801, + "learning_rate": 3.044452950941057e-06, + "loss": 0.172967529296875, + "step": 114560 + }, + { + "epoch": 0.9906096791208031, + "grad_norm": 26.157914492540772, + "learning_rate": 3.044249240035541e-06, + "loss": 0.1857147216796875, + "step": 114565 + }, + { + "epoch": 0.9906529126423463, + "grad_norm": 8.87313171853262, + "learning_rate": 3.0440455289259493e-06, + "loss": 0.13659210205078126, + "step": 114570 + }, + { + "epoch": 0.9906961461638897, + "grad_norm": 8.755216544935474, + "learning_rate": 3.0438418176132243e-06, + "loss": 0.0559600830078125, + "step": 114575 + }, + { + "epoch": 0.9907393796854329, + "grad_norm": 46.654674315716434, + "learning_rate": 3.0436381060983053e-06, + "loss": 0.31856765747070315, + "step": 114580 + }, + { + "epoch": 0.9907826132069761, + "grad_norm": 0.8780623032839789, + "learning_rate": 3.043434394382131e-06, + "loss": 0.031183624267578126, + "step": 114585 + }, + { + "epoch": 0.9908258467285195, + "grad_norm": 6.969828297401578, + "learning_rate": 3.0432306824656408e-06, + "loss": 0.2376007080078125, + "step": 114590 + }, + { + "epoch": 0.9908690802500627, + "grad_norm": 17.422587865289852, + "learning_rate": 3.0430269703497743e-06, + "loss": 0.16463623046875, + "step": 114595 + }, + { + "epoch": 0.9909123137716059, + "grad_norm": 32.10788480178292, + "learning_rate": 3.0428232580354714e-06, + "loss": 0.12865142822265624, + "step": 114600 + }, + { + "epoch": 0.9909555472931492, + "grad_norm": 0.8263729370615851, + "learning_rate": 3.042619545523672e-06, + "loss": 0.07789802551269531, + "step": 114605 + }, + { + "epoch": 0.9909987808146925, + "grad_norm": 9.018566131817694, + "learning_rate": 3.0424158328153142e-06, + "loss": 0.05547637939453125, + "step": 114610 + }, + { + "epoch": 0.9910420143362357, + "grad_norm": 48.090975573124275, + "learning_rate": 3.042212119911339e-06, + "loss": 0.3650909423828125, + "step": 114615 + }, + { + "epoch": 0.991085247857779, + "grad_norm": 7.199527501167117, + "learning_rate": 3.042008406812685e-06, + "loss": 0.1751922607421875, + "step": 114620 + }, + { + "epoch": 0.9911284813793223, + "grad_norm": 18.381965937559183, + "learning_rate": 3.041804693520291e-06, + "loss": 0.087225341796875, + "step": 114625 + }, + { + "epoch": 0.9911717149008655, + "grad_norm": 0.2784896695308265, + "learning_rate": 3.0416009800350993e-06, + "loss": 0.08599853515625, + "step": 114630 + }, + { + "epoch": 0.9912149484224088, + "grad_norm": 5.853564547394445, + "learning_rate": 3.0413972663580466e-06, + "loss": 0.04118671417236328, + "step": 114635 + }, + { + "epoch": 0.9912581819439521, + "grad_norm": 2.9764137201805174, + "learning_rate": 3.041193552490073e-06, + "loss": 0.09620399475097656, + "step": 114640 + }, + { + "epoch": 0.9913014154654953, + "grad_norm": 10.68682859023121, + "learning_rate": 3.040989838432119e-06, + "loss": 0.2029500961303711, + "step": 114645 + }, + { + "epoch": 0.9913446489870386, + "grad_norm": 2.3129856535563085, + "learning_rate": 3.040786124185123e-06, + "loss": 0.10045547485351562, + "step": 114650 + }, + { + "epoch": 0.9913878825085819, + "grad_norm": 53.5449283596958, + "learning_rate": 3.040582409750025e-06, + "loss": 0.382568359375, + "step": 114655 + }, + { + "epoch": 0.9914311160301251, + "grad_norm": 11.463612472654967, + "learning_rate": 3.0403786951277652e-06, + "loss": 0.1993408203125, + "step": 114660 + }, + { + "epoch": 0.9914743495516684, + "grad_norm": 59.68742282677593, + "learning_rate": 3.0401749803192823e-06, + "loss": 0.42737808227539065, + "step": 114665 + }, + { + "epoch": 0.9915175830732117, + "grad_norm": 17.55208920950106, + "learning_rate": 3.039971265325515e-06, + "loss": 0.17974853515625, + "step": 114670 + }, + { + "epoch": 0.9915608165947549, + "grad_norm": 0.435740577653851, + "learning_rate": 3.039767550147405e-06, + "loss": 0.026369094848632812, + "step": 114675 + }, + { + "epoch": 0.9916040501162982, + "grad_norm": 3.605063408116942, + "learning_rate": 3.0395638347858894e-06, + "loss": 0.10190811157226562, + "step": 114680 + }, + { + "epoch": 0.9916472836378414, + "grad_norm": 0.2044602266642272, + "learning_rate": 3.03936011924191e-06, + "loss": 0.2857452392578125, + "step": 114685 + }, + { + "epoch": 0.9916905171593847, + "grad_norm": 24.346164616291418, + "learning_rate": 3.0391564035164038e-06, + "loss": 0.29339599609375, + "step": 114690 + }, + { + "epoch": 0.991733750680928, + "grad_norm": 0.042948743963064716, + "learning_rate": 3.038952687610313e-06, + "loss": 0.03937454223632812, + "step": 114695 + }, + { + "epoch": 0.9917769842024712, + "grad_norm": 32.77382341267512, + "learning_rate": 3.038748971524576e-06, + "loss": 0.30222091674804685, + "step": 114700 + }, + { + "epoch": 0.9918202177240145, + "grad_norm": 15.11280166284759, + "learning_rate": 3.038545255260131e-06, + "loss": 0.4269418716430664, + "step": 114705 + }, + { + "epoch": 0.9918634512455577, + "grad_norm": 5.840367898679953, + "learning_rate": 3.038341538817919e-06, + "loss": 0.05054054260253906, + "step": 114710 + }, + { + "epoch": 0.991906684767101, + "grad_norm": 71.49738337914337, + "learning_rate": 3.03813782219888e-06, + "loss": 0.5461700439453125, + "step": 114715 + }, + { + "epoch": 0.9919499182886443, + "grad_norm": 0.08176619950669221, + "learning_rate": 3.0379341054039514e-06, + "loss": 0.053089332580566403, + "step": 114720 + }, + { + "epoch": 0.9919931518101875, + "grad_norm": 2.3487967986701395, + "learning_rate": 3.037730388434075e-06, + "loss": 0.13270034790039062, + "step": 114725 + }, + { + "epoch": 0.9920363853317308, + "grad_norm": 12.115693682718035, + "learning_rate": 3.0375266712901897e-06, + "loss": 0.15117912292480468, + "step": 114730 + }, + { + "epoch": 0.9920796188532741, + "grad_norm": 2.3999237637370614, + "learning_rate": 3.037322953973234e-06, + "loss": 0.3774444580078125, + "step": 114735 + }, + { + "epoch": 0.9921228523748173, + "grad_norm": 24.610566270901277, + "learning_rate": 3.0371192364841484e-06, + "loss": 0.115216064453125, + "step": 114740 + }, + { + "epoch": 0.9921660858963606, + "grad_norm": 0.28460478357189684, + "learning_rate": 3.036915518823872e-06, + "loss": 0.020735931396484376, + "step": 114745 + }, + { + "epoch": 0.9922093194179039, + "grad_norm": 6.393872473052746, + "learning_rate": 3.0367118009933444e-06, + "loss": 0.11069564819335938, + "step": 114750 + }, + { + "epoch": 0.9922525529394471, + "grad_norm": 1.0520884055604085, + "learning_rate": 3.036508082993506e-06, + "loss": 0.08337020874023438, + "step": 114755 + }, + { + "epoch": 0.9922957864609904, + "grad_norm": 4.326138467026946, + "learning_rate": 3.0363043648252953e-06, + "loss": 0.14603271484375, + "step": 114760 + }, + { + "epoch": 0.9923390199825337, + "grad_norm": 17.25558193159672, + "learning_rate": 3.036100646489651e-06, + "loss": 0.18170166015625, + "step": 114765 + }, + { + "epoch": 0.9923822535040769, + "grad_norm": 2.413945239356397, + "learning_rate": 3.035896927987514e-06, + "loss": 0.03250608444213867, + "step": 114770 + }, + { + "epoch": 0.9924254870256202, + "grad_norm": 12.442505643529715, + "learning_rate": 3.0356932093198246e-06, + "loss": 0.1647897720336914, + "step": 114775 + }, + { + "epoch": 0.9924687205471634, + "grad_norm": 28.078027356902066, + "learning_rate": 3.035489490487521e-06, + "loss": 0.1170989990234375, + "step": 114780 + }, + { + "epoch": 0.9925119540687067, + "grad_norm": 3.5367603348040335, + "learning_rate": 3.0352857714915427e-06, + "loss": 0.0694671630859375, + "step": 114785 + }, + { + "epoch": 0.99255518759025, + "grad_norm": 17.91033949647662, + "learning_rate": 3.03508205233283e-06, + "loss": 0.20400543212890626, + "step": 114790 + }, + { + "epoch": 0.9925984211117932, + "grad_norm": 12.841880293935192, + "learning_rate": 3.034878333012321e-06, + "loss": 0.32299118041992186, + "step": 114795 + }, + { + "epoch": 0.9926416546333365, + "grad_norm": 2.1473553362462, + "learning_rate": 3.0346746135309567e-06, + "loss": 0.12221107482910157, + "step": 114800 + }, + { + "epoch": 0.9926848881548798, + "grad_norm": 0.7923614697541463, + "learning_rate": 3.0344708938896765e-06, + "loss": 0.0651123046875, + "step": 114805 + }, + { + "epoch": 0.992728121676423, + "grad_norm": 0.5268505318122232, + "learning_rate": 3.034267174089419e-06, + "loss": 0.100128173828125, + "step": 114810 + }, + { + "epoch": 0.9927713551979663, + "grad_norm": 0.37951431187441154, + "learning_rate": 3.0340634541311246e-06, + "loss": 0.031547164916992186, + "step": 114815 + }, + { + "epoch": 0.9928145887195096, + "grad_norm": 3.9315198777722973, + "learning_rate": 3.033859734015732e-06, + "loss": 0.08168792724609375, + "step": 114820 + }, + { + "epoch": 0.9928578222410528, + "grad_norm": 28.632893131706805, + "learning_rate": 3.033656013744182e-06, + "loss": 0.228656005859375, + "step": 114825 + }, + { + "epoch": 0.9929010557625961, + "grad_norm": 19.773734874021173, + "learning_rate": 3.033452293317413e-06, + "loss": 0.16737022399902343, + "step": 114830 + }, + { + "epoch": 0.9929442892841394, + "grad_norm": 8.330404820411768, + "learning_rate": 3.0332485727363656e-06, + "loss": 0.20922393798828126, + "step": 114835 + }, + { + "epoch": 0.9929875228056826, + "grad_norm": 8.884646633612363, + "learning_rate": 3.0330448520019788e-06, + "loss": 0.08915634155273437, + "step": 114840 + }, + { + "epoch": 0.9930307563272259, + "grad_norm": 54.122605568679454, + "learning_rate": 3.0328411311151916e-06, + "loss": 0.21183700561523439, + "step": 114845 + }, + { + "epoch": 0.9930739898487692, + "grad_norm": 0.6837073465996594, + "learning_rate": 3.0326374100769436e-06, + "loss": 0.02859954833984375, + "step": 114850 + }, + { + "epoch": 0.9931172233703124, + "grad_norm": 9.032919886593515, + "learning_rate": 3.0324336888881757e-06, + "loss": 0.214990234375, + "step": 114855 + }, + { + "epoch": 0.9931604568918556, + "grad_norm": 3.927183207280165, + "learning_rate": 3.0322299675498264e-06, + "loss": 0.05830116271972656, + "step": 114860 + }, + { + "epoch": 0.993203690413399, + "grad_norm": 1.680221044439511, + "learning_rate": 3.0320262460628342e-06, + "loss": 0.14867668151855468, + "step": 114865 + }, + { + "epoch": 0.9932469239349422, + "grad_norm": 1.1591619642927782, + "learning_rate": 3.0318225244281407e-06, + "loss": 0.218304443359375, + "step": 114870 + }, + { + "epoch": 0.9932901574564854, + "grad_norm": 33.39814101671175, + "learning_rate": 3.0316188026466844e-06, + "loss": 0.45239715576171874, + "step": 114875 + }, + { + "epoch": 0.9933333909780288, + "grad_norm": 0.9111673368892224, + "learning_rate": 3.0314150807194037e-06, + "loss": 0.3207826614379883, + "step": 114880 + }, + { + "epoch": 0.993376624499572, + "grad_norm": 1.5097828877195483, + "learning_rate": 3.0312113586472414e-06, + "loss": 0.19882659912109374, + "step": 114885 + }, + { + "epoch": 0.9934198580211152, + "grad_norm": 0.7751333494573255, + "learning_rate": 3.0310076364311347e-06, + "loss": 0.07925796508789062, + "step": 114890 + }, + { + "epoch": 0.9934630915426585, + "grad_norm": 1.4780662221276042, + "learning_rate": 3.0308039140720226e-06, + "loss": 0.261834716796875, + "step": 114895 + }, + { + "epoch": 0.9935063250642018, + "grad_norm": 3.5935155580797233, + "learning_rate": 3.0306001915708466e-06, + "loss": 0.06441154479980468, + "step": 114900 + }, + { + "epoch": 0.993549558585745, + "grad_norm": 0.051323130070438615, + "learning_rate": 3.0303964689285443e-06, + "loss": 0.07768402099609376, + "step": 114905 + }, + { + "epoch": 0.9935927921072883, + "grad_norm": 2.8598949251235797, + "learning_rate": 3.0301927461460564e-06, + "loss": 0.04612555503845215, + "step": 114910 + }, + { + "epoch": 0.9936360256288316, + "grad_norm": 3.6902876350566687, + "learning_rate": 3.029989023224322e-06, + "loss": 0.2477020263671875, + "step": 114915 + }, + { + "epoch": 0.9936792591503748, + "grad_norm": 10.26599594310589, + "learning_rate": 3.0297853001642823e-06, + "loss": 0.296875, + "step": 114920 + }, + { + "epoch": 0.9937224926719181, + "grad_norm": 15.42265659595996, + "learning_rate": 3.029581576966874e-06, + "loss": 0.5378128051757812, + "step": 114925 + }, + { + "epoch": 0.9937657261934614, + "grad_norm": 18.44173107726582, + "learning_rate": 3.0293778536330387e-06, + "loss": 0.12224502563476562, + "step": 114930 + }, + { + "epoch": 0.9938089597150046, + "grad_norm": 4.309233608749839, + "learning_rate": 3.029174130163715e-06, + "loss": 0.0904541015625, + "step": 114935 + }, + { + "epoch": 0.9938521932365479, + "grad_norm": 0.5501940682197232, + "learning_rate": 3.0289704065598423e-06, + "loss": 0.26346893310546876, + "step": 114940 + }, + { + "epoch": 0.9938954267580912, + "grad_norm": 7.060493004133441, + "learning_rate": 3.0287666828223616e-06, + "loss": 0.1617645263671875, + "step": 114945 + }, + { + "epoch": 0.9939386602796344, + "grad_norm": 3.7600876377655634, + "learning_rate": 3.0285629589522118e-06, + "loss": 0.27260780334472656, + "step": 114950 + }, + { + "epoch": 0.9939818938011776, + "grad_norm": 1.629195627477132, + "learning_rate": 3.028359234950332e-06, + "loss": 0.015891265869140626, + "step": 114955 + }, + { + "epoch": 0.994025127322721, + "grad_norm": 0.08062396944489517, + "learning_rate": 3.0281555108176606e-06, + "loss": 0.12454948425292969, + "step": 114960 + }, + { + "epoch": 0.9940683608442642, + "grad_norm": 2.3025138529415017, + "learning_rate": 3.0279517865551393e-06, + "loss": 0.21549072265625, + "step": 114965 + }, + { + "epoch": 0.9941115943658074, + "grad_norm": 0.89296084826385, + "learning_rate": 3.0277480621637072e-06, + "loss": 0.05999755859375, + "step": 114970 + }, + { + "epoch": 0.9941548278873508, + "grad_norm": 2.2616825384615495, + "learning_rate": 3.0275443376443024e-06, + "loss": 0.299749755859375, + "step": 114975 + }, + { + "epoch": 0.994198061408894, + "grad_norm": 3.1411118652394503, + "learning_rate": 3.0273406129978672e-06, + "loss": 0.03578681945800781, + "step": 114980 + }, + { + "epoch": 0.9942412949304372, + "grad_norm": 17.73468222763631, + "learning_rate": 3.027136888225339e-06, + "loss": 0.13136844635009765, + "step": 114985 + }, + { + "epoch": 0.9942845284519806, + "grad_norm": 9.265366321263675, + "learning_rate": 3.0269331633276564e-06, + "loss": 0.05898361206054688, + "step": 114990 + }, + { + "epoch": 0.9943277619735238, + "grad_norm": 1.7624430998889038, + "learning_rate": 3.0267294383057616e-06, + "loss": 0.12964019775390626, + "step": 114995 + }, + { + "epoch": 0.994370995495067, + "grad_norm": 0.6805852729917452, + "learning_rate": 3.0265257131605934e-06, + "loss": 0.276007080078125, + "step": 115000 + }, + { + "epoch": 0.9944142290166104, + "grad_norm": 35.95063643153007, + "learning_rate": 3.0263219878930895e-06, + "loss": 0.7885177612304688, + "step": 115005 + }, + { + "epoch": 0.9944574625381536, + "grad_norm": 3.402580090204198, + "learning_rate": 3.0261182625041922e-06, + "loss": 0.20973358154296876, + "step": 115010 + }, + { + "epoch": 0.9945006960596968, + "grad_norm": 66.34422025040321, + "learning_rate": 3.0259145369948397e-06, + "loss": 0.4408561706542969, + "step": 115015 + }, + { + "epoch": 0.9945439295812402, + "grad_norm": 2.9439919394146665, + "learning_rate": 3.0257108113659704e-06, + "loss": 0.042205810546875, + "step": 115020 + }, + { + "epoch": 0.9945871631027834, + "grad_norm": 1.0915886463553375, + "learning_rate": 3.0255070856185266e-06, + "loss": 0.039861297607421874, + "step": 115025 + }, + { + "epoch": 0.9946303966243266, + "grad_norm": 0.21349200273131605, + "learning_rate": 3.025303359753446e-06, + "loss": 0.18830604553222657, + "step": 115030 + }, + { + "epoch": 0.9946736301458698, + "grad_norm": 2.688191102893541, + "learning_rate": 3.025099633771669e-06, + "loss": 0.0511688232421875, + "step": 115035 + }, + { + "epoch": 0.9947168636674132, + "grad_norm": 0.2770693380307538, + "learning_rate": 3.0248959076741337e-06, + "loss": 0.21676177978515626, + "step": 115040 + }, + { + "epoch": 0.9947600971889564, + "grad_norm": 0.5805662154197252, + "learning_rate": 3.0246921814617807e-06, + "loss": 0.055875396728515624, + "step": 115045 + }, + { + "epoch": 0.9948033307104996, + "grad_norm": 3.5727195721690244, + "learning_rate": 3.0244884551355504e-06, + "loss": 0.04326171875, + "step": 115050 + }, + { + "epoch": 0.994846564232043, + "grad_norm": 10.020303587681274, + "learning_rate": 3.0242847286963806e-06, + "loss": 0.1014984130859375, + "step": 115055 + }, + { + "epoch": 0.9948897977535862, + "grad_norm": 9.431497889690734, + "learning_rate": 3.0240810021452123e-06, + "loss": 0.255517578125, + "step": 115060 + }, + { + "epoch": 0.9949330312751294, + "grad_norm": 10.986449798379853, + "learning_rate": 3.0238772754829848e-06, + "loss": 0.16877288818359376, + "step": 115065 + }, + { + "epoch": 0.9949762647966728, + "grad_norm": 15.388365532653756, + "learning_rate": 3.0236735487106366e-06, + "loss": 0.046649169921875, + "step": 115070 + }, + { + "epoch": 0.995019498318216, + "grad_norm": 0.9002339845018882, + "learning_rate": 3.0234698218291085e-06, + "loss": 0.16127777099609375, + "step": 115075 + }, + { + "epoch": 0.9950627318397592, + "grad_norm": 1.3543960763434977, + "learning_rate": 3.0232660948393397e-06, + "loss": 0.16122970581054688, + "step": 115080 + }, + { + "epoch": 0.9951059653613026, + "grad_norm": 11.227162896404126, + "learning_rate": 3.0230623677422697e-06, + "loss": 0.0699615478515625, + "step": 115085 + }, + { + "epoch": 0.9951491988828458, + "grad_norm": 0.013157449353152016, + "learning_rate": 3.0228586405388385e-06, + "loss": 0.12421150207519531, + "step": 115090 + }, + { + "epoch": 0.995192432404389, + "grad_norm": 7.546160839144623, + "learning_rate": 3.0226549132299852e-06, + "loss": 0.05322418212890625, + "step": 115095 + }, + { + "epoch": 0.9952356659259324, + "grad_norm": 5.054171643859436, + "learning_rate": 3.0224511858166487e-06, + "loss": 0.055298995971679685, + "step": 115100 + }, + { + "epoch": 0.9952788994474756, + "grad_norm": 0.38605575399503184, + "learning_rate": 3.0222474582997697e-06, + "loss": 0.05624847412109375, + "step": 115105 + }, + { + "epoch": 0.9953221329690188, + "grad_norm": 5.3769445479574385, + "learning_rate": 3.0220437306802874e-06, + "loss": 0.06226024627685547, + "step": 115110 + }, + { + "epoch": 0.9953653664905622, + "grad_norm": 1.901568656131525, + "learning_rate": 3.021840002959141e-06, + "loss": 0.27166595458984377, + "step": 115115 + }, + { + "epoch": 0.9954086000121054, + "grad_norm": 31.903644328600176, + "learning_rate": 3.021636275137271e-06, + "loss": 0.1867136001586914, + "step": 115120 + }, + { + "epoch": 0.9954518335336486, + "grad_norm": 1.3266924820979356, + "learning_rate": 3.021432547215617e-06, + "loss": 0.12850570678710938, + "step": 115125 + }, + { + "epoch": 0.9954950670551919, + "grad_norm": 4.751758983715946, + "learning_rate": 3.0212288191951166e-06, + "loss": 0.12078475952148438, + "step": 115130 + }, + { + "epoch": 0.9955383005767352, + "grad_norm": 2.9829374075700614, + "learning_rate": 3.0210250910767116e-06, + "loss": 0.5406982421875, + "step": 115135 + }, + { + "epoch": 0.9955815340982784, + "grad_norm": 4.535782265654195, + "learning_rate": 3.0208213628613403e-06, + "loss": 0.13736419677734374, + "step": 115140 + }, + { + "epoch": 0.9956247676198217, + "grad_norm": 19.456202402985323, + "learning_rate": 3.0206176345499435e-06, + "loss": 0.15710678100585937, + "step": 115145 + }, + { + "epoch": 0.995668001141365, + "grad_norm": 0.8206117157481664, + "learning_rate": 3.020413906143459e-06, + "loss": 0.27533836364746095, + "step": 115150 + }, + { + "epoch": 0.9957112346629082, + "grad_norm": 2.429852990149431, + "learning_rate": 3.0202101776428284e-06, + "loss": 0.01655120849609375, + "step": 115155 + }, + { + "epoch": 0.9957544681844515, + "grad_norm": 41.12053000181403, + "learning_rate": 3.0200064490489893e-06, + "loss": 0.16640090942382812, + "step": 115160 + }, + { + "epoch": 0.9957977017059948, + "grad_norm": 2.707010442842109, + "learning_rate": 3.019802720362882e-06, + "loss": 0.15274372100830078, + "step": 115165 + }, + { + "epoch": 0.995840935227538, + "grad_norm": 9.134010426435477, + "learning_rate": 3.0195989915854473e-06, + "loss": 0.034967994689941405, + "step": 115170 + }, + { + "epoch": 0.9958841687490813, + "grad_norm": 7.669359634068302, + "learning_rate": 3.0193952627176235e-06, + "loss": 0.0812042236328125, + "step": 115175 + }, + { + "epoch": 0.9959274022706246, + "grad_norm": 2.943688314819258, + "learning_rate": 3.01919153376035e-06, + "loss": 0.055828857421875, + "step": 115180 + }, + { + "epoch": 0.9959706357921678, + "grad_norm": 1.2581361994874065, + "learning_rate": 3.0189878047145676e-06, + "loss": 0.022061920166015624, + "step": 115185 + }, + { + "epoch": 0.996013869313711, + "grad_norm": 4.5567595731976525, + "learning_rate": 3.0187840755812143e-06, + "loss": 0.07704544067382812, + "step": 115190 + }, + { + "epoch": 0.9960571028352544, + "grad_norm": 49.31638260626821, + "learning_rate": 3.018580346361231e-06, + "loss": 0.29209136962890625, + "step": 115195 + }, + { + "epoch": 0.9961003363567976, + "grad_norm": 0.9445847181681825, + "learning_rate": 3.018376617055557e-06, + "loss": 0.28375701904296874, + "step": 115200 + }, + { + "epoch": 0.9961435698783409, + "grad_norm": 7.531825303637826, + "learning_rate": 3.0181728876651315e-06, + "loss": 0.090289306640625, + "step": 115205 + }, + { + "epoch": 0.9961868033998841, + "grad_norm": 3.7701370526739515, + "learning_rate": 3.017969158190894e-06, + "loss": 0.2648448944091797, + "step": 115210 + }, + { + "epoch": 0.9962300369214274, + "grad_norm": 2.481894603131225, + "learning_rate": 3.0177654286337844e-06, + "loss": 0.18519744873046876, + "step": 115215 + }, + { + "epoch": 0.9962732704429706, + "grad_norm": 0.8894765294406981, + "learning_rate": 3.0175616989947423e-06, + "loss": 0.04310150146484375, + "step": 115220 + }, + { + "epoch": 0.9963165039645139, + "grad_norm": 1.0382701943684023, + "learning_rate": 3.017357969274707e-06, + "loss": 0.1104949951171875, + "step": 115225 + }, + { + "epoch": 0.9963597374860572, + "grad_norm": 27.76236504754322, + "learning_rate": 3.0171542394746186e-06, + "loss": 0.10373134613037109, + "step": 115230 + }, + { + "epoch": 0.9964029710076004, + "grad_norm": 6.92807964243805, + "learning_rate": 3.0169505095954163e-06, + "loss": 0.031571578979492185, + "step": 115235 + }, + { + "epoch": 0.9964462045291437, + "grad_norm": 2.028801173942645, + "learning_rate": 3.016746779638039e-06, + "loss": 0.019205474853515626, + "step": 115240 + }, + { + "epoch": 0.996489438050687, + "grad_norm": 7.723206978695485, + "learning_rate": 3.0165430496034276e-06, + "loss": 0.10289459228515625, + "step": 115245 + }, + { + "epoch": 0.9965326715722302, + "grad_norm": 0.3967979319556954, + "learning_rate": 3.0163393194925215e-06, + "loss": 0.12915477752685547, + "step": 115250 + }, + { + "epoch": 0.9965759050937735, + "grad_norm": 29.333440905089425, + "learning_rate": 3.0161355893062596e-06, + "loss": 0.15834884643554686, + "step": 115255 + }, + { + "epoch": 0.9966191386153168, + "grad_norm": 0.926412861163609, + "learning_rate": 3.0159318590455812e-06, + "loss": 0.07050933837890624, + "step": 115260 + }, + { + "epoch": 0.99666237213686, + "grad_norm": 9.119104026213295, + "learning_rate": 3.015728128711428e-06, + "loss": 0.06990165710449218, + "step": 115265 + }, + { + "epoch": 0.9967056056584033, + "grad_norm": 38.52133272407317, + "learning_rate": 3.015524398304736e-06, + "loss": 0.5751903533935547, + "step": 115270 + }, + { + "epoch": 0.9967488391799466, + "grad_norm": 10.437195205731483, + "learning_rate": 3.0153206678264478e-06, + "loss": 0.17250351905822753, + "step": 115275 + }, + { + "epoch": 0.9967920727014898, + "grad_norm": 6.315089328877276, + "learning_rate": 3.015116937277503e-06, + "loss": 0.12912874221801757, + "step": 115280 + }, + { + "epoch": 0.9968353062230331, + "grad_norm": 1.5242596079862887, + "learning_rate": 3.0149132066588396e-06, + "loss": 0.15504875183105468, + "step": 115285 + }, + { + "epoch": 0.9968785397445764, + "grad_norm": 0.13200621239156987, + "learning_rate": 3.014709475971397e-06, + "loss": 0.027336883544921874, + "step": 115290 + }, + { + "epoch": 0.9969217732661196, + "grad_norm": 25.187210403940174, + "learning_rate": 3.0145057452161167e-06, + "loss": 0.131317138671875, + "step": 115295 + }, + { + "epoch": 0.9969650067876629, + "grad_norm": 38.22743080956001, + "learning_rate": 3.014302014393936e-06, + "loss": 0.2380645751953125, + "step": 115300 + }, + { + "epoch": 0.9970082403092061, + "grad_norm": 16.30414751841742, + "learning_rate": 3.014098283505797e-06, + "loss": 0.3103424072265625, + "step": 115305 + }, + { + "epoch": 0.9970514738307494, + "grad_norm": 8.930743103486623, + "learning_rate": 3.0138945525526375e-06, + "loss": 0.251434326171875, + "step": 115310 + }, + { + "epoch": 0.9970947073522927, + "grad_norm": 17.19887681118665, + "learning_rate": 3.0136908215353973e-06, + "loss": 0.0798309326171875, + "step": 115315 + }, + { + "epoch": 0.9971379408738359, + "grad_norm": 16.29094186319908, + "learning_rate": 3.0134870904550168e-06, + "loss": 0.15538101196289061, + "step": 115320 + }, + { + "epoch": 0.9971811743953792, + "grad_norm": 13.086941253447018, + "learning_rate": 3.013283359312434e-06, + "loss": 0.09097061157226563, + "step": 115325 + }, + { + "epoch": 0.9972244079169225, + "grad_norm": 10.53459148682645, + "learning_rate": 3.0130796281085894e-06, + "loss": 0.11506729125976563, + "step": 115330 + }, + { + "epoch": 0.9972676414384657, + "grad_norm": 4.160974779344446, + "learning_rate": 3.012875896844424e-06, + "loss": 0.23243846893310546, + "step": 115335 + }, + { + "epoch": 0.997310874960009, + "grad_norm": 0.24056127778012487, + "learning_rate": 3.012672165520875e-06, + "loss": 0.07465667724609375, + "step": 115340 + }, + { + "epoch": 0.9973541084815523, + "grad_norm": 16.160848415709786, + "learning_rate": 3.012468434138884e-06, + "loss": 0.5549155235290527, + "step": 115345 + }, + { + "epoch": 0.9973973420030955, + "grad_norm": 0.43389251741674223, + "learning_rate": 3.0122647026993896e-06, + "loss": 0.1444427490234375, + "step": 115350 + }, + { + "epoch": 0.9974405755246388, + "grad_norm": 0.25729238860977277, + "learning_rate": 3.012060971203331e-06, + "loss": 0.08394126892089844, + "step": 115355 + }, + { + "epoch": 0.9974838090461821, + "grad_norm": 2.19415088836722, + "learning_rate": 3.0118572396516478e-06, + "loss": 0.2294321060180664, + "step": 115360 + }, + { + "epoch": 0.9975270425677253, + "grad_norm": 1.1829681665410832, + "learning_rate": 3.0116535080452813e-06, + "loss": 0.0911376953125, + "step": 115365 + }, + { + "epoch": 0.9975702760892686, + "grad_norm": 11.242705452905572, + "learning_rate": 3.0114497763851686e-06, + "loss": 0.12275314331054688, + "step": 115370 + }, + { + "epoch": 0.9976135096108119, + "grad_norm": 17.258774313026432, + "learning_rate": 3.0112460446722517e-06, + "loss": 0.43123817443847656, + "step": 115375 + }, + { + "epoch": 0.9976567431323551, + "grad_norm": 9.812822076022334, + "learning_rate": 3.0110423129074686e-06, + "loss": 0.172894287109375, + "step": 115380 + }, + { + "epoch": 0.9976999766538983, + "grad_norm": 13.806170626418496, + "learning_rate": 3.0108385810917588e-06, + "loss": 0.06697845458984375, + "step": 115385 + }, + { + "epoch": 0.9977432101754417, + "grad_norm": 4.082671801498656, + "learning_rate": 3.0106348492260626e-06, + "loss": 0.0717987060546875, + "step": 115390 + }, + { + "epoch": 0.9977864436969849, + "grad_norm": 13.643829730165121, + "learning_rate": 3.01043111731132e-06, + "loss": 0.1723102569580078, + "step": 115395 + }, + { + "epoch": 0.9978296772185281, + "grad_norm": 24.562995908471798, + "learning_rate": 3.01022738534847e-06, + "loss": 0.2119384765625, + "step": 115400 + }, + { + "epoch": 0.9978729107400715, + "grad_norm": 8.003656211272007, + "learning_rate": 3.0100236533384516e-06, + "loss": 0.27822170257568357, + "step": 115405 + }, + { + "epoch": 0.9979161442616147, + "grad_norm": 9.111608227024155, + "learning_rate": 3.0098199212822057e-06, + "loss": 0.09765090942382812, + "step": 115410 + }, + { + "epoch": 0.9979593777831579, + "grad_norm": 5.886479456108226, + "learning_rate": 3.0096161891806706e-06, + "loss": 0.0339752197265625, + "step": 115415 + }, + { + "epoch": 0.9980026113047012, + "grad_norm": 6.549914416463642, + "learning_rate": 3.009412457034786e-06, + "loss": 0.266973876953125, + "step": 115420 + }, + { + "epoch": 0.9980458448262445, + "grad_norm": 1.2592215520945373, + "learning_rate": 3.0092087248454933e-06, + "loss": 0.14356727600097657, + "step": 115425 + }, + { + "epoch": 0.9980890783477877, + "grad_norm": 8.96826870186894, + "learning_rate": 3.0090049926137302e-06, + "loss": 0.18947601318359375, + "step": 115430 + }, + { + "epoch": 0.998132311869331, + "grad_norm": 0.4269550302590774, + "learning_rate": 3.0088012603404363e-06, + "loss": 0.06720428466796875, + "step": 115435 + }, + { + "epoch": 0.9981755453908743, + "grad_norm": 0.504417156595841, + "learning_rate": 3.0085975280265517e-06, + "loss": 0.05635080337524414, + "step": 115440 + }, + { + "epoch": 0.9982187789124175, + "grad_norm": 3.6150698793502842, + "learning_rate": 3.008393795673017e-06, + "loss": 0.24784774780273439, + "step": 115445 + }, + { + "epoch": 0.9982620124339608, + "grad_norm": 2.600614121914898, + "learning_rate": 3.0081900632807706e-06, + "loss": 0.0707977294921875, + "step": 115450 + }, + { + "epoch": 0.9983052459555041, + "grad_norm": 14.548678233730906, + "learning_rate": 3.007986330850752e-06, + "loss": 0.13570709228515626, + "step": 115455 + }, + { + "epoch": 0.9983484794770473, + "grad_norm": 0.5794810202881597, + "learning_rate": 3.0077825983839016e-06, + "loss": 0.19480514526367188, + "step": 115460 + }, + { + "epoch": 0.9983917129985906, + "grad_norm": 0.698911511095094, + "learning_rate": 3.0075788658811583e-06, + "loss": 0.2568817138671875, + "step": 115465 + }, + { + "epoch": 0.9984349465201339, + "grad_norm": 1.104121596649478, + "learning_rate": 3.0073751333434614e-06, + "loss": 0.01998291015625, + "step": 115470 + }, + { + "epoch": 0.9984781800416771, + "grad_norm": 17.511617875656952, + "learning_rate": 3.007171400771752e-06, + "loss": 0.21571044921875, + "step": 115475 + }, + { + "epoch": 0.9985214135632203, + "grad_norm": 0.2765543659241302, + "learning_rate": 3.0069676681669686e-06, + "loss": 0.05783233642578125, + "step": 115480 + }, + { + "epoch": 0.9985646470847637, + "grad_norm": 23.691223671163463, + "learning_rate": 3.0067639355300502e-06, + "loss": 0.08287105560302735, + "step": 115485 + }, + { + "epoch": 0.9986078806063069, + "grad_norm": 1.8933403605103372, + "learning_rate": 3.0065602028619378e-06, + "loss": 0.026493072509765625, + "step": 115490 + }, + { + "epoch": 0.9986511141278501, + "grad_norm": 16.204622391528687, + "learning_rate": 3.00635647016357e-06, + "loss": 0.25109024047851564, + "step": 115495 + }, + { + "epoch": 0.9986943476493935, + "grad_norm": 14.717026795140523, + "learning_rate": 3.006152737435886e-06, + "loss": 0.15135345458984376, + "step": 115500 + }, + { + "epoch": 0.9987375811709367, + "grad_norm": 6.10623928945674, + "learning_rate": 3.0059490046798276e-06, + "loss": 0.029439544677734374, + "step": 115505 + }, + { + "epoch": 0.9987808146924799, + "grad_norm": 22.48505698947318, + "learning_rate": 3.0057452718963327e-06, + "loss": 0.13926239013671876, + "step": 115510 + }, + { + "epoch": 0.9988240482140233, + "grad_norm": 37.928910159259914, + "learning_rate": 3.00554153908634e-06, + "loss": 0.38678817749023436, + "step": 115515 + }, + { + "epoch": 0.9988672817355665, + "grad_norm": 2.728396257319506, + "learning_rate": 3.0053378062507916e-06, + "loss": 0.23865585327148436, + "step": 115520 + }, + { + "epoch": 0.9989105152571097, + "grad_norm": 18.49224917385552, + "learning_rate": 3.0051340733906246e-06, + "loss": 0.2690155029296875, + "step": 115525 + }, + { + "epoch": 0.9989537487786531, + "grad_norm": 0.0434171742627677, + "learning_rate": 3.004930340506779e-06, + "loss": 0.027436065673828124, + "step": 115530 + }, + { + "epoch": 0.9989969823001963, + "grad_norm": 13.126459912939348, + "learning_rate": 3.004726607600197e-06, + "loss": 0.353009033203125, + "step": 115535 + }, + { + "epoch": 0.9990402158217395, + "grad_norm": 4.0437709498213605, + "learning_rate": 3.0045228746718157e-06, + "loss": 0.1353546142578125, + "step": 115540 + }, + { + "epoch": 0.9990834493432829, + "grad_norm": 4.300028269021025, + "learning_rate": 3.0043191417225746e-06, + "loss": 0.13692855834960938, + "step": 115545 + }, + { + "epoch": 0.9991266828648261, + "grad_norm": 18.465061453438135, + "learning_rate": 3.004115408753415e-06, + "loss": 0.15235729217529298, + "step": 115550 + }, + { + "epoch": 0.9991699163863693, + "grad_norm": 29.624194442995368, + "learning_rate": 3.003911675765275e-06, + "loss": 0.16390609741210938, + "step": 115555 + }, + { + "epoch": 0.9992131499079125, + "grad_norm": 1.1229748842698024, + "learning_rate": 3.0037079427590948e-06, + "loss": 0.1584381103515625, + "step": 115560 + }, + { + "epoch": 0.9992563834294559, + "grad_norm": 1.6099473037110925, + "learning_rate": 3.0035042097358138e-06, + "loss": 0.16191864013671875, + "step": 115565 + }, + { + "epoch": 0.9992996169509991, + "grad_norm": 1.1297598954366623, + "learning_rate": 3.0033004766963713e-06, + "loss": 0.17295875549316406, + "step": 115570 + }, + { + "epoch": 0.9993428504725423, + "grad_norm": 0.28987300633850566, + "learning_rate": 3.003096743641708e-06, + "loss": 0.12346038818359376, + "step": 115575 + }, + { + "epoch": 0.9993860839940857, + "grad_norm": 19.271483695340113, + "learning_rate": 3.002893010572762e-06, + "loss": 0.23826560974121094, + "step": 115580 + }, + { + "epoch": 0.9994293175156289, + "grad_norm": 0.49095172749942345, + "learning_rate": 3.002689277490474e-06, + "loss": 0.06209144592285156, + "step": 115585 + }, + { + "epoch": 0.9994725510371721, + "grad_norm": 34.169843383197176, + "learning_rate": 3.0024855443957843e-06, + "loss": 0.30727996826171877, + "step": 115590 + }, + { + "epoch": 0.9995157845587155, + "grad_norm": 1.206738321311962, + "learning_rate": 3.00228181128963e-06, + "loss": 0.016227149963378908, + "step": 115595 + }, + { + "epoch": 0.9995590180802587, + "grad_norm": 9.05434553301696, + "learning_rate": 3.002078078172953e-06, + "loss": 0.21625747680664062, + "step": 115600 + }, + { + "epoch": 0.9996022516018019, + "grad_norm": 1.22118213079689, + "learning_rate": 3.0018743450466923e-06, + "loss": 0.018622970581054686, + "step": 115605 + }, + { + "epoch": 0.9996454851233453, + "grad_norm": 11.127369628515783, + "learning_rate": 3.0016706119117863e-06, + "loss": 0.03258209228515625, + "step": 115610 + }, + { + "epoch": 0.9996887186448885, + "grad_norm": 11.604177852714287, + "learning_rate": 3.001466878769176e-06, + "loss": 0.23523712158203125, + "step": 115615 + }, + { + "epoch": 0.9997319521664317, + "grad_norm": 9.142571824756553, + "learning_rate": 3.001263145619801e-06, + "loss": 0.27051239013671874, + "step": 115620 + }, + { + "epoch": 0.9997751856879751, + "grad_norm": 5.538248706996961, + "learning_rate": 3.0010594124646e-06, + "loss": 0.2069469451904297, + "step": 115625 + }, + { + "epoch": 0.9998184192095183, + "grad_norm": 39.33295621071322, + "learning_rate": 3.0008556793045136e-06, + "loss": 0.10973968505859374, + "step": 115630 + }, + { + "epoch": 0.9998616527310615, + "grad_norm": 45.94528480389972, + "learning_rate": 3.000651946140481e-06, + "loss": 0.3123149871826172, + "step": 115635 + }, + { + "epoch": 0.9999048862526049, + "grad_norm": 0.398422959465206, + "learning_rate": 3.0004482129734404e-06, + "loss": 0.23987045288085937, + "step": 115640 + }, + { + "epoch": 0.9999481197741481, + "grad_norm": 5.633166470126096, + "learning_rate": 3.0002444798043338e-06, + "loss": 0.27230300903320315, + "step": 115645 + }, + { + "epoch": 0.9999913532956913, + "grad_norm": 0.10134641944444635, + "learning_rate": 3.0000407466340997e-06, + "loss": 0.1014404296875, + "step": 115650 + }, + { + "epoch": 1.0000345868172347, + "grad_norm": 0.021524155788453306, + "learning_rate": 2.9998370134636772e-06, + "loss": 0.043461036682128903, + "step": 115655 + }, + { + "epoch": 1.000077820338778, + "grad_norm": 19.461866990790924, + "learning_rate": 2.999633280294007e-06, + "loss": 0.268853759765625, + "step": 115660 + }, + { + "epoch": 1.0001210538603211, + "grad_norm": 1.0466903794245, + "learning_rate": 2.9994295471260272e-06, + "loss": 0.09023933410644532, + "step": 115665 + }, + { + "epoch": 1.0001642873818644, + "grad_norm": 1.1920490815731268, + "learning_rate": 2.9992258139606793e-06, + "loss": 0.02504901885986328, + "step": 115670 + }, + { + "epoch": 1.0002075209034076, + "grad_norm": 2.7386179627882825, + "learning_rate": 2.9990220807989016e-06, + "loss": 0.13503570556640626, + "step": 115675 + }, + { + "epoch": 1.000250754424951, + "grad_norm": 10.53592788008918, + "learning_rate": 2.9988183476416327e-06, + "loss": 0.1442626953125, + "step": 115680 + }, + { + "epoch": 1.0002939879464943, + "grad_norm": 0.8819408167150365, + "learning_rate": 2.998614614489815e-06, + "loss": 0.018903350830078124, + "step": 115685 + }, + { + "epoch": 1.0003372214680375, + "grad_norm": 1.6848969739172155, + "learning_rate": 2.998410881344386e-06, + "loss": 0.08342409133911133, + "step": 115690 + }, + { + "epoch": 1.0003804549895807, + "grad_norm": 14.102287534861398, + "learning_rate": 2.998207148206286e-06, + "loss": 0.07015457153320312, + "step": 115695 + }, + { + "epoch": 1.000423688511124, + "grad_norm": 1.560332599777647, + "learning_rate": 2.9980034150764546e-06, + "loss": 0.12565879821777343, + "step": 115700 + }, + { + "epoch": 1.0004669220326672, + "grad_norm": 0.989098692874329, + "learning_rate": 2.9977996819558313e-06, + "loss": 0.1108001708984375, + "step": 115705 + }, + { + "epoch": 1.0005101555542104, + "grad_norm": 0.1662546602360029, + "learning_rate": 2.997595948845356e-06, + "loss": 0.08920974731445312, + "step": 115710 + }, + { + "epoch": 1.0005533890757539, + "grad_norm": 12.090530476292354, + "learning_rate": 2.9973922157459664e-06, + "loss": 0.04853744506835937, + "step": 115715 + }, + { + "epoch": 1.000596622597297, + "grad_norm": 0.7988177118288827, + "learning_rate": 2.997188482658605e-06, + "loss": 0.0112945556640625, + "step": 115720 + }, + { + "epoch": 1.0006398561188403, + "grad_norm": 2.943675303101962, + "learning_rate": 2.9969847495842095e-06, + "loss": 0.02920989990234375, + "step": 115725 + }, + { + "epoch": 1.0006830896403835, + "grad_norm": 0.9633112594563755, + "learning_rate": 2.9967810165237205e-06, + "loss": 0.0325653076171875, + "step": 115730 + }, + { + "epoch": 1.0007263231619268, + "grad_norm": 7.468308762041162, + "learning_rate": 2.9965772834780778e-06, + "loss": 0.08679580688476562, + "step": 115735 + }, + { + "epoch": 1.00076955668347, + "grad_norm": 3.743546994520049, + "learning_rate": 2.9963735504482196e-06, + "loss": 0.08910484313964843, + "step": 115740 + }, + { + "epoch": 1.0008127902050135, + "grad_norm": 2.309865475392174, + "learning_rate": 2.996169817435085e-06, + "loss": 0.088079833984375, + "step": 115745 + }, + { + "epoch": 1.0008560237265567, + "grad_norm": 1.2666002145583835, + "learning_rate": 2.995966084439617e-06, + "loss": 0.18673095703125, + "step": 115750 + }, + { + "epoch": 1.0008992572481, + "grad_norm": 2.8502381860671577, + "learning_rate": 2.9957623514627516e-06, + "loss": 0.138470458984375, + "step": 115755 + }, + { + "epoch": 1.0009424907696431, + "grad_norm": 2.704583350294412, + "learning_rate": 2.9955586185054307e-06, + "loss": 0.15582199096679689, + "step": 115760 + }, + { + "epoch": 1.0009857242911864, + "grad_norm": 10.301644504342665, + "learning_rate": 2.9953548855685928e-06, + "loss": 0.0985260009765625, + "step": 115765 + }, + { + "epoch": 1.0010289578127296, + "grad_norm": 1.592610572709028, + "learning_rate": 2.995151152653178e-06, + "loss": 0.037662506103515625, + "step": 115770 + }, + { + "epoch": 1.0010721913342728, + "grad_norm": 1.0876503604547412, + "learning_rate": 2.9949474197601257e-06, + "loss": 0.0784912109375, + "step": 115775 + }, + { + "epoch": 1.0011154248558163, + "grad_norm": 0.04973111132650111, + "learning_rate": 2.9947436868903745e-06, + "loss": 0.009651947021484374, + "step": 115780 + }, + { + "epoch": 1.0011586583773595, + "grad_norm": 5.83373274768717, + "learning_rate": 2.9945399540448654e-06, + "loss": 0.06014308929443359, + "step": 115785 + }, + { + "epoch": 1.0012018918989027, + "grad_norm": 6.518493415082956, + "learning_rate": 2.994336221224538e-06, + "loss": 0.06249370574951172, + "step": 115790 + }, + { + "epoch": 1.001245125420446, + "grad_norm": 2.8706234858478403, + "learning_rate": 2.9941324884303312e-06, + "loss": 0.03395576477050781, + "step": 115795 + }, + { + "epoch": 1.0012883589419892, + "grad_norm": 1.5437855851450282, + "learning_rate": 2.9939287556631854e-06, + "loss": 0.044690704345703124, + "step": 115800 + }, + { + "epoch": 1.0013315924635324, + "grad_norm": 3.344426834943957, + "learning_rate": 2.993725022924039e-06, + "loss": 0.07141036987304687, + "step": 115805 + }, + { + "epoch": 1.0013748259850759, + "grad_norm": 0.30109987807596733, + "learning_rate": 2.9935212902138315e-06, + "loss": 0.0099700927734375, + "step": 115810 + }, + { + "epoch": 1.001418059506619, + "grad_norm": 0.7303964488797458, + "learning_rate": 2.9933175575335044e-06, + "loss": 0.05109901428222656, + "step": 115815 + }, + { + "epoch": 1.0014612930281623, + "grad_norm": 20.49814814583312, + "learning_rate": 2.9931138248839957e-06, + "loss": 0.09351654052734375, + "step": 115820 + }, + { + "epoch": 1.0015045265497056, + "grad_norm": 3.039714695888118, + "learning_rate": 2.9929100922662457e-06, + "loss": 0.0774648666381836, + "step": 115825 + }, + { + "epoch": 1.0015477600712488, + "grad_norm": 8.649337652852251, + "learning_rate": 2.992706359681194e-06, + "loss": 0.11161956787109376, + "step": 115830 + }, + { + "epoch": 1.001590993592792, + "grad_norm": 2.5480572642951445, + "learning_rate": 2.9925026271297795e-06, + "loss": 0.2650726318359375, + "step": 115835 + }, + { + "epoch": 1.0016342271143355, + "grad_norm": 1.5549807116953716, + "learning_rate": 2.992298894612941e-06, + "loss": 0.023621177673339842, + "step": 115840 + }, + { + "epoch": 1.0016774606358787, + "grad_norm": 72.0507471987859, + "learning_rate": 2.9920951621316206e-06, + "loss": 0.16804580688476561, + "step": 115845 + }, + { + "epoch": 1.001720694157422, + "grad_norm": 0.8171247434876536, + "learning_rate": 2.991891429686757e-06, + "loss": 0.018959426879882814, + "step": 115850 + }, + { + "epoch": 1.0017639276789652, + "grad_norm": 1.4664549204028945, + "learning_rate": 2.9916876972792895e-06, + "loss": 0.0894500732421875, + "step": 115855 + }, + { + "epoch": 1.0018071612005084, + "grad_norm": 21.965026640645846, + "learning_rate": 2.9914839649101576e-06, + "loss": 0.13409652709960937, + "step": 115860 + }, + { + "epoch": 1.0018503947220516, + "grad_norm": 1.3726702920905811, + "learning_rate": 2.9912802325802997e-06, + "loss": 0.066070556640625, + "step": 115865 + }, + { + "epoch": 1.0018936282435948, + "grad_norm": 0.7624708268170617, + "learning_rate": 2.9910765002906576e-06, + "loss": 0.1666168212890625, + "step": 115870 + }, + { + "epoch": 1.0019368617651383, + "grad_norm": 0.08732174982438382, + "learning_rate": 2.9908727680421686e-06, + "loss": 0.11715011596679688, + "step": 115875 + }, + { + "epoch": 1.0019800952866815, + "grad_norm": 64.602051061337, + "learning_rate": 2.9906690358357746e-06, + "loss": 0.40835113525390626, + "step": 115880 + }, + { + "epoch": 1.0020233288082248, + "grad_norm": 2.266324449306802, + "learning_rate": 2.9904653036724146e-06, + "loss": 0.02350311279296875, + "step": 115885 + }, + { + "epoch": 1.002066562329768, + "grad_norm": 4.72509240671648, + "learning_rate": 2.990261571553027e-06, + "loss": 0.05749244689941406, + "step": 115890 + }, + { + "epoch": 1.0021097958513112, + "grad_norm": 0.0250199223035486, + "learning_rate": 2.990057839478553e-06, + "loss": 0.06188783645629883, + "step": 115895 + }, + { + "epoch": 1.0021530293728544, + "grad_norm": 1.5045140451644892, + "learning_rate": 2.9898541074499312e-06, + "loss": 0.04204864501953125, + "step": 115900 + }, + { + "epoch": 1.002196262894398, + "grad_norm": 17.22276862734249, + "learning_rate": 2.9896503754680997e-06, + "loss": 0.11080741882324219, + "step": 115905 + }, + { + "epoch": 1.0022394964159411, + "grad_norm": 33.7422114234346, + "learning_rate": 2.9894466435340014e-06, + "loss": 0.13696632385253907, + "step": 115910 + }, + { + "epoch": 1.0022827299374844, + "grad_norm": 3.5945564257672333, + "learning_rate": 2.989242911648574e-06, + "loss": 0.03310089111328125, + "step": 115915 + }, + { + "epoch": 1.0023259634590276, + "grad_norm": 0.312247265652155, + "learning_rate": 2.989039179812757e-06, + "loss": 0.028549957275390624, + "step": 115920 + }, + { + "epoch": 1.0023691969805708, + "grad_norm": 0.4071812132112675, + "learning_rate": 2.988835448027491e-06, + "loss": 0.021770668029785157, + "step": 115925 + }, + { + "epoch": 1.002412430502114, + "grad_norm": 25.78781730241644, + "learning_rate": 2.988631716293715e-06, + "loss": 0.13955192565917968, + "step": 115930 + }, + { + "epoch": 1.0024556640236575, + "grad_norm": 0.9039492283592654, + "learning_rate": 2.9884279846123666e-06, + "loss": 0.10972251892089843, + "step": 115935 + }, + { + "epoch": 1.0024988975452007, + "grad_norm": 0.5891684627458703, + "learning_rate": 2.988224252984389e-06, + "loss": 0.2364959716796875, + "step": 115940 + }, + { + "epoch": 1.002542131066744, + "grad_norm": 0.2129049045340981, + "learning_rate": 2.9880205214107206e-06, + "loss": 0.08276195526123047, + "step": 115945 + }, + { + "epoch": 1.0025853645882872, + "grad_norm": 0.6991941231412249, + "learning_rate": 2.987816789892299e-06, + "loss": 0.04011993408203125, + "step": 115950 + }, + { + "epoch": 1.0026285981098304, + "grad_norm": 15.135063300947232, + "learning_rate": 2.9876130584300667e-06, + "loss": 0.1211334228515625, + "step": 115955 + }, + { + "epoch": 1.0026718316313736, + "grad_norm": 0.34796799123516764, + "learning_rate": 2.9874093270249617e-06, + "loss": 0.013494873046875, + "step": 115960 + }, + { + "epoch": 1.0027150651529169, + "grad_norm": 2.7594876691441494, + "learning_rate": 2.9872055956779237e-06, + "loss": 0.24697265625, + "step": 115965 + }, + { + "epoch": 1.0027582986744603, + "grad_norm": 2.5656738443713825, + "learning_rate": 2.9870018643898905e-06, + "loss": 0.04183349609375, + "step": 115970 + }, + { + "epoch": 1.0028015321960035, + "grad_norm": 10.533645155854346, + "learning_rate": 2.9867981331618056e-06, + "loss": 0.07321319580078126, + "step": 115975 + }, + { + "epoch": 1.0028447657175468, + "grad_norm": 3.4995097870535896, + "learning_rate": 2.9865944019946054e-06, + "loss": 0.058935546875, + "step": 115980 + }, + { + "epoch": 1.00288799923909, + "grad_norm": 0.5218259491211613, + "learning_rate": 2.9863906708892314e-06, + "loss": 0.05956344604492188, + "step": 115985 + }, + { + "epoch": 1.0029312327606332, + "grad_norm": 14.38348946635457, + "learning_rate": 2.9861869398466226e-06, + "loss": 0.13805389404296875, + "step": 115990 + }, + { + "epoch": 1.0029744662821765, + "grad_norm": 2.577834724615932, + "learning_rate": 2.9859832088677183e-06, + "loss": 0.06510353088378906, + "step": 115995 + }, + { + "epoch": 1.00301769980372, + "grad_norm": 2.837278602858148, + "learning_rate": 2.985779477953457e-06, + "loss": 0.014610671997070312, + "step": 116000 + }, + { + "epoch": 1.0030609333252631, + "grad_norm": 7.622040727307272, + "learning_rate": 2.9855757471047806e-06, + "loss": 0.03181476593017578, + "step": 116005 + }, + { + "epoch": 1.0031041668468064, + "grad_norm": 0.3819570750638485, + "learning_rate": 2.985372016322627e-06, + "loss": 0.13770675659179688, + "step": 116010 + }, + { + "epoch": 1.0031474003683496, + "grad_norm": 3.579225991075599, + "learning_rate": 2.985168285607937e-06, + "loss": 0.05167884826660156, + "step": 116015 + }, + { + "epoch": 1.0031906338898928, + "grad_norm": 0.5189720302013707, + "learning_rate": 2.98496455496165e-06, + "loss": 0.11056747436523437, + "step": 116020 + }, + { + "epoch": 1.003233867411436, + "grad_norm": 0.07648404959843258, + "learning_rate": 2.9847608243847045e-06, + "loss": 0.10942459106445312, + "step": 116025 + }, + { + "epoch": 1.0032771009329795, + "grad_norm": 4.4977169051668024, + "learning_rate": 2.9845570938780397e-06, + "loss": 0.04085884094238281, + "step": 116030 + }, + { + "epoch": 1.0033203344545227, + "grad_norm": 0.36180015191606607, + "learning_rate": 2.9843533634425977e-06, + "loss": 0.0274810791015625, + "step": 116035 + }, + { + "epoch": 1.003363567976066, + "grad_norm": 3.151024349946543, + "learning_rate": 2.9841496330793156e-06, + "loss": 0.09783477783203125, + "step": 116040 + }, + { + "epoch": 1.0034068014976092, + "grad_norm": 4.463483989731798, + "learning_rate": 2.9839459027891346e-06, + "loss": 0.2234527587890625, + "step": 116045 + }, + { + "epoch": 1.0034500350191524, + "grad_norm": 34.88186213130705, + "learning_rate": 2.983742172572994e-06, + "loss": 0.2536022186279297, + "step": 116050 + }, + { + "epoch": 1.0034932685406956, + "grad_norm": 1.2367178001578667, + "learning_rate": 2.983538442431833e-06, + "loss": 0.11705188751220703, + "step": 116055 + }, + { + "epoch": 1.0035365020622389, + "grad_norm": 12.56899177292219, + "learning_rate": 2.9833347123665904e-06, + "loss": 0.04457550048828125, + "step": 116060 + }, + { + "epoch": 1.0035797355837823, + "grad_norm": 2.9202324949743415, + "learning_rate": 2.9831309823782064e-06, + "loss": 0.08828601837158204, + "step": 116065 + }, + { + "epoch": 1.0036229691053256, + "grad_norm": 8.26015151334338, + "learning_rate": 2.9829272524676215e-06, + "loss": 0.0694549560546875, + "step": 116070 + }, + { + "epoch": 1.0036662026268688, + "grad_norm": 2.427418903213543, + "learning_rate": 2.982723522635775e-06, + "loss": 0.0412872314453125, + "step": 116075 + }, + { + "epoch": 1.003709436148412, + "grad_norm": 0.3362649211189061, + "learning_rate": 2.982519792883606e-06, + "loss": 0.045916748046875, + "step": 116080 + }, + { + "epoch": 1.0037526696699552, + "grad_norm": 1.3354928450163444, + "learning_rate": 2.9823160632120544e-06, + "loss": 0.03929443359375, + "step": 116085 + }, + { + "epoch": 1.0037959031914985, + "grad_norm": 0.13251640440442097, + "learning_rate": 2.982112333622059e-06, + "loss": 0.1654052734375, + "step": 116090 + }, + { + "epoch": 1.003839136713042, + "grad_norm": 0.5231808313028795, + "learning_rate": 2.9819086041145587e-06, + "loss": 0.2069091796875, + "step": 116095 + }, + { + "epoch": 1.0038823702345852, + "grad_norm": 2.653592060354033, + "learning_rate": 2.981704874690496e-06, + "loss": 0.057086181640625, + "step": 116100 + }, + { + "epoch": 1.0039256037561284, + "grad_norm": 17.327117814891007, + "learning_rate": 2.981501145350809e-06, + "loss": 0.21059722900390626, + "step": 116105 + }, + { + "epoch": 1.0039688372776716, + "grad_norm": 1.8100118041192816, + "learning_rate": 2.9812974160964365e-06, + "loss": 0.055254364013671876, + "step": 116110 + }, + { + "epoch": 1.0040120707992148, + "grad_norm": 0.21898108218328702, + "learning_rate": 2.9810936869283184e-06, + "loss": 0.0807647705078125, + "step": 116115 + }, + { + "epoch": 1.004055304320758, + "grad_norm": 6.37923177931613, + "learning_rate": 2.980889957847395e-06, + "loss": 0.0358856201171875, + "step": 116120 + }, + { + "epoch": 1.0040985378423013, + "grad_norm": 16.484550936643853, + "learning_rate": 2.9806862288546055e-06, + "loss": 0.0779205322265625, + "step": 116125 + }, + { + "epoch": 1.0041417713638447, + "grad_norm": 4.838053138332329, + "learning_rate": 2.980482499950888e-06, + "loss": 0.0882904052734375, + "step": 116130 + }, + { + "epoch": 1.004185004885388, + "grad_norm": 3.1097573558736618, + "learning_rate": 2.980278771137185e-06, + "loss": 0.04494190216064453, + "step": 116135 + }, + { + "epoch": 1.0042282384069312, + "grad_norm": 17.72779269238778, + "learning_rate": 2.9800750424144345e-06, + "loss": 0.09487838745117187, + "step": 116140 + }, + { + "epoch": 1.0042714719284744, + "grad_norm": 1.5335035756905497, + "learning_rate": 2.979871313783576e-06, + "loss": 0.1076446533203125, + "step": 116145 + }, + { + "epoch": 1.0043147054500177, + "grad_norm": 16.094857589455266, + "learning_rate": 2.979667585245549e-06, + "loss": 0.08187484741210938, + "step": 116150 + }, + { + "epoch": 1.004357938971561, + "grad_norm": 1.4333383471104721, + "learning_rate": 2.9794638568012938e-06, + "loss": 0.0591278076171875, + "step": 116155 + }, + { + "epoch": 1.0044011724931043, + "grad_norm": 22.722324631862772, + "learning_rate": 2.979260128451748e-06, + "loss": 0.14056320190429689, + "step": 116160 + }, + { + "epoch": 1.0044444060146476, + "grad_norm": 10.11355904648821, + "learning_rate": 2.9790564001978536e-06, + "loss": 0.030629348754882813, + "step": 116165 + }, + { + "epoch": 1.0044876395361908, + "grad_norm": 20.142190664735097, + "learning_rate": 2.97885267204055e-06, + "loss": 0.05420951843261719, + "step": 116170 + }, + { + "epoch": 1.004530873057734, + "grad_norm": 52.78657061017837, + "learning_rate": 2.9786489439807747e-06, + "loss": 0.18044204711914064, + "step": 116175 + }, + { + "epoch": 1.0045741065792773, + "grad_norm": 0.557734666808223, + "learning_rate": 2.9784452160194697e-06, + "loss": 0.18812103271484376, + "step": 116180 + }, + { + "epoch": 1.0046173401008205, + "grad_norm": 0.2795321826793959, + "learning_rate": 2.9782414881575734e-06, + "loss": 0.17666168212890626, + "step": 116185 + }, + { + "epoch": 1.004660573622364, + "grad_norm": 10.377491696661126, + "learning_rate": 2.9780377603960238e-06, + "loss": 0.10301055908203124, + "step": 116190 + }, + { + "epoch": 1.0047038071439072, + "grad_norm": 0.744951898090005, + "learning_rate": 2.9778340327357636e-06, + "loss": 0.0565887451171875, + "step": 116195 + }, + { + "epoch": 1.0047470406654504, + "grad_norm": 28.90190726159555, + "learning_rate": 2.977630305177731e-06, + "loss": 0.27093658447265623, + "step": 116200 + }, + { + "epoch": 1.0047902741869936, + "grad_norm": 0.7663351698976256, + "learning_rate": 2.977426577722865e-06, + "loss": 0.040643310546875, + "step": 116205 + }, + { + "epoch": 1.0048335077085369, + "grad_norm": 2.0018025638469252, + "learning_rate": 2.9772228503721055e-06, + "loss": 0.07096900939941406, + "step": 116210 + }, + { + "epoch": 1.00487674123008, + "grad_norm": 0.05935245286531086, + "learning_rate": 2.977019123126393e-06, + "loss": 0.04876289367675781, + "step": 116215 + }, + { + "epoch": 1.0049199747516233, + "grad_norm": 22.710880003740144, + "learning_rate": 2.976815395986666e-06, + "loss": 0.0907440185546875, + "step": 116220 + }, + { + "epoch": 1.0049632082731668, + "grad_norm": 2.589753620776918, + "learning_rate": 2.976611668953863e-06, + "loss": 0.08659820556640625, + "step": 116225 + }, + { + "epoch": 1.00500644179471, + "grad_norm": 0.23839913566559126, + "learning_rate": 2.9764079420289264e-06, + "loss": 0.19863510131835938, + "step": 116230 + }, + { + "epoch": 1.0050496753162532, + "grad_norm": 1.8030167472083707, + "learning_rate": 2.976204215212793e-06, + "loss": 0.06304035186767579, + "step": 116235 + }, + { + "epoch": 1.0050929088377965, + "grad_norm": 12.696465427678298, + "learning_rate": 2.9760004885064046e-06, + "loss": 0.05482749938964844, + "step": 116240 + }, + { + "epoch": 1.0051361423593397, + "grad_norm": 7.332374274613357, + "learning_rate": 2.9757967619107006e-06, + "loss": 0.036590194702148436, + "step": 116245 + }, + { + "epoch": 1.005179375880883, + "grad_norm": 1.5565570396917001, + "learning_rate": 2.975593035426619e-06, + "loss": 0.009068679809570313, + "step": 116250 + }, + { + "epoch": 1.0052226094024264, + "grad_norm": 0.041881416408826845, + "learning_rate": 2.9753893090550986e-06, + "loss": 0.08192672729492187, + "step": 116255 + }, + { + "epoch": 1.0052658429239696, + "grad_norm": 0.05251134764635327, + "learning_rate": 2.9751855827970825e-06, + "loss": 0.449506950378418, + "step": 116260 + }, + { + "epoch": 1.0053090764455128, + "grad_norm": 4.866138401982241, + "learning_rate": 2.9749818566535077e-06, + "loss": 0.024011611938476562, + "step": 116265 + }, + { + "epoch": 1.005352309967056, + "grad_norm": 5.700103317969009, + "learning_rate": 2.974778130625315e-06, + "loss": 0.050807952880859375, + "step": 116270 + }, + { + "epoch": 1.0053955434885993, + "grad_norm": 0.07698260356174515, + "learning_rate": 2.9745744047134425e-06, + "loss": 0.013832855224609374, + "step": 116275 + }, + { + "epoch": 1.0054387770101425, + "grad_norm": 0.9182876660209952, + "learning_rate": 2.9743706789188313e-06, + "loss": 0.010070037841796876, + "step": 116280 + }, + { + "epoch": 1.005482010531686, + "grad_norm": 0.6193266740393127, + "learning_rate": 2.974166953242419e-06, + "loss": 0.05880584716796875, + "step": 116285 + }, + { + "epoch": 1.0055252440532292, + "grad_norm": 3.5765931042949024, + "learning_rate": 2.9739632276851467e-06, + "loss": 0.058786773681640626, + "step": 116290 + }, + { + "epoch": 1.0055684775747724, + "grad_norm": 1.8354333804625949, + "learning_rate": 2.973759502247955e-06, + "loss": 0.07520027160644531, + "step": 116295 + }, + { + "epoch": 1.0056117110963156, + "grad_norm": 2.63750749959228, + "learning_rate": 2.9735557769317814e-06, + "loss": 0.04063262939453125, + "step": 116300 + }, + { + "epoch": 1.0056549446178589, + "grad_norm": 9.433644589089093, + "learning_rate": 2.9733520517375667e-06, + "loss": 0.0309356689453125, + "step": 116305 + }, + { + "epoch": 1.005698178139402, + "grad_norm": 9.046656300680894, + "learning_rate": 2.973148326666249e-06, + "loss": 0.11426162719726562, + "step": 116310 + }, + { + "epoch": 1.0057414116609453, + "grad_norm": 10.628303336948147, + "learning_rate": 2.9729446017187696e-06, + "loss": 0.12649192810058593, + "step": 116315 + }, + { + "epoch": 1.0057846451824888, + "grad_norm": 0.021563611890095393, + "learning_rate": 2.972740876896066e-06, + "loss": 0.010289573669433593, + "step": 116320 + }, + { + "epoch": 1.005827878704032, + "grad_norm": 1.6291767439443978, + "learning_rate": 2.9725371521990807e-06, + "loss": 0.0126678466796875, + "step": 116325 + }, + { + "epoch": 1.0058711122255752, + "grad_norm": 1.2989787331684222, + "learning_rate": 2.972333427628751e-06, + "loss": 0.035892486572265625, + "step": 116330 + }, + { + "epoch": 1.0059143457471185, + "grad_norm": 47.877476618653255, + "learning_rate": 2.9721297031860175e-06, + "loss": 0.5021856307983399, + "step": 116335 + }, + { + "epoch": 1.0059575792686617, + "grad_norm": 18.065116330274446, + "learning_rate": 2.971925978871819e-06, + "loss": 0.0685638427734375, + "step": 116340 + }, + { + "epoch": 1.006000812790205, + "grad_norm": 1.5686074041075702, + "learning_rate": 2.9717222546870957e-06, + "loss": 0.02311553955078125, + "step": 116345 + }, + { + "epoch": 1.0060440463117484, + "grad_norm": 13.797673957204942, + "learning_rate": 2.9715185306327856e-06, + "loss": 0.12580490112304688, + "step": 116350 + }, + { + "epoch": 1.0060872798332916, + "grad_norm": 0.9662930063030888, + "learning_rate": 2.971314806709831e-06, + "loss": 0.02650623321533203, + "step": 116355 + }, + { + "epoch": 1.0061305133548348, + "grad_norm": 0.32989550053354216, + "learning_rate": 2.9711110829191692e-06, + "loss": 0.06669960021972657, + "step": 116360 + }, + { + "epoch": 1.006173746876378, + "grad_norm": 1.1179906430801099, + "learning_rate": 2.970907359261741e-06, + "loss": 0.03580551147460938, + "step": 116365 + }, + { + "epoch": 1.0062169803979213, + "grad_norm": 0.37949321833453753, + "learning_rate": 2.970703635738485e-06, + "loss": 0.05305747985839844, + "step": 116370 + }, + { + "epoch": 1.0062602139194645, + "grad_norm": 40.24656485386251, + "learning_rate": 2.970499912350342e-06, + "loss": 0.08321580886840821, + "step": 116375 + }, + { + "epoch": 1.006303447441008, + "grad_norm": 44.391886469377056, + "learning_rate": 2.9702961890982497e-06, + "loss": 0.09463424682617187, + "step": 116380 + }, + { + "epoch": 1.0063466809625512, + "grad_norm": 4.028853862329872, + "learning_rate": 2.9700924659831493e-06, + "loss": 0.21804122924804686, + "step": 116385 + }, + { + "epoch": 1.0063899144840944, + "grad_norm": 21.175091340734138, + "learning_rate": 2.9698887430059805e-06, + "loss": 0.17335739135742187, + "step": 116390 + }, + { + "epoch": 1.0064331480056377, + "grad_norm": 0.6615107780749208, + "learning_rate": 2.969685020167682e-06, + "loss": 0.033087158203125, + "step": 116395 + }, + { + "epoch": 1.0064763815271809, + "grad_norm": 7.53159035645949, + "learning_rate": 2.9694812974691922e-06, + "loss": 0.0549072265625, + "step": 116400 + }, + { + "epoch": 1.0065196150487241, + "grad_norm": 1.0881594232378347, + "learning_rate": 2.9692775749114535e-06, + "loss": 0.04758148193359375, + "step": 116405 + }, + { + "epoch": 1.0065628485702673, + "grad_norm": 3.563715966979372, + "learning_rate": 2.9690738524954034e-06, + "loss": 0.068603515625, + "step": 116410 + }, + { + "epoch": 1.0066060820918108, + "grad_norm": 5.34033837797447, + "learning_rate": 2.968870130221981e-06, + "loss": 0.041876220703125, + "step": 116415 + }, + { + "epoch": 1.006649315613354, + "grad_norm": 3.991914355276025, + "learning_rate": 2.9686664080921277e-06, + "loss": 0.251080322265625, + "step": 116420 + }, + { + "epoch": 1.0066925491348973, + "grad_norm": 2.4141974105450923, + "learning_rate": 2.9684626861067825e-06, + "loss": 0.07589874267578126, + "step": 116425 + }, + { + "epoch": 1.0067357826564405, + "grad_norm": 13.654643098380333, + "learning_rate": 2.968258964266884e-06, + "loss": 0.06368904113769532, + "step": 116430 + }, + { + "epoch": 1.0067790161779837, + "grad_norm": 1.6983342075488628, + "learning_rate": 2.9680552425733723e-06, + "loss": 0.07442054748535157, + "step": 116435 + }, + { + "epoch": 1.006822249699527, + "grad_norm": 16.169478361056235, + "learning_rate": 2.9678515210271874e-06, + "loss": 0.171148681640625, + "step": 116440 + }, + { + "epoch": 1.0068654832210704, + "grad_norm": 6.706389373309928, + "learning_rate": 2.967647799629267e-06, + "loss": 0.05168418884277344, + "step": 116445 + }, + { + "epoch": 1.0069087167426136, + "grad_norm": 0.055005908870877, + "learning_rate": 2.9674440783805543e-06, + "loss": 0.07310981750488281, + "step": 116450 + }, + { + "epoch": 1.0069519502641568, + "grad_norm": 2.2405850260927807, + "learning_rate": 2.9672403572819856e-06, + "loss": 0.05557727813720703, + "step": 116455 + }, + { + "epoch": 1.0069951837857, + "grad_norm": 0.18755745602577367, + "learning_rate": 2.967036636334501e-06, + "loss": 0.04015274047851562, + "step": 116460 + }, + { + "epoch": 1.0070384173072433, + "grad_norm": 11.470105549065789, + "learning_rate": 2.966832915539042e-06, + "loss": 0.09633255004882812, + "step": 116465 + }, + { + "epoch": 1.0070816508287865, + "grad_norm": 4.54739766701475, + "learning_rate": 2.9666291948965458e-06, + "loss": 0.1150115966796875, + "step": 116470 + }, + { + "epoch": 1.0071248843503298, + "grad_norm": 1.1670297291575782, + "learning_rate": 2.966425474407953e-06, + "loss": 0.018301010131835938, + "step": 116475 + }, + { + "epoch": 1.0071681178718732, + "grad_norm": 1.5061299658738907, + "learning_rate": 2.9662217540742014e-06, + "loss": 0.051019287109375, + "step": 116480 + }, + { + "epoch": 1.0072113513934164, + "grad_norm": 1.8655731312368709, + "learning_rate": 2.966018033896234e-06, + "loss": 0.052965736389160155, + "step": 116485 + }, + { + "epoch": 1.0072545849149597, + "grad_norm": 0.3124183543675354, + "learning_rate": 2.9658143138749873e-06, + "loss": 0.04898757934570312, + "step": 116490 + }, + { + "epoch": 1.007297818436503, + "grad_norm": 1.3234212244543517, + "learning_rate": 2.9656105940114025e-06, + "loss": 0.04154281616210938, + "step": 116495 + }, + { + "epoch": 1.0073410519580461, + "grad_norm": 4.686273105980424, + "learning_rate": 2.9654068743064194e-06, + "loss": 0.08613815307617187, + "step": 116500 + }, + { + "epoch": 1.0073842854795894, + "grad_norm": 13.505086602471996, + "learning_rate": 2.965203154760976e-06, + "loss": 0.24654083251953124, + "step": 116505 + }, + { + "epoch": 1.0074275190011328, + "grad_norm": 0.5873963692511353, + "learning_rate": 2.9649994353760112e-06, + "loss": 0.11980743408203125, + "step": 116510 + }, + { + "epoch": 1.007470752522676, + "grad_norm": 9.81292522333559, + "learning_rate": 2.964795716152467e-06, + "loss": 0.05661468505859375, + "step": 116515 + }, + { + "epoch": 1.0075139860442193, + "grad_norm": 42.574720542220064, + "learning_rate": 2.9645919970912822e-06, + "loss": 0.2002655029296875, + "step": 116520 + }, + { + "epoch": 1.0075572195657625, + "grad_norm": 35.567047755038764, + "learning_rate": 2.9643882781933962e-06, + "loss": 0.1076568603515625, + "step": 116525 + }, + { + "epoch": 1.0076004530873057, + "grad_norm": 0.7150870987214312, + "learning_rate": 2.9641845594597482e-06, + "loss": 0.05126953125, + "step": 116530 + }, + { + "epoch": 1.007643686608849, + "grad_norm": 4.674135499501888, + "learning_rate": 2.9639808408912776e-06, + "loss": 0.021785545349121093, + "step": 116535 + }, + { + "epoch": 1.0076869201303924, + "grad_norm": 0.35651644108440067, + "learning_rate": 2.9637771224889236e-06, + "loss": 0.040309906005859375, + "step": 116540 + }, + { + "epoch": 1.0077301536519356, + "grad_norm": 1.7145574065546345, + "learning_rate": 2.9635734042536266e-06, + "loss": 0.04893341064453125, + "step": 116545 + }, + { + "epoch": 1.0077733871734789, + "grad_norm": 15.354955738215619, + "learning_rate": 2.9633696861863263e-06, + "loss": 0.08307533264160157, + "step": 116550 + }, + { + "epoch": 1.007816620695022, + "grad_norm": 0.7406790809981499, + "learning_rate": 2.9631659682879616e-06, + "loss": 0.018896484375, + "step": 116555 + }, + { + "epoch": 1.0078598542165653, + "grad_norm": 3.4037876904238047, + "learning_rate": 2.962962250559473e-06, + "loss": 0.2476776123046875, + "step": 116560 + }, + { + "epoch": 1.0079030877381085, + "grad_norm": 1.5699663732521676, + "learning_rate": 2.9627585330017975e-06, + "loss": 0.050641632080078124, + "step": 116565 + }, + { + "epoch": 1.0079463212596518, + "grad_norm": 2.970276233359923, + "learning_rate": 2.962554815615878e-06, + "loss": 0.24008331298828126, + "step": 116570 + }, + { + "epoch": 1.0079895547811952, + "grad_norm": 0.36694087138163306, + "learning_rate": 2.9623510984026503e-06, + "loss": 0.058746719360351564, + "step": 116575 + }, + { + "epoch": 1.0080327883027385, + "grad_norm": 0.8641757962323804, + "learning_rate": 2.962147381363058e-06, + "loss": 0.049483871459960936, + "step": 116580 + }, + { + "epoch": 1.0080760218242817, + "grad_norm": 0.8733309365392177, + "learning_rate": 2.9619436644980384e-06, + "loss": 0.00997467041015625, + "step": 116585 + }, + { + "epoch": 1.008119255345825, + "grad_norm": 29.372239429716917, + "learning_rate": 2.9617399478085308e-06, + "loss": 0.177972412109375, + "step": 116590 + }, + { + "epoch": 1.0081624888673681, + "grad_norm": 10.161065576487324, + "learning_rate": 2.961536231295475e-06, + "loss": 0.12945976257324218, + "step": 116595 + }, + { + "epoch": 1.0082057223889114, + "grad_norm": 0.8737051130809057, + "learning_rate": 2.961332514959811e-06, + "loss": 0.0114013671875, + "step": 116600 + }, + { + "epoch": 1.0082489559104548, + "grad_norm": 11.073203915940233, + "learning_rate": 2.9611287988024773e-06, + "loss": 0.08313674926757812, + "step": 116605 + }, + { + "epoch": 1.008292189431998, + "grad_norm": 7.716982930551853, + "learning_rate": 2.960925082824415e-06, + "loss": 0.15647239685058595, + "step": 116610 + }, + { + "epoch": 1.0083354229535413, + "grad_norm": 5.202992826004712, + "learning_rate": 2.960721367026563e-06, + "loss": 0.11178016662597656, + "step": 116615 + }, + { + "epoch": 1.0083786564750845, + "grad_norm": 2.4288288769323403, + "learning_rate": 2.9605176514098602e-06, + "loss": 0.03397293090820312, + "step": 116620 + }, + { + "epoch": 1.0084218899966277, + "grad_norm": 36.71794047917899, + "learning_rate": 2.960313935975246e-06, + "loss": 0.10783157348632813, + "step": 116625 + }, + { + "epoch": 1.008465123518171, + "grad_norm": 0.10892641292607774, + "learning_rate": 2.9601102207236614e-06, + "loss": 0.0228912353515625, + "step": 116630 + }, + { + "epoch": 1.0085083570397144, + "grad_norm": 57.158582805032324, + "learning_rate": 2.959906505656043e-06, + "loss": 0.4934326171875, + "step": 116635 + }, + { + "epoch": 1.0085515905612576, + "grad_norm": 0.1248471639507894, + "learning_rate": 2.9597027907733343e-06, + "loss": 0.12647857666015624, + "step": 116640 + }, + { + "epoch": 1.0085948240828009, + "grad_norm": 0.23201212221230852, + "learning_rate": 2.959499076076472e-06, + "loss": 0.036480712890625, + "step": 116645 + }, + { + "epoch": 1.008638057604344, + "grad_norm": 11.776091940100663, + "learning_rate": 2.959295361566397e-06, + "loss": 0.08408012390136718, + "step": 116650 + }, + { + "epoch": 1.0086812911258873, + "grad_norm": 21.85966441828884, + "learning_rate": 2.959091647244047e-06, + "loss": 0.2761016845703125, + "step": 116655 + }, + { + "epoch": 1.0087245246474306, + "grad_norm": 0.7570169162098419, + "learning_rate": 2.958887933110364e-06, + "loss": 0.057489013671875, + "step": 116660 + }, + { + "epoch": 1.0087677581689738, + "grad_norm": 1.6644575659571617, + "learning_rate": 2.9586842191662863e-06, + "loss": 0.009735870361328124, + "step": 116665 + }, + { + "epoch": 1.0088109916905172, + "grad_norm": 57.899149841044085, + "learning_rate": 2.9584805054127516e-06, + "loss": 0.220501708984375, + "step": 116670 + }, + { + "epoch": 1.0088542252120605, + "grad_norm": 0.6771884315426554, + "learning_rate": 2.958276791850702e-06, + "loss": 0.0167694091796875, + "step": 116675 + }, + { + "epoch": 1.0088974587336037, + "grad_norm": 14.07345416312808, + "learning_rate": 2.958073078481077e-06, + "loss": 0.05524749755859375, + "step": 116680 + }, + { + "epoch": 1.008940692255147, + "grad_norm": 3.648545094570554, + "learning_rate": 2.957869365304814e-06, + "loss": 0.03166275024414063, + "step": 116685 + }, + { + "epoch": 1.0089839257766902, + "grad_norm": 24.358072344808683, + "learning_rate": 2.957665652322855e-06, + "loss": 0.05702667236328125, + "step": 116690 + }, + { + "epoch": 1.0090271592982334, + "grad_norm": 0.12272836890238868, + "learning_rate": 2.9574619395361377e-06, + "loss": 0.07873611450195313, + "step": 116695 + }, + { + "epoch": 1.0090703928197768, + "grad_norm": 27.18197705673981, + "learning_rate": 2.9572582269456012e-06, + "loss": 0.07347602844238281, + "step": 116700 + }, + { + "epoch": 1.00911362634132, + "grad_norm": 0.7159468103682354, + "learning_rate": 2.957054514552187e-06, + "loss": 0.04883918762207031, + "step": 116705 + }, + { + "epoch": 1.0091568598628633, + "grad_norm": 1.668003779065155, + "learning_rate": 2.956850802356833e-06, + "loss": 0.058442878723144534, + "step": 116710 + }, + { + "epoch": 1.0092000933844065, + "grad_norm": 2.805114519244401, + "learning_rate": 2.95664709036048e-06, + "loss": 0.05497875213623047, + "step": 116715 + }, + { + "epoch": 1.0092433269059498, + "grad_norm": 15.226094173633491, + "learning_rate": 2.9564433785640674e-06, + "loss": 0.1788330078125, + "step": 116720 + }, + { + "epoch": 1.009286560427493, + "grad_norm": 3.659103041881882, + "learning_rate": 2.9562396669685333e-06, + "loss": 0.14372711181640624, + "step": 116725 + }, + { + "epoch": 1.0093297939490364, + "grad_norm": 3.0440927403505036, + "learning_rate": 2.956035955574817e-06, + "loss": 0.0168060302734375, + "step": 116730 + }, + { + "epoch": 1.0093730274705797, + "grad_norm": 2.207826393336075, + "learning_rate": 2.9558322443838603e-06, + "loss": 0.0381744384765625, + "step": 116735 + }, + { + "epoch": 1.009416260992123, + "grad_norm": 0.9614131189412718, + "learning_rate": 2.9556285333966005e-06, + "loss": 0.02564849853515625, + "step": 116740 + }, + { + "epoch": 1.0094594945136661, + "grad_norm": 1.1727606358889915, + "learning_rate": 2.9554248226139788e-06, + "loss": 0.024547576904296875, + "step": 116745 + }, + { + "epoch": 1.0095027280352094, + "grad_norm": 10.450089467839904, + "learning_rate": 2.955221112036934e-06, + "loss": 0.096929931640625, + "step": 116750 + }, + { + "epoch": 1.0095459615567526, + "grad_norm": 0.30371686480138627, + "learning_rate": 2.9550174016664053e-06, + "loss": 0.1375812530517578, + "step": 116755 + }, + { + "epoch": 1.0095891950782958, + "grad_norm": 5.150224523105562, + "learning_rate": 2.954813691503332e-06, + "loss": 0.03782958984375, + "step": 116760 + }, + { + "epoch": 1.0096324285998393, + "grad_norm": 1.7283270753003763, + "learning_rate": 2.9546099815486535e-06, + "loss": 0.08493118286132813, + "step": 116765 + }, + { + "epoch": 1.0096756621213825, + "grad_norm": 25.32067313205988, + "learning_rate": 2.95440627180331e-06, + "loss": 0.07294387817382812, + "step": 116770 + }, + { + "epoch": 1.0097188956429257, + "grad_norm": 41.075687516638666, + "learning_rate": 2.9542025622682416e-06, + "loss": 0.1873138427734375, + "step": 116775 + }, + { + "epoch": 1.009762129164469, + "grad_norm": 14.318183005583878, + "learning_rate": 2.9539988529443863e-06, + "loss": 0.091668701171875, + "step": 116780 + }, + { + "epoch": 1.0098053626860122, + "grad_norm": 0.06482727544447933, + "learning_rate": 2.953795143832685e-06, + "loss": 0.06528282165527344, + "step": 116785 + }, + { + "epoch": 1.0098485962075554, + "grad_norm": 3.2444564646492093, + "learning_rate": 2.953591434934075e-06, + "loss": 0.1132232666015625, + "step": 116790 + }, + { + "epoch": 1.0098918297290989, + "grad_norm": 6.497141327082717, + "learning_rate": 2.9533877262494975e-06, + "loss": 0.1073638916015625, + "step": 116795 + }, + { + "epoch": 1.009935063250642, + "grad_norm": 4.898238092939053, + "learning_rate": 2.953184017779892e-06, + "loss": 0.142425537109375, + "step": 116800 + }, + { + "epoch": 1.0099782967721853, + "grad_norm": 9.852822919842648, + "learning_rate": 2.952980309526198e-06, + "loss": 0.2302276611328125, + "step": 116805 + }, + { + "epoch": 1.0100215302937285, + "grad_norm": 6.554684111422783, + "learning_rate": 2.9527766014893547e-06, + "loss": 0.11498565673828125, + "step": 116810 + }, + { + "epoch": 1.0100647638152718, + "grad_norm": 1.4396298876410347, + "learning_rate": 2.9525728936703018e-06, + "loss": 0.039743804931640626, + "step": 116815 + }, + { + "epoch": 1.010107997336815, + "grad_norm": 7.727744728173892, + "learning_rate": 2.9523691860699778e-06, + "loss": 0.076953125, + "step": 116820 + }, + { + "epoch": 1.0101512308583582, + "grad_norm": 7.493652058964627, + "learning_rate": 2.9521654786893233e-06, + "loss": 0.0579437255859375, + "step": 116825 + }, + { + "epoch": 1.0101944643799017, + "grad_norm": 1.9628666812832773, + "learning_rate": 2.9519617715292764e-06, + "loss": 0.08707275390625, + "step": 116830 + }, + { + "epoch": 1.010237697901445, + "grad_norm": 0.7157064545745913, + "learning_rate": 2.9517580645907786e-06, + "loss": 0.03520774841308594, + "step": 116835 + }, + { + "epoch": 1.0102809314229881, + "grad_norm": 1.4355851046217944, + "learning_rate": 2.9515543578747684e-06, + "loss": 0.12050018310546876, + "step": 116840 + }, + { + "epoch": 1.0103241649445314, + "grad_norm": 19.34227780778662, + "learning_rate": 2.951350651382185e-06, + "loss": 0.22248382568359376, + "step": 116845 + }, + { + "epoch": 1.0103673984660746, + "grad_norm": 1.9387398809140077, + "learning_rate": 2.951146945113968e-06, + "loss": 0.18592529296875, + "step": 116850 + }, + { + "epoch": 1.0104106319876178, + "grad_norm": 0.2444086166664178, + "learning_rate": 2.9509432390710574e-06, + "loss": 0.10992622375488281, + "step": 116855 + }, + { + "epoch": 1.0104538655091613, + "grad_norm": 2.066469447509932, + "learning_rate": 2.950739533254391e-06, + "loss": 0.03960113525390625, + "step": 116860 + }, + { + "epoch": 1.0104970990307045, + "grad_norm": 5.424806970772371, + "learning_rate": 2.9505358276649103e-06, + "loss": 0.17045822143554687, + "step": 116865 + }, + { + "epoch": 1.0105403325522477, + "grad_norm": 1.481317902148848, + "learning_rate": 2.950332122303555e-06, + "loss": 0.013251495361328126, + "step": 116870 + }, + { + "epoch": 1.010583566073791, + "grad_norm": 38.52569825942822, + "learning_rate": 2.9501284171712624e-06, + "loss": 0.29287109375, + "step": 116875 + }, + { + "epoch": 1.0106267995953342, + "grad_norm": 9.307125943406676, + "learning_rate": 2.949924712268973e-06, + "loss": 0.1706695556640625, + "step": 116880 + }, + { + "epoch": 1.0106700331168774, + "grad_norm": 12.010189900788523, + "learning_rate": 2.949721007597627e-06, + "loss": 0.043126678466796874, + "step": 116885 + }, + { + "epoch": 1.0107132666384209, + "grad_norm": 3.3763697634491825, + "learning_rate": 2.9495173031581623e-06, + "loss": 0.03952484130859375, + "step": 116890 + }, + { + "epoch": 1.010756500159964, + "grad_norm": 1.9572522200734923, + "learning_rate": 2.9493135989515202e-06, + "loss": 0.0144622802734375, + "step": 116895 + }, + { + "epoch": 1.0107997336815073, + "grad_norm": 0.3784810427718519, + "learning_rate": 2.9491098949786394e-06, + "loss": 0.06790618896484375, + "step": 116900 + }, + { + "epoch": 1.0108429672030506, + "grad_norm": 3.134114704728762, + "learning_rate": 2.9489061912404595e-06, + "loss": 0.117926025390625, + "step": 116905 + }, + { + "epoch": 1.0108862007245938, + "grad_norm": 1.9511410124626827, + "learning_rate": 2.948702487737919e-06, + "loss": 0.08135299682617188, + "step": 116910 + }, + { + "epoch": 1.010929434246137, + "grad_norm": 0.8994493191162415, + "learning_rate": 2.9484987844719588e-06, + "loss": 0.043573760986328126, + "step": 116915 + }, + { + "epoch": 1.0109726677676802, + "grad_norm": 2.2203745217678548, + "learning_rate": 2.9482950814435175e-06, + "loss": 0.017817306518554687, + "step": 116920 + }, + { + "epoch": 1.0110159012892237, + "grad_norm": 2.1853083743778052, + "learning_rate": 2.9480913786535334e-06, + "loss": 0.03726119995117187, + "step": 116925 + }, + { + "epoch": 1.011059134810767, + "grad_norm": 7.552555505680898, + "learning_rate": 2.9478876761029488e-06, + "loss": 0.04262542724609375, + "step": 116930 + }, + { + "epoch": 1.0111023683323102, + "grad_norm": 57.50095747231273, + "learning_rate": 2.947683973792701e-06, + "loss": 0.16315555572509766, + "step": 116935 + }, + { + "epoch": 1.0111456018538534, + "grad_norm": 1.7443929214567742, + "learning_rate": 2.94748027172373e-06, + "loss": 0.03074951171875, + "step": 116940 + }, + { + "epoch": 1.0111888353753966, + "grad_norm": 0.48009124574350304, + "learning_rate": 2.9472765698969763e-06, + "loss": 0.05545883178710938, + "step": 116945 + }, + { + "epoch": 1.0112320688969398, + "grad_norm": 15.503237509861703, + "learning_rate": 2.947072868313378e-06, + "loss": 0.06712074279785156, + "step": 116950 + }, + { + "epoch": 1.0112753024184833, + "grad_norm": 0.07967919106135282, + "learning_rate": 2.9468691669738734e-06, + "loss": 0.0197052001953125, + "step": 116955 + }, + { + "epoch": 1.0113185359400265, + "grad_norm": 5.582011815908133, + "learning_rate": 2.946665465879405e-06, + "loss": 0.3736598968505859, + "step": 116960 + }, + { + "epoch": 1.0113617694615697, + "grad_norm": 2.105869592477887, + "learning_rate": 2.94646176503091e-06, + "loss": 0.06526145935058594, + "step": 116965 + }, + { + "epoch": 1.011405002983113, + "grad_norm": 11.81349796884726, + "learning_rate": 2.9462580644293296e-06, + "loss": 0.02543182373046875, + "step": 116970 + }, + { + "epoch": 1.0114482365046562, + "grad_norm": 2.6014098175495395, + "learning_rate": 2.9460543640756024e-06, + "loss": 0.03479843139648438, + "step": 116975 + }, + { + "epoch": 1.0114914700261994, + "grad_norm": 18.50695315196252, + "learning_rate": 2.945850663970667e-06, + "loss": 0.06836509704589844, + "step": 116980 + }, + { + "epoch": 1.0115347035477429, + "grad_norm": 13.815864686104195, + "learning_rate": 2.9456469641154628e-06, + "loss": 0.09502182006835938, + "step": 116985 + }, + { + "epoch": 1.0115779370692861, + "grad_norm": 2.35153621107573, + "learning_rate": 2.945443264510931e-06, + "loss": 0.021221923828125, + "step": 116990 + }, + { + "epoch": 1.0116211705908293, + "grad_norm": 1.0779343442713656, + "learning_rate": 2.94523956515801e-06, + "loss": 0.07369384765625, + "step": 116995 + }, + { + "epoch": 1.0116644041123726, + "grad_norm": 1.4001585337771751, + "learning_rate": 2.9450358660576394e-06, + "loss": 0.0472442626953125, + "step": 117000 + }, + { + "epoch": 1.0117076376339158, + "grad_norm": 0.068374751062234, + "learning_rate": 2.9448321672107584e-06, + "loss": 0.04162788391113281, + "step": 117005 + }, + { + "epoch": 1.011750871155459, + "grad_norm": 26.015496052310468, + "learning_rate": 2.944628468618307e-06, + "loss": 0.1500629425048828, + "step": 117010 + }, + { + "epoch": 1.0117941046770023, + "grad_norm": 23.201306514971847, + "learning_rate": 2.944424770281223e-06, + "loss": 0.27908859252929685, + "step": 117015 + }, + { + "epoch": 1.0118373381985457, + "grad_norm": 1.3824058577733978, + "learning_rate": 2.9442210722004473e-06, + "loss": 0.153277587890625, + "step": 117020 + }, + { + "epoch": 1.011880571720089, + "grad_norm": 0.9409709395152027, + "learning_rate": 2.9440173743769193e-06, + "loss": 0.0295806884765625, + "step": 117025 + }, + { + "epoch": 1.0119238052416322, + "grad_norm": 14.248320407817648, + "learning_rate": 2.943813676811579e-06, + "loss": 0.05604686737060547, + "step": 117030 + }, + { + "epoch": 1.0119670387631754, + "grad_norm": 0.7807585256687817, + "learning_rate": 2.943609979505365e-06, + "loss": 0.10803604125976562, + "step": 117035 + }, + { + "epoch": 1.0120102722847186, + "grad_norm": 1.3550817398421302, + "learning_rate": 2.943406282459216e-06, + "loss": 0.0801666259765625, + "step": 117040 + }, + { + "epoch": 1.0120535058062619, + "grad_norm": 4.69359699449695, + "learning_rate": 2.9432025856740726e-06, + "loss": 0.04352645874023438, + "step": 117045 + }, + { + "epoch": 1.0120967393278053, + "grad_norm": 53.91201546657365, + "learning_rate": 2.9429988891508732e-06, + "loss": 0.0841094970703125, + "step": 117050 + }, + { + "epoch": 1.0121399728493485, + "grad_norm": 14.653745903476409, + "learning_rate": 2.942795192890558e-06, + "loss": 0.02848987579345703, + "step": 117055 + }, + { + "epoch": 1.0121832063708918, + "grad_norm": 0.6796511621761424, + "learning_rate": 2.9425914968940675e-06, + "loss": 0.028271484375, + "step": 117060 + }, + { + "epoch": 1.012226439892435, + "grad_norm": 5.985568975900421, + "learning_rate": 2.942387801162339e-06, + "loss": 0.11530075073242188, + "step": 117065 + }, + { + "epoch": 1.0122696734139782, + "grad_norm": 0.2273678965519945, + "learning_rate": 2.9421841056963134e-06, + "loss": 0.055255126953125, + "step": 117070 + }, + { + "epoch": 1.0123129069355215, + "grad_norm": 11.746316552079758, + "learning_rate": 2.941980410496929e-06, + "loss": 0.07594642639160157, + "step": 117075 + }, + { + "epoch": 1.0123561404570647, + "grad_norm": 3.6713575138728554, + "learning_rate": 2.9417767155651267e-06, + "loss": 0.07500572204589843, + "step": 117080 + }, + { + "epoch": 1.0123993739786081, + "grad_norm": 0.3205983877144843, + "learning_rate": 2.9415730209018435e-06, + "loss": 0.00888671875, + "step": 117085 + }, + { + "epoch": 1.0124426075001514, + "grad_norm": 10.864556798789353, + "learning_rate": 2.9413693265080214e-06, + "loss": 0.06828460693359376, + "step": 117090 + }, + { + "epoch": 1.0124858410216946, + "grad_norm": 25.206770533517947, + "learning_rate": 2.941165632384599e-06, + "loss": 0.16336669921875, + "step": 117095 + }, + { + "epoch": 1.0125290745432378, + "grad_norm": 4.741787418790096, + "learning_rate": 2.9409619385325153e-06, + "loss": 0.04256591796875, + "step": 117100 + }, + { + "epoch": 1.012572308064781, + "grad_norm": 49.159984019968995, + "learning_rate": 2.9407582449527096e-06, + "loss": 0.205438232421875, + "step": 117105 + }, + { + "epoch": 1.0126155415863243, + "grad_norm": 5.633818885600818, + "learning_rate": 2.9405545516461225e-06, + "loss": 0.04683990478515625, + "step": 117110 + }, + { + "epoch": 1.0126587751078677, + "grad_norm": 3.381617009550765, + "learning_rate": 2.9403508586136907e-06, + "loss": 0.02870330810546875, + "step": 117115 + }, + { + "epoch": 1.012702008629411, + "grad_norm": 4.041810837172605, + "learning_rate": 2.940147165856357e-06, + "loss": 0.06615200042724609, + "step": 117120 + }, + { + "epoch": 1.0127452421509542, + "grad_norm": 6.09216414263765, + "learning_rate": 2.939943473375059e-06, + "loss": 0.021963882446289062, + "step": 117125 + }, + { + "epoch": 1.0127884756724974, + "grad_norm": 1.1905154814633996, + "learning_rate": 2.939739781170737e-06, + "loss": 0.042280960083007815, + "step": 117130 + }, + { + "epoch": 1.0128317091940406, + "grad_norm": 0.9176233658420674, + "learning_rate": 2.9395360892443287e-06, + "loss": 0.089666748046875, + "step": 117135 + }, + { + "epoch": 1.0128749427155839, + "grad_norm": 8.306448715351165, + "learning_rate": 2.9393323975967754e-06, + "loss": 0.18817977905273436, + "step": 117140 + }, + { + "epoch": 1.0129181762371273, + "grad_norm": 5.942586405425876, + "learning_rate": 2.939128706229014e-06, + "loss": 0.09654426574707031, + "step": 117145 + }, + { + "epoch": 1.0129614097586706, + "grad_norm": 3.2624346899055023, + "learning_rate": 2.9389250151419874e-06, + "loss": 0.03614349365234375, + "step": 117150 + }, + { + "epoch": 1.0130046432802138, + "grad_norm": 0.8042800742514968, + "learning_rate": 2.9387213243366335e-06, + "loss": 0.09604339599609375, + "step": 117155 + }, + { + "epoch": 1.013047876801757, + "grad_norm": 7.5010173520831716, + "learning_rate": 2.9385176338138905e-06, + "loss": 0.0560821533203125, + "step": 117160 + }, + { + "epoch": 1.0130911103233002, + "grad_norm": 1.3206615462044804, + "learning_rate": 2.9383139435746993e-06, + "loss": 0.033831024169921876, + "step": 117165 + }, + { + "epoch": 1.0131343438448435, + "grad_norm": 21.131336115083524, + "learning_rate": 2.938110253619999e-06, + "loss": 0.09880714416503907, + "step": 117170 + }, + { + "epoch": 1.0131775773663867, + "grad_norm": 25.3894169883442, + "learning_rate": 2.9379065639507285e-06, + "loss": 0.176568603515625, + "step": 117175 + }, + { + "epoch": 1.0132208108879301, + "grad_norm": 7.372742281421916, + "learning_rate": 2.9377028745678262e-06, + "loss": 0.04917449951171875, + "step": 117180 + }, + { + "epoch": 1.0132640444094734, + "grad_norm": 8.302978830262353, + "learning_rate": 2.9374991854722337e-06, + "loss": 0.05699462890625, + "step": 117185 + }, + { + "epoch": 1.0133072779310166, + "grad_norm": 1.201909588105385, + "learning_rate": 2.937295496664889e-06, + "loss": 0.0350738525390625, + "step": 117190 + }, + { + "epoch": 1.0133505114525598, + "grad_norm": 1.3416618386441204, + "learning_rate": 2.937091808146733e-06, + "loss": 0.03017005920410156, + "step": 117195 + }, + { + "epoch": 1.013393744974103, + "grad_norm": 9.359299229126162, + "learning_rate": 2.9368881199187038e-06, + "loss": 0.08176116943359375, + "step": 117200 + }, + { + "epoch": 1.0134369784956463, + "grad_norm": 8.891061709777045, + "learning_rate": 2.936684431981741e-06, + "loss": 0.057520294189453126, + "step": 117205 + }, + { + "epoch": 1.0134802120171897, + "grad_norm": 1.2164186845816751, + "learning_rate": 2.9364807443367826e-06, + "loss": 0.048282623291015625, + "step": 117210 + }, + { + "epoch": 1.013523445538733, + "grad_norm": 4.209026714530648, + "learning_rate": 2.9362770569847706e-06, + "loss": 0.17558059692382813, + "step": 117215 + }, + { + "epoch": 1.0135666790602762, + "grad_norm": 0.6406712090786546, + "learning_rate": 2.936073369926642e-06, + "loss": 0.06455078125, + "step": 117220 + }, + { + "epoch": 1.0136099125818194, + "grad_norm": 15.825731147910945, + "learning_rate": 2.9358696831633392e-06, + "loss": 0.0315643310546875, + "step": 117225 + }, + { + "epoch": 1.0136531461033627, + "grad_norm": 6.481927264280169, + "learning_rate": 2.935665996695799e-06, + "loss": 0.04553680419921875, + "step": 117230 + }, + { + "epoch": 1.0136963796249059, + "grad_norm": 9.199125870942787, + "learning_rate": 2.9354623105249618e-06, + "loss": 0.037241363525390626, + "step": 117235 + }, + { + "epoch": 1.0137396131464493, + "grad_norm": 1.1663647533134398, + "learning_rate": 2.9352586246517655e-06, + "loss": 0.05847320556640625, + "step": 117240 + }, + { + "epoch": 1.0137828466679926, + "grad_norm": 1.449464141248972, + "learning_rate": 2.9350549390771518e-06, + "loss": 0.01313323974609375, + "step": 117245 + }, + { + "epoch": 1.0138260801895358, + "grad_norm": 10.38907725874295, + "learning_rate": 2.9348512538020586e-06, + "loss": 0.039966392517089847, + "step": 117250 + }, + { + "epoch": 1.013869313711079, + "grad_norm": 4.972187762270251, + "learning_rate": 2.934647568827426e-06, + "loss": 0.19647483825683593, + "step": 117255 + }, + { + "epoch": 1.0139125472326223, + "grad_norm": 5.397998003180304, + "learning_rate": 2.934443884154193e-06, + "loss": 0.041500091552734375, + "step": 117260 + }, + { + "epoch": 1.0139557807541655, + "grad_norm": 1.1463515146096281, + "learning_rate": 2.9342401997832997e-06, + "loss": 0.09981918334960938, + "step": 117265 + }, + { + "epoch": 1.0139990142757087, + "grad_norm": 0.4829597604632896, + "learning_rate": 2.9340365157156833e-06, + "loss": 0.02554779052734375, + "step": 117270 + }, + { + "epoch": 1.0140422477972522, + "grad_norm": 13.100187020175317, + "learning_rate": 2.933832831952285e-06, + "loss": 0.16149139404296875, + "step": 117275 + }, + { + "epoch": 1.0140854813187954, + "grad_norm": 4.735204687177825, + "learning_rate": 2.933629148494044e-06, + "loss": 0.05134315490722656, + "step": 117280 + }, + { + "epoch": 1.0141287148403386, + "grad_norm": 1.2876729400236138, + "learning_rate": 2.9334254653418994e-06, + "loss": 0.16944217681884766, + "step": 117285 + }, + { + "epoch": 1.0141719483618818, + "grad_norm": 1.391148196840027, + "learning_rate": 2.9332217824967913e-06, + "loss": 0.043304443359375, + "step": 117290 + }, + { + "epoch": 1.014215181883425, + "grad_norm": 7.064102992661983, + "learning_rate": 2.9330180999596585e-06, + "loss": 0.04003143310546875, + "step": 117295 + }, + { + "epoch": 1.0142584154049683, + "grad_norm": 3.0927570566654756, + "learning_rate": 2.9328144177314394e-06, + "loss": 0.0220428466796875, + "step": 117300 + }, + { + "epoch": 1.0143016489265118, + "grad_norm": 2.7030319139740766, + "learning_rate": 2.9326107358130747e-06, + "loss": 0.012656784057617188, + "step": 117305 + }, + { + "epoch": 1.014344882448055, + "grad_norm": 0.19509907850770283, + "learning_rate": 2.932407054205503e-06, + "loss": 0.043661880493164065, + "step": 117310 + }, + { + "epoch": 1.0143881159695982, + "grad_norm": 22.866740396823396, + "learning_rate": 2.9322033729096643e-06, + "loss": 0.14637451171875, + "step": 117315 + }, + { + "epoch": 1.0144313494911414, + "grad_norm": 5.670650204579513, + "learning_rate": 2.931999691926498e-06, + "loss": 0.220989990234375, + "step": 117320 + }, + { + "epoch": 1.0144745830126847, + "grad_norm": 31.10067750005961, + "learning_rate": 2.9317960112569433e-06, + "loss": 0.14428977966308593, + "step": 117325 + }, + { + "epoch": 1.014517816534228, + "grad_norm": 10.320673084107593, + "learning_rate": 2.931592330901939e-06, + "loss": 0.049468994140625, + "step": 117330 + }, + { + "epoch": 1.0145610500557714, + "grad_norm": 5.395286232558037, + "learning_rate": 2.9313886508624232e-06, + "loss": 0.13231048583984376, + "step": 117335 + }, + { + "epoch": 1.0146042835773146, + "grad_norm": 31.321894304470693, + "learning_rate": 2.931184971139339e-06, + "loss": 0.51077880859375, + "step": 117340 + }, + { + "epoch": 1.0146475170988578, + "grad_norm": 7.45864635749628, + "learning_rate": 2.9309812917336233e-06, + "loss": 0.09716796875, + "step": 117345 + }, + { + "epoch": 1.014690750620401, + "grad_norm": 31.722973490670846, + "learning_rate": 2.9307776126462155e-06, + "loss": 0.09980239868164062, + "step": 117350 + }, + { + "epoch": 1.0147339841419443, + "grad_norm": 0.9778740705608813, + "learning_rate": 2.9305739338780557e-06, + "loss": 0.049176025390625, + "step": 117355 + }, + { + "epoch": 1.0147772176634875, + "grad_norm": 0.17543897192602642, + "learning_rate": 2.930370255430082e-06, + "loss": 0.19262924194335937, + "step": 117360 + }, + { + "epoch": 1.0148204511850307, + "grad_norm": 2.432222848937098, + "learning_rate": 2.9301665773032354e-06, + "loss": 0.11133575439453125, + "step": 117365 + }, + { + "epoch": 1.0148636847065742, + "grad_norm": 1.6398622842260724, + "learning_rate": 2.9299628994984527e-06, + "loss": 0.06904678344726563, + "step": 117370 + }, + { + "epoch": 1.0149069182281174, + "grad_norm": 1.586862411551288, + "learning_rate": 2.9297592220166764e-06, + "loss": 0.07065505981445312, + "step": 117375 + }, + { + "epoch": 1.0149501517496606, + "grad_norm": 9.265680569765445, + "learning_rate": 2.9295555448588444e-06, + "loss": 0.14852981567382811, + "step": 117380 + }, + { + "epoch": 1.0149933852712039, + "grad_norm": 45.14712801889202, + "learning_rate": 2.9293518680258952e-06, + "loss": 0.17751312255859375, + "step": 117385 + }, + { + "epoch": 1.015036618792747, + "grad_norm": 30.90143632221495, + "learning_rate": 2.9291481915187696e-06, + "loss": 0.2030271530151367, + "step": 117390 + }, + { + "epoch": 1.0150798523142903, + "grad_norm": 4.059584129512504, + "learning_rate": 2.9289445153384064e-06, + "loss": 0.023672103881835938, + "step": 117395 + }, + { + "epoch": 1.0151230858358338, + "grad_norm": 0.8957987895590261, + "learning_rate": 2.9287408394857434e-06, + "loss": 0.126153564453125, + "step": 117400 + }, + { + "epoch": 1.015166319357377, + "grad_norm": 0.4225660344493804, + "learning_rate": 2.9285371639617227e-06, + "loss": 0.17802734375, + "step": 117405 + }, + { + "epoch": 1.0152095528789202, + "grad_norm": 0.28266104665436453, + "learning_rate": 2.928333488767282e-06, + "loss": 0.23063583374023439, + "step": 117410 + }, + { + "epoch": 1.0152527864004635, + "grad_norm": 0.8433699600268125, + "learning_rate": 2.9281298139033607e-06, + "loss": 0.13344879150390626, + "step": 117415 + }, + { + "epoch": 1.0152960199220067, + "grad_norm": 15.007160900181326, + "learning_rate": 2.9279261393708992e-06, + "loss": 0.10827713012695313, + "step": 117420 + }, + { + "epoch": 1.01533925344355, + "grad_norm": 1.2943376889698146, + "learning_rate": 2.927722465170835e-06, + "loss": 0.07393360137939453, + "step": 117425 + }, + { + "epoch": 1.0153824869650931, + "grad_norm": 1.6251954169797844, + "learning_rate": 2.9275187913041094e-06, + "loss": 0.07552947998046874, + "step": 117430 + }, + { + "epoch": 1.0154257204866366, + "grad_norm": 10.275561288896728, + "learning_rate": 2.9273151177716593e-06, + "loss": 0.0530487060546875, + "step": 117435 + }, + { + "epoch": 1.0154689540081798, + "grad_norm": 0.657243852495276, + "learning_rate": 2.9271114445744263e-06, + "loss": 0.05505428314208984, + "step": 117440 + }, + { + "epoch": 1.015512187529723, + "grad_norm": 1.7815188912289401, + "learning_rate": 2.9269077717133485e-06, + "loss": 0.13605308532714844, + "step": 117445 + }, + { + "epoch": 1.0155554210512663, + "grad_norm": 0.8998896748853573, + "learning_rate": 2.926704099189366e-06, + "loss": 0.01068286895751953, + "step": 117450 + }, + { + "epoch": 1.0155986545728095, + "grad_norm": 4.159241792709889, + "learning_rate": 2.926500427003418e-06, + "loss": 0.018198013305664062, + "step": 117455 + }, + { + "epoch": 1.0156418880943527, + "grad_norm": 3.6302196140971965, + "learning_rate": 2.9262967551564436e-06, + "loss": 0.12531585693359376, + "step": 117460 + }, + { + "epoch": 1.0156851216158962, + "grad_norm": 0.15900587444337203, + "learning_rate": 2.9260930836493806e-06, + "loss": 0.09980049133300781, + "step": 117465 + }, + { + "epoch": 1.0157283551374394, + "grad_norm": 12.565394346225707, + "learning_rate": 2.925889412483171e-06, + "loss": 0.1224151611328125, + "step": 117470 + }, + { + "epoch": 1.0157715886589826, + "grad_norm": 0.9314274549149436, + "learning_rate": 2.9256857416587524e-06, + "loss": 0.0151123046875, + "step": 117475 + }, + { + "epoch": 1.0158148221805259, + "grad_norm": 3.471183834380915, + "learning_rate": 2.9254820711770655e-06, + "loss": 0.15643348693847656, + "step": 117480 + }, + { + "epoch": 1.015858055702069, + "grad_norm": 5.884857288497223, + "learning_rate": 2.9252784010390483e-06, + "loss": 0.12173633575439453, + "step": 117485 + }, + { + "epoch": 1.0159012892236123, + "grad_norm": 2.5026938428985397, + "learning_rate": 2.9250747312456405e-06, + "loss": 0.04225234985351563, + "step": 117490 + }, + { + "epoch": 1.0159445227451558, + "grad_norm": 11.492502999322593, + "learning_rate": 2.9248710617977798e-06, + "loss": 0.09744224548339844, + "step": 117495 + }, + { + "epoch": 1.015987756266699, + "grad_norm": 0.4005520790267352, + "learning_rate": 2.9246673926964093e-06, + "loss": 0.0918182373046875, + "step": 117500 + }, + { + "epoch": 1.0160309897882422, + "grad_norm": 10.641389554553092, + "learning_rate": 2.9244637239424647e-06, + "loss": 0.04349822998046875, + "step": 117505 + }, + { + "epoch": 1.0160742233097855, + "grad_norm": 3.4707336460722553, + "learning_rate": 2.9242600555368878e-06, + "loss": 0.09453125, + "step": 117510 + }, + { + "epoch": 1.0161174568313287, + "grad_norm": 1.455467432064112, + "learning_rate": 2.9240563874806166e-06, + "loss": 0.0409576416015625, + "step": 117515 + }, + { + "epoch": 1.016160690352872, + "grad_norm": 12.908896965082041, + "learning_rate": 2.9238527197745906e-06, + "loss": 0.036163711547851564, + "step": 117520 + }, + { + "epoch": 1.0162039238744152, + "grad_norm": 1.0975768351094408, + "learning_rate": 2.9236490524197483e-06, + "loss": 0.019582366943359374, + "step": 117525 + }, + { + "epoch": 1.0162471573959586, + "grad_norm": 6.228619643070607, + "learning_rate": 2.9234453854170303e-06, + "loss": 0.03578338623046875, + "step": 117530 + }, + { + "epoch": 1.0162903909175018, + "grad_norm": 5.81387734070316, + "learning_rate": 2.9232417187673756e-06, + "loss": 0.21192703247070313, + "step": 117535 + }, + { + "epoch": 1.016333624439045, + "grad_norm": 2.5958791487688146, + "learning_rate": 2.9230380524717234e-06, + "loss": 0.062123870849609374, + "step": 117540 + }, + { + "epoch": 1.0163768579605883, + "grad_norm": 3.3730353755801823, + "learning_rate": 2.9228343865310132e-06, + "loss": 0.0797119140625, + "step": 117545 + }, + { + "epoch": 1.0164200914821315, + "grad_norm": 0.233976045125356, + "learning_rate": 2.922630720946184e-06, + "loss": 0.02472076416015625, + "step": 117550 + }, + { + "epoch": 1.0164633250036748, + "grad_norm": 0.17253057890576126, + "learning_rate": 2.922427055718174e-06, + "loss": 0.042285919189453125, + "step": 117555 + }, + { + "epoch": 1.0165065585252182, + "grad_norm": 0.3705962920157486, + "learning_rate": 2.9222233908479237e-06, + "loss": 0.0095428466796875, + "step": 117560 + }, + { + "epoch": 1.0165497920467614, + "grad_norm": 1.8534097462370904, + "learning_rate": 2.9220197263363732e-06, + "loss": 0.0683523178100586, + "step": 117565 + }, + { + "epoch": 1.0165930255683047, + "grad_norm": 4.798573409005018, + "learning_rate": 2.9218160621844606e-06, + "loss": 0.13751449584960937, + "step": 117570 + }, + { + "epoch": 1.016636259089848, + "grad_norm": 4.926360777587923, + "learning_rate": 2.9216123983931253e-06, + "loss": 0.1166748046875, + "step": 117575 + }, + { + "epoch": 1.0166794926113911, + "grad_norm": 5.2183583917652525, + "learning_rate": 2.9214087349633062e-06, + "loss": 0.16982040405273438, + "step": 117580 + }, + { + "epoch": 1.0167227261329344, + "grad_norm": 3.323504645610243, + "learning_rate": 2.921205071895944e-06, + "loss": 0.0314453125, + "step": 117585 + }, + { + "epoch": 1.0167659596544778, + "grad_norm": 0.03660560010776717, + "learning_rate": 2.9210014091919754e-06, + "loss": 0.02288055419921875, + "step": 117590 + }, + { + "epoch": 1.016809193176021, + "grad_norm": 24.32610747235957, + "learning_rate": 2.920797746852343e-06, + "loss": 0.0942047119140625, + "step": 117595 + }, + { + "epoch": 1.0168524266975643, + "grad_norm": 13.842278284625586, + "learning_rate": 2.920594084877984e-06, + "loss": 0.0753509521484375, + "step": 117600 + }, + { + "epoch": 1.0168956602191075, + "grad_norm": 0.15022866587952569, + "learning_rate": 2.920390423269838e-06, + "loss": 0.018710803985595704, + "step": 117605 + }, + { + "epoch": 1.0169388937406507, + "grad_norm": 0.020579087099957705, + "learning_rate": 2.920186762028844e-06, + "loss": 0.026161861419677735, + "step": 117610 + }, + { + "epoch": 1.016982127262194, + "grad_norm": 7.020226387981915, + "learning_rate": 2.9199831011559426e-06, + "loss": 0.15358467102050782, + "step": 117615 + }, + { + "epoch": 1.0170253607837372, + "grad_norm": 0.5324882070276759, + "learning_rate": 2.9197794406520713e-06, + "loss": 0.037229156494140624, + "step": 117620 + }, + { + "epoch": 1.0170685943052806, + "grad_norm": 3.4208595229082386, + "learning_rate": 2.9195757805181692e-06, + "loss": 0.047800445556640626, + "step": 117625 + }, + { + "epoch": 1.0171118278268239, + "grad_norm": 0.8306815209310743, + "learning_rate": 2.9193721207551778e-06, + "loss": 0.025225448608398437, + "step": 117630 + }, + { + "epoch": 1.017155061348367, + "grad_norm": 1.144316643706696, + "learning_rate": 2.919168461364035e-06, + "loss": 0.027169036865234374, + "step": 117635 + }, + { + "epoch": 1.0171982948699103, + "grad_norm": 1.318601387331567, + "learning_rate": 2.9189648023456796e-06, + "loss": 0.16134490966796874, + "step": 117640 + }, + { + "epoch": 1.0172415283914535, + "grad_norm": 42.914219781135515, + "learning_rate": 2.9187611437010516e-06, + "loss": 0.1712982177734375, + "step": 117645 + }, + { + "epoch": 1.0172847619129968, + "grad_norm": 8.345926570271994, + "learning_rate": 2.9185574854310904e-06, + "loss": 0.02987060546875, + "step": 117650 + }, + { + "epoch": 1.0173279954345402, + "grad_norm": 2.118206382855611, + "learning_rate": 2.9183538275367336e-06, + "loss": 0.11483173370361328, + "step": 117655 + }, + { + "epoch": 1.0173712289560835, + "grad_norm": 0.42258891539346966, + "learning_rate": 2.918150170018923e-06, + "loss": 0.1159027099609375, + "step": 117660 + }, + { + "epoch": 1.0174144624776267, + "grad_norm": 11.075336297735525, + "learning_rate": 2.9179465128785966e-06, + "loss": 0.03592529296875, + "step": 117665 + }, + { + "epoch": 1.01745769599917, + "grad_norm": 0.5633858620275904, + "learning_rate": 2.9177428561166924e-06, + "loss": 0.0317474365234375, + "step": 117670 + }, + { + "epoch": 1.0175009295207131, + "grad_norm": 1.596155096383217, + "learning_rate": 2.9175391997341527e-06, + "loss": 0.09450321197509766, + "step": 117675 + }, + { + "epoch": 1.0175441630422564, + "grad_norm": 3.1827107347901538, + "learning_rate": 2.917335543731914e-06, + "loss": 0.05376129150390625, + "step": 117680 + }, + { + "epoch": 1.0175873965637998, + "grad_norm": 14.158179026004998, + "learning_rate": 2.917131888110915e-06, + "loss": 0.098614501953125, + "step": 117685 + }, + { + "epoch": 1.017630630085343, + "grad_norm": 0.6946217537114565, + "learning_rate": 2.9169282328720982e-06, + "loss": 0.0256195068359375, + "step": 117690 + }, + { + "epoch": 1.0176738636068863, + "grad_norm": 2.206198063114488, + "learning_rate": 2.9167245780164015e-06, + "loss": 0.130322265625, + "step": 117695 + }, + { + "epoch": 1.0177170971284295, + "grad_norm": 1.8614321238220841, + "learning_rate": 2.9165209235447626e-06, + "loss": 0.011487960815429688, + "step": 117700 + }, + { + "epoch": 1.0177603306499727, + "grad_norm": 0.22601868052796703, + "learning_rate": 2.9163172694581222e-06, + "loss": 0.084698486328125, + "step": 117705 + }, + { + "epoch": 1.017803564171516, + "grad_norm": 2.4398947467694714, + "learning_rate": 2.9161136157574193e-06, + "loss": 0.113232421875, + "step": 117710 + }, + { + "epoch": 1.0178467976930592, + "grad_norm": 3.3189885017380787, + "learning_rate": 2.915909962443593e-06, + "loss": 0.07395248413085938, + "step": 117715 + }, + { + "epoch": 1.0178900312146026, + "grad_norm": 34.2466960123655, + "learning_rate": 2.915706309517581e-06, + "loss": 0.15290184020996095, + "step": 117720 + }, + { + "epoch": 1.0179332647361459, + "grad_norm": 9.667855661932236, + "learning_rate": 2.9155026569803256e-06, + "loss": 0.1432464599609375, + "step": 117725 + }, + { + "epoch": 1.017976498257689, + "grad_norm": 1.234346237167428, + "learning_rate": 2.9152990048327643e-06, + "loss": 0.05232658386230469, + "step": 117730 + }, + { + "epoch": 1.0180197317792323, + "grad_norm": 0.6480848614636127, + "learning_rate": 2.915095353075837e-06, + "loss": 0.04434814453125, + "step": 117735 + }, + { + "epoch": 1.0180629653007756, + "grad_norm": 5.35337176271989, + "learning_rate": 2.9148917017104816e-06, + "loss": 0.0258941650390625, + "step": 117740 + }, + { + "epoch": 1.0181061988223188, + "grad_norm": 14.994432982552228, + "learning_rate": 2.914688050737639e-06, + "loss": 0.21996231079101564, + "step": 117745 + }, + { + "epoch": 1.0181494323438622, + "grad_norm": 20.56969916396546, + "learning_rate": 2.9144844001582456e-06, + "loss": 0.10575599670410156, + "step": 117750 + }, + { + "epoch": 1.0181926658654055, + "grad_norm": 2.5043649070261296, + "learning_rate": 2.9142807499732444e-06, + "loss": 0.18071632385253905, + "step": 117755 + }, + { + "epoch": 1.0182358993869487, + "grad_norm": 1.099499821961463, + "learning_rate": 2.9140771001835716e-06, + "loss": 0.04076461791992188, + "step": 117760 + }, + { + "epoch": 1.018279132908492, + "grad_norm": 1.376201604101016, + "learning_rate": 2.913873450790169e-06, + "loss": 0.080706787109375, + "step": 117765 + }, + { + "epoch": 1.0183223664300352, + "grad_norm": 6.6588577642932, + "learning_rate": 2.913669801793974e-06, + "loss": 0.05032196044921875, + "step": 117770 + }, + { + "epoch": 1.0183655999515784, + "grad_norm": 8.838396820967555, + "learning_rate": 2.9134661531959264e-06, + "loss": 0.04436836242675781, + "step": 117775 + }, + { + "epoch": 1.0184088334731216, + "grad_norm": 0.2466728977886456, + "learning_rate": 2.9132625049969646e-06, + "loss": 0.16189956665039062, + "step": 117780 + }, + { + "epoch": 1.018452066994665, + "grad_norm": 1.6178634073902403, + "learning_rate": 2.9130588571980277e-06, + "loss": 0.03035430908203125, + "step": 117785 + }, + { + "epoch": 1.0184953005162083, + "grad_norm": 1.238847824725897, + "learning_rate": 2.912855209800057e-06, + "loss": 0.17347183227539062, + "step": 117790 + }, + { + "epoch": 1.0185385340377515, + "grad_norm": 4.9606749650208855, + "learning_rate": 2.9126515628039905e-06, + "loss": 0.08657608032226563, + "step": 117795 + }, + { + "epoch": 1.0185817675592947, + "grad_norm": 4.417937076545983, + "learning_rate": 2.9124479162107672e-06, + "loss": 0.07184829711914062, + "step": 117800 + }, + { + "epoch": 1.018625001080838, + "grad_norm": 1.4338596606266285, + "learning_rate": 2.912244270021326e-06, + "loss": 0.06177978515625, + "step": 117805 + }, + { + "epoch": 1.0186682346023812, + "grad_norm": 0.039985796072188065, + "learning_rate": 2.9120406242366066e-06, + "loss": 0.046286392211914065, + "step": 117810 + }, + { + "epoch": 1.0187114681239247, + "grad_norm": 2.2913921651740203, + "learning_rate": 2.9118369788575466e-06, + "loss": 0.11238555908203125, + "step": 117815 + }, + { + "epoch": 1.0187547016454679, + "grad_norm": 0.47084907361820666, + "learning_rate": 2.9116333338850888e-06, + "loss": 0.018607330322265626, + "step": 117820 + }, + { + "epoch": 1.0187979351670111, + "grad_norm": 1.0008307645184256, + "learning_rate": 2.9114296893201698e-06, + "loss": 0.01417388916015625, + "step": 117825 + }, + { + "epoch": 1.0188411686885543, + "grad_norm": 0.6867240959275326, + "learning_rate": 2.9112260451637295e-06, + "loss": 0.03268890380859375, + "step": 117830 + }, + { + "epoch": 1.0188844022100976, + "grad_norm": 1.9798093497277978, + "learning_rate": 2.911022401416706e-06, + "loss": 0.0648345947265625, + "step": 117835 + }, + { + "epoch": 1.0189276357316408, + "grad_norm": 0.4309702025220633, + "learning_rate": 2.9108187580800393e-06, + "loss": 0.16945152282714843, + "step": 117840 + }, + { + "epoch": 1.0189708692531843, + "grad_norm": 4.117830096143878, + "learning_rate": 2.9106151151546682e-06, + "loss": 0.021507644653320314, + "step": 117845 + }, + { + "epoch": 1.0190141027747275, + "grad_norm": 0.15293581345435767, + "learning_rate": 2.9104114726415332e-06, + "loss": 0.24282989501953126, + "step": 117850 + }, + { + "epoch": 1.0190573362962707, + "grad_norm": 1.3154019258207237, + "learning_rate": 2.910207830541573e-06, + "loss": 0.042414093017578126, + "step": 117855 + }, + { + "epoch": 1.019100569817814, + "grad_norm": 0.31024406718083836, + "learning_rate": 2.910004188855726e-06, + "loss": 0.018463516235351564, + "step": 117860 + }, + { + "epoch": 1.0191438033393572, + "grad_norm": 4.302993955468386, + "learning_rate": 2.909800547584931e-06, + "loss": 0.04596786499023438, + "step": 117865 + }, + { + "epoch": 1.0191870368609004, + "grad_norm": 0.48814705023921995, + "learning_rate": 2.909596906730129e-06, + "loss": 0.1184234619140625, + "step": 117870 + }, + { + "epoch": 1.0192302703824436, + "grad_norm": 4.644899627505498, + "learning_rate": 2.9093932662922577e-06, + "loss": 0.0337188720703125, + "step": 117875 + }, + { + "epoch": 1.019273503903987, + "grad_norm": 2.3808217089985733, + "learning_rate": 2.9091896262722553e-06, + "loss": 0.033720779418945315, + "step": 117880 + }, + { + "epoch": 1.0193167374255303, + "grad_norm": 1.6297678709638581, + "learning_rate": 2.9089859866710633e-06, + "loss": 0.011034774780273437, + "step": 117885 + }, + { + "epoch": 1.0193599709470735, + "grad_norm": 2.0528534925442483, + "learning_rate": 2.9087823474896206e-06, + "loss": 0.1894744873046875, + "step": 117890 + }, + { + "epoch": 1.0194032044686168, + "grad_norm": 1.6067104779376642, + "learning_rate": 2.9085787087288645e-06, + "loss": 0.032386398315429686, + "step": 117895 + }, + { + "epoch": 1.01944643799016, + "grad_norm": 12.223389699157917, + "learning_rate": 2.9083750703897363e-06, + "loss": 0.08810195922851563, + "step": 117900 + }, + { + "epoch": 1.0194896715117032, + "grad_norm": 0.5298127638183324, + "learning_rate": 2.9081714324731743e-06, + "loss": 0.012253570556640624, + "step": 117905 + }, + { + "epoch": 1.0195329050332467, + "grad_norm": 0.10151481666190877, + "learning_rate": 2.907967794980116e-06, + "loss": 0.053174591064453124, + "step": 117910 + }, + { + "epoch": 1.01957613855479, + "grad_norm": 5.692460452256522, + "learning_rate": 2.9077641579115034e-06, + "loss": 0.07441902160644531, + "step": 117915 + }, + { + "epoch": 1.0196193720763331, + "grad_norm": 1.0567885927764804, + "learning_rate": 2.907560521268274e-06, + "loss": 0.007845687866210937, + "step": 117920 + }, + { + "epoch": 1.0196626055978764, + "grad_norm": 0.2883919443177799, + "learning_rate": 2.907356885051367e-06, + "loss": 0.05661163330078125, + "step": 117925 + }, + { + "epoch": 1.0197058391194196, + "grad_norm": 21.40075358802187, + "learning_rate": 2.907153249261723e-06, + "loss": 0.09761199951171876, + "step": 117930 + }, + { + "epoch": 1.0197490726409628, + "grad_norm": 0.8360813657432029, + "learning_rate": 2.9069496139002795e-06, + "loss": 0.025748443603515626, + "step": 117935 + }, + { + "epoch": 1.0197923061625063, + "grad_norm": 0.5190871224602954, + "learning_rate": 2.9067459789679746e-06, + "loss": 0.04426994323730469, + "step": 117940 + }, + { + "epoch": 1.0198355396840495, + "grad_norm": 47.48108181904906, + "learning_rate": 2.9065423444657507e-06, + "loss": 0.22343940734863282, + "step": 117945 + }, + { + "epoch": 1.0198787732055927, + "grad_norm": 0.5074863847268387, + "learning_rate": 2.906338710394545e-06, + "loss": 0.01412506103515625, + "step": 117950 + }, + { + "epoch": 1.019922006727136, + "grad_norm": 19.658462958149375, + "learning_rate": 2.906135076755297e-06, + "loss": 0.09089851379394531, + "step": 117955 + }, + { + "epoch": 1.0199652402486792, + "grad_norm": 16.30770861393759, + "learning_rate": 2.9059314435489453e-06, + "loss": 0.180853271484375, + "step": 117960 + }, + { + "epoch": 1.0200084737702224, + "grad_norm": 0.6850748197649362, + "learning_rate": 2.90572781077643e-06, + "loss": 0.02017059326171875, + "step": 117965 + }, + { + "epoch": 1.0200517072917656, + "grad_norm": 12.67891851657025, + "learning_rate": 2.9055241784386896e-06, + "loss": 0.11047210693359374, + "step": 117970 + }, + { + "epoch": 1.020094940813309, + "grad_norm": 2.350991718850555, + "learning_rate": 2.9053205465366624e-06, + "loss": 0.019297027587890626, + "step": 117975 + }, + { + "epoch": 1.0201381743348523, + "grad_norm": 37.034920348608445, + "learning_rate": 2.9051169150712884e-06, + "loss": 0.10691909790039063, + "step": 117980 + }, + { + "epoch": 1.0201814078563956, + "grad_norm": 3.2236020691158336, + "learning_rate": 2.904913284043508e-06, + "loss": 0.0471527099609375, + "step": 117985 + }, + { + "epoch": 1.0202246413779388, + "grad_norm": 0.21917237675973586, + "learning_rate": 2.904709653454259e-06, + "loss": 0.0745086669921875, + "step": 117990 + }, + { + "epoch": 1.020267874899482, + "grad_norm": 0.4414293121591306, + "learning_rate": 2.9045060233044806e-06, + "loss": 0.05181694030761719, + "step": 117995 + }, + { + "epoch": 1.0203111084210252, + "grad_norm": 0.6034399081963215, + "learning_rate": 2.904302393595112e-06, + "loss": 0.02685203552246094, + "step": 118000 + }, + { + "epoch": 1.0203543419425687, + "grad_norm": 5.98862387287559, + "learning_rate": 2.904098764327091e-06, + "loss": 0.030597496032714843, + "step": 118005 + }, + { + "epoch": 1.020397575464112, + "grad_norm": 9.455004183171564, + "learning_rate": 2.903895135501359e-06, + "loss": 0.07909622192382812, + "step": 118010 + }, + { + "epoch": 1.0204408089856551, + "grad_norm": 0.24367406078262907, + "learning_rate": 2.9036915071188544e-06, + "loss": 0.14105377197265626, + "step": 118015 + }, + { + "epoch": 1.0204840425071984, + "grad_norm": 0.4721205694228394, + "learning_rate": 2.9034878791805163e-06, + "loss": 0.01915740966796875, + "step": 118020 + }, + { + "epoch": 1.0205272760287416, + "grad_norm": 2.108844523669083, + "learning_rate": 2.9032842516872833e-06, + "loss": 0.07241554260253906, + "step": 118025 + }, + { + "epoch": 1.0205705095502848, + "grad_norm": 0.3711258576747477, + "learning_rate": 2.903080624640094e-06, + "loss": 0.017380523681640624, + "step": 118030 + }, + { + "epoch": 1.020613743071828, + "grad_norm": 0.6151819264748981, + "learning_rate": 2.902876998039889e-06, + "loss": 0.08110847473144531, + "step": 118035 + }, + { + "epoch": 1.0206569765933715, + "grad_norm": 2.29989772104614, + "learning_rate": 2.902673371887606e-06, + "loss": 0.0744145393371582, + "step": 118040 + }, + { + "epoch": 1.0207002101149147, + "grad_norm": 4.60183482481541, + "learning_rate": 2.902469746184186e-06, + "loss": 0.020235633850097655, + "step": 118045 + }, + { + "epoch": 1.020743443636458, + "grad_norm": 1.7097653252973741, + "learning_rate": 2.9022661209305666e-06, + "loss": 0.14069061279296874, + "step": 118050 + }, + { + "epoch": 1.0207866771580012, + "grad_norm": 17.744199399846483, + "learning_rate": 2.9020624961276873e-06, + "loss": 0.042580795288085935, + "step": 118055 + }, + { + "epoch": 1.0208299106795444, + "grad_norm": 0.17988222898349351, + "learning_rate": 2.9018588717764862e-06, + "loss": 0.011717987060546876, + "step": 118060 + }, + { + "epoch": 1.0208731442010877, + "grad_norm": 1.7759120786895026, + "learning_rate": 2.9016552478779044e-06, + "loss": 0.015641021728515624, + "step": 118065 + }, + { + "epoch": 1.020916377722631, + "grad_norm": 2.655814154926081, + "learning_rate": 2.9014516244328787e-06, + "loss": 0.06906242370605468, + "step": 118070 + }, + { + "epoch": 1.0209596112441743, + "grad_norm": 0.7146700131398449, + "learning_rate": 2.90124800144235e-06, + "loss": 0.04450798034667969, + "step": 118075 + }, + { + "epoch": 1.0210028447657176, + "grad_norm": 0.1774948564014243, + "learning_rate": 2.9010443789072575e-06, + "loss": 0.08213157653808593, + "step": 118080 + }, + { + "epoch": 1.0210460782872608, + "grad_norm": 9.77451512501906, + "learning_rate": 2.900840756828539e-06, + "loss": 0.08940162658691406, + "step": 118085 + }, + { + "epoch": 1.021089311808804, + "grad_norm": 19.731360680623073, + "learning_rate": 2.900637135207134e-06, + "loss": 0.06483688354492187, + "step": 118090 + }, + { + "epoch": 1.0211325453303473, + "grad_norm": 58.98565498463383, + "learning_rate": 2.9004335140439822e-06, + "loss": 0.10820693969726562, + "step": 118095 + }, + { + "epoch": 1.0211757788518907, + "grad_norm": 2.2749565681143817, + "learning_rate": 2.9002298933400207e-06, + "loss": 0.10159492492675781, + "step": 118100 + }, + { + "epoch": 1.021219012373434, + "grad_norm": 6.01478378083886, + "learning_rate": 2.9000262730961917e-06, + "loss": 0.0779296875, + "step": 118105 + }, + { + "epoch": 1.0212622458949772, + "grad_norm": 4.024552635214321, + "learning_rate": 2.8998226533134323e-06, + "loss": 0.14236068725585938, + "step": 118110 + }, + { + "epoch": 1.0213054794165204, + "grad_norm": 12.82254005073662, + "learning_rate": 2.8996190339926824e-06, + "loss": 0.0999237060546875, + "step": 118115 + }, + { + "epoch": 1.0213487129380636, + "grad_norm": 1.2102626205952982, + "learning_rate": 2.89941541513488e-06, + "loss": 0.07875518798828125, + "step": 118120 + }, + { + "epoch": 1.0213919464596068, + "grad_norm": 0.2423048768869311, + "learning_rate": 2.8992117967409655e-06, + "loss": 0.013674545288085937, + "step": 118125 + }, + { + "epoch": 1.02143517998115, + "grad_norm": 0.18508527054984694, + "learning_rate": 2.899008178811877e-06, + "loss": 0.02939453125, + "step": 118130 + }, + { + "epoch": 1.0214784135026935, + "grad_norm": 0.6344433047408362, + "learning_rate": 2.898804561348553e-06, + "loss": 0.09530982971191407, + "step": 118135 + }, + { + "epoch": 1.0215216470242368, + "grad_norm": 2.355753543693819, + "learning_rate": 2.8986009443519343e-06, + "loss": 0.09966850280761719, + "step": 118140 + }, + { + "epoch": 1.02156488054578, + "grad_norm": 4.239111303460469, + "learning_rate": 2.8983973278229592e-06, + "loss": 0.46545753479003904, + "step": 118145 + }, + { + "epoch": 1.0216081140673232, + "grad_norm": 44.824133340985505, + "learning_rate": 2.8981937117625658e-06, + "loss": 0.19422607421875, + "step": 118150 + }, + { + "epoch": 1.0216513475888664, + "grad_norm": 3.947074649655793, + "learning_rate": 2.8979900961716954e-06, + "loss": 0.09903411865234375, + "step": 118155 + }, + { + "epoch": 1.0216945811104097, + "grad_norm": 1.5085270313908632, + "learning_rate": 2.897786481051285e-06, + "loss": 0.025005340576171875, + "step": 118160 + }, + { + "epoch": 1.0217378146319531, + "grad_norm": 2.6488499145914677, + "learning_rate": 2.897582866402273e-06, + "loss": 0.04154014587402344, + "step": 118165 + }, + { + "epoch": 1.0217810481534964, + "grad_norm": 16.634866207185837, + "learning_rate": 2.897379252225601e-06, + "loss": 0.142169189453125, + "step": 118170 + }, + { + "epoch": 1.0218242816750396, + "grad_norm": 0.4049143704652665, + "learning_rate": 2.897175638522207e-06, + "loss": 0.03597412109375, + "step": 118175 + }, + { + "epoch": 1.0218675151965828, + "grad_norm": 19.755761328936885, + "learning_rate": 2.8969720252930294e-06, + "loss": 0.08374290466308594, + "step": 118180 + }, + { + "epoch": 1.021910748718126, + "grad_norm": 0.12343199841800302, + "learning_rate": 2.8967684125390083e-06, + "loss": 0.03341827392578125, + "step": 118185 + }, + { + "epoch": 1.0219539822396693, + "grad_norm": 1.8388957245418334, + "learning_rate": 2.896564800261082e-06, + "loss": 0.01886749267578125, + "step": 118190 + }, + { + "epoch": 1.0219972157612127, + "grad_norm": 9.13339023324196, + "learning_rate": 2.8963611884601885e-06, + "loss": 0.11385078430175781, + "step": 118195 + }, + { + "epoch": 1.022040449282756, + "grad_norm": 0.3398033150228563, + "learning_rate": 2.8961575771372694e-06, + "loss": 0.27869415283203125, + "step": 118200 + }, + { + "epoch": 1.0220836828042992, + "grad_norm": 0.9091735777095383, + "learning_rate": 2.895953966293262e-06, + "loss": 0.03797073364257812, + "step": 118205 + }, + { + "epoch": 1.0221269163258424, + "grad_norm": 2.2746239313149474, + "learning_rate": 2.8957503559291057e-06, + "loss": 0.04001922607421875, + "step": 118210 + }, + { + "epoch": 1.0221701498473856, + "grad_norm": 0.22275738005022147, + "learning_rate": 2.89554674604574e-06, + "loss": 0.0557769775390625, + "step": 118215 + }, + { + "epoch": 1.0222133833689289, + "grad_norm": 4.767443841294456, + "learning_rate": 2.895343136644103e-06, + "loss": 0.017737960815429686, + "step": 118220 + }, + { + "epoch": 1.022256616890472, + "grad_norm": 23.186656183094197, + "learning_rate": 2.8951395277251347e-06, + "loss": 0.08695297241210938, + "step": 118225 + }, + { + "epoch": 1.0222998504120155, + "grad_norm": 5.313085544322233, + "learning_rate": 2.894935919289772e-06, + "loss": 0.04078521728515625, + "step": 118230 + }, + { + "epoch": 1.0223430839335588, + "grad_norm": 5.314895911974183, + "learning_rate": 2.8947323113389568e-06, + "loss": 0.0849599838256836, + "step": 118235 + }, + { + "epoch": 1.022386317455102, + "grad_norm": 0.7337017423858295, + "learning_rate": 2.894528703873627e-06, + "loss": 0.019725799560546875, + "step": 118240 + }, + { + "epoch": 1.0224295509766452, + "grad_norm": 3.2530501019187525, + "learning_rate": 2.8943250968947213e-06, + "loss": 0.08958816528320312, + "step": 118245 + }, + { + "epoch": 1.0224727844981885, + "grad_norm": 10.348772309028725, + "learning_rate": 2.8941214904031794e-06, + "loss": 0.0510223388671875, + "step": 118250 + }, + { + "epoch": 1.0225160180197317, + "grad_norm": 4.0306764212428305, + "learning_rate": 2.893917884399939e-06, + "loss": 0.12448883056640625, + "step": 118255 + }, + { + "epoch": 1.0225592515412751, + "grad_norm": 1.526514109617251, + "learning_rate": 2.89371427888594e-06, + "loss": 0.121136474609375, + "step": 118260 + }, + { + "epoch": 1.0226024850628184, + "grad_norm": 2.338858296637515, + "learning_rate": 2.893510673862121e-06, + "loss": 0.07419052124023437, + "step": 118265 + }, + { + "epoch": 1.0226457185843616, + "grad_norm": 18.578926830735, + "learning_rate": 2.8933070693294226e-06, + "loss": 0.08587188720703125, + "step": 118270 + }, + { + "epoch": 1.0226889521059048, + "grad_norm": 0.5549482602720781, + "learning_rate": 2.8931034652887824e-06, + "loss": 0.05772247314453125, + "step": 118275 + }, + { + "epoch": 1.022732185627448, + "grad_norm": 0.22205134547941596, + "learning_rate": 2.8928998617411395e-06, + "loss": 0.13330230712890626, + "step": 118280 + }, + { + "epoch": 1.0227754191489913, + "grad_norm": 1.295086359628341, + "learning_rate": 2.892696258687432e-06, + "loss": 0.04810905456542969, + "step": 118285 + }, + { + "epoch": 1.0228186526705347, + "grad_norm": 2.2316589579619457, + "learning_rate": 2.8924926561286007e-06, + "loss": 0.14313583374023436, + "step": 118290 + }, + { + "epoch": 1.022861886192078, + "grad_norm": 78.08296155223657, + "learning_rate": 2.892289054065583e-06, + "loss": 0.4064910888671875, + "step": 118295 + }, + { + "epoch": 1.0229051197136212, + "grad_norm": 18.80729483289491, + "learning_rate": 2.8920854524993195e-06, + "loss": 0.09138679504394531, + "step": 118300 + }, + { + "epoch": 1.0229483532351644, + "grad_norm": 17.607750171512077, + "learning_rate": 2.8918818514307486e-06, + "loss": 0.13957061767578124, + "step": 118305 + }, + { + "epoch": 1.0229915867567076, + "grad_norm": 0.9191791573981667, + "learning_rate": 2.8916782508608085e-06, + "loss": 0.2535442352294922, + "step": 118310 + }, + { + "epoch": 1.0230348202782509, + "grad_norm": 2.027575111968772, + "learning_rate": 2.8914746507904388e-06, + "loss": 0.07071914672851562, + "step": 118315 + }, + { + "epoch": 1.023078053799794, + "grad_norm": 4.103685623606341, + "learning_rate": 2.8912710512205783e-06, + "loss": 0.036070632934570315, + "step": 118320 + }, + { + "epoch": 1.0231212873213376, + "grad_norm": 5.503021178093942, + "learning_rate": 2.8910674521521655e-06, + "loss": 0.030493927001953126, + "step": 118325 + }, + { + "epoch": 1.0231645208428808, + "grad_norm": 16.72596307491948, + "learning_rate": 2.890863853586141e-06, + "loss": 0.025823211669921874, + "step": 118330 + }, + { + "epoch": 1.023207754364424, + "grad_norm": 0.7945472337244944, + "learning_rate": 2.890660255523443e-06, + "loss": 0.04716796875, + "step": 118335 + }, + { + "epoch": 1.0232509878859672, + "grad_norm": 5.931228400452664, + "learning_rate": 2.8904566579650097e-06, + "loss": 0.11248359680175782, + "step": 118340 + }, + { + "epoch": 1.0232942214075105, + "grad_norm": 0.8699347881186297, + "learning_rate": 2.8902530609117802e-06, + "loss": 0.1290203094482422, + "step": 118345 + }, + { + "epoch": 1.0233374549290537, + "grad_norm": 4.222167701657491, + "learning_rate": 2.890049464364695e-06, + "loss": 0.030031967163085937, + "step": 118350 + }, + { + "epoch": 1.0233806884505972, + "grad_norm": 0.6960002877811399, + "learning_rate": 2.88984586832469e-06, + "loss": 0.04913616180419922, + "step": 118355 + }, + { + "epoch": 1.0234239219721404, + "grad_norm": 1.7495646388188881, + "learning_rate": 2.8896422727927075e-06, + "loss": 0.12948217391967773, + "step": 118360 + }, + { + "epoch": 1.0234671554936836, + "grad_norm": 10.357432435475037, + "learning_rate": 2.8894386777696855e-06, + "loss": 0.14026565551757814, + "step": 118365 + }, + { + "epoch": 1.0235103890152268, + "grad_norm": 3.1937581795468937, + "learning_rate": 2.889235083256562e-06, + "loss": 0.06802253723144532, + "step": 118370 + }, + { + "epoch": 1.02355362253677, + "grad_norm": 0.41438731747166224, + "learning_rate": 2.8890314892542763e-06, + "loss": 0.04621429443359375, + "step": 118375 + }, + { + "epoch": 1.0235968560583133, + "grad_norm": 0.12184724693492806, + "learning_rate": 2.888827895763768e-06, + "loss": 0.01084747314453125, + "step": 118380 + }, + { + "epoch": 1.0236400895798567, + "grad_norm": 0.18327296659670228, + "learning_rate": 2.888624302785976e-06, + "loss": 0.055635833740234376, + "step": 118385 + }, + { + "epoch": 1.0236833231014, + "grad_norm": 26.831295470134133, + "learning_rate": 2.888420710321837e-06, + "loss": 0.11569328308105468, + "step": 118390 + }, + { + "epoch": 1.0237265566229432, + "grad_norm": 2.9979231483885487, + "learning_rate": 2.8882171183722936e-06, + "loss": 0.02012004852294922, + "step": 118395 + }, + { + "epoch": 1.0237697901444864, + "grad_norm": 0.46154545964346044, + "learning_rate": 2.888013526938283e-06, + "loss": 0.252435302734375, + "step": 118400 + }, + { + "epoch": 1.0238130236660297, + "grad_norm": 0.2882047030103955, + "learning_rate": 2.887809936020743e-06, + "loss": 0.028698253631591796, + "step": 118405 + }, + { + "epoch": 1.023856257187573, + "grad_norm": 0.9169277613045156, + "learning_rate": 2.887606345620615e-06, + "loss": 0.037064743041992185, + "step": 118410 + }, + { + "epoch": 1.0238994907091161, + "grad_norm": 9.163151338413222, + "learning_rate": 2.887402755738836e-06, + "loss": 0.3288606643676758, + "step": 118415 + }, + { + "epoch": 1.0239427242306596, + "grad_norm": 1.1383611451156028, + "learning_rate": 2.8871991663763446e-06, + "loss": 0.049737548828125, + "step": 118420 + }, + { + "epoch": 1.0239859577522028, + "grad_norm": 0.7567039129329886, + "learning_rate": 2.8869955775340814e-06, + "loss": 0.02797088623046875, + "step": 118425 + }, + { + "epoch": 1.024029191273746, + "grad_norm": 0.9364826090502525, + "learning_rate": 2.8867919892129844e-06, + "loss": 0.1171661376953125, + "step": 118430 + }, + { + "epoch": 1.0240724247952893, + "grad_norm": 1.8649170024328854, + "learning_rate": 2.8865884014139935e-06, + "loss": 0.086199951171875, + "step": 118435 + }, + { + "epoch": 1.0241156583168325, + "grad_norm": 18.567352449338784, + "learning_rate": 2.8863848141380467e-06, + "loss": 0.09097976684570312, + "step": 118440 + }, + { + "epoch": 1.0241588918383757, + "grad_norm": 0.3149033600892351, + "learning_rate": 2.886181227386083e-06, + "loss": 0.04916839599609375, + "step": 118445 + }, + { + "epoch": 1.0242021253599192, + "grad_norm": 1.159291987700837, + "learning_rate": 2.8859776411590403e-06, + "loss": 0.02050437927246094, + "step": 118450 + }, + { + "epoch": 1.0242453588814624, + "grad_norm": 0.6510479932210906, + "learning_rate": 2.88577405545786e-06, + "loss": 0.08084945678710938, + "step": 118455 + }, + { + "epoch": 1.0242885924030056, + "grad_norm": 0.7348990901895509, + "learning_rate": 2.885570470283479e-06, + "loss": 0.029439544677734374, + "step": 118460 + }, + { + "epoch": 1.0243318259245489, + "grad_norm": 1.518275780180984, + "learning_rate": 2.885366885636837e-06, + "loss": 0.02183837890625, + "step": 118465 + }, + { + "epoch": 1.024375059446092, + "grad_norm": 2.0215365290520824, + "learning_rate": 2.8851633015188734e-06, + "loss": 0.06649932861328126, + "step": 118470 + }, + { + "epoch": 1.0244182929676353, + "grad_norm": 0.35777835951852854, + "learning_rate": 2.8849597179305265e-06, + "loss": 0.022010231018066408, + "step": 118475 + }, + { + "epoch": 1.0244615264891785, + "grad_norm": 44.845772826124104, + "learning_rate": 2.8847561348727343e-06, + "loss": 0.08117828369140626, + "step": 118480 + }, + { + "epoch": 1.024504760010722, + "grad_norm": 2.8726693591610393, + "learning_rate": 2.8845525523464366e-06, + "loss": 0.020784759521484376, + "step": 118485 + }, + { + "epoch": 1.0245479935322652, + "grad_norm": 24.384508285160443, + "learning_rate": 2.8843489703525724e-06, + "loss": 0.05374164581298828, + "step": 118490 + }, + { + "epoch": 1.0245912270538085, + "grad_norm": 1.1065817497133175, + "learning_rate": 2.884145388892081e-06, + "loss": 0.011380767822265625, + "step": 118495 + }, + { + "epoch": 1.0246344605753517, + "grad_norm": 1.7971509853629246, + "learning_rate": 2.883941807965901e-06, + "loss": 0.074481201171875, + "step": 118500 + }, + { + "epoch": 1.024677694096895, + "grad_norm": 6.485751100763561, + "learning_rate": 2.8837382275749714e-06, + "loss": 0.09288272857666016, + "step": 118505 + }, + { + "epoch": 1.0247209276184381, + "grad_norm": 16.564654673307423, + "learning_rate": 2.8835346477202296e-06, + "loss": 0.04707527160644531, + "step": 118510 + }, + { + "epoch": 1.0247641611399816, + "grad_norm": 0.6404840539309775, + "learning_rate": 2.8833310684026164e-06, + "loss": 0.007149887084960937, + "step": 118515 + }, + { + "epoch": 1.0248073946615248, + "grad_norm": 0.6266969661881904, + "learning_rate": 2.8831274896230694e-06, + "loss": 0.029188919067382812, + "step": 118520 + }, + { + "epoch": 1.024850628183068, + "grad_norm": 0.8180514804797628, + "learning_rate": 2.8829239113825294e-06, + "loss": 0.0678253173828125, + "step": 118525 + }, + { + "epoch": 1.0248938617046113, + "grad_norm": 1.4906024128795425, + "learning_rate": 2.8827203336819334e-06, + "loss": 0.040306472778320314, + "step": 118530 + }, + { + "epoch": 1.0249370952261545, + "grad_norm": 13.51926731852998, + "learning_rate": 2.882516756522221e-06, + "loss": 0.39319610595703125, + "step": 118535 + }, + { + "epoch": 1.0249803287476977, + "grad_norm": 0.10743188785174722, + "learning_rate": 2.8823131799043305e-06, + "loss": 0.19924049377441405, + "step": 118540 + }, + { + "epoch": 1.0250235622692412, + "grad_norm": 0.6278384866175383, + "learning_rate": 2.882109603829201e-06, + "loss": 0.21573276519775392, + "step": 118545 + }, + { + "epoch": 1.0250667957907844, + "grad_norm": 8.644488430534468, + "learning_rate": 2.881906028297771e-06, + "loss": 0.20342655181884767, + "step": 118550 + }, + { + "epoch": 1.0251100293123276, + "grad_norm": 283.4166738092973, + "learning_rate": 2.881702453310982e-06, + "loss": 0.15704345703125, + "step": 118555 + }, + { + "epoch": 1.0251532628338709, + "grad_norm": 0.4308771039432176, + "learning_rate": 2.8814988788697697e-06, + "loss": 0.026395225524902345, + "step": 118560 + }, + { + "epoch": 1.025196496355414, + "grad_norm": 0.8405008261352337, + "learning_rate": 2.881295304975075e-06, + "loss": 0.0864959716796875, + "step": 118565 + }, + { + "epoch": 1.0252397298769573, + "grad_norm": 51.20055365297629, + "learning_rate": 2.8810917316278344e-06, + "loss": 0.19922943115234376, + "step": 118570 + }, + { + "epoch": 1.0252829633985006, + "grad_norm": 0.08742332257950539, + "learning_rate": 2.8808881588289893e-06, + "loss": 0.187725830078125, + "step": 118575 + }, + { + "epoch": 1.025326196920044, + "grad_norm": 11.0457129036427, + "learning_rate": 2.880684586579476e-06, + "loss": 0.05589561462402344, + "step": 118580 + }, + { + "epoch": 1.0253694304415872, + "grad_norm": 1.4321256969521658, + "learning_rate": 2.880481014880236e-06, + "loss": 0.1340972900390625, + "step": 118585 + }, + { + "epoch": 1.0254126639631305, + "grad_norm": 3.730501489841813, + "learning_rate": 2.8802774437322073e-06, + "loss": 0.2018524169921875, + "step": 118590 + }, + { + "epoch": 1.0254558974846737, + "grad_norm": 0.8201740080688038, + "learning_rate": 2.8800738731363284e-06, + "loss": 0.15805740356445314, + "step": 118595 + }, + { + "epoch": 1.025499131006217, + "grad_norm": 0.4261661094473598, + "learning_rate": 2.8798703030935375e-06, + "loss": 0.064532470703125, + "step": 118600 + }, + { + "epoch": 1.0255423645277602, + "grad_norm": 10.341774001975121, + "learning_rate": 2.879666733604775e-06, + "loss": 0.0926666259765625, + "step": 118605 + }, + { + "epoch": 1.0255855980493036, + "grad_norm": 0.2484742849327139, + "learning_rate": 2.879463164670977e-06, + "loss": 0.2316295623779297, + "step": 118610 + }, + { + "epoch": 1.0256288315708468, + "grad_norm": 4.234486625394792, + "learning_rate": 2.8792595962930858e-06, + "loss": 0.017763900756835937, + "step": 118615 + }, + { + "epoch": 1.02567206509239, + "grad_norm": 3.349013043519319, + "learning_rate": 2.879056028472039e-06, + "loss": 0.10980377197265626, + "step": 118620 + }, + { + "epoch": 1.0257152986139333, + "grad_norm": 12.169611941255459, + "learning_rate": 2.878852461208774e-06, + "loss": 0.0618072509765625, + "step": 118625 + }, + { + "epoch": 1.0257585321354765, + "grad_norm": 2.396267381885031, + "learning_rate": 2.878648894504232e-06, + "loss": 0.02004547119140625, + "step": 118630 + }, + { + "epoch": 1.0258017656570197, + "grad_norm": 30.39492362464878, + "learning_rate": 2.8784453283593503e-06, + "loss": 0.10733642578125, + "step": 118635 + }, + { + "epoch": 1.0258449991785632, + "grad_norm": 25.242296954886122, + "learning_rate": 2.8782417627750664e-06, + "loss": 0.159930419921875, + "step": 118640 + }, + { + "epoch": 1.0258882327001064, + "grad_norm": 3.5939682246912343, + "learning_rate": 2.878038197752322e-06, + "loss": 0.03683929443359375, + "step": 118645 + }, + { + "epoch": 1.0259314662216497, + "grad_norm": 1.8366940230042972, + "learning_rate": 2.8778346332920552e-06, + "loss": 0.08180561065673828, + "step": 118650 + }, + { + "epoch": 1.0259746997431929, + "grad_norm": 4.989220503178507, + "learning_rate": 2.8776310693952033e-06, + "loss": 0.23936767578125, + "step": 118655 + }, + { + "epoch": 1.0260179332647361, + "grad_norm": 0.10459987927097841, + "learning_rate": 2.877427506062707e-06, + "loss": 0.010961151123046875, + "step": 118660 + }, + { + "epoch": 1.0260611667862793, + "grad_norm": 4.56053686332246, + "learning_rate": 2.877223943295504e-06, + "loss": 0.12559967041015624, + "step": 118665 + }, + { + "epoch": 1.0261044003078226, + "grad_norm": 0.31543698796384373, + "learning_rate": 2.877020381094533e-06, + "loss": 0.04300537109375, + "step": 118670 + }, + { + "epoch": 1.026147633829366, + "grad_norm": 10.503562291533665, + "learning_rate": 2.876816819460732e-06, + "loss": 0.081964111328125, + "step": 118675 + }, + { + "epoch": 1.0261908673509093, + "grad_norm": 0.9037444124786468, + "learning_rate": 2.8766132583950423e-06, + "loss": 0.014642333984375, + "step": 118680 + }, + { + "epoch": 1.0262341008724525, + "grad_norm": 0.3551076169546684, + "learning_rate": 2.876409697898401e-06, + "loss": 0.05400772094726562, + "step": 118685 + }, + { + "epoch": 1.0262773343939957, + "grad_norm": 2.306434534824252, + "learning_rate": 2.8762061379717478e-06, + "loss": 0.04825897216796875, + "step": 118690 + }, + { + "epoch": 1.026320567915539, + "grad_norm": 2.150183058140882, + "learning_rate": 2.8760025786160207e-06, + "loss": 0.26274261474609373, + "step": 118695 + }, + { + "epoch": 1.0263638014370822, + "grad_norm": 0.1951903078801619, + "learning_rate": 2.875799019832159e-06, + "loss": 0.04435806274414063, + "step": 118700 + }, + { + "epoch": 1.0264070349586256, + "grad_norm": 3.4721863796951333, + "learning_rate": 2.8755954616211e-06, + "loss": 0.35904617309570314, + "step": 118705 + }, + { + "epoch": 1.0264502684801688, + "grad_norm": 1.0396023765201723, + "learning_rate": 2.875391903983785e-06, + "loss": 0.009356689453125, + "step": 118710 + }, + { + "epoch": 1.026493502001712, + "grad_norm": 2.248816163453983, + "learning_rate": 2.8751883469211508e-06, + "loss": 0.02265625, + "step": 118715 + }, + { + "epoch": 1.0265367355232553, + "grad_norm": 10.796700075673334, + "learning_rate": 2.8749847904341375e-06, + "loss": 0.13453292846679688, + "step": 118720 + }, + { + "epoch": 1.0265799690447985, + "grad_norm": 1.3738949327476495, + "learning_rate": 2.8747812345236835e-06, + "loss": 0.0236083984375, + "step": 118725 + }, + { + "epoch": 1.0266232025663418, + "grad_norm": 1.3083374598871123, + "learning_rate": 2.874577679190727e-06, + "loss": 0.04244537353515625, + "step": 118730 + }, + { + "epoch": 1.026666436087885, + "grad_norm": 2.3216200758656873, + "learning_rate": 2.8743741244362064e-06, + "loss": 0.04149360656738281, + "step": 118735 + }, + { + "epoch": 1.0267096696094284, + "grad_norm": 16.737034859027496, + "learning_rate": 2.8741705702610616e-06, + "loss": 0.24290390014648439, + "step": 118740 + }, + { + "epoch": 1.0267529031309717, + "grad_norm": 19.7770957696238, + "learning_rate": 2.873967016666231e-06, + "loss": 0.0478302001953125, + "step": 118745 + }, + { + "epoch": 1.026796136652515, + "grad_norm": 7.408693338752638, + "learning_rate": 2.873763463652654e-06, + "loss": 0.07339630126953126, + "step": 118750 + }, + { + "epoch": 1.0268393701740581, + "grad_norm": 0.21752066759440636, + "learning_rate": 2.873559911221269e-06, + "loss": 0.0130340576171875, + "step": 118755 + }, + { + "epoch": 1.0268826036956014, + "grad_norm": 2.1442784216436426, + "learning_rate": 2.8733563593730143e-06, + "loss": 0.2686370849609375, + "step": 118760 + }, + { + "epoch": 1.0269258372171446, + "grad_norm": 0.8760511701572828, + "learning_rate": 2.8731528081088283e-06, + "loss": 0.015016937255859375, + "step": 118765 + }, + { + "epoch": 1.026969070738688, + "grad_norm": 3.5569004828030137, + "learning_rate": 2.87294925742965e-06, + "loss": 0.024869155883789063, + "step": 118770 + }, + { + "epoch": 1.0270123042602313, + "grad_norm": 1.2938679057292581, + "learning_rate": 2.872745707336419e-06, + "loss": 0.056987762451171875, + "step": 118775 + }, + { + "epoch": 1.0270555377817745, + "grad_norm": 51.1786273446478, + "learning_rate": 2.8725421578300736e-06, + "loss": 0.1475006103515625, + "step": 118780 + }, + { + "epoch": 1.0270987713033177, + "grad_norm": 1.5515669980917728, + "learning_rate": 2.872338608911553e-06, + "loss": 0.0904876708984375, + "step": 118785 + }, + { + "epoch": 1.027142004824861, + "grad_norm": 0.6898842086183081, + "learning_rate": 2.8721350605817953e-06, + "loss": 0.031858062744140624, + "step": 118790 + }, + { + "epoch": 1.0271852383464042, + "grad_norm": 0.1033451997116634, + "learning_rate": 2.871931512841739e-06, + "loss": 0.06553573608398437, + "step": 118795 + }, + { + "epoch": 1.0272284718679476, + "grad_norm": 1.5384942547692306, + "learning_rate": 2.871727965692323e-06, + "loss": 0.04366302490234375, + "step": 118800 + }, + { + "epoch": 1.0272717053894909, + "grad_norm": 0.5222366593702747, + "learning_rate": 2.871524419134486e-06, + "loss": 0.18972206115722656, + "step": 118805 + }, + { + "epoch": 1.027314938911034, + "grad_norm": 0.15302405570070415, + "learning_rate": 2.871320873169168e-06, + "loss": 0.06417388916015625, + "step": 118810 + }, + { + "epoch": 1.0273581724325773, + "grad_norm": 30.092435821719107, + "learning_rate": 2.871117327797307e-06, + "loss": 0.04268951416015625, + "step": 118815 + }, + { + "epoch": 1.0274014059541206, + "grad_norm": 0.882285995960861, + "learning_rate": 2.870913783019841e-06, + "loss": 0.011556243896484375, + "step": 118820 + }, + { + "epoch": 1.0274446394756638, + "grad_norm": 1.1598587546643462, + "learning_rate": 2.870710238837709e-06, + "loss": 0.05168609619140625, + "step": 118825 + }, + { + "epoch": 1.027487872997207, + "grad_norm": 2.0089275518708187, + "learning_rate": 2.87050669525185e-06, + "loss": 0.07132339477539062, + "step": 118830 + }, + { + "epoch": 1.0275311065187505, + "grad_norm": 3.0848637871083535, + "learning_rate": 2.8703031522632022e-06, + "loss": 0.09642333984375, + "step": 118835 + }, + { + "epoch": 1.0275743400402937, + "grad_norm": 1.9634401305074147, + "learning_rate": 2.8700996098727057e-06, + "loss": 0.01524810791015625, + "step": 118840 + }, + { + "epoch": 1.027617573561837, + "grad_norm": 1.8353653380908315, + "learning_rate": 2.869896068081298e-06, + "loss": 0.02647705078125, + "step": 118845 + }, + { + "epoch": 1.0276608070833801, + "grad_norm": 7.020728048042744, + "learning_rate": 2.869692526889918e-06, + "loss": 0.022329044342041016, + "step": 118850 + }, + { + "epoch": 1.0277040406049234, + "grad_norm": 0.43852356828345584, + "learning_rate": 2.869488986299505e-06, + "loss": 0.021438217163085936, + "step": 118855 + }, + { + "epoch": 1.0277472741264666, + "grad_norm": 21.697044912199075, + "learning_rate": 2.869285446310997e-06, + "loss": 0.04671821594238281, + "step": 118860 + }, + { + "epoch": 1.02779050764801, + "grad_norm": 1.6291573812480806, + "learning_rate": 2.8690819069253322e-06, + "loss": 0.057933807373046875, + "step": 118865 + }, + { + "epoch": 1.0278337411695533, + "grad_norm": 1.0829447420554077, + "learning_rate": 2.868878368143451e-06, + "loss": 0.024326324462890625, + "step": 118870 + }, + { + "epoch": 1.0278769746910965, + "grad_norm": 20.360598441264784, + "learning_rate": 2.868674829966291e-06, + "loss": 0.191973876953125, + "step": 118875 + }, + { + "epoch": 1.0279202082126397, + "grad_norm": 0.1949237812208489, + "learning_rate": 2.8684712923947912e-06, + "loss": 0.17477951049804688, + "step": 118880 + }, + { + "epoch": 1.027963441734183, + "grad_norm": 5.332233675942719, + "learning_rate": 2.86826775542989e-06, + "loss": 0.04615936279296875, + "step": 118885 + }, + { + "epoch": 1.0280066752557262, + "grad_norm": 1.5599240452240104, + "learning_rate": 2.8680642190725272e-06, + "loss": 0.0456268310546875, + "step": 118890 + }, + { + "epoch": 1.0280499087772696, + "grad_norm": 12.379983347994829, + "learning_rate": 2.8678606833236385e-06, + "loss": 0.07251472473144531, + "step": 118895 + }, + { + "epoch": 1.0280931422988129, + "grad_norm": 0.21287508506687172, + "learning_rate": 2.867657148184166e-06, + "loss": 0.12752532958984375, + "step": 118900 + }, + { + "epoch": 1.028136375820356, + "grad_norm": 0.38269678022377146, + "learning_rate": 2.8674536136550473e-06, + "loss": 0.09058990478515624, + "step": 118905 + }, + { + "epoch": 1.0281796093418993, + "grad_norm": 35.513493459146105, + "learning_rate": 2.8672500797372197e-06, + "loss": 0.296014404296875, + "step": 118910 + }, + { + "epoch": 1.0282228428634426, + "grad_norm": 1.0120043031923822, + "learning_rate": 2.8670465464316246e-06, + "loss": 0.041240692138671875, + "step": 118915 + }, + { + "epoch": 1.0282660763849858, + "grad_norm": 4.693305201725161, + "learning_rate": 2.8668430137391983e-06, + "loss": 0.02293243408203125, + "step": 118920 + }, + { + "epoch": 1.028309309906529, + "grad_norm": 19.902412622976886, + "learning_rate": 2.8666394816608803e-06, + "loss": 0.1098541259765625, + "step": 118925 + }, + { + "epoch": 1.0283525434280725, + "grad_norm": 19.56057743722512, + "learning_rate": 2.866435950197608e-06, + "loss": 0.2589263916015625, + "step": 118930 + }, + { + "epoch": 1.0283957769496157, + "grad_norm": 12.890772925002752, + "learning_rate": 2.866232419350323e-06, + "loss": 0.09311027526855468, + "step": 118935 + }, + { + "epoch": 1.028439010471159, + "grad_norm": 1.2856066046087267, + "learning_rate": 2.866028889119961e-06, + "loss": 0.027777099609375, + "step": 118940 + }, + { + "epoch": 1.0284822439927022, + "grad_norm": 18.86061193355405, + "learning_rate": 2.865825359507463e-06, + "loss": 0.043218231201171874, + "step": 118945 + }, + { + "epoch": 1.0285254775142454, + "grad_norm": 0.20617089925359192, + "learning_rate": 2.865621830513766e-06, + "loss": 0.09528923034667969, + "step": 118950 + }, + { + "epoch": 1.0285687110357886, + "grad_norm": 6.270641940007336, + "learning_rate": 2.86541830213981e-06, + "loss": 0.05498695373535156, + "step": 118955 + }, + { + "epoch": 1.028611944557332, + "grad_norm": 0.6561324110328595, + "learning_rate": 2.865214774386531e-06, + "loss": 0.018817138671875, + "step": 118960 + }, + { + "epoch": 1.0286551780788753, + "grad_norm": 12.55717104633765, + "learning_rate": 2.8650112472548713e-06, + "loss": 0.01985769271850586, + "step": 118965 + }, + { + "epoch": 1.0286984116004185, + "grad_norm": 1.0026969161879544, + "learning_rate": 2.864807720745767e-06, + "loss": 0.03362541198730469, + "step": 118970 + }, + { + "epoch": 1.0287416451219618, + "grad_norm": 6.276338652616599, + "learning_rate": 2.8646041948601584e-06, + "loss": 0.23741092681884765, + "step": 118975 + }, + { + "epoch": 1.028784878643505, + "grad_norm": 0.15286431844088216, + "learning_rate": 2.864400669598983e-06, + "loss": 0.07157573699951172, + "step": 118980 + }, + { + "epoch": 1.0288281121650482, + "grad_norm": 7.15538724684152, + "learning_rate": 2.8641971449631796e-06, + "loss": 0.4642311096191406, + "step": 118985 + }, + { + "epoch": 1.0288713456865914, + "grad_norm": 8.783398349507216, + "learning_rate": 2.863993620953686e-06, + "loss": 0.0890655517578125, + "step": 118990 + }, + { + "epoch": 1.028914579208135, + "grad_norm": 0.21795743959155203, + "learning_rate": 2.863790097571443e-06, + "loss": 0.04901466369628906, + "step": 118995 + }, + { + "epoch": 1.0289578127296781, + "grad_norm": 14.404285272486709, + "learning_rate": 2.863586574817387e-06, + "loss": 0.0515625, + "step": 119000 + }, + { + "epoch": 1.0290010462512214, + "grad_norm": 0.8107231060390545, + "learning_rate": 2.8633830526924587e-06, + "loss": 0.02555694580078125, + "step": 119005 + }, + { + "epoch": 1.0290442797727646, + "grad_norm": 1.2366304047743353, + "learning_rate": 2.8631795311975958e-06, + "loss": 0.085565185546875, + "step": 119010 + }, + { + "epoch": 1.0290875132943078, + "grad_norm": 0.2644608145647145, + "learning_rate": 2.862976010333737e-06, + "loss": 0.17909011840820313, + "step": 119015 + }, + { + "epoch": 1.029130746815851, + "grad_norm": 1.751071430303892, + "learning_rate": 2.8627724901018196e-06, + "loss": 0.01273040771484375, + "step": 119020 + }, + { + "epoch": 1.0291739803373945, + "grad_norm": 6.788508895293408, + "learning_rate": 2.862568970502783e-06, + "loss": 0.02238006591796875, + "step": 119025 + }, + { + "epoch": 1.0292172138589377, + "grad_norm": 11.681894282023489, + "learning_rate": 2.862365451537567e-06, + "loss": 0.16726608276367189, + "step": 119030 + }, + { + "epoch": 1.029260447380481, + "grad_norm": 5.922128052660597, + "learning_rate": 2.86216193320711e-06, + "loss": 0.07249221801757813, + "step": 119035 + }, + { + "epoch": 1.0293036809020242, + "grad_norm": 5.931722285746054, + "learning_rate": 2.8619584155123494e-06, + "loss": 0.036930084228515625, + "step": 119040 + }, + { + "epoch": 1.0293469144235674, + "grad_norm": 8.18224472950247, + "learning_rate": 2.8617548984542247e-06, + "loss": 0.07419033050537109, + "step": 119045 + }, + { + "epoch": 1.0293901479451106, + "grad_norm": 7.691100556983292, + "learning_rate": 2.8615513820336737e-06, + "loss": 0.027386474609375, + "step": 119050 + }, + { + "epoch": 1.029433381466654, + "grad_norm": 1.0762435222488607, + "learning_rate": 2.8613478662516342e-06, + "loss": 0.0718597412109375, + "step": 119055 + }, + { + "epoch": 1.0294766149881973, + "grad_norm": 3.7894314727901364, + "learning_rate": 2.861144351109048e-06, + "loss": 0.1178060531616211, + "step": 119060 + }, + { + "epoch": 1.0295198485097405, + "grad_norm": 3.744052995653834, + "learning_rate": 2.860940836606852e-06, + "loss": 0.2808204650878906, + "step": 119065 + }, + { + "epoch": 1.0295630820312838, + "grad_norm": 0.31006205377980073, + "learning_rate": 2.8607373227459837e-06, + "loss": 0.024840545654296876, + "step": 119070 + }, + { + "epoch": 1.029606315552827, + "grad_norm": 0.7764349938760562, + "learning_rate": 2.8605338095273827e-06, + "loss": 0.0944366455078125, + "step": 119075 + }, + { + "epoch": 1.0296495490743702, + "grad_norm": 6.861111421018443, + "learning_rate": 2.8603302969519877e-06, + "loss": 0.04891624450683594, + "step": 119080 + }, + { + "epoch": 1.0296927825959135, + "grad_norm": 1.758048628344776, + "learning_rate": 2.860126785020737e-06, + "loss": 0.07290477752685547, + "step": 119085 + }, + { + "epoch": 1.029736016117457, + "grad_norm": 5.8155503347261, + "learning_rate": 2.859923273734568e-06, + "loss": 0.23464508056640626, + "step": 119090 + }, + { + "epoch": 1.0297792496390001, + "grad_norm": 8.650096401588097, + "learning_rate": 2.859719763094422e-06, + "loss": 0.10625, + "step": 119095 + }, + { + "epoch": 1.0298224831605434, + "grad_norm": 1.3804633927934373, + "learning_rate": 2.8595162531012356e-06, + "loss": 0.14155426025390624, + "step": 119100 + }, + { + "epoch": 1.0298657166820866, + "grad_norm": 0.6817101558614064, + "learning_rate": 2.8593127437559477e-06, + "loss": 0.04538421630859375, + "step": 119105 + }, + { + "epoch": 1.0299089502036298, + "grad_norm": 0.5209962049067447, + "learning_rate": 2.8591092350594973e-06, + "loss": 0.06905364990234375, + "step": 119110 + }, + { + "epoch": 1.029952183725173, + "grad_norm": 0.848842430558386, + "learning_rate": 2.8589057270128228e-06, + "loss": 0.05180816650390625, + "step": 119115 + }, + { + "epoch": 1.0299954172467165, + "grad_norm": 3.8109140032696485, + "learning_rate": 2.8587022196168614e-06, + "loss": 0.01893768310546875, + "step": 119120 + }, + { + "epoch": 1.0300386507682597, + "grad_norm": 1.9713545458793265, + "learning_rate": 2.858498712872554e-06, + "loss": 0.1171478271484375, + "step": 119125 + }, + { + "epoch": 1.030081884289803, + "grad_norm": 0.162927976479731, + "learning_rate": 2.858295206780838e-06, + "loss": 0.07679290771484375, + "step": 119130 + }, + { + "epoch": 1.0301251178113462, + "grad_norm": 0.976274709149819, + "learning_rate": 2.8580917013426516e-06, + "loss": 0.0988494873046875, + "step": 119135 + }, + { + "epoch": 1.0301683513328894, + "grad_norm": 8.081667322044728, + "learning_rate": 2.8578881965589343e-06, + "loss": 0.06128387451171875, + "step": 119140 + }, + { + "epoch": 1.0302115848544326, + "grad_norm": 1.600454357483341, + "learning_rate": 2.857684692430624e-06, + "loss": 0.02454833984375, + "step": 119145 + }, + { + "epoch": 1.030254818375976, + "grad_norm": 1.074067686782253, + "learning_rate": 2.857481188958658e-06, + "loss": 0.17228641510009765, + "step": 119150 + }, + { + "epoch": 1.0302980518975193, + "grad_norm": 0.30212969561703806, + "learning_rate": 2.8572776861439773e-06, + "loss": 0.014742660522460937, + "step": 119155 + }, + { + "epoch": 1.0303412854190626, + "grad_norm": 3.425475553886548, + "learning_rate": 2.8570741839875195e-06, + "loss": 0.08054580688476562, + "step": 119160 + }, + { + "epoch": 1.0303845189406058, + "grad_norm": 0.778218723561663, + "learning_rate": 2.8568706824902223e-06, + "loss": 0.06915626525878907, + "step": 119165 + }, + { + "epoch": 1.030427752462149, + "grad_norm": 0.37194016109075173, + "learning_rate": 2.8566671816530255e-06, + "loss": 0.08839168548583984, + "step": 119170 + }, + { + "epoch": 1.0304709859836922, + "grad_norm": 14.786577596897798, + "learning_rate": 2.856463681476867e-06, + "loss": 0.1020538330078125, + "step": 119175 + }, + { + "epoch": 1.0305142195052355, + "grad_norm": 0.34371357616131787, + "learning_rate": 2.856260181962685e-06, + "loss": 0.030590057373046875, + "step": 119180 + }, + { + "epoch": 1.030557453026779, + "grad_norm": 0.2055545309830728, + "learning_rate": 2.8560566831114176e-06, + "loss": 0.03123779296875, + "step": 119185 + }, + { + "epoch": 1.0306006865483222, + "grad_norm": 1.8567419802908252, + "learning_rate": 2.855853184924005e-06, + "loss": 0.303924560546875, + "step": 119190 + }, + { + "epoch": 1.0306439200698654, + "grad_norm": 4.027179694273992, + "learning_rate": 2.8556496874013845e-06, + "loss": 0.017855453491210937, + "step": 119195 + }, + { + "epoch": 1.0306871535914086, + "grad_norm": 26.99796648380989, + "learning_rate": 2.855446190544495e-06, + "loss": 0.15106430053710937, + "step": 119200 + }, + { + "epoch": 1.0307303871129518, + "grad_norm": 0.5177151594375787, + "learning_rate": 2.855242694354275e-06, + "loss": 0.08251266479492188, + "step": 119205 + }, + { + "epoch": 1.030773620634495, + "grad_norm": 0.22198758238350566, + "learning_rate": 2.8550391988316634e-06, + "loss": 0.037616729736328125, + "step": 119210 + }, + { + "epoch": 1.0308168541560385, + "grad_norm": 4.276802417984546, + "learning_rate": 2.8548357039775965e-06, + "loss": 0.13341064453125, + "step": 119215 + }, + { + "epoch": 1.0308600876775817, + "grad_norm": 17.41113265001022, + "learning_rate": 2.854632209793016e-06, + "loss": 0.05182342529296875, + "step": 119220 + }, + { + "epoch": 1.030903321199125, + "grad_norm": 0.8092755039908869, + "learning_rate": 2.8544287162788578e-06, + "loss": 0.016747283935546874, + "step": 119225 + }, + { + "epoch": 1.0309465547206682, + "grad_norm": 3.51335086614111, + "learning_rate": 2.8542252234360626e-06, + "loss": 0.06305122375488281, + "step": 119230 + }, + { + "epoch": 1.0309897882422114, + "grad_norm": 17.48137670779924, + "learning_rate": 2.8540217312655676e-06, + "loss": 0.17762908935546876, + "step": 119235 + }, + { + "epoch": 1.0310330217637547, + "grad_norm": 33.04310664792844, + "learning_rate": 2.8538182397683117e-06, + "loss": 0.1668771743774414, + "step": 119240 + }, + { + "epoch": 1.0310762552852981, + "grad_norm": 1.3343412061194164, + "learning_rate": 2.8536147489452314e-06, + "loss": 0.06096038818359375, + "step": 119245 + }, + { + "epoch": 1.0311194888068413, + "grad_norm": 25.458201032121735, + "learning_rate": 2.853411258797268e-06, + "loss": 0.106890869140625, + "step": 119250 + }, + { + "epoch": 1.0311627223283846, + "grad_norm": 15.805199014301188, + "learning_rate": 2.8532077693253595e-06, + "loss": 0.37169570922851564, + "step": 119255 + }, + { + "epoch": 1.0312059558499278, + "grad_norm": 0.6112457632024576, + "learning_rate": 2.8530042805304437e-06, + "loss": 0.04325790405273437, + "step": 119260 + }, + { + "epoch": 1.031249189371471, + "grad_norm": 22.402752122205193, + "learning_rate": 2.852800792413459e-06, + "loss": 0.17934541702270507, + "step": 119265 + }, + { + "epoch": 1.0312924228930143, + "grad_norm": 8.980361367275083, + "learning_rate": 2.8525973049753444e-06, + "loss": 0.18927001953125, + "step": 119270 + }, + { + "epoch": 1.0313356564145575, + "grad_norm": 6.018967731451287, + "learning_rate": 2.852393818217037e-06, + "loss": 0.020330047607421874, + "step": 119275 + }, + { + "epoch": 1.031378889936101, + "grad_norm": 0.14685685359750933, + "learning_rate": 2.8521903321394763e-06, + "loss": 0.05870819091796875, + "step": 119280 + }, + { + "epoch": 1.0314221234576442, + "grad_norm": 13.810445630471449, + "learning_rate": 2.8519868467436012e-06, + "loss": 0.08213882446289063, + "step": 119285 + }, + { + "epoch": 1.0314653569791874, + "grad_norm": 0.4518902610515359, + "learning_rate": 2.85178336203035e-06, + "loss": 0.16375656127929689, + "step": 119290 + }, + { + "epoch": 1.0315085905007306, + "grad_norm": 1.994168032107817, + "learning_rate": 2.851579878000661e-06, + "loss": 0.0485651969909668, + "step": 119295 + }, + { + "epoch": 1.0315518240222739, + "grad_norm": 27.379142629825026, + "learning_rate": 2.851376394655472e-06, + "loss": 0.09967918395996093, + "step": 119300 + }, + { + "epoch": 1.031595057543817, + "grad_norm": 5.38103676973704, + "learning_rate": 2.851172911995722e-06, + "loss": 0.120172119140625, + "step": 119305 + }, + { + "epoch": 1.0316382910653605, + "grad_norm": 11.646792628294438, + "learning_rate": 2.8509694300223487e-06, + "loss": 0.05761871337890625, + "step": 119310 + }, + { + "epoch": 1.0316815245869038, + "grad_norm": 0.2611548248651016, + "learning_rate": 2.850765948736292e-06, + "loss": 0.046749114990234375, + "step": 119315 + }, + { + "epoch": 1.031724758108447, + "grad_norm": 30.87405303979513, + "learning_rate": 2.85056246813849e-06, + "loss": 0.04118156433105469, + "step": 119320 + }, + { + "epoch": 1.0317679916299902, + "grad_norm": 16.996880105525637, + "learning_rate": 2.8503589882298802e-06, + "loss": 0.17641687393188477, + "step": 119325 + }, + { + "epoch": 1.0318112251515335, + "grad_norm": 4.987022925577934, + "learning_rate": 2.850155509011401e-06, + "loss": 0.11578121185302734, + "step": 119330 + }, + { + "epoch": 1.0318544586730767, + "grad_norm": 0.5167151498831505, + "learning_rate": 2.849952030483992e-06, + "loss": 0.1004460334777832, + "step": 119335 + }, + { + "epoch": 1.0318976921946201, + "grad_norm": 5.491297449938482, + "learning_rate": 2.84974855264859e-06, + "loss": 0.08605575561523438, + "step": 119340 + }, + { + "epoch": 1.0319409257161634, + "grad_norm": 7.295534698853531, + "learning_rate": 2.8495450755061358e-06, + "loss": 0.0805419921875, + "step": 119345 + }, + { + "epoch": 1.0319841592377066, + "grad_norm": 6.412643125697239, + "learning_rate": 2.849341599057566e-06, + "loss": 0.17242698669433593, + "step": 119350 + }, + { + "epoch": 1.0320273927592498, + "grad_norm": 39.477690887652216, + "learning_rate": 2.8491381233038196e-06, + "loss": 0.14710960388183594, + "step": 119355 + }, + { + "epoch": 1.032070626280793, + "grad_norm": 1.4263575054811184, + "learning_rate": 2.848934648245834e-06, + "loss": 0.06143951416015625, + "step": 119360 + }, + { + "epoch": 1.0321138598023363, + "grad_norm": 1.904086479429563, + "learning_rate": 2.848731173884549e-06, + "loss": 0.11890830993652343, + "step": 119365 + }, + { + "epoch": 1.0321570933238795, + "grad_norm": 3.5911632818890626, + "learning_rate": 2.848527700220903e-06, + "loss": 0.04347305297851563, + "step": 119370 + }, + { + "epoch": 1.032200326845423, + "grad_norm": 12.525251355166898, + "learning_rate": 2.848324227255832e-06, + "loss": 0.037908935546875, + "step": 119375 + }, + { + "epoch": 1.0322435603669662, + "grad_norm": 2.660374614308046, + "learning_rate": 2.848120754990278e-06, + "loss": 0.02239990234375, + "step": 119380 + }, + { + "epoch": 1.0322867938885094, + "grad_norm": 21.336927404358637, + "learning_rate": 2.8479172834251775e-06, + "loss": 0.0843536376953125, + "step": 119385 + }, + { + "epoch": 1.0323300274100526, + "grad_norm": 18.03143138901201, + "learning_rate": 2.8477138125614685e-06, + "loss": 0.07656097412109375, + "step": 119390 + }, + { + "epoch": 1.0323732609315959, + "grad_norm": 1.1483535237321714, + "learning_rate": 2.8475103424000904e-06, + "loss": 0.01583099365234375, + "step": 119395 + }, + { + "epoch": 1.032416494453139, + "grad_norm": 0.5166944563358518, + "learning_rate": 2.8473068729419813e-06, + "loss": 0.17331161499023437, + "step": 119400 + }, + { + "epoch": 1.0324597279746826, + "grad_norm": 0.1175392618399237, + "learning_rate": 2.847103404188078e-06, + "loss": 0.05411605834960938, + "step": 119405 + }, + { + "epoch": 1.0325029614962258, + "grad_norm": 4.957907855581335, + "learning_rate": 2.8468999361393217e-06, + "loss": 0.05000152587890625, + "step": 119410 + }, + { + "epoch": 1.032546195017769, + "grad_norm": 34.28682628614384, + "learning_rate": 2.8466964687966494e-06, + "loss": 0.22723007202148438, + "step": 119415 + }, + { + "epoch": 1.0325894285393122, + "grad_norm": 0.6110770588354804, + "learning_rate": 2.8464930021609985e-06, + "loss": 0.051563262939453125, + "step": 119420 + }, + { + "epoch": 1.0326326620608555, + "grad_norm": 14.094632133355638, + "learning_rate": 2.846289536233309e-06, + "loss": 0.02727088928222656, + "step": 119425 + }, + { + "epoch": 1.0326758955823987, + "grad_norm": 66.00080712739909, + "learning_rate": 2.8460860710145187e-06, + "loss": 0.16310653686523438, + "step": 119430 + }, + { + "epoch": 1.032719129103942, + "grad_norm": 7.48969099035314, + "learning_rate": 2.845882606505566e-06, + "loss": 0.077685546875, + "step": 119435 + }, + { + "epoch": 1.0327623626254854, + "grad_norm": 0.3184500491369431, + "learning_rate": 2.8456791427073873e-06, + "loss": 0.1517925262451172, + "step": 119440 + }, + { + "epoch": 1.0328055961470286, + "grad_norm": 1.4806333604464115, + "learning_rate": 2.845475679620925e-06, + "loss": 0.0323516845703125, + "step": 119445 + }, + { + "epoch": 1.0328488296685718, + "grad_norm": 7.396185241667817, + "learning_rate": 2.8452722172471134e-06, + "loss": 0.020670700073242187, + "step": 119450 + }, + { + "epoch": 1.032892063190115, + "grad_norm": 7.447080815697372, + "learning_rate": 2.8450687555868942e-06, + "loss": 0.03499736785888672, + "step": 119455 + }, + { + "epoch": 1.0329352967116583, + "grad_norm": 1.317164010994759, + "learning_rate": 2.8448652946412037e-06, + "loss": 0.008171653747558594, + "step": 119460 + }, + { + "epoch": 1.0329785302332015, + "grad_norm": 1.6402077007739777, + "learning_rate": 2.844661834410981e-06, + "loss": 0.014856338500976562, + "step": 119465 + }, + { + "epoch": 1.033021763754745, + "grad_norm": 14.710347994606398, + "learning_rate": 2.844458374897163e-06, + "loss": 0.0647369384765625, + "step": 119470 + }, + { + "epoch": 1.0330649972762882, + "grad_norm": 9.259958505051438, + "learning_rate": 2.8442549161006893e-06, + "loss": 0.03760223388671875, + "step": 119475 + }, + { + "epoch": 1.0331082307978314, + "grad_norm": 0.4240482971303485, + "learning_rate": 2.8440514580224996e-06, + "loss": 0.11399612426757813, + "step": 119480 + }, + { + "epoch": 1.0331514643193747, + "grad_norm": 17.6042756970362, + "learning_rate": 2.8438480006635303e-06, + "loss": 0.19606266021728516, + "step": 119485 + }, + { + "epoch": 1.0331946978409179, + "grad_norm": 2.1648118916554693, + "learning_rate": 2.8436445440247202e-06, + "loss": 0.0190277099609375, + "step": 119490 + }, + { + "epoch": 1.0332379313624611, + "grad_norm": 0.3143373981287058, + "learning_rate": 2.8434410881070075e-06, + "loss": 0.09723129272460937, + "step": 119495 + }, + { + "epoch": 1.0332811648840046, + "grad_norm": 0.5798662070308163, + "learning_rate": 2.8432376329113302e-06, + "loss": 0.32178955078125, + "step": 119500 + }, + { + "epoch": 1.0333243984055478, + "grad_norm": 1.835154360441221, + "learning_rate": 2.843034178438627e-06, + "loss": 0.2151094436645508, + "step": 119505 + }, + { + "epoch": 1.033367631927091, + "grad_norm": 6.9089999450213195, + "learning_rate": 2.8428307246898374e-06, + "loss": 0.0295013427734375, + "step": 119510 + }, + { + "epoch": 1.0334108654486343, + "grad_norm": 0.1659368851873912, + "learning_rate": 2.8426272716658984e-06, + "loss": 0.02087249755859375, + "step": 119515 + }, + { + "epoch": 1.0334540989701775, + "grad_norm": 21.75641571221585, + "learning_rate": 2.8424238193677485e-06, + "loss": 0.2013641357421875, + "step": 119520 + }, + { + "epoch": 1.0334973324917207, + "grad_norm": 0.6175000552984874, + "learning_rate": 2.8422203677963253e-06, + "loss": 0.07024993896484374, + "step": 119525 + }, + { + "epoch": 1.033540566013264, + "grad_norm": 0.647575188160506, + "learning_rate": 2.842016916952568e-06, + "loss": 0.23873214721679686, + "step": 119530 + }, + { + "epoch": 1.0335837995348074, + "grad_norm": 3.9140504596009253, + "learning_rate": 2.8418134668374144e-06, + "loss": 0.14077835083007811, + "step": 119535 + }, + { + "epoch": 1.0336270330563506, + "grad_norm": 0.15497655992117865, + "learning_rate": 2.841610017451804e-06, + "loss": 0.06441192626953125, + "step": 119540 + }, + { + "epoch": 1.0336702665778938, + "grad_norm": 26.084915147128857, + "learning_rate": 2.841406568796674e-06, + "loss": 0.04519157409667969, + "step": 119545 + }, + { + "epoch": 1.033713500099437, + "grad_norm": 8.275228769481298, + "learning_rate": 2.841203120872963e-06, + "loss": 0.03850059509277344, + "step": 119550 + }, + { + "epoch": 1.0337567336209803, + "grad_norm": 3.4377478738361784, + "learning_rate": 2.8409996736816087e-06, + "loss": 0.028343963623046874, + "step": 119555 + }, + { + "epoch": 1.0337999671425235, + "grad_norm": 2.569666749350704, + "learning_rate": 2.8407962272235506e-06, + "loss": 0.0372406005859375, + "step": 119560 + }, + { + "epoch": 1.033843200664067, + "grad_norm": 0.4463286804865636, + "learning_rate": 2.8405927814997244e-06, + "loss": 0.015648269653320314, + "step": 119565 + }, + { + "epoch": 1.0338864341856102, + "grad_norm": 1.5558826893256366, + "learning_rate": 2.840389336511072e-06, + "loss": 0.054825973510742185, + "step": 119570 + }, + { + "epoch": 1.0339296677071534, + "grad_norm": 41.36748709346167, + "learning_rate": 2.8401858922585297e-06, + "loss": 0.31460723876953123, + "step": 119575 + }, + { + "epoch": 1.0339729012286967, + "grad_norm": 0.1991718443149384, + "learning_rate": 2.839982448743036e-06, + "loss": 0.08665084838867188, + "step": 119580 + }, + { + "epoch": 1.03401613475024, + "grad_norm": 12.35554300940368, + "learning_rate": 2.839779005965528e-06, + "loss": 0.05077056884765625, + "step": 119585 + }, + { + "epoch": 1.0340593682717831, + "grad_norm": 2.1413255728231815, + "learning_rate": 2.8395755639269466e-06, + "loss": 0.0542572021484375, + "step": 119590 + }, + { + "epoch": 1.0341026017933266, + "grad_norm": 0.4402558666389517, + "learning_rate": 2.8393721226282263e-06, + "loss": 0.01697845458984375, + "step": 119595 + }, + { + "epoch": 1.0341458353148698, + "grad_norm": 1.2931121942313317, + "learning_rate": 2.8391686820703096e-06, + "loss": 0.0113128662109375, + "step": 119600 + }, + { + "epoch": 1.034189068836413, + "grad_norm": 11.349635858212496, + "learning_rate": 2.8389652422541325e-06, + "loss": 0.15849151611328124, + "step": 119605 + }, + { + "epoch": 1.0342323023579563, + "grad_norm": 27.951566488165216, + "learning_rate": 2.838761803180633e-06, + "loss": 0.907720947265625, + "step": 119610 + }, + { + "epoch": 1.0342755358794995, + "grad_norm": 3.31584289135432, + "learning_rate": 2.8385583648507497e-06, + "loss": 0.05674057006835938, + "step": 119615 + }, + { + "epoch": 1.0343187694010427, + "grad_norm": 10.259129171704052, + "learning_rate": 2.8383549272654215e-06, + "loss": 0.127227783203125, + "step": 119620 + }, + { + "epoch": 1.034362002922586, + "grad_norm": 71.60658993509328, + "learning_rate": 2.8381514904255864e-06, + "loss": 0.22966995239257812, + "step": 119625 + }, + { + "epoch": 1.0344052364441294, + "grad_norm": 5.98420796778199, + "learning_rate": 2.8379480543321805e-06, + "loss": 0.05713081359863281, + "step": 119630 + }, + { + "epoch": 1.0344484699656726, + "grad_norm": 5.6992524955805655, + "learning_rate": 2.8377446189861452e-06, + "loss": 0.16788825988769532, + "step": 119635 + }, + { + "epoch": 1.0344917034872159, + "grad_norm": 7.463332422925007, + "learning_rate": 2.8375411843884174e-06, + "loss": 0.11971397399902343, + "step": 119640 + }, + { + "epoch": 1.034534937008759, + "grad_norm": 0.26959654789865445, + "learning_rate": 2.837337750539935e-06, + "loss": 0.1742401123046875, + "step": 119645 + }, + { + "epoch": 1.0345781705303023, + "grad_norm": 2.8176000934004164, + "learning_rate": 2.8371343174416372e-06, + "loss": 0.0637237548828125, + "step": 119650 + }, + { + "epoch": 1.0346214040518456, + "grad_norm": 76.46029404078175, + "learning_rate": 2.836930885094461e-06, + "loss": 0.2378384590148926, + "step": 119655 + }, + { + "epoch": 1.034664637573389, + "grad_norm": 7.162038450570753, + "learning_rate": 2.8367274534993444e-06, + "loss": 0.02151031494140625, + "step": 119660 + }, + { + "epoch": 1.0347078710949322, + "grad_norm": 1.4831739097689827, + "learning_rate": 2.8365240226572272e-06, + "loss": 0.06006011962890625, + "step": 119665 + }, + { + "epoch": 1.0347511046164755, + "grad_norm": 14.390321911035713, + "learning_rate": 2.836320592569047e-06, + "loss": 0.14542884826660157, + "step": 119670 + }, + { + "epoch": 1.0347943381380187, + "grad_norm": 1.371195184364631, + "learning_rate": 2.836117163235741e-06, + "loss": 0.12047805786132812, + "step": 119675 + }, + { + "epoch": 1.034837571659562, + "grad_norm": 1.3403656902553056, + "learning_rate": 2.835913734658249e-06, + "loss": 0.07450084686279297, + "step": 119680 + }, + { + "epoch": 1.0348808051811051, + "grad_norm": 4.556140883309691, + "learning_rate": 2.835710306837508e-06, + "loss": 0.10288848876953124, + "step": 119685 + }, + { + "epoch": 1.0349240387026484, + "grad_norm": 7.350670132487691, + "learning_rate": 2.835506879774457e-06, + "loss": 0.02337226867675781, + "step": 119690 + }, + { + "epoch": 1.0349672722241918, + "grad_norm": 0.881497113961311, + "learning_rate": 2.835303453470032e-06, + "loss": 0.11637229919433593, + "step": 119695 + }, + { + "epoch": 1.035010505745735, + "grad_norm": 0.9923456496845375, + "learning_rate": 2.8351000279251742e-06, + "loss": 0.07671585083007812, + "step": 119700 + }, + { + "epoch": 1.0350537392672783, + "grad_norm": 0.5679470217997827, + "learning_rate": 2.8348966031408207e-06, + "loss": 0.253936767578125, + "step": 119705 + }, + { + "epoch": 1.0350969727888215, + "grad_norm": 14.365196468259695, + "learning_rate": 2.8346931791179096e-06, + "loss": 0.08885650634765625, + "step": 119710 + }, + { + "epoch": 1.0351402063103647, + "grad_norm": 5.194564153566207, + "learning_rate": 2.834489755857379e-06, + "loss": 0.16737060546875, + "step": 119715 + }, + { + "epoch": 1.035183439831908, + "grad_norm": 35.572155946157565, + "learning_rate": 2.834286333360166e-06, + "loss": 0.1863189697265625, + "step": 119720 + }, + { + "epoch": 1.0352266733534514, + "grad_norm": 10.452984086354567, + "learning_rate": 2.8340829116272097e-06, + "loss": 0.04895095825195313, + "step": 119725 + }, + { + "epoch": 1.0352699068749946, + "grad_norm": 33.4520429004592, + "learning_rate": 2.833879490659449e-06, + "loss": 0.13789710998535157, + "step": 119730 + }, + { + "epoch": 1.0353131403965379, + "grad_norm": 1.2477579809219592, + "learning_rate": 2.8336760704578216e-06, + "loss": 0.0116119384765625, + "step": 119735 + }, + { + "epoch": 1.035356373918081, + "grad_norm": 0.18881401809109646, + "learning_rate": 2.833472651023266e-06, + "loss": 0.06068572998046875, + "step": 119740 + }, + { + "epoch": 1.0353996074396243, + "grad_norm": 0.14203057738568875, + "learning_rate": 2.8332692323567193e-06, + "loss": 0.13760147094726563, + "step": 119745 + }, + { + "epoch": 1.0354428409611676, + "grad_norm": 2.5103483592994764, + "learning_rate": 2.8330658144591197e-06, + "loss": 0.013512420654296874, + "step": 119750 + }, + { + "epoch": 1.035486074482711, + "grad_norm": 0.1974131318749026, + "learning_rate": 2.8328623973314058e-06, + "loss": 0.013460540771484375, + "step": 119755 + }, + { + "epoch": 1.0355293080042542, + "grad_norm": 37.74567111960162, + "learning_rate": 2.8326589809745155e-06, + "loss": 0.06392669677734375, + "step": 119760 + }, + { + "epoch": 1.0355725415257975, + "grad_norm": 1.7231422375456042, + "learning_rate": 2.8324555653893883e-06, + "loss": 0.030286407470703124, + "step": 119765 + }, + { + "epoch": 1.0356157750473407, + "grad_norm": 30.13400322571714, + "learning_rate": 2.8322521505769606e-06, + "loss": 0.38556709289550783, + "step": 119770 + }, + { + "epoch": 1.035659008568884, + "grad_norm": 6.317234955566911, + "learning_rate": 2.8320487365381716e-06, + "loss": 0.04625396728515625, + "step": 119775 + }, + { + "epoch": 1.0357022420904272, + "grad_norm": 0.5541468708268735, + "learning_rate": 2.8318453232739583e-06, + "loss": 0.008762359619140625, + "step": 119780 + }, + { + "epoch": 1.0357454756119706, + "grad_norm": 0.9672081046678923, + "learning_rate": 2.8316419107852604e-06, + "loss": 0.09492645263671876, + "step": 119785 + }, + { + "epoch": 1.0357887091335138, + "grad_norm": 1.005807922491998, + "learning_rate": 2.8314384990730135e-06, + "loss": 0.19024810791015626, + "step": 119790 + }, + { + "epoch": 1.035831942655057, + "grad_norm": 4.459613411392652, + "learning_rate": 2.8312350881381586e-06, + "loss": 0.04963226318359375, + "step": 119795 + }, + { + "epoch": 1.0358751761766003, + "grad_norm": 17.918939355062328, + "learning_rate": 2.831031677981633e-06, + "loss": 0.05235157012939453, + "step": 119800 + }, + { + "epoch": 1.0359184096981435, + "grad_norm": 7.717488438746545, + "learning_rate": 2.8308282686043742e-06, + "loss": 0.018918228149414063, + "step": 119805 + }, + { + "epoch": 1.0359616432196868, + "grad_norm": 2.1423613717644785, + "learning_rate": 2.8306248600073197e-06, + "loss": 0.121795654296875, + "step": 119810 + }, + { + "epoch": 1.03600487674123, + "grad_norm": 21.683739322779278, + "learning_rate": 2.8304214521914086e-06, + "loss": 0.14729537963867187, + "step": 119815 + }, + { + "epoch": 1.0360481102627734, + "grad_norm": 3.247651786352078, + "learning_rate": 2.830218045157578e-06, + "loss": 0.1324676513671875, + "step": 119820 + }, + { + "epoch": 1.0360913437843167, + "grad_norm": 1.0356702584534268, + "learning_rate": 2.8300146389067678e-06, + "loss": 0.122332763671875, + "step": 119825 + }, + { + "epoch": 1.03613457730586, + "grad_norm": 7.995031548870503, + "learning_rate": 2.8298112334399154e-06, + "loss": 0.26513671875, + "step": 119830 + }, + { + "epoch": 1.0361778108274031, + "grad_norm": 6.597150670999425, + "learning_rate": 2.8296078287579585e-06, + "loss": 0.084283447265625, + "step": 119835 + }, + { + "epoch": 1.0362210443489464, + "grad_norm": 8.16150537501664, + "learning_rate": 2.8294044248618343e-06, + "loss": 0.031789398193359374, + "step": 119840 + }, + { + "epoch": 1.0362642778704896, + "grad_norm": 1.507359154066073, + "learning_rate": 2.829201021752483e-06, + "loss": 0.03361663818359375, + "step": 119845 + }, + { + "epoch": 1.036307511392033, + "grad_norm": 7.1946771611965055, + "learning_rate": 2.8289976194308394e-06, + "loss": 0.0366485595703125, + "step": 119850 + }, + { + "epoch": 1.0363507449135763, + "grad_norm": 0.5861078228075655, + "learning_rate": 2.8287942178978456e-06, + "loss": 0.08806686401367188, + "step": 119855 + }, + { + "epoch": 1.0363939784351195, + "grad_norm": 4.250799142401954, + "learning_rate": 2.8285908171544378e-06, + "loss": 0.08134002685546875, + "step": 119860 + }, + { + "epoch": 1.0364372119566627, + "grad_norm": 1.054433207538985, + "learning_rate": 2.8283874172015535e-06, + "loss": 0.016709136962890624, + "step": 119865 + }, + { + "epoch": 1.036480445478206, + "grad_norm": 33.15625846549751, + "learning_rate": 2.828184018040131e-06, + "loss": 0.1451631546020508, + "step": 119870 + }, + { + "epoch": 1.0365236789997492, + "grad_norm": 5.323500689052748, + "learning_rate": 2.827980619671109e-06, + "loss": 0.025202178955078126, + "step": 119875 + }, + { + "epoch": 1.0365669125212924, + "grad_norm": 2.1215147387724826, + "learning_rate": 2.827777222095425e-06, + "loss": 0.019424057006835936, + "step": 119880 + }, + { + "epoch": 1.0366101460428359, + "grad_norm": 14.926710792831374, + "learning_rate": 2.8275738253140164e-06, + "loss": 0.11218719482421875, + "step": 119885 + }, + { + "epoch": 1.036653379564379, + "grad_norm": 1.3205975899933733, + "learning_rate": 2.8273704293278234e-06, + "loss": 0.16005439758300782, + "step": 119890 + }, + { + "epoch": 1.0366966130859223, + "grad_norm": 4.319395109207865, + "learning_rate": 2.827167034137782e-06, + "loss": 0.21625595092773436, + "step": 119895 + }, + { + "epoch": 1.0367398466074655, + "grad_norm": 0.5836880538267022, + "learning_rate": 2.8269636397448314e-06, + "loss": 0.132586669921875, + "step": 119900 + }, + { + "epoch": 1.0367830801290088, + "grad_norm": 14.466036354658659, + "learning_rate": 2.8267602461499094e-06, + "loss": 0.3130840301513672, + "step": 119905 + }, + { + "epoch": 1.036826313650552, + "grad_norm": 6.763824847356797, + "learning_rate": 2.8265568533539537e-06, + "loss": 0.057447052001953124, + "step": 119910 + }, + { + "epoch": 1.0368695471720955, + "grad_norm": 10.938549324179869, + "learning_rate": 2.8263534613579017e-06, + "loss": 0.046506500244140624, + "step": 119915 + }, + { + "epoch": 1.0369127806936387, + "grad_norm": 0.788851508297601, + "learning_rate": 2.826150070162693e-06, + "loss": 0.07568817138671875, + "step": 119920 + }, + { + "epoch": 1.036956014215182, + "grad_norm": 22.239330392936548, + "learning_rate": 2.825946679769264e-06, + "loss": 0.097015380859375, + "step": 119925 + }, + { + "epoch": 1.0369992477367251, + "grad_norm": 2.119836099397344, + "learning_rate": 2.8257432901785546e-06, + "loss": 0.01478729248046875, + "step": 119930 + }, + { + "epoch": 1.0370424812582684, + "grad_norm": 0.20016225792056905, + "learning_rate": 2.8255399013915013e-06, + "loss": 0.06297779083251953, + "step": 119935 + }, + { + "epoch": 1.0370857147798116, + "grad_norm": 0.11532698513942673, + "learning_rate": 2.8253365134090425e-06, + "loss": 0.10739822387695312, + "step": 119940 + }, + { + "epoch": 1.0371289483013548, + "grad_norm": 0.2700624605753366, + "learning_rate": 2.8251331262321154e-06, + "loss": 0.05680694580078125, + "step": 119945 + }, + { + "epoch": 1.0371721818228983, + "grad_norm": 52.08390400859261, + "learning_rate": 2.82492973986166e-06, + "loss": 0.24676666259765626, + "step": 119950 + }, + { + "epoch": 1.0372154153444415, + "grad_norm": 0.03626703019692749, + "learning_rate": 2.824726354298613e-06, + "loss": 0.041334915161132815, + "step": 119955 + }, + { + "epoch": 1.0372586488659847, + "grad_norm": 4.238793344607509, + "learning_rate": 2.8245229695439125e-06, + "loss": 0.1048248291015625, + "step": 119960 + }, + { + "epoch": 1.037301882387528, + "grad_norm": 6.825421086623924, + "learning_rate": 2.824319585598497e-06, + "loss": 0.032666015625, + "step": 119965 + }, + { + "epoch": 1.0373451159090712, + "grad_norm": 10.01951323034521, + "learning_rate": 2.8241162024633036e-06, + "loss": 0.0763519287109375, + "step": 119970 + }, + { + "epoch": 1.0373883494306144, + "grad_norm": 0.15749509435999487, + "learning_rate": 2.8239128201392705e-06, + "loss": 0.007746315002441407, + "step": 119975 + }, + { + "epoch": 1.0374315829521579, + "grad_norm": 19.324680403150346, + "learning_rate": 2.8237094386273358e-06, + "loss": 0.1105712890625, + "step": 119980 + }, + { + "epoch": 1.037474816473701, + "grad_norm": 0.12520757059489537, + "learning_rate": 2.8235060579284375e-06, + "loss": 0.05906982421875, + "step": 119985 + }, + { + "epoch": 1.0375180499952443, + "grad_norm": 0.442626200791598, + "learning_rate": 2.8233026780435145e-06, + "loss": 0.13734893798828124, + "step": 119990 + }, + { + "epoch": 1.0375612835167876, + "grad_norm": 34.9137846647494, + "learning_rate": 2.8230992989735038e-06, + "loss": 0.16889495849609376, + "step": 119995 + }, + { + "epoch": 1.0376045170383308, + "grad_norm": 3.0958689613198365, + "learning_rate": 2.8228959207193433e-06, + "loss": 0.23533592224121094, + "step": 120000 + }, + { + "epoch": 1.0376045170383308, + "eval_loss": 0.13517992198467255, + "eval_margin": 0.14652018249034882, + "eval_mean_neg": -0.0009239530190825462, + "eval_mean_pos": 0.7222235202789307, + "eval_runtime": 20.7958, + "eval_samples_per_second": 11.108, + "eval_steps_per_second": 5.578, + "step": 120000 + }, + { + "epoch": 1.037647750559874, + "grad_norm": 3.758484622228887, + "learning_rate": 2.8226925432819707e-06, + "loss": 0.07839126586914062, + "step": 120005 + }, + { + "epoch": 1.0376909840814175, + "grad_norm": 30.62842515193429, + "learning_rate": 2.8224891666623247e-06, + "loss": 0.08335914611816406, + "step": 120010 + }, + { + "epoch": 1.0377342176029607, + "grad_norm": 0.7712561087308862, + "learning_rate": 2.8222857908613427e-06, + "loss": 0.019338607788085938, + "step": 120015 + }, + { + "epoch": 1.037777451124504, + "grad_norm": 2.6492082502965557, + "learning_rate": 2.822082415879964e-06, + "loss": 0.07061767578125, + "step": 120020 + }, + { + "epoch": 1.0378206846460472, + "grad_norm": 12.189825808058579, + "learning_rate": 2.8218790417191248e-06, + "loss": 0.04939422607421875, + "step": 120025 + }, + { + "epoch": 1.0378639181675904, + "grad_norm": 3.819783992389764, + "learning_rate": 2.8216756683797637e-06, + "loss": 0.1797271728515625, + "step": 120030 + }, + { + "epoch": 1.0379071516891336, + "grad_norm": 1.4449478452973585, + "learning_rate": 2.8214722958628182e-06, + "loss": 0.011708450317382813, + "step": 120035 + }, + { + "epoch": 1.037950385210677, + "grad_norm": 21.04180485374587, + "learning_rate": 2.8212689241692278e-06, + "loss": 0.06831703186035157, + "step": 120040 + }, + { + "epoch": 1.0379936187322203, + "grad_norm": 1.7262940806761988, + "learning_rate": 2.821065553299928e-06, + "loss": 0.030876922607421874, + "step": 120045 + }, + { + "epoch": 1.0380368522537635, + "grad_norm": 0.05260869081677538, + "learning_rate": 2.8208621832558587e-06, + "loss": 0.060344696044921875, + "step": 120050 + }, + { + "epoch": 1.0380800857753067, + "grad_norm": 0.5109183829803138, + "learning_rate": 2.8206588140379575e-06, + "loss": 0.09465484619140625, + "step": 120055 + }, + { + "epoch": 1.03812331929685, + "grad_norm": 3.4125485450154085, + "learning_rate": 2.820455445647162e-06, + "loss": 0.04867076873779297, + "step": 120060 + }, + { + "epoch": 1.0381665528183932, + "grad_norm": 1.7574238906136899, + "learning_rate": 2.8202520780844093e-06, + "loss": 0.058118438720703124, + "step": 120065 + }, + { + "epoch": 1.0382097863399364, + "grad_norm": 5.763715008530023, + "learning_rate": 2.8200487113506393e-06, + "loss": 0.26209259033203125, + "step": 120070 + }, + { + "epoch": 1.0382530198614799, + "grad_norm": 6.708953958642827, + "learning_rate": 2.819845345446787e-06, + "loss": 0.02850513458251953, + "step": 120075 + }, + { + "epoch": 1.0382962533830231, + "grad_norm": 2.541157374689812, + "learning_rate": 2.8196419803737934e-06, + "loss": 0.1476531982421875, + "step": 120080 + }, + { + "epoch": 1.0383394869045663, + "grad_norm": 0.6896935481442719, + "learning_rate": 2.8194386161325953e-06, + "loss": 0.063323974609375, + "step": 120085 + }, + { + "epoch": 1.0383827204261096, + "grad_norm": 4.786876222459523, + "learning_rate": 2.81923525272413e-06, + "loss": 0.029394149780273438, + "step": 120090 + }, + { + "epoch": 1.0384259539476528, + "grad_norm": 7.776525683251715, + "learning_rate": 2.8190318901493356e-06, + "loss": 0.04182586669921875, + "step": 120095 + }, + { + "epoch": 1.038469187469196, + "grad_norm": 9.138849454064582, + "learning_rate": 2.8188285284091505e-06, + "loss": 0.0567901611328125, + "step": 120100 + }, + { + "epoch": 1.0385124209907395, + "grad_norm": 1.5071196249450578, + "learning_rate": 2.818625167504511e-06, + "loss": 0.009704208374023438, + "step": 120105 + }, + { + "epoch": 1.0385556545122827, + "grad_norm": 14.980071243290846, + "learning_rate": 2.818421807436357e-06, + "loss": 0.0542266845703125, + "step": 120110 + }, + { + "epoch": 1.038598888033826, + "grad_norm": 11.265086641228041, + "learning_rate": 2.818218448205626e-06, + "loss": 0.42752609252929685, + "step": 120115 + }, + { + "epoch": 1.0386421215553692, + "grad_norm": 0.19776568444833764, + "learning_rate": 2.818015089813255e-06, + "loss": 0.10343475341796875, + "step": 120120 + }, + { + "epoch": 1.0386853550769124, + "grad_norm": 17.7399608797649, + "learning_rate": 2.8178117322601824e-06, + "loss": 0.03897781372070312, + "step": 120125 + }, + { + "epoch": 1.0387285885984556, + "grad_norm": 4.201532088372679, + "learning_rate": 2.8176083755473467e-06, + "loss": 0.04563827514648437, + "step": 120130 + }, + { + "epoch": 1.0387718221199989, + "grad_norm": 9.071426104301533, + "learning_rate": 2.817405019675684e-06, + "loss": 0.104937744140625, + "step": 120135 + }, + { + "epoch": 1.0388150556415423, + "grad_norm": 3.0574548731275177, + "learning_rate": 2.8172016646461333e-06, + "loss": 0.041461944580078125, + "step": 120140 + }, + { + "epoch": 1.0388582891630855, + "grad_norm": 8.888470748169595, + "learning_rate": 2.8169983104596326e-06, + "loss": 0.115423583984375, + "step": 120145 + }, + { + "epoch": 1.0389015226846288, + "grad_norm": 18.605204883928753, + "learning_rate": 2.8167949571171193e-06, + "loss": 0.34874114990234373, + "step": 120150 + }, + { + "epoch": 1.038944756206172, + "grad_norm": 4.881662269878321, + "learning_rate": 2.816591604619532e-06, + "loss": 0.13801193237304688, + "step": 120155 + }, + { + "epoch": 1.0389879897277152, + "grad_norm": 3.708220062697582, + "learning_rate": 2.8163882529678084e-06, + "loss": 0.028558349609375, + "step": 120160 + }, + { + "epoch": 1.0390312232492585, + "grad_norm": 17.2315615951137, + "learning_rate": 2.8161849021628856e-06, + "loss": 0.106353759765625, + "step": 120165 + }, + { + "epoch": 1.039074456770802, + "grad_norm": 16.792578409539406, + "learning_rate": 2.8159815522057e-06, + "loss": 0.10392837524414063, + "step": 120170 + }, + { + "epoch": 1.0391176902923451, + "grad_norm": 11.279758960124346, + "learning_rate": 2.8157782030971932e-06, + "loss": 0.10406723022460937, + "step": 120175 + }, + { + "epoch": 1.0391609238138884, + "grad_norm": 6.531287723824039, + "learning_rate": 2.8155748548383003e-06, + "loss": 0.11389923095703125, + "step": 120180 + }, + { + "epoch": 1.0392041573354316, + "grad_norm": 0.6141124283530944, + "learning_rate": 2.8153715074299605e-06, + "loss": 0.09961090087890626, + "step": 120185 + }, + { + "epoch": 1.0392473908569748, + "grad_norm": 0.7038608574326773, + "learning_rate": 2.8151681608731113e-06, + "loss": 0.043691253662109374, + "step": 120190 + }, + { + "epoch": 1.039290624378518, + "grad_norm": 0.33067150362586745, + "learning_rate": 2.8149648151686897e-06, + "loss": 0.027199172973632814, + "step": 120195 + }, + { + "epoch": 1.0393338579000615, + "grad_norm": 7.254804250684105, + "learning_rate": 2.814761470317633e-06, + "loss": 0.084442138671875, + "step": 120200 + }, + { + "epoch": 1.0393770914216047, + "grad_norm": 9.484367839950298, + "learning_rate": 2.814558126320881e-06, + "loss": 0.11338424682617188, + "step": 120205 + }, + { + "epoch": 1.039420324943148, + "grad_norm": 10.792695541609406, + "learning_rate": 2.81435478317937e-06, + "loss": 0.0564666748046875, + "step": 120210 + }, + { + "epoch": 1.0394635584646912, + "grad_norm": 1.1038108649965663, + "learning_rate": 2.814151440894039e-06, + "loss": 0.044864654541015625, + "step": 120215 + }, + { + "epoch": 1.0395067919862344, + "grad_norm": 10.114578609463411, + "learning_rate": 2.8139480994658253e-06, + "loss": 0.10394287109375, + "step": 120220 + }, + { + "epoch": 1.0395500255077776, + "grad_norm": 7.0260545160764725, + "learning_rate": 2.8137447588956664e-06, + "loss": 0.14940376281738282, + "step": 120225 + }, + { + "epoch": 1.0395932590293209, + "grad_norm": 0.4287624136810076, + "learning_rate": 2.8135414191844997e-06, + "loss": 0.0070781707763671875, + "step": 120230 + }, + { + "epoch": 1.0396364925508643, + "grad_norm": 2.897600156973523, + "learning_rate": 2.8133380803332633e-06, + "loss": 0.0357574462890625, + "step": 120235 + }, + { + "epoch": 1.0396797260724076, + "grad_norm": 0.8983161887604718, + "learning_rate": 2.8131347423428953e-06, + "loss": 0.021704483032226562, + "step": 120240 + }, + { + "epoch": 1.0397229595939508, + "grad_norm": 4.037839867864967, + "learning_rate": 2.812931405214334e-06, + "loss": 0.08314285278320313, + "step": 120245 + }, + { + "epoch": 1.039766193115494, + "grad_norm": 3.02111409157195, + "learning_rate": 2.8127280689485163e-06, + "loss": 0.02332725524902344, + "step": 120250 + }, + { + "epoch": 1.0398094266370372, + "grad_norm": 1.4424908803595071, + "learning_rate": 2.8125247335463807e-06, + "loss": 0.047149276733398436, + "step": 120255 + }, + { + "epoch": 1.0398526601585805, + "grad_norm": 17.61855469256781, + "learning_rate": 2.8123213990088637e-06, + "loss": 0.20599365234375, + "step": 120260 + }, + { + "epoch": 1.039895893680124, + "grad_norm": 35.359831114575954, + "learning_rate": 2.8121180653369035e-06, + "loss": 0.07044792175292969, + "step": 120265 + }, + { + "epoch": 1.0399391272016671, + "grad_norm": 50.246636221215276, + "learning_rate": 2.8119147325314384e-06, + "loss": 0.37118377685546877, + "step": 120270 + }, + { + "epoch": 1.0399823607232104, + "grad_norm": 11.28551819549463, + "learning_rate": 2.8117114005934067e-06, + "loss": 0.3004852294921875, + "step": 120275 + }, + { + "epoch": 1.0400255942447536, + "grad_norm": 0.521380576439282, + "learning_rate": 2.811508069523745e-06, + "loss": 0.0198089599609375, + "step": 120280 + }, + { + "epoch": 1.0400688277662968, + "grad_norm": 5.268236463743778, + "learning_rate": 2.8113047393233917e-06, + "loss": 0.015381622314453124, + "step": 120285 + }, + { + "epoch": 1.04011206128784, + "grad_norm": 0.29482170647799105, + "learning_rate": 2.8111014099932838e-06, + "loss": 0.071954345703125, + "step": 120290 + }, + { + "epoch": 1.0401552948093835, + "grad_norm": 9.26917689820199, + "learning_rate": 2.810898081534358e-06, + "loss": 0.0831155776977539, + "step": 120295 + }, + { + "epoch": 1.0401985283309267, + "grad_norm": 0.41300393687299375, + "learning_rate": 2.810694753947556e-06, + "loss": 0.0759185791015625, + "step": 120300 + }, + { + "epoch": 1.04024176185247, + "grad_norm": 0.030536335557624868, + "learning_rate": 2.8104914272338124e-06, + "loss": 0.10467567443847656, + "step": 120305 + }, + { + "epoch": 1.0402849953740132, + "grad_norm": 0.8015512789355186, + "learning_rate": 2.810288101394066e-06, + "loss": 0.05371017456054687, + "step": 120310 + }, + { + "epoch": 1.0403282288955564, + "grad_norm": 20.41545183128068, + "learning_rate": 2.8100847764292533e-06, + "loss": 0.15804595947265626, + "step": 120315 + }, + { + "epoch": 1.0403714624170997, + "grad_norm": 0.5489452423174428, + "learning_rate": 2.809881452340313e-06, + "loss": 0.026070213317871092, + "step": 120320 + }, + { + "epoch": 1.0404146959386429, + "grad_norm": 2.518359489821419, + "learning_rate": 2.809678129128183e-06, + "loss": 0.18414688110351562, + "step": 120325 + }, + { + "epoch": 1.0404579294601863, + "grad_norm": 25.644408762689068, + "learning_rate": 2.8094748067937993e-06, + "loss": 0.23718719482421874, + "step": 120330 + }, + { + "epoch": 1.0405011629817296, + "grad_norm": 52.805031491407945, + "learning_rate": 2.8092714853381022e-06, + "loss": 0.1817047119140625, + "step": 120335 + }, + { + "epoch": 1.0405443965032728, + "grad_norm": 17.484443903133048, + "learning_rate": 2.809068164762028e-06, + "loss": 0.06626739501953124, + "step": 120340 + }, + { + "epoch": 1.040587630024816, + "grad_norm": 4.830895215752722, + "learning_rate": 2.8088648450665143e-06, + "loss": 0.02755107879638672, + "step": 120345 + }, + { + "epoch": 1.0406308635463593, + "grad_norm": 3.5069610919082304, + "learning_rate": 2.8086615262524996e-06, + "loss": 0.14202880859375, + "step": 120350 + }, + { + "epoch": 1.0406740970679025, + "grad_norm": 28.784879880689576, + "learning_rate": 2.808458208320921e-06, + "loss": 0.13556747436523436, + "step": 120355 + }, + { + "epoch": 1.040717330589446, + "grad_norm": 19.88586412371449, + "learning_rate": 2.808254891272715e-06, + "loss": 0.211944580078125, + "step": 120360 + }, + { + "epoch": 1.0407605641109892, + "grad_norm": 26.23128144329791, + "learning_rate": 2.8080515751088218e-06, + "loss": 0.3067008972167969, + "step": 120365 + }, + { + "epoch": 1.0408037976325324, + "grad_norm": 0.08168046735269052, + "learning_rate": 2.8078482598301777e-06, + "loss": 0.02377777099609375, + "step": 120370 + }, + { + "epoch": 1.0408470311540756, + "grad_norm": 0.7933656345925004, + "learning_rate": 2.8076449454377194e-06, + "loss": 0.01101226806640625, + "step": 120375 + }, + { + "epoch": 1.0408902646756188, + "grad_norm": 2.686393039797254, + "learning_rate": 2.8074416319323867e-06, + "loss": 0.0163970947265625, + "step": 120380 + }, + { + "epoch": 1.040933498197162, + "grad_norm": 13.618379277017302, + "learning_rate": 2.8072383193151163e-06, + "loss": 0.110198974609375, + "step": 120385 + }, + { + "epoch": 1.0409767317187053, + "grad_norm": 1.0118533009053647, + "learning_rate": 2.807035007586845e-06, + "loss": 0.02161684036254883, + "step": 120390 + }, + { + "epoch": 1.0410199652402488, + "grad_norm": 7.134027872562755, + "learning_rate": 2.806831696748511e-06, + "loss": 0.05074920654296875, + "step": 120395 + }, + { + "epoch": 1.041063198761792, + "grad_norm": 12.744187628919823, + "learning_rate": 2.806628386801053e-06, + "loss": 0.035144805908203125, + "step": 120400 + }, + { + "epoch": 1.0411064322833352, + "grad_norm": 0.23082522009672551, + "learning_rate": 2.806425077745407e-06, + "loss": 0.194866943359375, + "step": 120405 + }, + { + "epoch": 1.0411496658048784, + "grad_norm": 0.2506277040464577, + "learning_rate": 2.806221769582512e-06, + "loss": 0.12799835205078125, + "step": 120410 + }, + { + "epoch": 1.0411928993264217, + "grad_norm": 0.5109492980077516, + "learning_rate": 2.8060184623133054e-06, + "loss": 0.10032501220703124, + "step": 120415 + }, + { + "epoch": 1.041236132847965, + "grad_norm": 25.47637385686419, + "learning_rate": 2.805815155938724e-06, + "loss": 0.10277862548828125, + "step": 120420 + }, + { + "epoch": 1.0412793663695084, + "grad_norm": 10.271611059501144, + "learning_rate": 2.805611850459705e-06, + "loss": 0.10009765625, + "step": 120425 + }, + { + "epoch": 1.0413225998910516, + "grad_norm": 0.6357537016474378, + "learning_rate": 2.8054085458771884e-06, + "loss": 0.017653656005859376, + "step": 120430 + }, + { + "epoch": 1.0413658334125948, + "grad_norm": 0.20784637179020046, + "learning_rate": 2.8052052421921096e-06, + "loss": 0.025694656372070312, + "step": 120435 + }, + { + "epoch": 1.041409066934138, + "grad_norm": 10.869474422812088, + "learning_rate": 2.8050019394054075e-06, + "loss": 0.03155670166015625, + "step": 120440 + }, + { + "epoch": 1.0414523004556813, + "grad_norm": 4.575555520245143, + "learning_rate": 2.8047986375180194e-06, + "loss": 0.0467132568359375, + "step": 120445 + }, + { + "epoch": 1.0414955339772245, + "grad_norm": 3.496778567969805, + "learning_rate": 2.804595336530883e-06, + "loss": 0.2555084228515625, + "step": 120450 + }, + { + "epoch": 1.041538767498768, + "grad_norm": 0.6920685758561366, + "learning_rate": 2.804392036444934e-06, + "loss": 0.035082244873046876, + "step": 120455 + }, + { + "epoch": 1.0415820010203112, + "grad_norm": 3.446183624804741, + "learning_rate": 2.8041887372611126e-06, + "loss": 0.12112884521484375, + "step": 120460 + }, + { + "epoch": 1.0416252345418544, + "grad_norm": 9.109781634060456, + "learning_rate": 2.803985438980355e-06, + "loss": 0.11476593017578125, + "step": 120465 + }, + { + "epoch": 1.0416684680633976, + "grad_norm": 2.749880606321942, + "learning_rate": 2.8037821416036004e-06, + "loss": 0.02244415283203125, + "step": 120470 + }, + { + "epoch": 1.0417117015849409, + "grad_norm": 15.034649659070917, + "learning_rate": 2.8035788451317847e-06, + "loss": 0.060894775390625, + "step": 120475 + }, + { + "epoch": 1.041754935106484, + "grad_norm": 3.2004763669813885, + "learning_rate": 2.8033755495658462e-06, + "loss": 0.032117462158203124, + "step": 120480 + }, + { + "epoch": 1.0417981686280273, + "grad_norm": 2.658322262492098, + "learning_rate": 2.8031722549067217e-06, + "loss": 0.02196807861328125, + "step": 120485 + }, + { + "epoch": 1.0418414021495708, + "grad_norm": 0.748798811645675, + "learning_rate": 2.8029689611553493e-06, + "loss": 0.04915313720703125, + "step": 120490 + }, + { + "epoch": 1.041884635671114, + "grad_norm": 1.7604789965988998, + "learning_rate": 2.8027656683126666e-06, + "loss": 0.055425262451171874, + "step": 120495 + }, + { + "epoch": 1.0419278691926572, + "grad_norm": 10.130991724432286, + "learning_rate": 2.802562376379612e-06, + "loss": 0.1452606201171875, + "step": 120500 + }, + { + "epoch": 1.0419711027142005, + "grad_norm": 0.7043085748872987, + "learning_rate": 2.802359085357122e-06, + "loss": 0.19626312255859374, + "step": 120505 + }, + { + "epoch": 1.0420143362357437, + "grad_norm": 0.5191248429809104, + "learning_rate": 2.802155795246135e-06, + "loss": 0.07331619262695313, + "step": 120510 + }, + { + "epoch": 1.042057569757287, + "grad_norm": 1.0254852238134438, + "learning_rate": 2.801952506047587e-06, + "loss": 0.026317596435546875, + "step": 120515 + }, + { + "epoch": 1.0421008032788304, + "grad_norm": 0.14396063662823497, + "learning_rate": 2.8017492177624155e-06, + "loss": 0.1277761459350586, + "step": 120520 + }, + { + "epoch": 1.0421440368003736, + "grad_norm": 13.91207124220127, + "learning_rate": 2.801545930391561e-06, + "loss": 0.10845947265625, + "step": 120525 + }, + { + "epoch": 1.0421872703219168, + "grad_norm": 9.299986109980539, + "learning_rate": 2.8013426439359587e-06, + "loss": 0.035947418212890624, + "step": 120530 + }, + { + "epoch": 1.04223050384346, + "grad_norm": 0.6116603822844622, + "learning_rate": 2.801139358396547e-06, + "loss": 0.02295379638671875, + "step": 120535 + }, + { + "epoch": 1.0422737373650033, + "grad_norm": 1.2210768700388996, + "learning_rate": 2.8009360737742623e-06, + "loss": 0.10462207794189453, + "step": 120540 + }, + { + "epoch": 1.0423169708865465, + "grad_norm": 23.306485416042555, + "learning_rate": 2.8007327900700427e-06, + "loss": 0.21067352294921876, + "step": 120545 + }, + { + "epoch": 1.04236020440809, + "grad_norm": 0.26535459667287536, + "learning_rate": 2.800529507284825e-06, + "loss": 0.009725189208984375, + "step": 120550 + }, + { + "epoch": 1.0424034379296332, + "grad_norm": 1.1935631702444207, + "learning_rate": 2.8003262254195492e-06, + "loss": 0.03560981750488281, + "step": 120555 + }, + { + "epoch": 1.0424466714511764, + "grad_norm": 1.2471524959297398, + "learning_rate": 2.800122944475151e-06, + "loss": 0.02021942138671875, + "step": 120560 + }, + { + "epoch": 1.0424899049727196, + "grad_norm": 7.104347748073964, + "learning_rate": 2.7999196644525683e-06, + "loss": 0.0303192138671875, + "step": 120565 + }, + { + "epoch": 1.0425331384942629, + "grad_norm": 0.09245761621789161, + "learning_rate": 2.7997163853527377e-06, + "loss": 0.004821014404296875, + "step": 120570 + }, + { + "epoch": 1.042576372015806, + "grad_norm": 5.420196355417348, + "learning_rate": 2.7995131071765977e-06, + "loss": 0.03329010009765625, + "step": 120575 + }, + { + "epoch": 1.0426196055373493, + "grad_norm": 1.781882844674845, + "learning_rate": 2.799309829925086e-06, + "loss": 0.0775421142578125, + "step": 120580 + }, + { + "epoch": 1.0426628390588928, + "grad_norm": 9.856657489437321, + "learning_rate": 2.799106553599138e-06, + "loss": 0.055384063720703126, + "step": 120585 + }, + { + "epoch": 1.042706072580436, + "grad_norm": 16.699555403695708, + "learning_rate": 2.7989032781996946e-06, + "loss": 0.12867279052734376, + "step": 120590 + }, + { + "epoch": 1.0427493061019792, + "grad_norm": 19.73177297984558, + "learning_rate": 2.7987000037276916e-06, + "loss": 0.06372795104980469, + "step": 120595 + }, + { + "epoch": 1.0427925396235225, + "grad_norm": 8.953385191807483, + "learning_rate": 2.798496730184065e-06, + "loss": 0.0632354736328125, + "step": 120600 + }, + { + "epoch": 1.0428357731450657, + "grad_norm": 0.8624121369998723, + "learning_rate": 2.798293457569755e-06, + "loss": 0.05311737060546875, + "step": 120605 + }, + { + "epoch": 1.042879006666609, + "grad_norm": 8.242435254663846, + "learning_rate": 2.7980901858856973e-06, + "loss": 0.08459854125976562, + "step": 120610 + }, + { + "epoch": 1.0429222401881524, + "grad_norm": 1.0443547987578363, + "learning_rate": 2.7978869151328287e-06, + "loss": 0.0792144775390625, + "step": 120615 + }, + { + "epoch": 1.0429654737096956, + "grad_norm": 0.33597974307470957, + "learning_rate": 2.797683645312089e-06, + "loss": 0.065667724609375, + "step": 120620 + }, + { + "epoch": 1.0430087072312388, + "grad_norm": 36.58477891615889, + "learning_rate": 2.7974803764244146e-06, + "loss": 0.10228195190429687, + "step": 120625 + }, + { + "epoch": 1.043051940752782, + "grad_norm": 4.085754427395597, + "learning_rate": 2.797277108470742e-06, + "loss": 0.04037685394287109, + "step": 120630 + }, + { + "epoch": 1.0430951742743253, + "grad_norm": 2.108598694632069, + "learning_rate": 2.79707384145201e-06, + "loss": 0.0162841796875, + "step": 120635 + }, + { + "epoch": 1.0431384077958685, + "grad_norm": 6.668574893622657, + "learning_rate": 2.796870575369156e-06, + "loss": 0.041534805297851564, + "step": 120640 + }, + { + "epoch": 1.0431816413174118, + "grad_norm": 17.578255727231607, + "learning_rate": 2.796667310223115e-06, + "loss": 0.050357818603515625, + "step": 120645 + }, + { + "epoch": 1.0432248748389552, + "grad_norm": 1.0270773277691356, + "learning_rate": 2.7964640460148282e-06, + "loss": 0.13077392578125, + "step": 120650 + }, + { + "epoch": 1.0432681083604984, + "grad_norm": 14.510220289867819, + "learning_rate": 2.796260782745231e-06, + "loss": 0.09631576538085937, + "step": 120655 + }, + { + "epoch": 1.0433113418820417, + "grad_norm": 4.457561987811152, + "learning_rate": 2.7960575204152607e-06, + "loss": 0.02949371337890625, + "step": 120660 + }, + { + "epoch": 1.043354575403585, + "grad_norm": 5.0478177205125805, + "learning_rate": 2.7958542590258557e-06, + "loss": 0.053897857666015625, + "step": 120665 + }, + { + "epoch": 1.0433978089251281, + "grad_norm": 41.21426301420299, + "learning_rate": 2.7956509985779527e-06, + "loss": 0.16044921875, + "step": 120670 + }, + { + "epoch": 1.0434410424466714, + "grad_norm": 0.13406945865662317, + "learning_rate": 2.795447739072489e-06, + "loss": 0.08356285095214844, + "step": 120675 + }, + { + "epoch": 1.0434842759682148, + "grad_norm": 45.24316582482047, + "learning_rate": 2.795244480510401e-06, + "loss": 0.2128599166870117, + "step": 120680 + }, + { + "epoch": 1.043527509489758, + "grad_norm": 2.2687774372293457, + "learning_rate": 2.795041222892629e-06, + "loss": 0.08019218444824219, + "step": 120685 + }, + { + "epoch": 1.0435707430113013, + "grad_norm": 17.3003272957541, + "learning_rate": 2.7948379662201076e-06, + "loss": 0.0991851806640625, + "step": 120690 + }, + { + "epoch": 1.0436139765328445, + "grad_norm": 10.81778750027934, + "learning_rate": 2.794634710493777e-06, + "loss": 0.08257293701171875, + "step": 120695 + }, + { + "epoch": 1.0436572100543877, + "grad_norm": 0.6081967630781027, + "learning_rate": 2.794431455714572e-06, + "loss": 0.2112152099609375, + "step": 120700 + }, + { + "epoch": 1.043700443575931, + "grad_norm": 0.9162973062384497, + "learning_rate": 2.794228201883431e-06, + "loss": 0.032520103454589847, + "step": 120705 + }, + { + "epoch": 1.0437436770974744, + "grad_norm": 21.852360905958097, + "learning_rate": 2.7940249490012905e-06, + "loss": 0.09497909545898438, + "step": 120710 + }, + { + "epoch": 1.0437869106190176, + "grad_norm": 1.7071467089461778, + "learning_rate": 2.79382169706909e-06, + "loss": 0.030229568481445312, + "step": 120715 + }, + { + "epoch": 1.0438301441405609, + "grad_norm": 22.05605673480899, + "learning_rate": 2.793618446087765e-06, + "loss": 0.09074573516845703, + "step": 120720 + }, + { + "epoch": 1.043873377662104, + "grad_norm": 47.76296761207495, + "learning_rate": 2.7934151960582537e-06, + "loss": 0.13166465759277343, + "step": 120725 + }, + { + "epoch": 1.0439166111836473, + "grad_norm": 3.9329565404573774, + "learning_rate": 2.7932119469814936e-06, + "loss": 0.07058181762695312, + "step": 120730 + }, + { + "epoch": 1.0439598447051905, + "grad_norm": 0.25015971105152346, + "learning_rate": 2.793008698858422e-06, + "loss": 0.0824249267578125, + "step": 120735 + }, + { + "epoch": 1.044003078226734, + "grad_norm": 5.532501437579403, + "learning_rate": 2.7928054516899746e-06, + "loss": 0.03624420166015625, + "step": 120740 + }, + { + "epoch": 1.0440463117482772, + "grad_norm": 15.883618586966978, + "learning_rate": 2.79260220547709e-06, + "loss": 0.2999855041503906, + "step": 120745 + }, + { + "epoch": 1.0440895452698205, + "grad_norm": 0.16087549953975483, + "learning_rate": 2.792398960220707e-06, + "loss": 0.07947006225585937, + "step": 120750 + }, + { + "epoch": 1.0441327787913637, + "grad_norm": 5.096733167235445, + "learning_rate": 2.7921957159217615e-06, + "loss": 0.0816162109375, + "step": 120755 + }, + { + "epoch": 1.044176012312907, + "grad_norm": 2.7234386765018845, + "learning_rate": 2.791992472581191e-06, + "loss": 0.07242507934570312, + "step": 120760 + }, + { + "epoch": 1.0442192458344501, + "grad_norm": 0.30578735083781156, + "learning_rate": 2.791789230199932e-06, + "loss": 0.12719573974609374, + "step": 120765 + }, + { + "epoch": 1.0442624793559934, + "grad_norm": 0.3592785167073694, + "learning_rate": 2.7915859887789235e-06, + "loss": 0.0163604736328125, + "step": 120770 + }, + { + "epoch": 1.0443057128775368, + "grad_norm": 0.8221113162883924, + "learning_rate": 2.791382748319101e-06, + "loss": 0.01563568115234375, + "step": 120775 + }, + { + "epoch": 1.04434894639908, + "grad_norm": 14.681976383424166, + "learning_rate": 2.7911795088214037e-06, + "loss": 0.041796875, + "step": 120780 + }, + { + "epoch": 1.0443921799206233, + "grad_norm": 3.952496777314378, + "learning_rate": 2.7909762702867687e-06, + "loss": 0.039928436279296875, + "step": 120785 + }, + { + "epoch": 1.0444354134421665, + "grad_norm": 63.83011205719171, + "learning_rate": 2.790773032716132e-06, + "loss": 0.3253168106079102, + "step": 120790 + }, + { + "epoch": 1.0444786469637097, + "grad_norm": 5.141132367089252, + "learning_rate": 2.7905697961104312e-06, + "loss": 0.03634910583496094, + "step": 120795 + }, + { + "epoch": 1.044521880485253, + "grad_norm": 0.12723564188597353, + "learning_rate": 2.7903665604706046e-06, + "loss": 0.23084564208984376, + "step": 120800 + }, + { + "epoch": 1.0445651140067964, + "grad_norm": 15.051230109448076, + "learning_rate": 2.7901633257975876e-06, + "loss": 0.10924530029296875, + "step": 120805 + }, + { + "epoch": 1.0446083475283396, + "grad_norm": 0.1066907075142546, + "learning_rate": 2.78996009209232e-06, + "loss": 0.03660392761230469, + "step": 120810 + }, + { + "epoch": 1.0446515810498829, + "grad_norm": 1.507381779862129, + "learning_rate": 2.7897568593557384e-06, + "loss": 0.08274078369140625, + "step": 120815 + }, + { + "epoch": 1.044694814571426, + "grad_norm": 0.82493004604685, + "learning_rate": 2.789553627588779e-06, + "loss": 0.09429855346679687, + "step": 120820 + }, + { + "epoch": 1.0447380480929693, + "grad_norm": 0.8376281061195896, + "learning_rate": 2.7893503967923794e-06, + "loss": 0.0595428466796875, + "step": 120825 + }, + { + "epoch": 1.0447812816145126, + "grad_norm": 22.5858523275165, + "learning_rate": 2.7891471669674775e-06, + "loss": 0.07495689392089844, + "step": 120830 + }, + { + "epoch": 1.0448245151360558, + "grad_norm": 7.603630012732212, + "learning_rate": 2.78894393811501e-06, + "loss": 0.021375656127929688, + "step": 120835 + }, + { + "epoch": 1.0448677486575992, + "grad_norm": 23.475558126397175, + "learning_rate": 2.7887407102359137e-06, + "loss": 0.0947052001953125, + "step": 120840 + }, + { + "epoch": 1.0449109821791425, + "grad_norm": 1.1772017217473343, + "learning_rate": 2.788537483331128e-06, + "loss": 0.045875930786132814, + "step": 120845 + }, + { + "epoch": 1.0449542157006857, + "grad_norm": 2.028873451877049, + "learning_rate": 2.788334257401588e-06, + "loss": 0.05172996520996094, + "step": 120850 + }, + { + "epoch": 1.044997449222229, + "grad_norm": 7.694078832620013, + "learning_rate": 2.7881310324482316e-06, + "loss": 0.02698516845703125, + "step": 120855 + }, + { + "epoch": 1.0450406827437722, + "grad_norm": 0.9666041243093221, + "learning_rate": 2.7879278084719965e-06, + "loss": 0.06391258239746093, + "step": 120860 + }, + { + "epoch": 1.0450839162653154, + "grad_norm": 0.1544000725666927, + "learning_rate": 2.78772458547382e-06, + "loss": 0.07435302734375, + "step": 120865 + }, + { + "epoch": 1.0451271497868588, + "grad_norm": 8.130969749101766, + "learning_rate": 2.787521363454637e-06, + "loss": 0.09649524688720704, + "step": 120870 + }, + { + "epoch": 1.045170383308402, + "grad_norm": 8.4467091408711, + "learning_rate": 2.7873181424153887e-06, + "loss": 0.1283050537109375, + "step": 120875 + }, + { + "epoch": 1.0452136168299453, + "grad_norm": 0.9068443739443299, + "learning_rate": 2.78711492235701e-06, + "loss": 0.03456802368164062, + "step": 120880 + }, + { + "epoch": 1.0452568503514885, + "grad_norm": 0.1685421489803384, + "learning_rate": 2.786911703280438e-06, + "loss": 0.013570404052734375, + "step": 120885 + }, + { + "epoch": 1.0453000838730317, + "grad_norm": 0.02967547281679752, + "learning_rate": 2.786708485186611e-06, + "loss": 0.14961185455322265, + "step": 120890 + }, + { + "epoch": 1.045343317394575, + "grad_norm": 0.33358912691158976, + "learning_rate": 2.7865052680764654e-06, + "loss": 0.03623046875, + "step": 120895 + }, + { + "epoch": 1.0453865509161184, + "grad_norm": 0.36301366567648646, + "learning_rate": 2.7863020519509375e-06, + "loss": 0.005894088745117187, + "step": 120900 + }, + { + "epoch": 1.0454297844376617, + "grad_norm": 1.4902965709412517, + "learning_rate": 2.7860988368109666e-06, + "loss": 0.047498321533203124, + "step": 120905 + }, + { + "epoch": 1.0454730179592049, + "grad_norm": 0.5369610402018761, + "learning_rate": 2.7858956226574897e-06, + "loss": 0.16210670471191407, + "step": 120910 + }, + { + "epoch": 1.0455162514807481, + "grad_norm": 4.557785169704988, + "learning_rate": 2.785692409491442e-06, + "loss": 0.035498046875, + "step": 120915 + }, + { + "epoch": 1.0455594850022913, + "grad_norm": 6.098858674626224, + "learning_rate": 2.785489197313763e-06, + "loss": 0.1164306640625, + "step": 120920 + }, + { + "epoch": 1.0456027185238346, + "grad_norm": 0.18748602088278615, + "learning_rate": 2.7852859861253884e-06, + "loss": 0.2350902557373047, + "step": 120925 + }, + { + "epoch": 1.0456459520453778, + "grad_norm": 0.7115797628412397, + "learning_rate": 2.7850827759272564e-06, + "loss": 0.0419403076171875, + "step": 120930 + }, + { + "epoch": 1.0456891855669213, + "grad_norm": 0.1184794174620711, + "learning_rate": 2.784879566720302e-06, + "loss": 0.01658973693847656, + "step": 120935 + }, + { + "epoch": 1.0457324190884645, + "grad_norm": 0.7056141821788505, + "learning_rate": 2.7846763585054654e-06, + "loss": 0.06658248901367188, + "step": 120940 + }, + { + "epoch": 1.0457756526100077, + "grad_norm": 10.25429910718487, + "learning_rate": 2.784473151283682e-06, + "loss": 0.17139778137207032, + "step": 120945 + }, + { + "epoch": 1.045818886131551, + "grad_norm": 2.9761503195405825, + "learning_rate": 2.7842699450558892e-06, + "loss": 0.0830352783203125, + "step": 120950 + }, + { + "epoch": 1.0458621196530942, + "grad_norm": 0.7887486973391529, + "learning_rate": 2.7840667398230254e-06, + "loss": 0.031966018676757815, + "step": 120955 + }, + { + "epoch": 1.0459053531746374, + "grad_norm": 0.5118853195194972, + "learning_rate": 2.783863535586026e-06, + "loss": 0.02886505126953125, + "step": 120960 + }, + { + "epoch": 1.0459485866961808, + "grad_norm": 3.921842304251346, + "learning_rate": 2.7836603323458277e-06, + "loss": 0.03918266296386719, + "step": 120965 + }, + { + "epoch": 1.045991820217724, + "grad_norm": 10.339469543145393, + "learning_rate": 2.78345713010337e-06, + "loss": 0.031402206420898436, + "step": 120970 + }, + { + "epoch": 1.0460350537392673, + "grad_norm": 0.167221867609818, + "learning_rate": 2.7832539288595888e-06, + "loss": 0.02039680480957031, + "step": 120975 + }, + { + "epoch": 1.0460782872608105, + "grad_norm": 0.17886527604707575, + "learning_rate": 2.783050728615422e-06, + "loss": 0.08262653350830078, + "step": 120980 + }, + { + "epoch": 1.0461215207823538, + "grad_norm": 3.9792994613775057, + "learning_rate": 2.7828475293718057e-06, + "loss": 0.037933731079101564, + "step": 120985 + }, + { + "epoch": 1.046164754303897, + "grad_norm": 1.3392878365437095, + "learning_rate": 2.7826443311296767e-06, + "loss": 0.13212051391601562, + "step": 120990 + }, + { + "epoch": 1.0462079878254404, + "grad_norm": 5.365086935030813, + "learning_rate": 2.782441133889973e-06, + "loss": 0.025521469116210938, + "step": 120995 + }, + { + "epoch": 1.0462512213469837, + "grad_norm": 16.51227814370667, + "learning_rate": 2.7822379376536317e-06, + "loss": 0.148065185546875, + "step": 121000 + }, + { + "epoch": 1.046294454868527, + "grad_norm": 1.4744540617774349, + "learning_rate": 2.7820347424215906e-06, + "loss": 0.019735336303710938, + "step": 121005 + }, + { + "epoch": 1.0463376883900701, + "grad_norm": 35.43819548578619, + "learning_rate": 2.7818315481947853e-06, + "loss": 0.13125, + "step": 121010 + }, + { + "epoch": 1.0463809219116134, + "grad_norm": 1.8127312182520263, + "learning_rate": 2.7816283549741543e-06, + "loss": 0.03150634765625, + "step": 121015 + }, + { + "epoch": 1.0464241554331566, + "grad_norm": 5.964536567904252, + "learning_rate": 2.781425162760633e-06, + "loss": 0.04188098907470703, + "step": 121020 + }, + { + "epoch": 1.0464673889546998, + "grad_norm": 1.3761678478496198, + "learning_rate": 2.78122197155516e-06, + "loss": 0.13218994140625, + "step": 121025 + }, + { + "epoch": 1.0465106224762433, + "grad_norm": 0.5551370804350141, + "learning_rate": 2.781018781358671e-06, + "loss": 0.03425445556640625, + "step": 121030 + }, + { + "epoch": 1.0465538559977865, + "grad_norm": 0.9789320277372743, + "learning_rate": 2.780815592172105e-06, + "loss": 0.07256240844726562, + "step": 121035 + }, + { + "epoch": 1.0465970895193297, + "grad_norm": 1.1676335298974514, + "learning_rate": 2.7806124039963986e-06, + "loss": 0.10583438873291015, + "step": 121040 + }, + { + "epoch": 1.046640323040873, + "grad_norm": 2.264955210541038, + "learning_rate": 2.780409216832488e-06, + "loss": 0.021148681640625, + "step": 121045 + }, + { + "epoch": 1.0466835565624162, + "grad_norm": 0.6378679910893195, + "learning_rate": 2.7802060306813102e-06, + "loss": 0.0203765869140625, + "step": 121050 + }, + { + "epoch": 1.0467267900839594, + "grad_norm": 2.662862669703623, + "learning_rate": 2.7800028455438037e-06, + "loss": 0.027323150634765626, + "step": 121055 + }, + { + "epoch": 1.0467700236055029, + "grad_norm": 7.529433725647035, + "learning_rate": 2.779799661420903e-06, + "loss": 0.0999847412109375, + "step": 121060 + }, + { + "epoch": 1.046813257127046, + "grad_norm": 3.971389351341134, + "learning_rate": 2.779596478313548e-06, + "loss": 0.12717323303222655, + "step": 121065 + }, + { + "epoch": 1.0468564906485893, + "grad_norm": 42.159814092466895, + "learning_rate": 2.7793932962226746e-06, + "loss": 0.1356487274169922, + "step": 121070 + }, + { + "epoch": 1.0468997241701326, + "grad_norm": 3.219000315030826, + "learning_rate": 2.7791901151492204e-06, + "loss": 0.08759346008300781, + "step": 121075 + }, + { + "epoch": 1.0469429576916758, + "grad_norm": 0.7257988423865692, + "learning_rate": 2.7789869350941207e-06, + "loss": 0.058485221862792966, + "step": 121080 + }, + { + "epoch": 1.046986191213219, + "grad_norm": 14.076996290188632, + "learning_rate": 2.7787837560583143e-06, + "loss": 0.0363861083984375, + "step": 121085 + }, + { + "epoch": 1.0470294247347622, + "grad_norm": 8.06022246123059, + "learning_rate": 2.7785805780427375e-06, + "loss": 0.17227325439453126, + "step": 121090 + }, + { + "epoch": 1.0470726582563057, + "grad_norm": 6.110549832355016, + "learning_rate": 2.7783774010483266e-06, + "loss": 0.15015945434570313, + "step": 121095 + }, + { + "epoch": 1.047115891777849, + "grad_norm": 5.071772004751737, + "learning_rate": 2.778174225076021e-06, + "loss": 0.022369384765625, + "step": 121100 + }, + { + "epoch": 1.0471591252993921, + "grad_norm": 36.32385329424598, + "learning_rate": 2.777971050126756e-06, + "loss": 0.1632232666015625, + "step": 121105 + }, + { + "epoch": 1.0472023588209354, + "grad_norm": 3.4635799910910716, + "learning_rate": 2.7777678762014683e-06, + "loss": 0.12350378036499024, + "step": 121110 + }, + { + "epoch": 1.0472455923424786, + "grad_norm": 22.0624691324216, + "learning_rate": 2.7775647033010963e-06, + "loss": 0.1558521270751953, + "step": 121115 + }, + { + "epoch": 1.0472888258640218, + "grad_norm": 0.2705462930726047, + "learning_rate": 2.7773615314265757e-06, + "loss": 0.019640636444091798, + "step": 121120 + }, + { + "epoch": 1.0473320593855653, + "grad_norm": 0.974044085145553, + "learning_rate": 2.7771583605788437e-06, + "loss": 0.101739501953125, + "step": 121125 + }, + { + "epoch": 1.0473752929071085, + "grad_norm": 1.3569415989796973, + "learning_rate": 2.776955190758838e-06, + "loss": 0.03993492126464844, + "step": 121130 + }, + { + "epoch": 1.0474185264286517, + "grad_norm": 4.521204836694981, + "learning_rate": 2.7767520219674956e-06, + "loss": 0.04512100219726563, + "step": 121135 + }, + { + "epoch": 1.047461759950195, + "grad_norm": 0.0641288721860863, + "learning_rate": 2.7765488542057528e-06, + "loss": 0.2152374267578125, + "step": 121140 + }, + { + "epoch": 1.0475049934717382, + "grad_norm": 0.7206393144693776, + "learning_rate": 2.7763456874745475e-06, + "loss": 0.35561599731445315, + "step": 121145 + }, + { + "epoch": 1.0475482269932814, + "grad_norm": 1.8234716499034251, + "learning_rate": 2.7761425217748157e-06, + "loss": 0.186053466796875, + "step": 121150 + }, + { + "epoch": 1.0475914605148249, + "grad_norm": 0.7088164410038771, + "learning_rate": 2.775939357107494e-06, + "loss": 0.025053024291992188, + "step": 121155 + }, + { + "epoch": 1.047634694036368, + "grad_norm": 0.9769499513385781, + "learning_rate": 2.775736193473521e-06, + "loss": 0.06481246948242188, + "step": 121160 + }, + { + "epoch": 1.0476779275579113, + "grad_norm": 3.0637001632194893, + "learning_rate": 2.7755330308738328e-06, + "loss": 0.05831069946289062, + "step": 121165 + }, + { + "epoch": 1.0477211610794546, + "grad_norm": 4.666206873545955, + "learning_rate": 2.7753298693093662e-06, + "loss": 0.05121192932128906, + "step": 121170 + }, + { + "epoch": 1.0477643946009978, + "grad_norm": 0.43525564691486507, + "learning_rate": 2.7751267087810595e-06, + "loss": 0.11072425842285157, + "step": 121175 + }, + { + "epoch": 1.047807628122541, + "grad_norm": 1.4362459261846963, + "learning_rate": 2.7749235492898476e-06, + "loss": 0.19336090087890626, + "step": 121180 + }, + { + "epoch": 1.0478508616440843, + "grad_norm": 6.5667264743385, + "learning_rate": 2.7747203908366686e-06, + "loss": 0.1521759033203125, + "step": 121185 + }, + { + "epoch": 1.0478940951656277, + "grad_norm": 0.3212198220206892, + "learning_rate": 2.7745172334224585e-06, + "loss": 0.044655990600585935, + "step": 121190 + }, + { + "epoch": 1.047937328687171, + "grad_norm": 1.1491578575113284, + "learning_rate": 2.774314077048155e-06, + "loss": 0.07917633056640624, + "step": 121195 + }, + { + "epoch": 1.0479805622087142, + "grad_norm": 35.076730547818386, + "learning_rate": 2.7741109217146963e-06, + "loss": 0.20327911376953126, + "step": 121200 + }, + { + "epoch": 1.0480237957302574, + "grad_norm": 15.897944966352723, + "learning_rate": 2.7739077674230177e-06, + "loss": 0.044034576416015624, + "step": 121205 + }, + { + "epoch": 1.0480670292518006, + "grad_norm": 2.2258339724977216, + "learning_rate": 2.7737046141740566e-06, + "loss": 0.017680549621582033, + "step": 121210 + }, + { + "epoch": 1.0481102627733438, + "grad_norm": 0.8925221818209963, + "learning_rate": 2.773501461968749e-06, + "loss": 0.2598846435546875, + "step": 121215 + }, + { + "epoch": 1.0481534962948873, + "grad_norm": 27.342065158181253, + "learning_rate": 2.773298310808033e-06, + "loss": 0.19834136962890625, + "step": 121220 + }, + { + "epoch": 1.0481967298164305, + "grad_norm": 2.9090575282398543, + "learning_rate": 2.7730951606928446e-06, + "loss": 0.06285400390625, + "step": 121225 + }, + { + "epoch": 1.0482399633379738, + "grad_norm": 25.773412515103512, + "learning_rate": 2.7728920116241223e-06, + "loss": 0.10070648193359374, + "step": 121230 + }, + { + "epoch": 1.048283196859517, + "grad_norm": 1.4092448342822368, + "learning_rate": 2.7726888636028017e-06, + "loss": 0.0177764892578125, + "step": 121235 + }, + { + "epoch": 1.0483264303810602, + "grad_norm": 5.474231695460934, + "learning_rate": 2.7724857166298202e-06, + "loss": 0.07541923522949219, + "step": 121240 + }, + { + "epoch": 1.0483696639026034, + "grad_norm": 15.762600069086078, + "learning_rate": 2.772282570706114e-06, + "loss": 0.0575958251953125, + "step": 121245 + }, + { + "epoch": 1.048412897424147, + "grad_norm": 0.4516091966937639, + "learning_rate": 2.77207942583262e-06, + "loss": 0.030315399169921875, + "step": 121250 + }, + { + "epoch": 1.0484561309456901, + "grad_norm": 1.7388039188373825, + "learning_rate": 2.771876282010276e-06, + "loss": 0.0565521240234375, + "step": 121255 + }, + { + "epoch": 1.0484993644672334, + "grad_norm": 1.9090660246312845, + "learning_rate": 2.771673139240019e-06, + "loss": 0.08451404571533203, + "step": 121260 + }, + { + "epoch": 1.0485425979887766, + "grad_norm": 4.190831217502002, + "learning_rate": 2.771469997522785e-06, + "loss": 0.026194000244140626, + "step": 121265 + }, + { + "epoch": 1.0485858315103198, + "grad_norm": 0.6662211170605541, + "learning_rate": 2.7712668568595115e-06, + "loss": 0.12489471435546876, + "step": 121270 + }, + { + "epoch": 1.048629065031863, + "grad_norm": 4.136739275295244, + "learning_rate": 2.7710637172511343e-06, + "loss": 0.07104110717773438, + "step": 121275 + }, + { + "epoch": 1.0486722985534063, + "grad_norm": 22.216921941476166, + "learning_rate": 2.7708605786985916e-06, + "loss": 0.044596290588378905, + "step": 121280 + }, + { + "epoch": 1.0487155320749497, + "grad_norm": 51.667457443109974, + "learning_rate": 2.7706574412028183e-06, + "loss": 0.137322998046875, + "step": 121285 + }, + { + "epoch": 1.048758765596493, + "grad_norm": 2.111173418596708, + "learning_rate": 2.770454304764754e-06, + "loss": 0.10713958740234375, + "step": 121290 + }, + { + "epoch": 1.0488019991180362, + "grad_norm": 2.825615623667788, + "learning_rate": 2.770251169385334e-06, + "loss": 0.29588470458984373, + "step": 121295 + }, + { + "epoch": 1.0488452326395794, + "grad_norm": 0.15800416116671076, + "learning_rate": 2.7700480350654956e-06, + "loss": 0.28194141387939453, + "step": 121300 + }, + { + "epoch": 1.0488884661611226, + "grad_norm": 12.425994414714488, + "learning_rate": 2.7698449018061745e-06, + "loss": 0.12440032958984375, + "step": 121305 + }, + { + "epoch": 1.0489316996826659, + "grad_norm": 38.661808984882306, + "learning_rate": 2.7696417696083093e-06, + "loss": 0.14321022033691405, + "step": 121310 + }, + { + "epoch": 1.0489749332042093, + "grad_norm": 1.9307187037984908, + "learning_rate": 2.7694386384728344e-06, + "loss": 0.14753055572509766, + "step": 121315 + }, + { + "epoch": 1.0490181667257525, + "grad_norm": 6.674162995266469, + "learning_rate": 2.769235508400689e-06, + "loss": 0.2595935821533203, + "step": 121320 + }, + { + "epoch": 1.0490614002472958, + "grad_norm": 0.7535593595669393, + "learning_rate": 2.7690323793928097e-06, + "loss": 0.02971038818359375, + "step": 121325 + }, + { + "epoch": 1.049104633768839, + "grad_norm": 4.150866003587794, + "learning_rate": 2.7688292514501328e-06, + "loss": 0.11920909881591797, + "step": 121330 + }, + { + "epoch": 1.0491478672903822, + "grad_norm": 1.0826171800666273, + "learning_rate": 2.7686261245735937e-06, + "loss": 0.129400634765625, + "step": 121335 + }, + { + "epoch": 1.0491911008119255, + "grad_norm": 18.48444940623243, + "learning_rate": 2.7684229987641315e-06, + "loss": 0.09312286376953124, + "step": 121340 + }, + { + "epoch": 1.0492343343334687, + "grad_norm": 4.042591252171545, + "learning_rate": 2.7682198740226818e-06, + "loss": 0.027654266357421874, + "step": 121345 + }, + { + "epoch": 1.0492775678550121, + "grad_norm": 1.0718313621890931, + "learning_rate": 2.7680167503501804e-06, + "loss": 0.05238037109375, + "step": 121350 + }, + { + "epoch": 1.0493208013765554, + "grad_norm": 1.525953668161483, + "learning_rate": 2.7678136277475663e-06, + "loss": 0.12240524291992187, + "step": 121355 + }, + { + "epoch": 1.0493640348980986, + "grad_norm": 5.906304283321127, + "learning_rate": 2.7676105062157755e-06, + "loss": 0.11350555419921875, + "step": 121360 + }, + { + "epoch": 1.0494072684196418, + "grad_norm": 0.23722208058067945, + "learning_rate": 2.7674073857557435e-06, + "loss": 0.07317123413085938, + "step": 121365 + }, + { + "epoch": 1.049450501941185, + "grad_norm": 11.385631875484291, + "learning_rate": 2.767204266368409e-06, + "loss": 0.318658447265625, + "step": 121370 + }, + { + "epoch": 1.0494937354627283, + "grad_norm": 1.4432568698338866, + "learning_rate": 2.767001148054708e-06, + "loss": 0.21139945983886718, + "step": 121375 + }, + { + "epoch": 1.0495369689842717, + "grad_norm": 2.54603432247725, + "learning_rate": 2.7667980308155756e-06, + "loss": 0.055165863037109374, + "step": 121380 + }, + { + "epoch": 1.049580202505815, + "grad_norm": 3.4826723749708175, + "learning_rate": 2.7665949146519514e-06, + "loss": 0.07172317504882812, + "step": 121385 + }, + { + "epoch": 1.0496234360273582, + "grad_norm": 2.371513586157829, + "learning_rate": 2.76639179956477e-06, + "loss": 0.04789390563964844, + "step": 121390 + }, + { + "epoch": 1.0496666695489014, + "grad_norm": 8.710406979372143, + "learning_rate": 2.76618868555497e-06, + "loss": 0.02769622802734375, + "step": 121395 + }, + { + "epoch": 1.0497099030704446, + "grad_norm": 0.18545063518531407, + "learning_rate": 2.765985572623487e-06, + "loss": 0.25098609924316406, + "step": 121400 + }, + { + "epoch": 1.0497531365919879, + "grad_norm": 9.354552819295181, + "learning_rate": 2.765782460771258e-06, + "loss": 0.0890899658203125, + "step": 121405 + }, + { + "epoch": 1.0497963701135313, + "grad_norm": 1.378732263613071, + "learning_rate": 2.765579349999218e-06, + "loss": 0.059766387939453124, + "step": 121410 + }, + { + "epoch": 1.0498396036350746, + "grad_norm": 29.581163158657848, + "learning_rate": 2.7653762403083065e-06, + "loss": 0.10867233276367187, + "step": 121415 + }, + { + "epoch": 1.0498828371566178, + "grad_norm": 0.23822027303897772, + "learning_rate": 2.765173131699459e-06, + "loss": 0.03264312744140625, + "step": 121420 + }, + { + "epoch": 1.049926070678161, + "grad_norm": 3.757425250424092, + "learning_rate": 2.7649700241736124e-06, + "loss": 0.1615751266479492, + "step": 121425 + }, + { + "epoch": 1.0499693041997042, + "grad_norm": 7.261038630486752, + "learning_rate": 2.764766917731703e-06, + "loss": 0.0263824462890625, + "step": 121430 + }, + { + "epoch": 1.0500125377212475, + "grad_norm": 3.9963923354058495, + "learning_rate": 2.7645638123746683e-06, + "loss": 0.1080780029296875, + "step": 121435 + }, + { + "epoch": 1.0500557712427907, + "grad_norm": 1.0677233598010614, + "learning_rate": 2.764360708103444e-06, + "loss": 0.023789215087890624, + "step": 121440 + }, + { + "epoch": 1.0500990047643342, + "grad_norm": 1.9877044765513772, + "learning_rate": 2.764157604918967e-06, + "loss": 0.14512214660644532, + "step": 121445 + }, + { + "epoch": 1.0501422382858774, + "grad_norm": 0.5320078586437692, + "learning_rate": 2.7639545028221743e-06, + "loss": 0.3650714874267578, + "step": 121450 + }, + { + "epoch": 1.0501854718074206, + "grad_norm": 0.96514156502, + "learning_rate": 2.7637514018140032e-06, + "loss": 0.021312713623046875, + "step": 121455 + }, + { + "epoch": 1.0502287053289638, + "grad_norm": 2.5373690262053326, + "learning_rate": 2.7635483018953897e-06, + "loss": 0.12713756561279296, + "step": 121460 + }, + { + "epoch": 1.050271938850507, + "grad_norm": 5.85318912791547, + "learning_rate": 2.763345203067271e-06, + "loss": 0.019557952880859375, + "step": 121465 + }, + { + "epoch": 1.0503151723720503, + "grad_norm": 7.32414446054597, + "learning_rate": 2.763142105330582e-06, + "loss": 0.02004852294921875, + "step": 121470 + }, + { + "epoch": 1.0503584058935937, + "grad_norm": 8.188701710873433, + "learning_rate": 2.7629390086862613e-06, + "loss": 0.16009292602539063, + "step": 121475 + }, + { + "epoch": 1.050401639415137, + "grad_norm": 10.673826419886774, + "learning_rate": 2.7627359131352445e-06, + "loss": 0.20696239471435546, + "step": 121480 + }, + { + "epoch": 1.0504448729366802, + "grad_norm": 24.329032550576706, + "learning_rate": 2.7625328186784696e-06, + "loss": 0.087701416015625, + "step": 121485 + }, + { + "epoch": 1.0504881064582234, + "grad_norm": 2.929748277339329, + "learning_rate": 2.7623297253168724e-06, + "loss": 0.0575678825378418, + "step": 121490 + }, + { + "epoch": 1.0505313399797667, + "grad_norm": 2.1060473648722717, + "learning_rate": 2.762126633051389e-06, + "loss": 0.076812744140625, + "step": 121495 + }, + { + "epoch": 1.05057457350131, + "grad_norm": 0.7020465166904523, + "learning_rate": 2.7619235418829564e-06, + "loss": 0.00691070556640625, + "step": 121500 + }, + { + "epoch": 1.0506178070228533, + "grad_norm": 2.0819749227049935, + "learning_rate": 2.761720451812511e-06, + "loss": 0.03101692199707031, + "step": 121505 + }, + { + "epoch": 1.0506610405443966, + "grad_norm": 1.4684846179165778, + "learning_rate": 2.7615173628409905e-06, + "loss": 0.0153533935546875, + "step": 121510 + }, + { + "epoch": 1.0507042740659398, + "grad_norm": 6.219005476486279, + "learning_rate": 2.761314274969331e-06, + "loss": 0.05100517272949219, + "step": 121515 + }, + { + "epoch": 1.050747507587483, + "grad_norm": 3.6736139926464157, + "learning_rate": 2.761111188198469e-06, + "loss": 0.06764755249023438, + "step": 121520 + }, + { + "epoch": 1.0507907411090263, + "grad_norm": 9.48439233101324, + "learning_rate": 2.760908102529341e-06, + "loss": 0.1626190185546875, + "step": 121525 + }, + { + "epoch": 1.0508339746305695, + "grad_norm": 18.446012552774373, + "learning_rate": 2.7607050179628833e-06, + "loss": 0.1353790283203125, + "step": 121530 + }, + { + "epoch": 1.0508772081521127, + "grad_norm": 0.2826440434231839, + "learning_rate": 2.7605019345000337e-06, + "loss": 0.3096527099609375, + "step": 121535 + }, + { + "epoch": 1.0509204416736562, + "grad_norm": 4.110610488582549, + "learning_rate": 2.7602988521417265e-06, + "loss": 0.34839324951171874, + "step": 121540 + }, + { + "epoch": 1.0509636751951994, + "grad_norm": 0.6630512505244163, + "learning_rate": 2.7600957708889007e-06, + "loss": 0.03751220703125, + "step": 121545 + }, + { + "epoch": 1.0510069087167426, + "grad_norm": 0.0854969418005236, + "learning_rate": 2.7598926907424926e-06, + "loss": 0.01775703430175781, + "step": 121550 + }, + { + "epoch": 1.0510501422382859, + "grad_norm": 13.153661258216786, + "learning_rate": 2.759689611703438e-06, + "loss": 0.0705169677734375, + "step": 121555 + }, + { + "epoch": 1.051093375759829, + "grad_norm": 0.05623863118338361, + "learning_rate": 2.759486533772673e-06, + "loss": 0.22607097625732422, + "step": 121560 + }, + { + "epoch": 1.0511366092813723, + "grad_norm": 15.043419422260564, + "learning_rate": 2.7592834569511355e-06, + "loss": 0.06429100036621094, + "step": 121565 + }, + { + "epoch": 1.0511798428029158, + "grad_norm": 1.4984693185073124, + "learning_rate": 2.75908038123976e-06, + "loss": 0.056536865234375, + "step": 121570 + }, + { + "epoch": 1.051223076324459, + "grad_norm": 9.877725925124244, + "learning_rate": 2.758877306639486e-06, + "loss": 0.31786575317382815, + "step": 121575 + }, + { + "epoch": 1.0512663098460022, + "grad_norm": 2.048864875890377, + "learning_rate": 2.7586742331512487e-06, + "loss": 0.06171350479125977, + "step": 121580 + }, + { + "epoch": 1.0513095433675455, + "grad_norm": 0.38731577429060826, + "learning_rate": 2.758471160775984e-06, + "loss": 0.2542816162109375, + "step": 121585 + }, + { + "epoch": 1.0513527768890887, + "grad_norm": 10.54105078188751, + "learning_rate": 2.758268089514629e-06, + "loss": 0.05145339965820313, + "step": 121590 + }, + { + "epoch": 1.051396010410632, + "grad_norm": 21.348870531358386, + "learning_rate": 2.7580650193681203e-06, + "loss": 0.104034423828125, + "step": 121595 + }, + { + "epoch": 1.0514392439321751, + "grad_norm": 2.0764647592789722, + "learning_rate": 2.7578619503373933e-06, + "loss": 0.036554908752441405, + "step": 121600 + }, + { + "epoch": 1.0514824774537186, + "grad_norm": 0.6060900239032465, + "learning_rate": 2.757658882423387e-06, + "loss": 0.12188072204589843, + "step": 121605 + }, + { + "epoch": 1.0515257109752618, + "grad_norm": 0.9860445569500871, + "learning_rate": 2.7574558156270362e-06, + "loss": 0.305181884765625, + "step": 121610 + }, + { + "epoch": 1.051568944496805, + "grad_norm": 28.98918745922747, + "learning_rate": 2.7572527499492773e-06, + "loss": 0.05396881103515625, + "step": 121615 + }, + { + "epoch": 1.0516121780183483, + "grad_norm": 0.9880388290797764, + "learning_rate": 2.7570496853910477e-06, + "loss": 0.06775779724121093, + "step": 121620 + }, + { + "epoch": 1.0516554115398915, + "grad_norm": 2.321025763070068, + "learning_rate": 2.7568466219532835e-06, + "loss": 0.07181396484375, + "step": 121625 + }, + { + "epoch": 1.0516986450614347, + "grad_norm": 0.4862537088592047, + "learning_rate": 2.756643559636922e-06, + "loss": 0.011284637451171874, + "step": 121630 + }, + { + "epoch": 1.0517418785829782, + "grad_norm": 0.3132764101164704, + "learning_rate": 2.7564404984428963e-06, + "loss": 0.04292869567871094, + "step": 121635 + }, + { + "epoch": 1.0517851121045214, + "grad_norm": 4.705683000707841, + "learning_rate": 2.7562374383721478e-06, + "loss": 0.09886894226074219, + "step": 121640 + }, + { + "epoch": 1.0518283456260646, + "grad_norm": 1.473139471841382, + "learning_rate": 2.7560343794256093e-06, + "loss": 0.0934661865234375, + "step": 121645 + }, + { + "epoch": 1.0518715791476079, + "grad_norm": 5.4488387693407025, + "learning_rate": 2.75583132160422e-06, + "loss": 0.02523193359375, + "step": 121650 + }, + { + "epoch": 1.051914812669151, + "grad_norm": 21.27919381023064, + "learning_rate": 2.7556282649089146e-06, + "loss": 0.07660980224609375, + "step": 121655 + }, + { + "epoch": 1.0519580461906943, + "grad_norm": 3.9908476016981442, + "learning_rate": 2.7554252093406303e-06, + "loss": 0.01937713623046875, + "step": 121660 + }, + { + "epoch": 1.0520012797122378, + "grad_norm": 2.4733113336869033, + "learning_rate": 2.755222154900302e-06, + "loss": 0.0553497314453125, + "step": 121665 + }, + { + "epoch": 1.052044513233781, + "grad_norm": 0.7726368292346915, + "learning_rate": 2.7550191015888688e-06, + "loss": 0.0231201171875, + "step": 121670 + }, + { + "epoch": 1.0520877467553242, + "grad_norm": 17.501020049498365, + "learning_rate": 2.754816049407265e-06, + "loss": 0.191705322265625, + "step": 121675 + }, + { + "epoch": 1.0521309802768675, + "grad_norm": 30.48213326475131, + "learning_rate": 2.754612998356429e-06, + "loss": 0.34440765380859373, + "step": 121680 + }, + { + "epoch": 1.0521742137984107, + "grad_norm": 7.280419276906502, + "learning_rate": 2.754409948437296e-06, + "loss": 0.08818588256835938, + "step": 121685 + }, + { + "epoch": 1.052217447319954, + "grad_norm": 1.4238515951505446, + "learning_rate": 2.754206899650802e-06, + "loss": 0.031243896484375, + "step": 121690 + }, + { + "epoch": 1.0522606808414974, + "grad_norm": 17.16768817399536, + "learning_rate": 2.7540038519978843e-06, + "loss": 0.13302154541015626, + "step": 121695 + }, + { + "epoch": 1.0523039143630406, + "grad_norm": 0.13266805965308015, + "learning_rate": 2.7538008054794787e-06, + "loss": 0.06813507080078125, + "step": 121700 + }, + { + "epoch": 1.0523471478845838, + "grad_norm": 0.9364234160549211, + "learning_rate": 2.753597760096522e-06, + "loss": 0.10017738342285157, + "step": 121705 + }, + { + "epoch": 1.052390381406127, + "grad_norm": 4.808569374915447, + "learning_rate": 2.7533947158499516e-06, + "loss": 0.08443069458007812, + "step": 121710 + }, + { + "epoch": 1.0524336149276703, + "grad_norm": 16.684819443240567, + "learning_rate": 2.7531916727407024e-06, + "loss": 0.04661712646484375, + "step": 121715 + }, + { + "epoch": 1.0524768484492135, + "grad_norm": 10.331818740388867, + "learning_rate": 2.7529886307697117e-06, + "loss": 0.06614723205566406, + "step": 121720 + }, + { + "epoch": 1.0525200819707567, + "grad_norm": 5.964266069158523, + "learning_rate": 2.752785589937915e-06, + "loss": 0.06505966186523438, + "step": 121725 + }, + { + "epoch": 1.0525633154923002, + "grad_norm": 43.54979960428201, + "learning_rate": 2.7525825502462496e-06, + "loss": 0.130859375, + "step": 121730 + }, + { + "epoch": 1.0526065490138434, + "grad_norm": 8.389078429994013, + "learning_rate": 2.752379511695651e-06, + "loss": 0.064544677734375, + "step": 121735 + }, + { + "epoch": 1.0526497825353867, + "grad_norm": 6.271229804603006, + "learning_rate": 2.7521764742870574e-06, + "loss": 0.06295967102050781, + "step": 121740 + }, + { + "epoch": 1.0526930160569299, + "grad_norm": 3.044431230694903, + "learning_rate": 2.7519734380214038e-06, + "loss": 0.019899368286132812, + "step": 121745 + }, + { + "epoch": 1.0527362495784731, + "grad_norm": 3.689088034434479, + "learning_rate": 2.7517704028996266e-06, + "loss": 0.037677574157714847, + "step": 121750 + }, + { + "epoch": 1.0527794831000163, + "grad_norm": 1.0574373967377615, + "learning_rate": 2.751567368922662e-06, + "loss": 0.02972564697265625, + "step": 121755 + }, + { + "epoch": 1.0528227166215598, + "grad_norm": 0.5446987225663545, + "learning_rate": 2.7513643360914464e-06, + "loss": 0.17610702514648438, + "step": 121760 + }, + { + "epoch": 1.052865950143103, + "grad_norm": 0.9292733962439526, + "learning_rate": 2.751161304406917e-06, + "loss": 0.13117599487304688, + "step": 121765 + }, + { + "epoch": 1.0529091836646463, + "grad_norm": 0.12057465222336582, + "learning_rate": 2.75095827387001e-06, + "loss": 0.013474273681640624, + "step": 121770 + }, + { + "epoch": 1.0529524171861895, + "grad_norm": 0.3006060997437879, + "learning_rate": 2.7507552444816613e-06, + "loss": 0.00894012451171875, + "step": 121775 + }, + { + "epoch": 1.0529956507077327, + "grad_norm": 6.860813917218658, + "learning_rate": 2.7505522162428078e-06, + "loss": 0.09908294677734375, + "step": 121780 + }, + { + "epoch": 1.053038884229276, + "grad_norm": 44.30312570278749, + "learning_rate": 2.7503491891543846e-06, + "loss": 0.17351360321044923, + "step": 121785 + }, + { + "epoch": 1.0530821177508192, + "grad_norm": 69.84677673124584, + "learning_rate": 2.7501461632173296e-06, + "loss": 0.03961601257324219, + "step": 121790 + }, + { + "epoch": 1.0531253512723626, + "grad_norm": 3.1326063305628864, + "learning_rate": 2.749943138432577e-06, + "loss": 0.086651611328125, + "step": 121795 + }, + { + "epoch": 1.0531685847939058, + "grad_norm": 4.564565728665453, + "learning_rate": 2.749740114801066e-06, + "loss": 0.036937713623046875, + "step": 121800 + }, + { + "epoch": 1.053211818315449, + "grad_norm": 2.0954964130626217, + "learning_rate": 2.7495370923237313e-06, + "loss": 0.04446601867675781, + "step": 121805 + }, + { + "epoch": 1.0532550518369923, + "grad_norm": 4.279638109241103, + "learning_rate": 2.7493340710015095e-06, + "loss": 0.16011199951171876, + "step": 121810 + }, + { + "epoch": 1.0532982853585355, + "grad_norm": 0.7077281166516967, + "learning_rate": 2.749131050835336e-06, + "loss": 0.10121231079101563, + "step": 121815 + }, + { + "epoch": 1.0533415188800788, + "grad_norm": 0.5014626049041535, + "learning_rate": 2.7489280318261487e-06, + "loss": 0.049901580810546874, + "step": 121820 + }, + { + "epoch": 1.0533847524016222, + "grad_norm": 0.7735633821917118, + "learning_rate": 2.748725013974882e-06, + "loss": 0.03986663818359375, + "step": 121825 + }, + { + "epoch": 1.0534279859231654, + "grad_norm": 9.079642355083635, + "learning_rate": 2.748521997282474e-06, + "loss": 0.05578536987304687, + "step": 121830 + }, + { + "epoch": 1.0534712194447087, + "grad_norm": 0.8543502373235474, + "learning_rate": 2.748318981749861e-06, + "loss": 0.032034111022949216, + "step": 121835 + }, + { + "epoch": 1.053514452966252, + "grad_norm": 2.4618022908135844, + "learning_rate": 2.748115967377978e-06, + "loss": 0.13162155151367189, + "step": 121840 + }, + { + "epoch": 1.0535576864877951, + "grad_norm": 2.4611331858715197, + "learning_rate": 2.7479129541677622e-06, + "loss": 0.09395828247070312, + "step": 121845 + }, + { + "epoch": 1.0536009200093384, + "grad_norm": 34.70155173939953, + "learning_rate": 2.7477099421201498e-06, + "loss": 0.09673185348510742, + "step": 121850 + }, + { + "epoch": 1.0536441535308818, + "grad_norm": 1.8527494361742176, + "learning_rate": 2.7475069312360755e-06, + "loss": 0.01475982666015625, + "step": 121855 + }, + { + "epoch": 1.053687387052425, + "grad_norm": 6.86591749408609, + "learning_rate": 2.747303921516478e-06, + "loss": 0.112664794921875, + "step": 121860 + }, + { + "epoch": 1.0537306205739683, + "grad_norm": 6.051930526695494, + "learning_rate": 2.747100912962293e-06, + "loss": 0.17846527099609374, + "step": 121865 + }, + { + "epoch": 1.0537738540955115, + "grad_norm": 12.839678636261477, + "learning_rate": 2.746897905574455e-06, + "loss": 0.03082122802734375, + "step": 121870 + }, + { + "epoch": 1.0538170876170547, + "grad_norm": 0.8144094250053191, + "learning_rate": 2.7466948993539024e-06, + "loss": 0.021181869506835937, + "step": 121875 + }, + { + "epoch": 1.053860321138598, + "grad_norm": 7.089768327062298, + "learning_rate": 2.7464918943015707e-06, + "loss": 0.042282485961914064, + "step": 121880 + }, + { + "epoch": 1.0539035546601412, + "grad_norm": 1.1055377268920006, + "learning_rate": 2.7462888904183957e-06, + "loss": 0.017000579833984376, + "step": 121885 + }, + { + "epoch": 1.0539467881816846, + "grad_norm": 29.24769767384636, + "learning_rate": 2.7460858877053125e-06, + "loss": 0.049008655548095706, + "step": 121890 + }, + { + "epoch": 1.0539900217032279, + "grad_norm": 7.07520273113524, + "learning_rate": 2.7458828861632603e-06, + "loss": 0.11202850341796874, + "step": 121895 + }, + { + "epoch": 1.054033255224771, + "grad_norm": 0.22111643926743976, + "learning_rate": 2.745679885793173e-06, + "loss": 0.07275238037109374, + "step": 121900 + }, + { + "epoch": 1.0540764887463143, + "grad_norm": 14.043195773051078, + "learning_rate": 2.745476886595989e-06, + "loss": 0.19298744201660156, + "step": 121905 + }, + { + "epoch": 1.0541197222678576, + "grad_norm": 0.34348727404434015, + "learning_rate": 2.745273888572642e-06, + "loss": 0.09588947296142578, + "step": 121910 + }, + { + "epoch": 1.0541629557894008, + "grad_norm": 9.351900761092574, + "learning_rate": 2.7450708917240698e-06, + "loss": 0.08120098114013671, + "step": 121915 + }, + { + "epoch": 1.0542061893109442, + "grad_norm": 3.2862567347731417, + "learning_rate": 2.7448678960512067e-06, + "loss": 0.01598224639892578, + "step": 121920 + }, + { + "epoch": 1.0542494228324875, + "grad_norm": 24.374187695715193, + "learning_rate": 2.744664901554992e-06, + "loss": 0.1186309814453125, + "step": 121925 + }, + { + "epoch": 1.0542926563540307, + "grad_norm": 40.448546604440644, + "learning_rate": 2.744461908236359e-06, + "loss": 0.16321182250976562, + "step": 121930 + }, + { + "epoch": 1.054335889875574, + "grad_norm": 0.2143759750839342, + "learning_rate": 2.7442589160962464e-06, + "loss": 0.04500732421875, + "step": 121935 + }, + { + "epoch": 1.0543791233971171, + "grad_norm": 0.2089028700061234, + "learning_rate": 2.7440559251355886e-06, + "loss": 0.024309730529785155, + "step": 121940 + }, + { + "epoch": 1.0544223569186604, + "grad_norm": 0.520944597608542, + "learning_rate": 2.7438529353553224e-06, + "loss": 0.2031341552734375, + "step": 121945 + }, + { + "epoch": 1.0544655904402038, + "grad_norm": 1.0405871823172355, + "learning_rate": 2.7436499467563826e-06, + "loss": 0.05559520721435547, + "step": 121950 + }, + { + "epoch": 1.054508823961747, + "grad_norm": 5.443944283897507, + "learning_rate": 2.7434469593397077e-06, + "loss": 0.033609771728515626, + "step": 121955 + }, + { + "epoch": 1.0545520574832903, + "grad_norm": 2.261083926534501, + "learning_rate": 2.743243973106232e-06, + "loss": 0.123443603515625, + "step": 121960 + }, + { + "epoch": 1.0545952910048335, + "grad_norm": 0.6405576477974849, + "learning_rate": 2.7430409880568934e-06, + "loss": 0.0103271484375, + "step": 121965 + }, + { + "epoch": 1.0546385245263767, + "grad_norm": 13.982260066886651, + "learning_rate": 2.742838004192627e-06, + "loss": 0.29798164367675783, + "step": 121970 + }, + { + "epoch": 1.05468175804792, + "grad_norm": 0.9378883721526994, + "learning_rate": 2.742635021514369e-06, + "loss": 0.1362152099609375, + "step": 121975 + }, + { + "epoch": 1.0547249915694632, + "grad_norm": 31.719425273632034, + "learning_rate": 2.7424320400230545e-06, + "loss": 0.09289703369140626, + "step": 121980 + }, + { + "epoch": 1.0547682250910067, + "grad_norm": 2.1689483964394545, + "learning_rate": 2.7422290597196212e-06, + "loss": 0.007654953002929688, + "step": 121985 + }, + { + "epoch": 1.0548114586125499, + "grad_norm": 0.311524855696389, + "learning_rate": 2.7420260806050046e-06, + "loss": 0.01367778778076172, + "step": 121990 + }, + { + "epoch": 1.054854692134093, + "grad_norm": 53.847623001152904, + "learning_rate": 2.741823102680141e-06, + "loss": 0.3438079833984375, + "step": 121995 + }, + { + "epoch": 1.0548979256556363, + "grad_norm": 3.6557719186674267, + "learning_rate": 2.741620125945967e-06, + "loss": 0.021736526489257814, + "step": 122000 + }, + { + "epoch": 1.0549411591771796, + "grad_norm": 1.175775832602887, + "learning_rate": 2.7414171504034184e-06, + "loss": 0.0132293701171875, + "step": 122005 + }, + { + "epoch": 1.0549843926987228, + "grad_norm": 2.637765603302813, + "learning_rate": 2.74121417605343e-06, + "loss": 0.18129959106445312, + "step": 122010 + }, + { + "epoch": 1.0550276262202662, + "grad_norm": 14.065864916188854, + "learning_rate": 2.741011202896938e-06, + "loss": 0.08001899719238281, + "step": 122015 + }, + { + "epoch": 1.0550708597418095, + "grad_norm": 6.084820444632215, + "learning_rate": 2.7408082309348814e-06, + "loss": 0.251971435546875, + "step": 122020 + }, + { + "epoch": 1.0551140932633527, + "grad_norm": 0.3926440411389335, + "learning_rate": 2.740605260168194e-06, + "loss": 0.041123580932617185, + "step": 122025 + }, + { + "epoch": 1.055157326784896, + "grad_norm": 0.6010046306933629, + "learning_rate": 2.7404022905978122e-06, + "loss": 0.05382194519042969, + "step": 122030 + }, + { + "epoch": 1.0552005603064392, + "grad_norm": 0.2004495649952842, + "learning_rate": 2.7401993222246716e-06, + "loss": 0.05563812255859375, + "step": 122035 + }, + { + "epoch": 1.0552437938279824, + "grad_norm": 1.0240146987765224, + "learning_rate": 2.739996355049709e-06, + "loss": 0.01077728271484375, + "step": 122040 + }, + { + "epoch": 1.0552870273495256, + "grad_norm": 0.23224873515777048, + "learning_rate": 2.739793389073861e-06, + "loss": 0.08067550659179687, + "step": 122045 + }, + { + "epoch": 1.055330260871069, + "grad_norm": 0.1552683774442143, + "learning_rate": 2.7395904242980607e-06, + "loss": 0.05099029541015625, + "step": 122050 + }, + { + "epoch": 1.0553734943926123, + "grad_norm": 22.771619973008594, + "learning_rate": 2.739387460723248e-06, + "loss": 0.06795120239257812, + "step": 122055 + }, + { + "epoch": 1.0554167279141555, + "grad_norm": 19.23554890808534, + "learning_rate": 2.7391844983503577e-06, + "loss": 0.07231941223144531, + "step": 122060 + }, + { + "epoch": 1.0554599614356988, + "grad_norm": 1.6401837586665333, + "learning_rate": 2.738981537180324e-06, + "loss": 0.132379150390625, + "step": 122065 + }, + { + "epoch": 1.055503194957242, + "grad_norm": 12.62069316230292, + "learning_rate": 2.738778577214086e-06, + "loss": 0.036328125, + "step": 122070 + }, + { + "epoch": 1.0555464284787852, + "grad_norm": 3.9993058543446587, + "learning_rate": 2.738575618452578e-06, + "loss": 0.10647010803222656, + "step": 122075 + }, + { + "epoch": 1.0555896620003287, + "grad_norm": 2.6698859952810126, + "learning_rate": 2.738372660896734e-06, + "loss": 0.058148193359375, + "step": 122080 + }, + { + "epoch": 1.055632895521872, + "grad_norm": 1.9811462365994414, + "learning_rate": 2.7381697045474942e-06, + "loss": 0.013373184204101562, + "step": 122085 + }, + { + "epoch": 1.0556761290434151, + "grad_norm": 10.507114043209224, + "learning_rate": 2.737966749405792e-06, + "loss": 0.0730926513671875, + "step": 122090 + }, + { + "epoch": 1.0557193625649584, + "grad_norm": 9.916033645538688, + "learning_rate": 2.7377637954725643e-06, + "loss": 0.1065338134765625, + "step": 122095 + }, + { + "epoch": 1.0557625960865016, + "grad_norm": 1.9778494167378038, + "learning_rate": 2.737560842748747e-06, + "loss": 0.06587295532226563, + "step": 122100 + }, + { + "epoch": 1.0558058296080448, + "grad_norm": 0.6675893995771043, + "learning_rate": 2.7373578912352758e-06, + "loss": 0.11641826629638671, + "step": 122105 + }, + { + "epoch": 1.0558490631295883, + "grad_norm": 12.518844697122434, + "learning_rate": 2.737154940933085e-06, + "loss": 0.1403656005859375, + "step": 122110 + }, + { + "epoch": 1.0558922966511315, + "grad_norm": 2.667160303909967, + "learning_rate": 2.736951991843114e-06, + "loss": 0.10498542785644531, + "step": 122115 + }, + { + "epoch": 1.0559355301726747, + "grad_norm": 25.09754581589295, + "learning_rate": 2.7367490439662974e-06, + "loss": 0.22586536407470703, + "step": 122120 + }, + { + "epoch": 1.055978763694218, + "grad_norm": 0.21685955121872333, + "learning_rate": 2.7365460973035705e-06, + "loss": 0.02547760009765625, + "step": 122125 + }, + { + "epoch": 1.0560219972157612, + "grad_norm": 23.526200345376907, + "learning_rate": 2.73634315185587e-06, + "loss": 0.1075775146484375, + "step": 122130 + }, + { + "epoch": 1.0560652307373044, + "grad_norm": 3.0069181859616396, + "learning_rate": 2.7361402076241318e-06, + "loss": 0.193115234375, + "step": 122135 + }, + { + "epoch": 1.0561084642588476, + "grad_norm": 0.40421438372015667, + "learning_rate": 2.7359372646092916e-06, + "loss": 0.0583989143371582, + "step": 122140 + }, + { + "epoch": 1.056151697780391, + "grad_norm": 0.09548631888783583, + "learning_rate": 2.735734322812284e-06, + "loss": 0.035028076171875, + "step": 122145 + }, + { + "epoch": 1.0561949313019343, + "grad_norm": 1.1086047185535814, + "learning_rate": 2.7355313822340475e-06, + "loss": 0.012230873107910156, + "step": 122150 + }, + { + "epoch": 1.0562381648234775, + "grad_norm": 0.03841085397573134, + "learning_rate": 2.7353284428755165e-06, + "loss": 0.07095870971679688, + "step": 122155 + }, + { + "epoch": 1.0562813983450208, + "grad_norm": 7.878470066395989, + "learning_rate": 2.7351255047376277e-06, + "loss": 0.018621826171875, + "step": 122160 + }, + { + "epoch": 1.056324631866564, + "grad_norm": 2.008692755664604, + "learning_rate": 2.734922567821317e-06, + "loss": 0.19271697998046874, + "step": 122165 + }, + { + "epoch": 1.0563678653881072, + "grad_norm": 5.981166538379129, + "learning_rate": 2.73471963212752e-06, + "loss": 0.0321014404296875, + "step": 122170 + }, + { + "epoch": 1.0564110989096507, + "grad_norm": 0.33290351651627215, + "learning_rate": 2.734516697657171e-06, + "loss": 0.12297801971435547, + "step": 122175 + }, + { + "epoch": 1.056454332431194, + "grad_norm": 1.9894848103201423, + "learning_rate": 2.7343137644112093e-06, + "loss": 0.06755599975585938, + "step": 122180 + }, + { + "epoch": 1.0564975659527371, + "grad_norm": 1.4860801367343537, + "learning_rate": 2.734110832390568e-06, + "loss": 0.06969757080078125, + "step": 122185 + }, + { + "epoch": 1.0565407994742804, + "grad_norm": 12.011963154966026, + "learning_rate": 2.733907901596185e-06, + "loss": 0.11038360595703126, + "step": 122190 + }, + { + "epoch": 1.0565840329958236, + "grad_norm": 0.02278865212642471, + "learning_rate": 2.733704972028995e-06, + "loss": 0.06447820663452149, + "step": 122195 + }, + { + "epoch": 1.0566272665173668, + "grad_norm": 8.324125446308688, + "learning_rate": 2.7335020436899342e-06, + "loss": 0.25396728515625, + "step": 122200 + }, + { + "epoch": 1.0566705000389103, + "grad_norm": 0.5772958999430784, + "learning_rate": 2.733299116579937e-06, + "loss": 0.08148612976074218, + "step": 122205 + }, + { + "epoch": 1.0567137335604535, + "grad_norm": 0.9551055641278264, + "learning_rate": 2.7330961906999415e-06, + "loss": 0.0160186767578125, + "step": 122210 + }, + { + "epoch": 1.0567569670819967, + "grad_norm": 27.44457179629451, + "learning_rate": 2.7328932660508834e-06, + "loss": 0.07266521453857422, + "step": 122215 + }, + { + "epoch": 1.05680020060354, + "grad_norm": 30.970120721217842, + "learning_rate": 2.7326903426336984e-06, + "loss": 0.1991607666015625, + "step": 122220 + }, + { + "epoch": 1.0568434341250832, + "grad_norm": 0.5553975238418067, + "learning_rate": 2.732487420449321e-06, + "loss": 0.11197357177734375, + "step": 122225 + }, + { + "epoch": 1.0568866676466264, + "grad_norm": 11.336893623297655, + "learning_rate": 2.732284499498689e-06, + "loss": 0.15001068115234376, + "step": 122230 + }, + { + "epoch": 1.0569299011681696, + "grad_norm": 2.479176451747547, + "learning_rate": 2.732081579782736e-06, + "loss": 0.3333133697509766, + "step": 122235 + }, + { + "epoch": 1.056973134689713, + "grad_norm": 0.2605494045413601, + "learning_rate": 2.731878661302398e-06, + "loss": 0.04399871826171875, + "step": 122240 + }, + { + "epoch": 1.0570163682112563, + "grad_norm": 1.6331665706588039, + "learning_rate": 2.731675744058614e-06, + "loss": 0.06237106323242188, + "step": 122245 + }, + { + "epoch": 1.0570596017327996, + "grad_norm": 2.7127843768064865, + "learning_rate": 2.7314728280523175e-06, + "loss": 0.0852081298828125, + "step": 122250 + }, + { + "epoch": 1.0571028352543428, + "grad_norm": 0.16257271938167836, + "learning_rate": 2.7312699132844443e-06, + "loss": 0.06788291931152343, + "step": 122255 + }, + { + "epoch": 1.057146068775886, + "grad_norm": 0.6774319098231926, + "learning_rate": 2.73106699975593e-06, + "loss": 0.015798568725585938, + "step": 122260 + }, + { + "epoch": 1.0571893022974292, + "grad_norm": 0.5236088635079112, + "learning_rate": 2.730864087467712e-06, + "loss": 0.05487136840820313, + "step": 122265 + }, + { + "epoch": 1.0572325358189727, + "grad_norm": 2.0343303755586226, + "learning_rate": 2.730661176420723e-06, + "loss": 0.1420520782470703, + "step": 122270 + }, + { + "epoch": 1.057275769340516, + "grad_norm": 4.769994332888572, + "learning_rate": 2.730458266615903e-06, + "loss": 0.037480926513671874, + "step": 122275 + }, + { + "epoch": 1.0573190028620592, + "grad_norm": 0.1861652682811451, + "learning_rate": 2.730255358054185e-06, + "loss": 0.27999114990234375, + "step": 122280 + }, + { + "epoch": 1.0573622363836024, + "grad_norm": 0.3754741049764716, + "learning_rate": 2.7300524507365057e-06, + "loss": 0.11335029602050781, + "step": 122285 + }, + { + "epoch": 1.0574054699051456, + "grad_norm": 39.68734181927762, + "learning_rate": 2.7298495446638e-06, + "loss": 0.09037704467773437, + "step": 122290 + }, + { + "epoch": 1.0574487034266888, + "grad_norm": 25.064519235966305, + "learning_rate": 2.7296466398370053e-06, + "loss": 0.22450008392333984, + "step": 122295 + }, + { + "epoch": 1.057491936948232, + "grad_norm": 5.070197608659083, + "learning_rate": 2.7294437362570545e-06, + "loss": 0.18146209716796874, + "step": 122300 + }, + { + "epoch": 1.0575351704697755, + "grad_norm": 1.12773515627149, + "learning_rate": 2.7292408339248874e-06, + "loss": 0.05963592529296875, + "step": 122305 + }, + { + "epoch": 1.0575784039913187, + "grad_norm": 5.6117398126483, + "learning_rate": 2.729037932841437e-06, + "loss": 0.18558216094970703, + "step": 122310 + }, + { + "epoch": 1.057621637512862, + "grad_norm": 0.6749393539436157, + "learning_rate": 2.7288350330076398e-06, + "loss": 0.13975372314453124, + "step": 122315 + }, + { + "epoch": 1.0576648710344052, + "grad_norm": 0.8368181454214982, + "learning_rate": 2.7286321344244307e-06, + "loss": 0.014328765869140624, + "step": 122320 + }, + { + "epoch": 1.0577081045559484, + "grad_norm": 1.1481485428430804, + "learning_rate": 2.7284292370927472e-06, + "loss": 0.026641082763671876, + "step": 122325 + }, + { + "epoch": 1.0577513380774917, + "grad_norm": 6.861236270772294, + "learning_rate": 2.728226341013524e-06, + "loss": 0.07834625244140625, + "step": 122330 + }, + { + "epoch": 1.0577945715990351, + "grad_norm": 8.66172988896939, + "learning_rate": 2.7280234461876954e-06, + "loss": 0.10438308715820313, + "step": 122335 + }, + { + "epoch": 1.0578378051205783, + "grad_norm": 2.1523985674783854, + "learning_rate": 2.7278205526162e-06, + "loss": 0.052242279052734375, + "step": 122340 + }, + { + "epoch": 1.0578810386421216, + "grad_norm": 1.4758740564929937, + "learning_rate": 2.727617660299972e-06, + "loss": 0.012680435180664062, + "step": 122345 + }, + { + "epoch": 1.0579242721636648, + "grad_norm": 0.47465150204242557, + "learning_rate": 2.7274147692399467e-06, + "loss": 0.024056434631347656, + "step": 122350 + }, + { + "epoch": 1.057967505685208, + "grad_norm": 5.182714159172626, + "learning_rate": 2.7272118794370617e-06, + "loss": 0.026607513427734375, + "step": 122355 + }, + { + "epoch": 1.0580107392067513, + "grad_norm": 3.689066129155174, + "learning_rate": 2.7270089908922506e-06, + "loss": 0.09838714599609374, + "step": 122360 + }, + { + "epoch": 1.0580539727282947, + "grad_norm": 1.1283533718195593, + "learning_rate": 2.7268061036064486e-06, + "loss": 0.05493087768554687, + "step": 122365 + }, + { + "epoch": 1.058097206249838, + "grad_norm": 46.8177632665161, + "learning_rate": 2.7266032175805945e-06, + "loss": 0.0824249267578125, + "step": 122370 + }, + { + "epoch": 1.0581404397713812, + "grad_norm": 6.288762294433497, + "learning_rate": 2.7264003328156218e-06, + "loss": 0.0495880126953125, + "step": 122375 + }, + { + "epoch": 1.0581836732929244, + "grad_norm": 5.157455403044999, + "learning_rate": 2.7261974493124664e-06, + "loss": 0.045354461669921874, + "step": 122380 + }, + { + "epoch": 1.0582269068144676, + "grad_norm": 0.3068664458930798, + "learning_rate": 2.725994567072065e-06, + "loss": 0.02089996337890625, + "step": 122385 + }, + { + "epoch": 1.0582701403360109, + "grad_norm": 2.4370777710636347, + "learning_rate": 2.7257916860953517e-06, + "loss": 0.01477813720703125, + "step": 122390 + }, + { + "epoch": 1.0583133738575543, + "grad_norm": 4.496823651262881, + "learning_rate": 2.725588806383263e-06, + "loss": 0.04205818176269531, + "step": 122395 + }, + { + "epoch": 1.0583566073790975, + "grad_norm": 0.3626102308247203, + "learning_rate": 2.7253859279367334e-06, + "loss": 0.03432197570800781, + "step": 122400 + }, + { + "epoch": 1.0583998409006408, + "grad_norm": 0.6633248123984056, + "learning_rate": 2.725183050756701e-06, + "loss": 0.03613128662109375, + "step": 122405 + }, + { + "epoch": 1.058443074422184, + "grad_norm": 0.23454116258099023, + "learning_rate": 2.7249801748440993e-06, + "loss": 0.07344093322753906, + "step": 122410 + }, + { + "epoch": 1.0584863079437272, + "grad_norm": 7.7203157653451076, + "learning_rate": 2.7247773001998654e-06, + "loss": 0.04670257568359375, + "step": 122415 + }, + { + "epoch": 1.0585295414652705, + "grad_norm": 18.214089392817918, + "learning_rate": 2.7245744268249346e-06, + "loss": 0.03699703216552734, + "step": 122420 + }, + { + "epoch": 1.0585727749868137, + "grad_norm": 0.897198151346742, + "learning_rate": 2.724371554720242e-06, + "loss": 0.07398834228515624, + "step": 122425 + }, + { + "epoch": 1.0586160085083571, + "grad_norm": 2.1202661903533366, + "learning_rate": 2.724168683886722e-06, + "loss": 0.04483795166015625, + "step": 122430 + }, + { + "epoch": 1.0586592420299004, + "grad_norm": 16.16268769084, + "learning_rate": 2.7239658143253123e-06, + "loss": 0.109613037109375, + "step": 122435 + }, + { + "epoch": 1.0587024755514436, + "grad_norm": 30.412206701527225, + "learning_rate": 2.7237629460369486e-06, + "loss": 0.151910400390625, + "step": 122440 + }, + { + "epoch": 1.0587457090729868, + "grad_norm": 0.4817931451636149, + "learning_rate": 2.7235600790225656e-06, + "loss": 0.1210968017578125, + "step": 122445 + }, + { + "epoch": 1.05878894259453, + "grad_norm": 4.099839189374964, + "learning_rate": 2.7233572132830993e-06, + "loss": 0.05961456298828125, + "step": 122450 + }, + { + "epoch": 1.0588321761160733, + "grad_norm": 3.287901528774556, + "learning_rate": 2.723154348819485e-06, + "loss": 0.03744354248046875, + "step": 122455 + }, + { + "epoch": 1.0588754096376167, + "grad_norm": 5.692834085917736, + "learning_rate": 2.722951485632657e-06, + "loss": 0.013510894775390626, + "step": 122460 + }, + { + "epoch": 1.05891864315916, + "grad_norm": 0.6797907682391209, + "learning_rate": 2.7227486237235534e-06, + "loss": 0.18189697265625, + "step": 122465 + }, + { + "epoch": 1.0589618766807032, + "grad_norm": 0.329526195879693, + "learning_rate": 2.722545763093109e-06, + "loss": 0.04698314666748047, + "step": 122470 + }, + { + "epoch": 1.0590051102022464, + "grad_norm": 0.08215025429189368, + "learning_rate": 2.722342903742259e-06, + "loss": 0.02109375, + "step": 122475 + }, + { + "epoch": 1.0590483437237896, + "grad_norm": 2.8428320818140307, + "learning_rate": 2.722140045671939e-06, + "loss": 0.0409027099609375, + "step": 122480 + }, + { + "epoch": 1.0590915772453329, + "grad_norm": 27.230814112949222, + "learning_rate": 2.721937188883084e-06, + "loss": 0.05543384552001953, + "step": 122485 + }, + { + "epoch": 1.059134810766876, + "grad_norm": 4.032592297820777, + "learning_rate": 2.7217343333766303e-06, + "loss": 0.08756866455078124, + "step": 122490 + }, + { + "epoch": 1.0591780442884196, + "grad_norm": 2.389008691285944, + "learning_rate": 2.7215314791535125e-06, + "loss": 0.10516357421875, + "step": 122495 + }, + { + "epoch": 1.0592212778099628, + "grad_norm": 0.6225183108712073, + "learning_rate": 2.7213286262146678e-06, + "loss": 0.027388381958007812, + "step": 122500 + }, + { + "epoch": 1.059264511331506, + "grad_norm": 3.0974568756537706, + "learning_rate": 2.721125774561031e-06, + "loss": 0.09154424667358399, + "step": 122505 + }, + { + "epoch": 1.0593077448530492, + "grad_norm": 0.7129703716437005, + "learning_rate": 2.7209229241935375e-06, + "loss": 0.08270187377929687, + "step": 122510 + }, + { + "epoch": 1.0593509783745925, + "grad_norm": 0.6111428790097059, + "learning_rate": 2.7207200751131222e-06, + "loss": 0.3154453277587891, + "step": 122515 + }, + { + "epoch": 1.0593942118961357, + "grad_norm": 5.582842255790206, + "learning_rate": 2.720517227320722e-06, + "loss": 0.13928604125976562, + "step": 122520 + }, + { + "epoch": 1.0594374454176791, + "grad_norm": 3.9397435036081396, + "learning_rate": 2.7203143808172704e-06, + "loss": 0.1149932861328125, + "step": 122525 + }, + { + "epoch": 1.0594806789392224, + "grad_norm": 3.801000304130073, + "learning_rate": 2.720111535603705e-06, + "loss": 0.0394500732421875, + "step": 122530 + }, + { + "epoch": 1.0595239124607656, + "grad_norm": 144.99922165151514, + "learning_rate": 2.7199086916809614e-06, + "loss": 0.33702545166015624, + "step": 122535 + }, + { + "epoch": 1.0595671459823088, + "grad_norm": 20.37574036290829, + "learning_rate": 2.719705849049973e-06, + "loss": 0.3147899627685547, + "step": 122540 + }, + { + "epoch": 1.059610379503852, + "grad_norm": 4.085542342184483, + "learning_rate": 2.7195030077116766e-06, + "loss": 0.17808837890625, + "step": 122545 + }, + { + "epoch": 1.0596536130253953, + "grad_norm": 2.996109910309492, + "learning_rate": 2.7193001676670078e-06, + "loss": 0.03895645141601563, + "step": 122550 + }, + { + "epoch": 1.0596968465469385, + "grad_norm": 1.691220084949105, + "learning_rate": 2.719097328916901e-06, + "loss": 0.036527252197265624, + "step": 122555 + }, + { + "epoch": 1.059740080068482, + "grad_norm": 16.2882317365817, + "learning_rate": 2.7188944914622936e-06, + "loss": 0.09102935791015625, + "step": 122560 + }, + { + "epoch": 1.0597833135900252, + "grad_norm": 0.40936041374117993, + "learning_rate": 2.71869165530412e-06, + "loss": 0.0272003173828125, + "step": 122565 + }, + { + "epoch": 1.0598265471115684, + "grad_norm": 4.314112338245794, + "learning_rate": 2.718488820443315e-06, + "loss": 0.08951759338378906, + "step": 122570 + }, + { + "epoch": 1.0598697806331117, + "grad_norm": 3.434965061683022, + "learning_rate": 2.718285986880815e-06, + "loss": 0.0386962890625, + "step": 122575 + }, + { + "epoch": 1.0599130141546549, + "grad_norm": 18.303780525213153, + "learning_rate": 2.7180831546175547e-06, + "loss": 0.12436103820800781, + "step": 122580 + }, + { + "epoch": 1.0599562476761981, + "grad_norm": 0.31283300600482944, + "learning_rate": 2.7178803236544707e-06, + "loss": 0.0677642822265625, + "step": 122585 + }, + { + "epoch": 1.0599994811977416, + "grad_norm": 4.667954784716332, + "learning_rate": 2.7176774939924967e-06, + "loss": 0.17754096984863282, + "step": 122590 + }, + { + "epoch": 1.0600427147192848, + "grad_norm": 30.665413475693416, + "learning_rate": 2.7174746656325694e-06, + "loss": 0.12876129150390625, + "step": 122595 + }, + { + "epoch": 1.060085948240828, + "grad_norm": 13.212008459497062, + "learning_rate": 2.7172718385756253e-06, + "loss": 0.0448883056640625, + "step": 122600 + }, + { + "epoch": 1.0601291817623713, + "grad_norm": 0.48972778657893284, + "learning_rate": 2.717069012822597e-06, + "loss": 0.043408203125, + "step": 122605 + }, + { + "epoch": 1.0601724152839145, + "grad_norm": 11.202715260332171, + "learning_rate": 2.716866188374422e-06, + "loss": 0.05560798645019531, + "step": 122610 + }, + { + "epoch": 1.0602156488054577, + "grad_norm": 1.616996898770103, + "learning_rate": 2.7166633652320355e-06, + "loss": 0.0317779541015625, + "step": 122615 + }, + { + "epoch": 1.0602588823270012, + "grad_norm": 0.1899405879584587, + "learning_rate": 2.716460543396371e-06, + "loss": 0.020523452758789064, + "step": 122620 + }, + { + "epoch": 1.0603021158485444, + "grad_norm": 4.539924650431623, + "learning_rate": 2.7162577228683666e-06, + "loss": 0.076934814453125, + "step": 122625 + }, + { + "epoch": 1.0603453493700876, + "grad_norm": 6.820262541498248, + "learning_rate": 2.716054903648957e-06, + "loss": 0.08779220581054688, + "step": 122630 + }, + { + "epoch": 1.0603885828916308, + "grad_norm": 3.9893337501068022, + "learning_rate": 2.7158520857390763e-06, + "loss": 0.030930328369140624, + "step": 122635 + }, + { + "epoch": 1.060431816413174, + "grad_norm": 1.2143837025360285, + "learning_rate": 2.7156492691396607e-06, + "loss": 0.40662269592285155, + "step": 122640 + }, + { + "epoch": 1.0604750499347173, + "grad_norm": 4.648538536369596, + "learning_rate": 2.715446453851646e-06, + "loss": 0.014950180053710937, + "step": 122645 + }, + { + "epoch": 1.0605182834562608, + "grad_norm": 4.988183764245783, + "learning_rate": 2.7152436398759675e-06, + "loss": 0.0506134033203125, + "step": 122650 + }, + { + "epoch": 1.060561516977804, + "grad_norm": 0.2544530264769386, + "learning_rate": 2.7150408272135584e-06, + "loss": 0.042047119140625, + "step": 122655 + }, + { + "epoch": 1.0606047504993472, + "grad_norm": 3.6252499057330017, + "learning_rate": 2.7148380158653566e-06, + "loss": 0.0896026611328125, + "step": 122660 + }, + { + "epoch": 1.0606479840208904, + "grad_norm": 0.25742314885276396, + "learning_rate": 2.7146352058322975e-06, + "loss": 0.009158515930175781, + "step": 122665 + }, + { + "epoch": 1.0606912175424337, + "grad_norm": 76.34739976311874, + "learning_rate": 2.7144323971153154e-06, + "loss": 0.32282028198242185, + "step": 122670 + }, + { + "epoch": 1.060734451063977, + "grad_norm": 14.327033682041122, + "learning_rate": 2.714229589715346e-06, + "loss": 0.04220123291015625, + "step": 122675 + }, + { + "epoch": 1.0607776845855201, + "grad_norm": 1.4905933831141376, + "learning_rate": 2.714026783633324e-06, + "loss": 0.12875518798828126, + "step": 122680 + }, + { + "epoch": 1.0608209181070636, + "grad_norm": 2.6832931849166592, + "learning_rate": 2.7138239788701846e-06, + "loss": 0.03397369384765625, + "step": 122685 + }, + { + "epoch": 1.0608641516286068, + "grad_norm": 14.728816691636752, + "learning_rate": 2.7136211754268635e-06, + "loss": 0.28054122924804686, + "step": 122690 + }, + { + "epoch": 1.06090738515015, + "grad_norm": 0.6040239929184861, + "learning_rate": 2.7134183733042976e-06, + "loss": 0.06380691528320312, + "step": 122695 + }, + { + "epoch": 1.0609506186716933, + "grad_norm": 2.750148439292341, + "learning_rate": 2.7132155725034208e-06, + "loss": 0.138690185546875, + "step": 122700 + }, + { + "epoch": 1.0609938521932365, + "grad_norm": 20.482595699705097, + "learning_rate": 2.713012773025168e-06, + "loss": 0.04676132202148438, + "step": 122705 + }, + { + "epoch": 1.0610370857147797, + "grad_norm": 1.0044667870746173, + "learning_rate": 2.7128099748704747e-06, + "loss": 0.013365936279296876, + "step": 122710 + }, + { + "epoch": 1.0610803192363232, + "grad_norm": 0.07844493347635022, + "learning_rate": 2.712607178040276e-06, + "loss": 0.12108955383300782, + "step": 122715 + }, + { + "epoch": 1.0611235527578664, + "grad_norm": 8.954544703418069, + "learning_rate": 2.7124043825355083e-06, + "loss": 0.04326324462890625, + "step": 122720 + }, + { + "epoch": 1.0611667862794096, + "grad_norm": 1.3825665526002615, + "learning_rate": 2.7122015883571064e-06, + "loss": 0.20971527099609374, + "step": 122725 + }, + { + "epoch": 1.0612100198009529, + "grad_norm": 9.660376001986304, + "learning_rate": 2.711998795506005e-06, + "loss": 0.0618560791015625, + "step": 122730 + }, + { + "epoch": 1.061253253322496, + "grad_norm": 0.22423093505230296, + "learning_rate": 2.7117960039831406e-06, + "loss": 0.013540077209472656, + "step": 122735 + }, + { + "epoch": 1.0612964868440393, + "grad_norm": 16.12526381693769, + "learning_rate": 2.7115932137894464e-06, + "loss": 0.0846334457397461, + "step": 122740 + }, + { + "epoch": 1.0613397203655826, + "grad_norm": 3.798173908890186, + "learning_rate": 2.7113904249258595e-06, + "loss": 0.07903518676757812, + "step": 122745 + }, + { + "epoch": 1.061382953887126, + "grad_norm": 5.646284899220553, + "learning_rate": 2.711187637393313e-06, + "loss": 0.04286346435546875, + "step": 122750 + }, + { + "epoch": 1.0614261874086692, + "grad_norm": 2.324521551193912, + "learning_rate": 2.710984851192745e-06, + "loss": 0.09303512573242187, + "step": 122755 + }, + { + "epoch": 1.0614694209302125, + "grad_norm": 1.998312448457712, + "learning_rate": 2.710782066325089e-06, + "loss": 0.06850051879882812, + "step": 122760 + }, + { + "epoch": 1.0615126544517557, + "grad_norm": 4.538872052794707, + "learning_rate": 2.710579282791281e-06, + "loss": 0.03294048309326172, + "step": 122765 + }, + { + "epoch": 1.061555887973299, + "grad_norm": 2.8138742499577676, + "learning_rate": 2.710376500592255e-06, + "loss": 0.01214447021484375, + "step": 122770 + }, + { + "epoch": 1.0615991214948421, + "grad_norm": 0.3629159285273663, + "learning_rate": 2.7101737197289476e-06, + "loss": 0.06463165283203125, + "step": 122775 + }, + { + "epoch": 1.0616423550163856, + "grad_norm": 0.24383864577013992, + "learning_rate": 2.709970940202292e-06, + "loss": 0.2359783172607422, + "step": 122780 + }, + { + "epoch": 1.0616855885379288, + "grad_norm": 0.4592672582306348, + "learning_rate": 2.709768162013226e-06, + "loss": 0.08638687133789062, + "step": 122785 + }, + { + "epoch": 1.061728822059472, + "grad_norm": 15.144496847677178, + "learning_rate": 2.709565385162684e-06, + "loss": 0.037740325927734374, + "step": 122790 + }, + { + "epoch": 1.0617720555810153, + "grad_norm": 32.612629725565824, + "learning_rate": 2.709362609651601e-06, + "loss": 0.11045379638671875, + "step": 122795 + }, + { + "epoch": 1.0618152891025585, + "grad_norm": 7.297930683347746, + "learning_rate": 2.709159835480911e-06, + "loss": 0.1180349349975586, + "step": 122800 + }, + { + "epoch": 1.0618585226241017, + "grad_norm": 0.5743761624908327, + "learning_rate": 2.7089570626515506e-06, + "loss": 0.050196075439453126, + "step": 122805 + }, + { + "epoch": 1.0619017561456452, + "grad_norm": 10.898087312460168, + "learning_rate": 2.7087542911644533e-06, + "loss": 0.06795425415039062, + "step": 122810 + }, + { + "epoch": 1.0619449896671884, + "grad_norm": 3.45769557573507, + "learning_rate": 2.7085515210205566e-06, + "loss": 0.029981231689453124, + "step": 122815 + }, + { + "epoch": 1.0619882231887317, + "grad_norm": 0.22595753756426698, + "learning_rate": 2.708348752220795e-06, + "loss": 0.03499298095703125, + "step": 122820 + }, + { + "epoch": 1.0620314567102749, + "grad_norm": 1.5379011306305013, + "learning_rate": 2.7081459847661027e-06, + "loss": 0.03000946044921875, + "step": 122825 + }, + { + "epoch": 1.062074690231818, + "grad_norm": 0.9468852284903552, + "learning_rate": 2.707943218657415e-06, + "loss": 0.05687103271484375, + "step": 122830 + }, + { + "epoch": 1.0621179237533613, + "grad_norm": 1.3805021016630878, + "learning_rate": 2.707740453895668e-06, + "loss": 0.05159149169921875, + "step": 122835 + }, + { + "epoch": 1.0621611572749046, + "grad_norm": 1.3599164456575887, + "learning_rate": 2.707537690481796e-06, + "loss": 0.04164886474609375, + "step": 122840 + }, + { + "epoch": 1.062204390796448, + "grad_norm": 0.92327075284823, + "learning_rate": 2.7073349284167334e-06, + "loss": 0.030687332153320312, + "step": 122845 + }, + { + "epoch": 1.0622476243179912, + "grad_norm": 41.85054019170058, + "learning_rate": 2.707132167701417e-06, + "loss": 0.10348014831542969, + "step": 122850 + }, + { + "epoch": 1.0622908578395345, + "grad_norm": 2.3326568240978616, + "learning_rate": 2.7069294083367814e-06, + "loss": 0.05048027038574219, + "step": 122855 + }, + { + "epoch": 1.0623340913610777, + "grad_norm": 5.856808379988086, + "learning_rate": 2.706726650323761e-06, + "loss": 0.10974655151367188, + "step": 122860 + }, + { + "epoch": 1.062377324882621, + "grad_norm": 22.893588768676718, + "learning_rate": 2.706523893663292e-06, + "loss": 0.21668777465820313, + "step": 122865 + }, + { + "epoch": 1.0624205584041642, + "grad_norm": 2.701322749473929, + "learning_rate": 2.7063211383563087e-06, + "loss": 0.05645675659179687, + "step": 122870 + }, + { + "epoch": 1.0624637919257076, + "grad_norm": 32.216855687603136, + "learning_rate": 2.706118384403745e-06, + "loss": 0.0785682201385498, + "step": 122875 + }, + { + "epoch": 1.0625070254472508, + "grad_norm": 13.245415877725836, + "learning_rate": 2.705915631806539e-06, + "loss": 0.05465984344482422, + "step": 122880 + }, + { + "epoch": 1.062550258968794, + "grad_norm": 0.6416433092964637, + "learning_rate": 2.705712880565623e-06, + "loss": 0.05153350830078125, + "step": 122885 + }, + { + "epoch": 1.0625934924903373, + "grad_norm": 14.27982114536815, + "learning_rate": 2.7055101306819347e-06, + "loss": 0.05821914672851562, + "step": 122890 + }, + { + "epoch": 1.0626367260118805, + "grad_norm": 1.066420543533883, + "learning_rate": 2.705307382156407e-06, + "loss": 0.052545166015625, + "step": 122895 + }, + { + "epoch": 1.0626799595334238, + "grad_norm": 0.08685502056489802, + "learning_rate": 2.7051046349899755e-06, + "loss": 0.03565788269042969, + "step": 122900 + }, + { + "epoch": 1.0627231930549672, + "grad_norm": 9.872872383576706, + "learning_rate": 2.704901889183574e-06, + "loss": 0.03936691284179687, + "step": 122905 + }, + { + "epoch": 1.0627664265765104, + "grad_norm": 4.1747571200171505, + "learning_rate": 2.7046991447381405e-06, + "loss": 0.08432731628417969, + "step": 122910 + }, + { + "epoch": 1.0628096600980537, + "grad_norm": 0.2837096828103351, + "learning_rate": 2.7044964016546075e-06, + "loss": 0.08983783721923828, + "step": 122915 + }, + { + "epoch": 1.062852893619597, + "grad_norm": 1.6521473367677728, + "learning_rate": 2.7042936599339123e-06, + "loss": 0.05443572998046875, + "step": 122920 + }, + { + "epoch": 1.0628961271411401, + "grad_norm": 15.831961241998306, + "learning_rate": 2.704090919576988e-06, + "loss": 0.16784210205078126, + "step": 122925 + }, + { + "epoch": 1.0629393606626834, + "grad_norm": 3.914976262971921, + "learning_rate": 2.7038881805847703e-06, + "loss": 0.09077606201171876, + "step": 122930 + }, + { + "epoch": 1.0629825941842266, + "grad_norm": 1.0514494612558776, + "learning_rate": 2.7036854429581937e-06, + "loss": 0.04853515625, + "step": 122935 + }, + { + "epoch": 1.06302582770577, + "grad_norm": 1.7613382696286892, + "learning_rate": 2.7034827066981934e-06, + "loss": 0.23343048095703126, + "step": 122940 + }, + { + "epoch": 1.0630690612273133, + "grad_norm": 2.6792292146711, + "learning_rate": 2.7032799718057053e-06, + "loss": 0.037268447875976565, + "step": 122945 + }, + { + "epoch": 1.0631122947488565, + "grad_norm": 0.30709282693296086, + "learning_rate": 2.7030772382816637e-06, + "loss": 0.08361892700195313, + "step": 122950 + }, + { + "epoch": 1.0631555282703997, + "grad_norm": 8.172953654193627, + "learning_rate": 2.7028745061270037e-06, + "loss": 0.093267822265625, + "step": 122955 + }, + { + "epoch": 1.063198761791943, + "grad_norm": 1.502841883402884, + "learning_rate": 2.7026717753426606e-06, + "loss": 0.0635995864868164, + "step": 122960 + }, + { + "epoch": 1.0632419953134862, + "grad_norm": 3.2786271499433597, + "learning_rate": 2.702469045929568e-06, + "loss": 0.039461517333984376, + "step": 122965 + }, + { + "epoch": 1.0632852288350296, + "grad_norm": 1.2883483261917004, + "learning_rate": 2.702266317888662e-06, + "loss": 0.010303783416748046, + "step": 122970 + }, + { + "epoch": 1.0633284623565729, + "grad_norm": 2.9054864569958108, + "learning_rate": 2.7020635912208775e-06, + "loss": 0.15180740356445313, + "step": 122975 + }, + { + "epoch": 1.063371695878116, + "grad_norm": 0.5271866219813265, + "learning_rate": 2.70186086592715e-06, + "loss": 0.04703216552734375, + "step": 122980 + }, + { + "epoch": 1.0634149293996593, + "grad_norm": 0.06875410127636061, + "learning_rate": 2.701658142008414e-06, + "loss": 0.017327499389648438, + "step": 122985 + }, + { + "epoch": 1.0634581629212025, + "grad_norm": 1.7674114415211153, + "learning_rate": 2.701455419465604e-06, + "loss": 0.02393684387207031, + "step": 122990 + }, + { + "epoch": 1.0635013964427458, + "grad_norm": 5.944258380787609, + "learning_rate": 2.701252698299655e-06, + "loss": 0.2879486083984375, + "step": 122995 + }, + { + "epoch": 1.063544629964289, + "grad_norm": 0.6354483568138771, + "learning_rate": 2.7010499785115023e-06, + "loss": 0.1599720001220703, + "step": 123000 + }, + { + "epoch": 1.0635878634858325, + "grad_norm": 2.4186039785787763, + "learning_rate": 2.7008472601020797e-06, + "loss": 0.057660675048828124, + "step": 123005 + }, + { + "epoch": 1.0636310970073757, + "grad_norm": 7.426235361727964, + "learning_rate": 2.700644543072324e-06, + "loss": 0.0140106201171875, + "step": 123010 + }, + { + "epoch": 1.063674330528919, + "grad_norm": 12.6634700007365, + "learning_rate": 2.7004418274231694e-06, + "loss": 0.046825408935546875, + "step": 123015 + }, + { + "epoch": 1.0637175640504621, + "grad_norm": 0.10854967112748105, + "learning_rate": 2.7002391131555507e-06, + "loss": 0.027005767822265624, + "step": 123020 + }, + { + "epoch": 1.0637607975720054, + "grad_norm": 0.6315570294605748, + "learning_rate": 2.7000364002704025e-06, + "loss": 0.09237003326416016, + "step": 123025 + }, + { + "epoch": 1.0638040310935486, + "grad_norm": 3.176255860794952, + "learning_rate": 2.6998336887686597e-06, + "loss": 0.0484771728515625, + "step": 123030 + }, + { + "epoch": 1.063847264615092, + "grad_norm": 1.164674338463069, + "learning_rate": 2.699630978651257e-06, + "loss": 0.048919677734375, + "step": 123035 + }, + { + "epoch": 1.0638904981366353, + "grad_norm": 1.5649884490341106, + "learning_rate": 2.69942826991913e-06, + "loss": 0.09136238098144531, + "step": 123040 + }, + { + "epoch": 1.0639337316581785, + "grad_norm": 0.35311971503402917, + "learning_rate": 2.699225562573214e-06, + "loss": 0.038873291015625, + "step": 123045 + }, + { + "epoch": 1.0639769651797217, + "grad_norm": 3.3954932190528253, + "learning_rate": 2.699022856614443e-06, + "loss": 0.2914289474487305, + "step": 123050 + }, + { + "epoch": 1.064020198701265, + "grad_norm": 12.881557377906843, + "learning_rate": 2.6988201520437515e-06, + "loss": 0.1580667495727539, + "step": 123055 + }, + { + "epoch": 1.0640634322228082, + "grad_norm": 0.46098187996308204, + "learning_rate": 2.698617448862075e-06, + "loss": 0.03554840087890625, + "step": 123060 + }, + { + "epoch": 1.0641066657443516, + "grad_norm": 4.020853172202026, + "learning_rate": 2.698414747070347e-06, + "loss": 0.06619873046875, + "step": 123065 + }, + { + "epoch": 1.0641498992658949, + "grad_norm": 0.05975114447508883, + "learning_rate": 2.698212046669505e-06, + "loss": 0.026703643798828124, + "step": 123070 + }, + { + "epoch": 1.064193132787438, + "grad_norm": 1.3920949324524285, + "learning_rate": 2.698009347660483e-06, + "loss": 0.02060394287109375, + "step": 123075 + }, + { + "epoch": 1.0642363663089813, + "grad_norm": 1.8721228695987986, + "learning_rate": 2.697806650044214e-06, + "loss": 0.0661844253540039, + "step": 123080 + }, + { + "epoch": 1.0642795998305246, + "grad_norm": 5.638576574408495, + "learning_rate": 2.6976039538216343e-06, + "loss": 0.0972869873046875, + "step": 123085 + }, + { + "epoch": 1.0643228333520678, + "grad_norm": 0.8514481012090365, + "learning_rate": 2.697401258993679e-06, + "loss": 0.00650787353515625, + "step": 123090 + }, + { + "epoch": 1.0643660668736112, + "grad_norm": 38.68197551219729, + "learning_rate": 2.697198565561282e-06, + "loss": 0.5013916015625, + "step": 123095 + }, + { + "epoch": 1.0644093003951545, + "grad_norm": 1.0397643376614845, + "learning_rate": 2.696995873525377e-06, + "loss": 0.17052879333496093, + "step": 123100 + }, + { + "epoch": 1.0644525339166977, + "grad_norm": 1.430448388244408, + "learning_rate": 2.696793182886902e-06, + "loss": 0.0580596923828125, + "step": 123105 + }, + { + "epoch": 1.064495767438241, + "grad_norm": 3.6620105339495748, + "learning_rate": 2.696590493646789e-06, + "loss": 0.046783447265625, + "step": 123110 + }, + { + "epoch": 1.0645390009597842, + "grad_norm": 2.8433345133335837, + "learning_rate": 2.696387805805975e-06, + "loss": 0.37256393432617185, + "step": 123115 + }, + { + "epoch": 1.0645822344813274, + "grad_norm": 9.374019822360646, + "learning_rate": 2.6961851193653935e-06, + "loss": 0.12473258972167969, + "step": 123120 + }, + { + "epoch": 1.0646254680028706, + "grad_norm": 0.30371364624097336, + "learning_rate": 2.695982434325979e-06, + "loss": 0.057501220703125, + "step": 123125 + }, + { + "epoch": 1.064668701524414, + "grad_norm": 1.5018191110874057, + "learning_rate": 2.6957797506886654e-06, + "loss": 0.06425018310546875, + "step": 123130 + }, + { + "epoch": 1.0647119350459573, + "grad_norm": 0.3004220494137264, + "learning_rate": 2.6955770684543903e-06, + "loss": 0.081201171875, + "step": 123135 + }, + { + "epoch": 1.0647551685675005, + "grad_norm": 48.644211020846434, + "learning_rate": 2.6953743876240863e-06, + "loss": 0.5131332397460937, + "step": 123140 + }, + { + "epoch": 1.0647984020890437, + "grad_norm": 3.029574314002475, + "learning_rate": 2.695171708198689e-06, + "loss": 0.06373138427734375, + "step": 123145 + }, + { + "epoch": 1.064841635610587, + "grad_norm": 7.033073368774794, + "learning_rate": 2.6949690301791328e-06, + "loss": 0.07365913391113281, + "step": 123150 + }, + { + "epoch": 1.0648848691321302, + "grad_norm": 1.8668060765369152, + "learning_rate": 2.6947663535663527e-06, + "loss": 0.0292694091796875, + "step": 123155 + }, + { + "epoch": 1.0649281026536737, + "grad_norm": 0.14968893155875093, + "learning_rate": 2.694563678361282e-06, + "loss": 0.36032257080078123, + "step": 123160 + }, + { + "epoch": 1.0649713361752169, + "grad_norm": 0.6616230367200463, + "learning_rate": 2.694361004564858e-06, + "loss": 0.0316619873046875, + "step": 123165 + }, + { + "epoch": 1.0650145696967601, + "grad_norm": 3.791657206428038, + "learning_rate": 2.694158332178013e-06, + "loss": 0.03277740478515625, + "step": 123170 + }, + { + "epoch": 1.0650578032183033, + "grad_norm": 98.735816924438, + "learning_rate": 2.6939556612016836e-06, + "loss": 0.24504737854003905, + "step": 123175 + }, + { + "epoch": 1.0651010367398466, + "grad_norm": 24.28421190296856, + "learning_rate": 2.693752991636804e-06, + "loss": 0.0836482048034668, + "step": 123180 + }, + { + "epoch": 1.0651442702613898, + "grad_norm": 5.362201425974884, + "learning_rate": 2.693550323484308e-06, + "loss": 0.033118820190429686, + "step": 123185 + }, + { + "epoch": 1.065187503782933, + "grad_norm": 9.409919458167781, + "learning_rate": 2.69334765674513e-06, + "loss": 0.0466796875, + "step": 123190 + }, + { + "epoch": 1.0652307373044765, + "grad_norm": 0.9378705350744925, + "learning_rate": 2.6931449914202058e-06, + "loss": 0.042353057861328126, + "step": 123195 + }, + { + "epoch": 1.0652739708260197, + "grad_norm": 0.7965506628147877, + "learning_rate": 2.69294232751047e-06, + "loss": 0.033599853515625, + "step": 123200 + }, + { + "epoch": 1.065317204347563, + "grad_norm": 0.06781742553805459, + "learning_rate": 2.6927396650168575e-06, + "loss": 0.04218101501464844, + "step": 123205 + }, + { + "epoch": 1.0653604378691062, + "grad_norm": 23.04765241918782, + "learning_rate": 2.6925370039403024e-06, + "loss": 0.15731735229492189, + "step": 123210 + }, + { + "epoch": 1.0654036713906494, + "grad_norm": 6.443826035301455, + "learning_rate": 2.6923343442817398e-06, + "loss": 0.04475555419921875, + "step": 123215 + }, + { + "epoch": 1.0654469049121926, + "grad_norm": 0.3251753321869574, + "learning_rate": 2.6921316860421034e-06, + "loss": 0.2665191650390625, + "step": 123220 + }, + { + "epoch": 1.065490138433736, + "grad_norm": 0.6739272235899936, + "learning_rate": 2.691929029222328e-06, + "loss": 0.0182464599609375, + "step": 123225 + }, + { + "epoch": 1.0655333719552793, + "grad_norm": 3.8140315452297315, + "learning_rate": 2.6917263738233493e-06, + "loss": 0.131683349609375, + "step": 123230 + }, + { + "epoch": 1.0655766054768225, + "grad_norm": 0.47277803697819365, + "learning_rate": 2.6915237198461014e-06, + "loss": 0.08007431030273438, + "step": 123235 + }, + { + "epoch": 1.0656198389983658, + "grad_norm": 0.18878978305981034, + "learning_rate": 2.691321067291519e-06, + "loss": 0.0266815185546875, + "step": 123240 + }, + { + "epoch": 1.065663072519909, + "grad_norm": 0.6634220866839924, + "learning_rate": 2.6911184161605367e-06, + "loss": 0.09690933227539063, + "step": 123245 + }, + { + "epoch": 1.0657063060414522, + "grad_norm": 51.61421046480833, + "learning_rate": 2.6909157664540886e-06, + "loss": 0.0742218017578125, + "step": 123250 + }, + { + "epoch": 1.0657495395629955, + "grad_norm": 0.5404537357592456, + "learning_rate": 2.6907131181731093e-06, + "loss": 0.08450775146484375, + "step": 123255 + }, + { + "epoch": 1.065792773084539, + "grad_norm": 2.6802595098641517, + "learning_rate": 2.690510471318534e-06, + "loss": 0.02444915771484375, + "step": 123260 + }, + { + "epoch": 1.0658360066060821, + "grad_norm": 1.32706848510185, + "learning_rate": 2.6903078258912974e-06, + "loss": 0.057969284057617185, + "step": 123265 + }, + { + "epoch": 1.0658792401276254, + "grad_norm": 2.7892658740531586, + "learning_rate": 2.690105181892334e-06, + "loss": 0.03773231506347656, + "step": 123270 + }, + { + "epoch": 1.0659224736491686, + "grad_norm": 0.7206008136083408, + "learning_rate": 2.689902539322578e-06, + "loss": 0.15592803955078124, + "step": 123275 + }, + { + "epoch": 1.0659657071707118, + "grad_norm": 58.59141135804429, + "learning_rate": 2.689699898182963e-06, + "loss": 0.32726287841796875, + "step": 123280 + }, + { + "epoch": 1.066008940692255, + "grad_norm": 0.5118542705501131, + "learning_rate": 2.6894972584744255e-06, + "loss": 0.08017997741699219, + "step": 123285 + }, + { + "epoch": 1.0660521742137985, + "grad_norm": 1.4825558023266845, + "learning_rate": 2.6892946201978984e-06, + "loss": 0.025856781005859374, + "step": 123290 + }, + { + "epoch": 1.0660954077353417, + "grad_norm": 3.98906565478256, + "learning_rate": 2.689091983354318e-06, + "loss": 0.029854583740234374, + "step": 123295 + }, + { + "epoch": 1.066138641256885, + "grad_norm": 3.481406638998241, + "learning_rate": 2.688889347944618e-06, + "loss": 0.3698890686035156, + "step": 123300 + }, + { + "epoch": 1.0661818747784282, + "grad_norm": 3.9910919121510937, + "learning_rate": 2.6886867139697326e-06, + "loss": 0.17815895080566407, + "step": 123305 + }, + { + "epoch": 1.0662251082999714, + "grad_norm": 0.6429354296005899, + "learning_rate": 2.6884840814305963e-06, + "loss": 0.026853561401367188, + "step": 123310 + }, + { + "epoch": 1.0662683418215146, + "grad_norm": 0.9038780760680288, + "learning_rate": 2.6882814503281444e-06, + "loss": 0.03872756958007813, + "step": 123315 + }, + { + "epoch": 1.066311575343058, + "grad_norm": 1.2990758397387667, + "learning_rate": 2.6880788206633098e-06, + "loss": 0.03658580780029297, + "step": 123320 + }, + { + "epoch": 1.0663548088646013, + "grad_norm": 25.486109745170445, + "learning_rate": 2.687876192437029e-06, + "loss": 0.12952804565429688, + "step": 123325 + }, + { + "epoch": 1.0663980423861446, + "grad_norm": 1.1452721263501882, + "learning_rate": 2.687673565650236e-06, + "loss": 0.15145339965820312, + "step": 123330 + }, + { + "epoch": 1.0664412759076878, + "grad_norm": 17.465844646693768, + "learning_rate": 2.687470940303864e-06, + "loss": 0.0615875244140625, + "step": 123335 + }, + { + "epoch": 1.066484509429231, + "grad_norm": 2.8737348719485496, + "learning_rate": 2.687268316398849e-06, + "loss": 0.18852996826171875, + "step": 123340 + }, + { + "epoch": 1.0665277429507742, + "grad_norm": 2.744660369573161, + "learning_rate": 2.687065693936125e-06, + "loss": 0.013589859008789062, + "step": 123345 + }, + { + "epoch": 1.0665709764723177, + "grad_norm": 20.475261282807576, + "learning_rate": 2.686863072916627e-06, + "loss": 0.15721664428710938, + "step": 123350 + }, + { + "epoch": 1.066614209993861, + "grad_norm": 0.27197567305815407, + "learning_rate": 2.6866604533412866e-06, + "loss": 0.02504730224609375, + "step": 123355 + }, + { + "epoch": 1.0666574435154041, + "grad_norm": 7.76394950087231, + "learning_rate": 2.686457835211042e-06, + "loss": 0.04573822021484375, + "step": 123360 + }, + { + "epoch": 1.0667006770369474, + "grad_norm": 0.710725402590435, + "learning_rate": 2.686255218526826e-06, + "loss": 0.017779541015625, + "step": 123365 + }, + { + "epoch": 1.0667439105584906, + "grad_norm": 0.27896195601199664, + "learning_rate": 2.6860526032895736e-06, + "loss": 0.11445465087890624, + "step": 123370 + }, + { + "epoch": 1.0667871440800338, + "grad_norm": 0.5862931452247137, + "learning_rate": 2.685849989500219e-06, + "loss": 0.0062770843505859375, + "step": 123375 + }, + { + "epoch": 1.066830377601577, + "grad_norm": 0.551539983260346, + "learning_rate": 2.6856473771596958e-06, + "loss": 0.0626556396484375, + "step": 123380 + }, + { + "epoch": 1.0668736111231205, + "grad_norm": 0.16166864315214677, + "learning_rate": 2.685444766268938e-06, + "loss": 0.0580108642578125, + "step": 123385 + }, + { + "epoch": 1.0669168446446637, + "grad_norm": 31.31909774802007, + "learning_rate": 2.6852421568288833e-06, + "loss": 0.126824951171875, + "step": 123390 + }, + { + "epoch": 1.066960078166207, + "grad_norm": 0.07058386053669768, + "learning_rate": 2.6850395488404625e-06, + "loss": 0.04035873413085937, + "step": 123395 + }, + { + "epoch": 1.0670033116877502, + "grad_norm": 12.94879825945549, + "learning_rate": 2.684836942304612e-06, + "loss": 0.06228561401367187, + "step": 123400 + }, + { + "epoch": 1.0670465452092934, + "grad_norm": 0.039825719866431755, + "learning_rate": 2.6846343372222666e-06, + "loss": 0.02237110137939453, + "step": 123405 + }, + { + "epoch": 1.0670897787308367, + "grad_norm": 0.5592299945192538, + "learning_rate": 2.684431733594359e-06, + "loss": 0.005856132507324219, + "step": 123410 + }, + { + "epoch": 1.06713301225238, + "grad_norm": 6.1291397243395185, + "learning_rate": 2.6842291314218232e-06, + "loss": 0.06360378265380859, + "step": 123415 + }, + { + "epoch": 1.0671762457739233, + "grad_norm": 4.217098264490751, + "learning_rate": 2.684026530705596e-06, + "loss": 0.06272506713867188, + "step": 123420 + }, + { + "epoch": 1.0672194792954666, + "grad_norm": 0.978839164219865, + "learning_rate": 2.68382393144661e-06, + "loss": 0.012617874145507812, + "step": 123425 + }, + { + "epoch": 1.0672627128170098, + "grad_norm": 0.21493838304677312, + "learning_rate": 2.683621333645801e-06, + "loss": 0.023276901245117186, + "step": 123430 + }, + { + "epoch": 1.067305946338553, + "grad_norm": 0.7889588169369023, + "learning_rate": 2.683418737304102e-06, + "loss": 0.1505807876586914, + "step": 123435 + }, + { + "epoch": 1.0673491798600963, + "grad_norm": 3.9019691818466153, + "learning_rate": 2.6832161424224483e-06, + "loss": 0.02295379638671875, + "step": 123440 + }, + { + "epoch": 1.0673924133816395, + "grad_norm": 24.940401727175388, + "learning_rate": 2.6830135490017727e-06, + "loss": 0.09042491912841796, + "step": 123445 + }, + { + "epoch": 1.067435646903183, + "grad_norm": 0.6398620652977697, + "learning_rate": 2.6828109570430104e-06, + "loss": 0.03198089599609375, + "step": 123450 + }, + { + "epoch": 1.0674788804247262, + "grad_norm": 1.777304443511737, + "learning_rate": 2.682608366547096e-06, + "loss": 0.03529510498046875, + "step": 123455 + }, + { + "epoch": 1.0675221139462694, + "grad_norm": 3.9425966401960215, + "learning_rate": 2.6824057775149647e-06, + "loss": 0.0486053466796875, + "step": 123460 + }, + { + "epoch": 1.0675653474678126, + "grad_norm": 2.269330495334927, + "learning_rate": 2.6822031899475503e-06, + "loss": 0.0749053955078125, + "step": 123465 + }, + { + "epoch": 1.0676085809893558, + "grad_norm": 6.196399832065868, + "learning_rate": 2.682000603845786e-06, + "loss": 0.06121368408203125, + "step": 123470 + }, + { + "epoch": 1.067651814510899, + "grad_norm": 46.381051617871066, + "learning_rate": 2.6817980192106067e-06, + "loss": 0.1923084259033203, + "step": 123475 + }, + { + "epoch": 1.0676950480324425, + "grad_norm": 23.631581172072636, + "learning_rate": 2.6815954360429457e-06, + "loss": 0.08067626953125, + "step": 123480 + }, + { + "epoch": 1.0677382815539858, + "grad_norm": 26.426179162380055, + "learning_rate": 2.68139285434374e-06, + "loss": 0.2461620330810547, + "step": 123485 + }, + { + "epoch": 1.067781515075529, + "grad_norm": 0.11607180493682152, + "learning_rate": 2.681190274113922e-06, + "loss": 0.057521820068359375, + "step": 123490 + }, + { + "epoch": 1.0678247485970722, + "grad_norm": 0.13209926992652635, + "learning_rate": 2.6809876953544267e-06, + "loss": 0.13173828125, + "step": 123495 + }, + { + "epoch": 1.0678679821186154, + "grad_norm": 13.646570101077451, + "learning_rate": 2.680785118066188e-06, + "loss": 0.08852252960205079, + "step": 123500 + }, + { + "epoch": 1.0679112156401587, + "grad_norm": 35.26516682876308, + "learning_rate": 2.6805825422501388e-06, + "loss": 0.13947982788085939, + "step": 123505 + }, + { + "epoch": 1.067954449161702, + "grad_norm": 13.424943177307943, + "learning_rate": 2.6803799679072143e-06, + "loss": 0.06335678100585937, + "step": 123510 + }, + { + "epoch": 1.0679976826832454, + "grad_norm": 3.730147472253294, + "learning_rate": 2.6801773950383507e-06, + "loss": 0.035461044311523436, + "step": 123515 + }, + { + "epoch": 1.0680409162047886, + "grad_norm": 3.450434267540867, + "learning_rate": 2.6799748236444807e-06, + "loss": 0.09684867858886718, + "step": 123520 + }, + { + "epoch": 1.0680841497263318, + "grad_norm": 3.7092566800064946, + "learning_rate": 2.6797722537265385e-06, + "loss": 0.09965667724609376, + "step": 123525 + }, + { + "epoch": 1.068127383247875, + "grad_norm": 14.956950385365914, + "learning_rate": 2.679569685285458e-06, + "loss": 0.15755577087402345, + "step": 123530 + }, + { + "epoch": 1.0681706167694183, + "grad_norm": 4.863733996676338, + "learning_rate": 2.6793671183221737e-06, + "loss": 0.046149826049804686, + "step": 123535 + }, + { + "epoch": 1.0682138502909615, + "grad_norm": 9.183441288973382, + "learning_rate": 2.6791645528376206e-06, + "loss": 0.067291259765625, + "step": 123540 + }, + { + "epoch": 1.068257083812505, + "grad_norm": 2.9160210932487964, + "learning_rate": 2.6789619888327302e-06, + "loss": 0.08730087280273438, + "step": 123545 + }, + { + "epoch": 1.0683003173340482, + "grad_norm": 1.3556944086240394, + "learning_rate": 2.6787594263084408e-06, + "loss": 0.039821624755859375, + "step": 123550 + }, + { + "epoch": 1.0683435508555914, + "grad_norm": 5.962455004093565, + "learning_rate": 2.678556865265684e-06, + "loss": 0.09803848266601563, + "step": 123555 + }, + { + "epoch": 1.0683867843771346, + "grad_norm": 39.4111887886401, + "learning_rate": 2.6783543057053944e-06, + "loss": 0.13175506591796876, + "step": 123560 + }, + { + "epoch": 1.0684300178986779, + "grad_norm": 11.662976000466092, + "learning_rate": 2.678151747628507e-06, + "loss": 0.06235504150390625, + "step": 123565 + }, + { + "epoch": 1.068473251420221, + "grad_norm": 9.533964267167827, + "learning_rate": 2.677949191035955e-06, + "loss": 0.02730560302734375, + "step": 123570 + }, + { + "epoch": 1.0685164849417645, + "grad_norm": 3.4034501951533356, + "learning_rate": 2.677746635928671e-06, + "loss": 0.1121368408203125, + "step": 123575 + }, + { + "epoch": 1.0685597184633078, + "grad_norm": 7.936163366135417, + "learning_rate": 2.677544082307593e-06, + "loss": 0.0773345947265625, + "step": 123580 + }, + { + "epoch": 1.068602951984851, + "grad_norm": 0.7952409051267908, + "learning_rate": 2.6773415301736535e-06, + "loss": 0.03594284057617188, + "step": 123585 + }, + { + "epoch": 1.0686461855063942, + "grad_norm": 5.488387139948549, + "learning_rate": 2.677138979527785e-06, + "loss": 0.13425083160400392, + "step": 123590 + }, + { + "epoch": 1.0686894190279375, + "grad_norm": 16.60882924476899, + "learning_rate": 2.676936430370924e-06, + "loss": 0.05074329376220703, + "step": 123595 + }, + { + "epoch": 1.0687326525494807, + "grad_norm": 18.724973958276824, + "learning_rate": 2.6767338827040034e-06, + "loss": 0.12401466369628907, + "step": 123600 + }, + { + "epoch": 1.0687758860710241, + "grad_norm": 1.112046719483195, + "learning_rate": 2.6765313365279568e-06, + "loss": 0.0209930419921875, + "step": 123605 + }, + { + "epoch": 1.0688191195925674, + "grad_norm": 19.668867396047787, + "learning_rate": 2.67632879184372e-06, + "loss": 0.0906768798828125, + "step": 123610 + }, + { + "epoch": 1.0688623531141106, + "grad_norm": 0.21818348784885672, + "learning_rate": 2.6761262486522264e-06, + "loss": 0.029750823974609375, + "step": 123615 + }, + { + "epoch": 1.0689055866356538, + "grad_norm": 1.1068789639177656, + "learning_rate": 2.675923706954409e-06, + "loss": 0.19017486572265624, + "step": 123620 + }, + { + "epoch": 1.068948820157197, + "grad_norm": 3.045942292328969, + "learning_rate": 2.6757211667512034e-06, + "loss": 0.05350341796875, + "step": 123625 + }, + { + "epoch": 1.0689920536787403, + "grad_norm": 0.4819178122373705, + "learning_rate": 2.6755186280435437e-06, + "loss": 0.0753509521484375, + "step": 123630 + }, + { + "epoch": 1.0690352872002835, + "grad_norm": 2.581910456192036, + "learning_rate": 2.675316090832363e-06, + "loss": 0.04796867370605469, + "step": 123635 + }, + { + "epoch": 1.069078520721827, + "grad_norm": 0.7520786230964711, + "learning_rate": 2.6751135551185945e-06, + "loss": 0.13257064819335937, + "step": 123640 + }, + { + "epoch": 1.0691217542433702, + "grad_norm": 1.9580647721500333, + "learning_rate": 2.674911020903175e-06, + "loss": 0.033475494384765624, + "step": 123645 + }, + { + "epoch": 1.0691649877649134, + "grad_norm": 0.7059453778317757, + "learning_rate": 2.6747084881870366e-06, + "loss": 0.05823974609375, + "step": 123650 + }, + { + "epoch": 1.0692082212864567, + "grad_norm": 0.7689386267621877, + "learning_rate": 2.6745059569711144e-06, + "loss": 0.03763275146484375, + "step": 123655 + }, + { + "epoch": 1.0692514548079999, + "grad_norm": 0.4619944125423066, + "learning_rate": 2.6743034272563416e-06, + "loss": 0.14239044189453126, + "step": 123660 + }, + { + "epoch": 1.069294688329543, + "grad_norm": 1.1945433128864233, + "learning_rate": 2.674100899043653e-06, + "loss": 0.04358596801757812, + "step": 123665 + }, + { + "epoch": 1.0693379218510866, + "grad_norm": 0.4501401245051406, + "learning_rate": 2.6738983723339812e-06, + "loss": 0.020950508117675782, + "step": 123670 + }, + { + "epoch": 1.0693811553726298, + "grad_norm": 0.5726502364124088, + "learning_rate": 2.673695847128262e-06, + "loss": 0.223931884765625, + "step": 123675 + }, + { + "epoch": 1.069424388894173, + "grad_norm": 1.6901323433541586, + "learning_rate": 2.6734933234274284e-06, + "loss": 0.044746780395507814, + "step": 123680 + }, + { + "epoch": 1.0694676224157162, + "grad_norm": 1.4795291421797288, + "learning_rate": 2.673290801232416e-06, + "loss": 0.01004486083984375, + "step": 123685 + }, + { + "epoch": 1.0695108559372595, + "grad_norm": 1.157248618609548, + "learning_rate": 2.6730882805441565e-06, + "loss": 0.015355682373046875, + "step": 123690 + }, + { + "epoch": 1.0695540894588027, + "grad_norm": 11.96827089119085, + "learning_rate": 2.6728857613635857e-06, + "loss": 0.09300460815429687, + "step": 123695 + }, + { + "epoch": 1.069597322980346, + "grad_norm": 1.3295552208929833, + "learning_rate": 2.6726832436916356e-06, + "loss": 0.0430267333984375, + "step": 123700 + }, + { + "epoch": 1.0696405565018894, + "grad_norm": 18.096319757340158, + "learning_rate": 2.6724807275292413e-06, + "loss": 0.0412200927734375, + "step": 123705 + }, + { + "epoch": 1.0696837900234326, + "grad_norm": 13.57699651168808, + "learning_rate": 2.672278212877338e-06, + "loss": 0.08598861694335938, + "step": 123710 + }, + { + "epoch": 1.0697270235449758, + "grad_norm": 2.387704123525143, + "learning_rate": 2.6720756997368588e-06, + "loss": 0.06346874237060547, + "step": 123715 + }, + { + "epoch": 1.069770257066519, + "grad_norm": 7.00189161305316, + "learning_rate": 2.6718731881087374e-06, + "loss": 0.049383544921875, + "step": 123720 + }, + { + "epoch": 1.0698134905880623, + "grad_norm": 4.2012071148614405, + "learning_rate": 2.6716706779939076e-06, + "loss": 0.0442474365234375, + "step": 123725 + }, + { + "epoch": 1.0698567241096055, + "grad_norm": 4.0961112104608866, + "learning_rate": 2.6714681693933035e-06, + "loss": 0.021091651916503907, + "step": 123730 + }, + { + "epoch": 1.069899957631149, + "grad_norm": 12.2606583135108, + "learning_rate": 2.6712656623078583e-06, + "loss": 0.053195953369140625, + "step": 123735 + }, + { + "epoch": 1.0699431911526922, + "grad_norm": 28.325490591964854, + "learning_rate": 2.671063156738508e-06, + "loss": 0.3842134475708008, + "step": 123740 + }, + { + "epoch": 1.0699864246742354, + "grad_norm": 1.5624445691063087, + "learning_rate": 2.6708606526861853e-06, + "loss": 0.022190093994140625, + "step": 123745 + }, + { + "epoch": 1.0700296581957787, + "grad_norm": 1.1957088958815678, + "learning_rate": 2.6706581501518244e-06, + "loss": 0.16230010986328125, + "step": 123750 + }, + { + "epoch": 1.070072891717322, + "grad_norm": 0.10565795127978406, + "learning_rate": 2.6704556491363584e-06, + "loss": 0.017098236083984374, + "step": 123755 + }, + { + "epoch": 1.0701161252388651, + "grad_norm": 4.295483570374866, + "learning_rate": 2.6702531496407223e-06, + "loss": 0.0394775390625, + "step": 123760 + }, + { + "epoch": 1.0701593587604084, + "grad_norm": 2.5183982334203328, + "learning_rate": 2.6700506516658482e-06, + "loss": 0.028184127807617188, + "step": 123765 + }, + { + "epoch": 1.0702025922819518, + "grad_norm": 1.1132597943576021, + "learning_rate": 2.6698481552126727e-06, + "loss": 0.078466796875, + "step": 123770 + }, + { + "epoch": 1.070245825803495, + "grad_norm": 18.274752648365773, + "learning_rate": 2.6696456602821283e-06, + "loss": 0.1017425537109375, + "step": 123775 + }, + { + "epoch": 1.0702890593250383, + "grad_norm": 32.254964526049385, + "learning_rate": 2.6694431668751493e-06, + "loss": 0.09021034240722656, + "step": 123780 + }, + { + "epoch": 1.0703322928465815, + "grad_norm": 2.44975757142824, + "learning_rate": 2.6692406749926677e-06, + "loss": 0.07518997192382812, + "step": 123785 + }, + { + "epoch": 1.0703755263681247, + "grad_norm": 0.13530934247194654, + "learning_rate": 2.6690381846356205e-06, + "loss": 0.053102874755859376, + "step": 123790 + }, + { + "epoch": 1.0704187598896682, + "grad_norm": 0.7144336570033806, + "learning_rate": 2.6688356958049394e-06, + "loss": 0.010543060302734376, + "step": 123795 + }, + { + "epoch": 1.0704619934112114, + "grad_norm": 11.777923599449574, + "learning_rate": 2.6686332085015574e-06, + "loss": 0.05914154052734375, + "step": 123800 + }, + { + "epoch": 1.0705052269327546, + "grad_norm": 21.822578003902855, + "learning_rate": 2.6684307227264116e-06, + "loss": 0.04707260131835937, + "step": 123805 + }, + { + "epoch": 1.0705484604542979, + "grad_norm": 31.547857958311397, + "learning_rate": 2.6682282384804336e-06, + "loss": 0.309100341796875, + "step": 123810 + }, + { + "epoch": 1.070591693975841, + "grad_norm": 0.6200112452470227, + "learning_rate": 2.668025755764557e-06, + "loss": 0.12026081085205079, + "step": 123815 + }, + { + "epoch": 1.0706349274973843, + "grad_norm": 1.027509799493826, + "learning_rate": 2.667823274579717e-06, + "loss": 0.11064872741699219, + "step": 123820 + }, + { + "epoch": 1.0706781610189275, + "grad_norm": 2.1416694612978406, + "learning_rate": 2.667620794926847e-06, + "loss": 0.043947601318359376, + "step": 123825 + }, + { + "epoch": 1.070721394540471, + "grad_norm": 1.8960874208036014, + "learning_rate": 2.667418316806879e-06, + "loss": 0.024148941040039062, + "step": 123830 + }, + { + "epoch": 1.0707646280620142, + "grad_norm": 4.743916777966711, + "learning_rate": 2.6672158402207494e-06, + "loss": 0.10158767700195312, + "step": 123835 + }, + { + "epoch": 1.0708078615835575, + "grad_norm": 1.3970694605675211, + "learning_rate": 2.6670133651693915e-06, + "loss": 0.04275703430175781, + "step": 123840 + }, + { + "epoch": 1.0708510951051007, + "grad_norm": 23.02747626539508, + "learning_rate": 2.6668108916537373e-06, + "loss": 0.212957763671875, + "step": 123845 + }, + { + "epoch": 1.070894328626644, + "grad_norm": 1.288772233227615, + "learning_rate": 2.666608419674723e-06, + "loss": 0.05095748901367188, + "step": 123850 + }, + { + "epoch": 1.0709375621481871, + "grad_norm": 0.025985347040512526, + "learning_rate": 2.6664059492332804e-06, + "loss": 0.0417999267578125, + "step": 123855 + }, + { + "epoch": 1.0709807956697306, + "grad_norm": 5.6645908286277935, + "learning_rate": 2.6662034803303436e-06, + "loss": 0.08437347412109375, + "step": 123860 + }, + { + "epoch": 1.0710240291912738, + "grad_norm": 5.9945274335916, + "learning_rate": 2.666001012966848e-06, + "loss": 0.02627143859863281, + "step": 123865 + }, + { + "epoch": 1.071067262712817, + "grad_norm": 0.43919458436990655, + "learning_rate": 2.665798547143726e-06, + "loss": 0.019174957275390626, + "step": 123870 + }, + { + "epoch": 1.0711104962343603, + "grad_norm": 27.070640462633058, + "learning_rate": 2.665596082861911e-06, + "loss": 0.3402587890625, + "step": 123875 + }, + { + "epoch": 1.0711537297559035, + "grad_norm": 0.4558381910719648, + "learning_rate": 2.665393620122338e-06, + "loss": 0.0527740478515625, + "step": 123880 + }, + { + "epoch": 1.0711969632774467, + "grad_norm": 0.8648524447843914, + "learning_rate": 2.66519115892594e-06, + "loss": 0.018564605712890626, + "step": 123885 + }, + { + "epoch": 1.07124019679899, + "grad_norm": 13.43833479056939, + "learning_rate": 2.6649886992736506e-06, + "loss": 0.0668426513671875, + "step": 123890 + }, + { + "epoch": 1.0712834303205334, + "grad_norm": 2.055719993508754, + "learning_rate": 2.6647862411664024e-06, + "loss": 0.33298797607421876, + "step": 123895 + }, + { + "epoch": 1.0713266638420766, + "grad_norm": 26.336004016351374, + "learning_rate": 2.664583784605132e-06, + "loss": 0.08995094299316406, + "step": 123900 + }, + { + "epoch": 1.0713698973636199, + "grad_norm": 1.3248105349474693, + "learning_rate": 2.664381329590771e-06, + "loss": 0.01112823486328125, + "step": 123905 + }, + { + "epoch": 1.071413130885163, + "grad_norm": 0.619641509937588, + "learning_rate": 2.664178876124254e-06, + "loss": 0.09061698913574219, + "step": 123910 + }, + { + "epoch": 1.0714563644067063, + "grad_norm": 1.3679281920399815, + "learning_rate": 2.6639764242065145e-06, + "loss": 0.046724700927734376, + "step": 123915 + }, + { + "epoch": 1.0714995979282496, + "grad_norm": 0.3315412246111638, + "learning_rate": 2.663773973838486e-06, + "loss": 0.03289356231689453, + "step": 123920 + }, + { + "epoch": 1.071542831449793, + "grad_norm": 3.410283380041028, + "learning_rate": 2.6635715250211014e-06, + "loss": 0.018927001953125, + "step": 123925 + }, + { + "epoch": 1.0715860649713362, + "grad_norm": 0.43573722195094217, + "learning_rate": 2.663369077755295e-06, + "loss": 0.09302635192871093, + "step": 123930 + }, + { + "epoch": 1.0716292984928795, + "grad_norm": 9.037178719644801, + "learning_rate": 2.6631666320420017e-06, + "loss": 0.039776611328125, + "step": 123935 + }, + { + "epoch": 1.0716725320144227, + "grad_norm": 8.273898596009666, + "learning_rate": 2.6629641878821543e-06, + "loss": 0.40774688720703123, + "step": 123940 + }, + { + "epoch": 1.071715765535966, + "grad_norm": 1.3773023735929486, + "learning_rate": 2.662761745276686e-06, + "loss": 0.012819671630859375, + "step": 123945 + }, + { + "epoch": 1.0717589990575092, + "grad_norm": 0.3113933537640994, + "learning_rate": 2.6625593042265304e-06, + "loss": 0.11989707946777343, + "step": 123950 + }, + { + "epoch": 1.0718022325790524, + "grad_norm": 10.444452399790709, + "learning_rate": 2.662356864732621e-06, + "loss": 0.023630523681640626, + "step": 123955 + }, + { + "epoch": 1.0718454661005958, + "grad_norm": 10.049266278597548, + "learning_rate": 2.6621544267958927e-06, + "loss": 0.09390449523925781, + "step": 123960 + }, + { + "epoch": 1.071888699622139, + "grad_norm": 15.844534906274001, + "learning_rate": 2.6619519904172783e-06, + "loss": 0.24493637084960937, + "step": 123965 + }, + { + "epoch": 1.0719319331436823, + "grad_norm": 3.756552740117459, + "learning_rate": 2.661749555597712e-06, + "loss": 0.02471771240234375, + "step": 123970 + }, + { + "epoch": 1.0719751666652255, + "grad_norm": 3.598973129904365, + "learning_rate": 2.661547122338126e-06, + "loss": 0.107464599609375, + "step": 123975 + }, + { + "epoch": 1.0720184001867687, + "grad_norm": 4.3156861626158545, + "learning_rate": 2.6613446906394548e-06, + "loss": 0.09022235870361328, + "step": 123980 + }, + { + "epoch": 1.072061633708312, + "grad_norm": 0.12043947976151043, + "learning_rate": 2.6611422605026325e-06, + "loss": 0.020064926147460936, + "step": 123985 + }, + { + "epoch": 1.0721048672298554, + "grad_norm": 2.188850192582435, + "learning_rate": 2.6609398319285908e-06, + "loss": 0.13271141052246094, + "step": 123990 + }, + { + "epoch": 1.0721481007513987, + "grad_norm": 21.15336824125891, + "learning_rate": 2.660737404918266e-06, + "loss": 0.0915924072265625, + "step": 123995 + }, + { + "epoch": 1.0721913342729419, + "grad_norm": 0.2611700804946431, + "learning_rate": 2.6605349794725903e-06, + "loss": 0.05009613037109375, + "step": 124000 + }, + { + "epoch": 1.0722345677944851, + "grad_norm": 2.335505725478338, + "learning_rate": 2.660332555592497e-06, + "loss": 0.10824899673461914, + "step": 124005 + }, + { + "epoch": 1.0722778013160283, + "grad_norm": 0.5095669881660896, + "learning_rate": 2.6601301332789196e-06, + "loss": 0.08033084869384766, + "step": 124010 + }, + { + "epoch": 1.0723210348375716, + "grad_norm": 5.01454242715693, + "learning_rate": 2.6599277125327925e-06, + "loss": 0.02595977783203125, + "step": 124015 + }, + { + "epoch": 1.072364268359115, + "grad_norm": 0.31600302811003056, + "learning_rate": 2.6597252933550473e-06, + "loss": 0.022832107543945313, + "step": 124020 + }, + { + "epoch": 1.0724075018806583, + "grad_norm": 0.3774216712945353, + "learning_rate": 2.6595228757466208e-06, + "loss": 0.06137542724609375, + "step": 124025 + }, + { + "epoch": 1.0724507354022015, + "grad_norm": 73.78301531640928, + "learning_rate": 2.6593204597084443e-06, + "loss": 0.272613525390625, + "step": 124030 + }, + { + "epoch": 1.0724939689237447, + "grad_norm": 4.963886188070404, + "learning_rate": 2.6591180452414516e-06, + "loss": 0.1245361328125, + "step": 124035 + }, + { + "epoch": 1.072537202445288, + "grad_norm": 3.55212528618704, + "learning_rate": 2.6589156323465763e-06, + "loss": 0.08770370483398438, + "step": 124040 + }, + { + "epoch": 1.0725804359668312, + "grad_norm": 47.7504429817132, + "learning_rate": 2.658713221024752e-06, + "loss": 0.024912071228027344, + "step": 124045 + }, + { + "epoch": 1.0726236694883746, + "grad_norm": 71.6803513852415, + "learning_rate": 2.6585108112769124e-06, + "loss": 0.40375213623046874, + "step": 124050 + }, + { + "epoch": 1.0726669030099178, + "grad_norm": 1.1399579620088283, + "learning_rate": 2.6583084031039895e-06, + "loss": 0.023522186279296874, + "step": 124055 + }, + { + "epoch": 1.072710136531461, + "grad_norm": 3.946709351626646, + "learning_rate": 2.6581059965069193e-06, + "loss": 0.031713104248046874, + "step": 124060 + }, + { + "epoch": 1.0727533700530043, + "grad_norm": 2.3068288045916123, + "learning_rate": 2.6579035914866344e-06, + "loss": 0.26361083984375, + "step": 124065 + }, + { + "epoch": 1.0727966035745475, + "grad_norm": 19.855791005749147, + "learning_rate": 2.6577011880440665e-06, + "loss": 0.14927825927734376, + "step": 124070 + }, + { + "epoch": 1.0728398370960908, + "grad_norm": 31.387277505659206, + "learning_rate": 2.6574987861801516e-06, + "loss": 0.1270416259765625, + "step": 124075 + }, + { + "epoch": 1.072883070617634, + "grad_norm": 0.14151805520336358, + "learning_rate": 2.657296385895822e-06, + "loss": 0.02589588165283203, + "step": 124080 + }, + { + "epoch": 1.0729263041391774, + "grad_norm": 20.806266624780445, + "learning_rate": 2.6570939871920095e-06, + "loss": 0.21037521362304687, + "step": 124085 + }, + { + "epoch": 1.0729695376607207, + "grad_norm": 1.803128667956689, + "learning_rate": 2.6568915900696512e-06, + "loss": 0.11691150665283204, + "step": 124090 + }, + { + "epoch": 1.073012771182264, + "grad_norm": 2.462784237928514, + "learning_rate": 2.6566891945296784e-06, + "loss": 0.027404022216796876, + "step": 124095 + }, + { + "epoch": 1.0730560047038071, + "grad_norm": 6.0741137304247435, + "learning_rate": 2.656486800573024e-06, + "loss": 0.04763526916503906, + "step": 124100 + }, + { + "epoch": 1.0730992382253504, + "grad_norm": 19.28321592501675, + "learning_rate": 2.6562844082006224e-06, + "loss": 0.09220809936523437, + "step": 124105 + }, + { + "epoch": 1.0731424717468936, + "grad_norm": 0.21092680859722143, + "learning_rate": 2.6560820174134073e-06, + "loss": 0.10558624267578125, + "step": 124110 + }, + { + "epoch": 1.073185705268437, + "grad_norm": 17.857975700571995, + "learning_rate": 2.6558796282123097e-06, + "loss": 0.1000213623046875, + "step": 124115 + }, + { + "epoch": 1.0732289387899803, + "grad_norm": 9.202661271994566, + "learning_rate": 2.6556772405982664e-06, + "loss": 0.07648687362670899, + "step": 124120 + }, + { + "epoch": 1.0732721723115235, + "grad_norm": 5.482853024287322, + "learning_rate": 2.6554748545722096e-06, + "loss": 0.144183349609375, + "step": 124125 + }, + { + "epoch": 1.0733154058330667, + "grad_norm": 8.206736941801584, + "learning_rate": 2.655272470135071e-06, + "loss": 0.03675270080566406, + "step": 124130 + }, + { + "epoch": 1.07335863935461, + "grad_norm": 2.558820665877372, + "learning_rate": 2.655070087287786e-06, + "loss": 0.034905624389648435, + "step": 124135 + }, + { + "epoch": 1.0734018728761532, + "grad_norm": 12.243039001166768, + "learning_rate": 2.654867706031288e-06, + "loss": 0.06404151916503906, + "step": 124140 + }, + { + "epoch": 1.0734451063976964, + "grad_norm": 3.515311459948503, + "learning_rate": 2.654665326366509e-06, + "loss": 0.02573089599609375, + "step": 124145 + }, + { + "epoch": 1.0734883399192399, + "grad_norm": 11.680357387325053, + "learning_rate": 2.6544629482943817e-06, + "loss": 0.08365936279296875, + "step": 124150 + }, + { + "epoch": 1.073531573440783, + "grad_norm": 0.856601035490619, + "learning_rate": 2.6542605718158418e-06, + "loss": 0.024315834045410156, + "step": 124155 + }, + { + "epoch": 1.0735748069623263, + "grad_norm": 0.537747783581112, + "learning_rate": 2.6540581969318223e-06, + "loss": 0.04879684448242187, + "step": 124160 + }, + { + "epoch": 1.0736180404838696, + "grad_norm": 14.557725641329935, + "learning_rate": 2.6538558236432555e-06, + "loss": 0.2049285888671875, + "step": 124165 + }, + { + "epoch": 1.0736612740054128, + "grad_norm": 13.793766701810135, + "learning_rate": 2.6536534519510748e-06, + "loss": 0.037000274658203124, + "step": 124170 + }, + { + "epoch": 1.073704507526956, + "grad_norm": 8.700192529497402, + "learning_rate": 2.653451081856214e-06, + "loss": 0.13540496826171874, + "step": 124175 + }, + { + "epoch": 1.0737477410484995, + "grad_norm": 0.07555827174536038, + "learning_rate": 2.6532487133596056e-06, + "loss": 0.027756309509277342, + "step": 124180 + }, + { + "epoch": 1.0737909745700427, + "grad_norm": 22.387719426575913, + "learning_rate": 2.6530463464621836e-06, + "loss": 0.15556793212890624, + "step": 124185 + }, + { + "epoch": 1.073834208091586, + "grad_norm": 0.40273421786237706, + "learning_rate": 2.6528439811648813e-06, + "loss": 0.03519363403320312, + "step": 124190 + }, + { + "epoch": 1.0738774416131291, + "grad_norm": 0.5301344674066506, + "learning_rate": 2.6526416174686327e-06, + "loss": 0.02898712158203125, + "step": 124195 + }, + { + "epoch": 1.0739206751346724, + "grad_norm": 1.1227051991815642, + "learning_rate": 2.65243925537437e-06, + "loss": 0.04737396240234375, + "step": 124200 + }, + { + "epoch": 1.0739639086562156, + "grad_norm": 4.8029094408568715, + "learning_rate": 2.652236894883026e-06, + "loss": 0.05108833312988281, + "step": 124205 + }, + { + "epoch": 1.0740071421777588, + "grad_norm": 0.7765266341136428, + "learning_rate": 2.6520345359955345e-06, + "loss": 0.09827499389648438, + "step": 124210 + }, + { + "epoch": 1.0740503756993023, + "grad_norm": 10.455038305836261, + "learning_rate": 2.6518321787128293e-06, + "loss": 0.32633056640625, + "step": 124215 + }, + { + "epoch": 1.0740936092208455, + "grad_norm": 19.009447667492946, + "learning_rate": 2.651629823035844e-06, + "loss": 0.11060905456542969, + "step": 124220 + }, + { + "epoch": 1.0741368427423887, + "grad_norm": 7.1736419417745605, + "learning_rate": 2.651427468965511e-06, + "loss": 0.14603900909423828, + "step": 124225 + }, + { + "epoch": 1.074180076263932, + "grad_norm": 3.080872054454041, + "learning_rate": 2.6512251165027638e-06, + "loss": 0.06602096557617188, + "step": 124230 + }, + { + "epoch": 1.0742233097854752, + "grad_norm": 7.906640590859201, + "learning_rate": 2.651022765648535e-06, + "loss": 0.029657363891601562, + "step": 124235 + }, + { + "epoch": 1.0742665433070184, + "grad_norm": 12.099620906305173, + "learning_rate": 2.6508204164037586e-06, + "loss": 0.10275192260742187, + "step": 124240 + }, + { + "epoch": 1.0743097768285619, + "grad_norm": 1.168436935018217, + "learning_rate": 2.6506180687693668e-06, + "loss": 0.2289590835571289, + "step": 124245 + }, + { + "epoch": 1.074353010350105, + "grad_norm": 19.886411285822764, + "learning_rate": 2.6504157227462946e-06, + "loss": 0.07969093322753906, + "step": 124250 + }, + { + "epoch": 1.0743962438716483, + "grad_norm": 3.1258548746743804, + "learning_rate": 2.6502133783354743e-06, + "loss": 0.20403594970703126, + "step": 124255 + }, + { + "epoch": 1.0744394773931916, + "grad_norm": 19.103953691888595, + "learning_rate": 2.6500110355378388e-06, + "loss": 0.1000152587890625, + "step": 124260 + }, + { + "epoch": 1.0744827109147348, + "grad_norm": 2.191819242380069, + "learning_rate": 2.649808694354321e-06, + "loss": 0.039617919921875, + "step": 124265 + }, + { + "epoch": 1.074525944436278, + "grad_norm": 42.645167873109, + "learning_rate": 2.649606354785855e-06, + "loss": 0.09363079071044922, + "step": 124270 + }, + { + "epoch": 1.0745691779578215, + "grad_norm": 0.6751981865079801, + "learning_rate": 2.6494040168333726e-06, + "loss": 0.10940132141113282, + "step": 124275 + }, + { + "epoch": 1.0746124114793647, + "grad_norm": 4.231137217274866, + "learning_rate": 2.649201680497809e-06, + "loss": 0.09935493469238281, + "step": 124280 + }, + { + "epoch": 1.074655645000908, + "grad_norm": 29.001123236838875, + "learning_rate": 2.6489993457800966e-06, + "loss": 0.0884490966796875, + "step": 124285 + }, + { + "epoch": 1.0746988785224512, + "grad_norm": 5.262577238100174, + "learning_rate": 2.648797012681168e-06, + "loss": 0.04082183837890625, + "step": 124290 + }, + { + "epoch": 1.0747421120439944, + "grad_norm": 22.412570426090547, + "learning_rate": 2.6485946812019555e-06, + "loss": 0.08103179931640625, + "step": 124295 + }, + { + "epoch": 1.0747853455655376, + "grad_norm": 4.841946762058717, + "learning_rate": 2.648392351343394e-06, + "loss": 0.03737335205078125, + "step": 124300 + }, + { + "epoch": 1.074828579087081, + "grad_norm": 5.728512021925279, + "learning_rate": 2.6481900231064164e-06, + "loss": 0.0612213134765625, + "step": 124305 + }, + { + "epoch": 1.0748718126086243, + "grad_norm": 10.092537362641593, + "learning_rate": 2.6479876964919535e-06, + "loss": 0.09432144165039062, + "step": 124310 + }, + { + "epoch": 1.0749150461301675, + "grad_norm": 0.37209269993589594, + "learning_rate": 2.647785371500942e-06, + "loss": 0.04419403076171875, + "step": 124315 + }, + { + "epoch": 1.0749582796517108, + "grad_norm": 0.16591581862700064, + "learning_rate": 2.647583048134313e-06, + "loss": 0.023949432373046874, + "step": 124320 + }, + { + "epoch": 1.075001513173254, + "grad_norm": 9.142014083480582, + "learning_rate": 2.647380726392999e-06, + "loss": 0.03525390625, + "step": 124325 + }, + { + "epoch": 1.0750447466947972, + "grad_norm": 0.7553778998822561, + "learning_rate": 2.647178406277935e-06, + "loss": 0.0096343994140625, + "step": 124330 + }, + { + "epoch": 1.0750879802163404, + "grad_norm": 1.4927093607299706, + "learning_rate": 2.646976087790053e-06, + "loss": 0.045787811279296875, + "step": 124335 + }, + { + "epoch": 1.075131213737884, + "grad_norm": 1.3243251301188925, + "learning_rate": 2.646773770930284e-06, + "loss": 0.1051849365234375, + "step": 124340 + }, + { + "epoch": 1.0751744472594271, + "grad_norm": 4.540690562368273, + "learning_rate": 2.6465714556995655e-06, + "loss": 0.090557861328125, + "step": 124345 + }, + { + "epoch": 1.0752176807809704, + "grad_norm": 4.61565563045504, + "learning_rate": 2.646369142098827e-06, + "loss": 0.024276351928710936, + "step": 124350 + }, + { + "epoch": 1.0752609143025136, + "grad_norm": 23.439279099907917, + "learning_rate": 2.6461668301290035e-06, + "loss": 0.15572586059570312, + "step": 124355 + }, + { + "epoch": 1.0753041478240568, + "grad_norm": 13.414879748514656, + "learning_rate": 2.6459645197910274e-06, + "loss": 0.045957183837890624, + "step": 124360 + }, + { + "epoch": 1.0753473813456, + "grad_norm": 10.927563058824122, + "learning_rate": 2.6457622110858315e-06, + "loss": 0.02539215087890625, + "step": 124365 + }, + { + "epoch": 1.0753906148671435, + "grad_norm": 1.9410740638852295, + "learning_rate": 2.6455599040143483e-06, + "loss": 0.011990737915039063, + "step": 124370 + }, + { + "epoch": 1.0754338483886867, + "grad_norm": 0.5151065651142999, + "learning_rate": 2.645357598577512e-06, + "loss": 0.030197525024414064, + "step": 124375 + }, + { + "epoch": 1.07547708191023, + "grad_norm": 4.209640812491961, + "learning_rate": 2.645155294776255e-06, + "loss": 0.02453155517578125, + "step": 124380 + }, + { + "epoch": 1.0755203154317732, + "grad_norm": 2.431138868246455, + "learning_rate": 2.6449529926115106e-06, + "loss": 0.12684669494628906, + "step": 124385 + }, + { + "epoch": 1.0755635489533164, + "grad_norm": 0.22516505112200064, + "learning_rate": 2.644750692084212e-06, + "loss": 0.045476531982421874, + "step": 124390 + }, + { + "epoch": 1.0756067824748596, + "grad_norm": 2.116294304218249, + "learning_rate": 2.6445483931952914e-06, + "loss": 0.09004974365234375, + "step": 124395 + }, + { + "epoch": 1.0756500159964029, + "grad_norm": 0.6409082709070917, + "learning_rate": 2.6443460959456816e-06, + "loss": 0.35759849548339845, + "step": 124400 + }, + { + "epoch": 1.0756932495179463, + "grad_norm": 22.747403557811765, + "learning_rate": 2.644143800336317e-06, + "loss": 0.04823150634765625, + "step": 124405 + }, + { + "epoch": 1.0757364830394895, + "grad_norm": 0.8170930255154182, + "learning_rate": 2.6439415063681288e-06, + "loss": 0.03721466064453125, + "step": 124410 + }, + { + "epoch": 1.0757797165610328, + "grad_norm": 5.571419738206097, + "learning_rate": 2.6437392140420517e-06, + "loss": 0.110406494140625, + "step": 124415 + }, + { + "epoch": 1.075822950082576, + "grad_norm": 23.772356218683782, + "learning_rate": 2.6435369233590183e-06, + "loss": 0.17498931884765626, + "step": 124420 + }, + { + "epoch": 1.0758661836041192, + "grad_norm": 8.292303779258532, + "learning_rate": 2.643334634319961e-06, + "loss": 0.02851524353027344, + "step": 124425 + }, + { + "epoch": 1.0759094171256625, + "grad_norm": 14.182493226544166, + "learning_rate": 2.643132346925812e-06, + "loss": 0.3925323486328125, + "step": 124430 + }, + { + "epoch": 1.075952650647206, + "grad_norm": 24.1093016658964, + "learning_rate": 2.6429300611775053e-06, + "loss": 0.153302001953125, + "step": 124435 + }, + { + "epoch": 1.0759958841687491, + "grad_norm": 21.708655694738464, + "learning_rate": 2.6427277770759733e-06, + "loss": 0.08442878723144531, + "step": 124440 + }, + { + "epoch": 1.0760391176902924, + "grad_norm": 0.4910374708390304, + "learning_rate": 2.6425254946221504e-06, + "loss": 0.060914039611816406, + "step": 124445 + }, + { + "epoch": 1.0760823512118356, + "grad_norm": 0.5913077811817617, + "learning_rate": 2.642323213816968e-06, + "loss": 0.010791778564453125, + "step": 124450 + }, + { + "epoch": 1.0761255847333788, + "grad_norm": 3.6918709603963777, + "learning_rate": 2.642120934661359e-06, + "loss": 0.044393062591552734, + "step": 124455 + }, + { + "epoch": 1.076168818254922, + "grad_norm": 7.218167757612888, + "learning_rate": 2.6419186571562563e-06, + "loss": 0.06697158813476563, + "step": 124460 + }, + { + "epoch": 1.0762120517764653, + "grad_norm": 4.557291716613008, + "learning_rate": 2.6417163813025933e-06, + "loss": 0.1153228759765625, + "step": 124465 + }, + { + "epoch": 1.0762552852980087, + "grad_norm": 10.181262739865593, + "learning_rate": 2.6415141071013022e-06, + "loss": 0.181732177734375, + "step": 124470 + }, + { + "epoch": 1.076298518819552, + "grad_norm": 0.44788623202440503, + "learning_rate": 2.6413118345533175e-06, + "loss": 0.01795654296875, + "step": 124475 + }, + { + "epoch": 1.0763417523410952, + "grad_norm": 3.947509935797016, + "learning_rate": 2.641109563659571e-06, + "loss": 0.13392486572265624, + "step": 124480 + }, + { + "epoch": 1.0763849858626384, + "grad_norm": 4.447745460630573, + "learning_rate": 2.6409072944209953e-06, + "loss": 0.05103225708007812, + "step": 124485 + }, + { + "epoch": 1.0764282193841817, + "grad_norm": 0.36282809744540767, + "learning_rate": 2.6407050268385226e-06, + "loss": 0.01273956298828125, + "step": 124490 + }, + { + "epoch": 1.0764714529057249, + "grad_norm": 3.3813042357546426, + "learning_rate": 2.6405027609130873e-06, + "loss": 0.016097831726074218, + "step": 124495 + }, + { + "epoch": 1.0765146864272683, + "grad_norm": 3.3738294250834397, + "learning_rate": 2.64030049664562e-06, + "loss": 0.06932754516601562, + "step": 124500 + }, + { + "epoch": 1.0765579199488116, + "grad_norm": 36.99098469362473, + "learning_rate": 2.6400982340370565e-06, + "loss": 0.07583961486816407, + "step": 124505 + }, + { + "epoch": 1.0766011534703548, + "grad_norm": 8.414711832723553, + "learning_rate": 2.639895973088328e-06, + "loss": 0.07280502319335938, + "step": 124510 + }, + { + "epoch": 1.076644386991898, + "grad_norm": 9.972350453046829, + "learning_rate": 2.639693713800368e-06, + "loss": 0.071875, + "step": 124515 + }, + { + "epoch": 1.0766876205134412, + "grad_norm": 0.4794233317745948, + "learning_rate": 2.639491456174108e-06, + "loss": 0.07219963073730469, + "step": 124520 + }, + { + "epoch": 1.0767308540349845, + "grad_norm": 0.23009948904397323, + "learning_rate": 2.639289200210482e-06, + "loss": 0.08745498657226562, + "step": 124525 + }, + { + "epoch": 1.076774087556528, + "grad_norm": 11.070381745726959, + "learning_rate": 2.6390869459104205e-06, + "loss": 0.08542938232421875, + "step": 124530 + }, + { + "epoch": 1.0768173210780712, + "grad_norm": 17.644266228352866, + "learning_rate": 2.63888469327486e-06, + "loss": 0.09748916625976563, + "step": 124535 + }, + { + "epoch": 1.0768605545996144, + "grad_norm": 19.130532937083096, + "learning_rate": 2.6386824423047314e-06, + "loss": 0.09362564086914063, + "step": 124540 + }, + { + "epoch": 1.0769037881211576, + "grad_norm": 37.476952273096, + "learning_rate": 2.638480193000967e-06, + "loss": 0.07985649108886719, + "step": 124545 + }, + { + "epoch": 1.0769470216427008, + "grad_norm": 2.3879532252495825, + "learning_rate": 2.6382779453645e-06, + "loss": 0.03216972351074219, + "step": 124550 + }, + { + "epoch": 1.076990255164244, + "grad_norm": 0.5959114722571542, + "learning_rate": 2.638075699396264e-06, + "loss": 0.038553619384765626, + "step": 124555 + }, + { + "epoch": 1.0770334886857875, + "grad_norm": 30.513172390321166, + "learning_rate": 2.6378734550971887e-06, + "loss": 0.19181442260742188, + "step": 124560 + }, + { + "epoch": 1.0770767222073308, + "grad_norm": 22.1567893799865, + "learning_rate": 2.6376712124682105e-06, + "loss": 0.14278106689453124, + "step": 124565 + }, + { + "epoch": 1.077119955728874, + "grad_norm": 1.217073974258837, + "learning_rate": 2.637468971510261e-06, + "loss": 0.04407501220703125, + "step": 124570 + }, + { + "epoch": 1.0771631892504172, + "grad_norm": 1.2751554072255578, + "learning_rate": 2.637266732224272e-06, + "loss": 0.030742645263671875, + "step": 124575 + }, + { + "epoch": 1.0772064227719604, + "grad_norm": 1.961971113628123, + "learning_rate": 2.637064494611177e-06, + "loss": 0.023044586181640625, + "step": 124580 + }, + { + "epoch": 1.0772496562935037, + "grad_norm": 12.15591282376099, + "learning_rate": 2.6368622586719087e-06, + "loss": 0.06488838195800781, + "step": 124585 + }, + { + "epoch": 1.077292889815047, + "grad_norm": 10.138730170325967, + "learning_rate": 2.6366600244073995e-06, + "loss": 0.15713233947753907, + "step": 124590 + }, + { + "epoch": 1.0773361233365903, + "grad_norm": 0.5183097241613255, + "learning_rate": 2.636457791818581e-06, + "loss": 0.012454605102539063, + "step": 124595 + }, + { + "epoch": 1.0773793568581336, + "grad_norm": 9.461931159866445, + "learning_rate": 2.636255560906388e-06, + "loss": 0.1485443115234375, + "step": 124600 + }, + { + "epoch": 1.0774225903796768, + "grad_norm": 75.06838299415188, + "learning_rate": 2.6360533316717525e-06, + "loss": 0.2860321044921875, + "step": 124605 + }, + { + "epoch": 1.07746582390122, + "grad_norm": 4.642062289020282, + "learning_rate": 2.6358511041156066e-06, + "loss": 0.29347896575927734, + "step": 124610 + }, + { + "epoch": 1.0775090574227633, + "grad_norm": 3.7102224747611894, + "learning_rate": 2.6356488782388835e-06, + "loss": 0.37360153198242185, + "step": 124615 + }, + { + "epoch": 1.0775522909443065, + "grad_norm": 1.8785547917818117, + "learning_rate": 2.6354466540425154e-06, + "loss": 0.10615882873535157, + "step": 124620 + }, + { + "epoch": 1.07759552446585, + "grad_norm": 0.10479816602998836, + "learning_rate": 2.6352444315274344e-06, + "loss": 0.030888748168945313, + "step": 124625 + }, + { + "epoch": 1.0776387579873932, + "grad_norm": 12.065269456014864, + "learning_rate": 2.6350422106945744e-06, + "loss": 0.08546714782714844, + "step": 124630 + }, + { + "epoch": 1.0776819915089364, + "grad_norm": 1.627748420126682, + "learning_rate": 2.634839991544867e-06, + "loss": 0.1305643081665039, + "step": 124635 + }, + { + "epoch": 1.0777252250304796, + "grad_norm": 4.932409250436669, + "learning_rate": 2.634637774079246e-06, + "loss": 0.04506702423095703, + "step": 124640 + }, + { + "epoch": 1.0777684585520229, + "grad_norm": 0.2379282852888097, + "learning_rate": 2.634435558298644e-06, + "loss": 0.018762588500976562, + "step": 124645 + }, + { + "epoch": 1.077811692073566, + "grad_norm": 24.127303228390804, + "learning_rate": 2.634233344203992e-06, + "loss": 0.07514076232910157, + "step": 124650 + }, + { + "epoch": 1.0778549255951093, + "grad_norm": 0.4729935986828824, + "learning_rate": 2.6340311317962233e-06, + "loss": 0.10782976150512695, + "step": 124655 + }, + { + "epoch": 1.0778981591166528, + "grad_norm": 55.147680737565416, + "learning_rate": 2.6338289210762705e-06, + "loss": 0.21872177124023437, + "step": 124660 + }, + { + "epoch": 1.077941392638196, + "grad_norm": 3.822164846824936, + "learning_rate": 2.6336267120450664e-06, + "loss": 0.20695953369140624, + "step": 124665 + }, + { + "epoch": 1.0779846261597392, + "grad_norm": 0.9102603636296116, + "learning_rate": 2.6334245047035443e-06, + "loss": 0.1569976806640625, + "step": 124670 + }, + { + "epoch": 1.0780278596812825, + "grad_norm": 1.7519757932235231, + "learning_rate": 2.633222299052636e-06, + "loss": 0.06206855773925781, + "step": 124675 + }, + { + "epoch": 1.0780710932028257, + "grad_norm": 15.523917138820822, + "learning_rate": 2.6330200950932738e-06, + "loss": 0.1507293701171875, + "step": 124680 + }, + { + "epoch": 1.078114326724369, + "grad_norm": 1.7192034867241444, + "learning_rate": 2.6328178928263896e-06, + "loss": 0.1399555206298828, + "step": 124685 + }, + { + "epoch": 1.0781575602459124, + "grad_norm": 0.1960495299378065, + "learning_rate": 2.632615692252918e-06, + "loss": 0.04328842163085937, + "step": 124690 + }, + { + "epoch": 1.0782007937674556, + "grad_norm": 1.0683821512179874, + "learning_rate": 2.632413493373789e-06, + "loss": 0.012671661376953126, + "step": 124695 + }, + { + "epoch": 1.0782440272889988, + "grad_norm": 51.1737683285928, + "learning_rate": 2.6322112961899377e-06, + "loss": 0.2769935607910156, + "step": 124700 + }, + { + "epoch": 1.078287260810542, + "grad_norm": 0.39145161501035625, + "learning_rate": 2.6320091007022953e-06, + "loss": 0.013473129272460938, + "step": 124705 + }, + { + "epoch": 1.0783304943320853, + "grad_norm": 2.251457785418518, + "learning_rate": 2.6318069069117948e-06, + "loss": 0.01701183319091797, + "step": 124710 + }, + { + "epoch": 1.0783737278536285, + "grad_norm": 28.858457483197178, + "learning_rate": 2.6316047148193675e-06, + "loss": 0.1625232696533203, + "step": 124715 + }, + { + "epoch": 1.078416961375172, + "grad_norm": 3.499207992520897, + "learning_rate": 2.6314025244259464e-06, + "loss": 0.047253990173339845, + "step": 124720 + }, + { + "epoch": 1.0784601948967152, + "grad_norm": 3.8163734146688912, + "learning_rate": 2.6312003357324645e-06, + "loss": 0.07762794494628907, + "step": 124725 + }, + { + "epoch": 1.0785034284182584, + "grad_norm": 1.450267132353513, + "learning_rate": 2.6309981487398544e-06, + "loss": 0.027927207946777343, + "step": 124730 + }, + { + "epoch": 1.0785466619398016, + "grad_norm": 1.7172758838083506, + "learning_rate": 2.6307959634490487e-06, + "loss": 0.0701446533203125, + "step": 124735 + }, + { + "epoch": 1.0785898954613449, + "grad_norm": 2.950947221696246, + "learning_rate": 2.630593779860979e-06, + "loss": 0.011151123046875, + "step": 124740 + }, + { + "epoch": 1.078633128982888, + "grad_norm": 2.272856381719036, + "learning_rate": 2.6303915979765783e-06, + "loss": 0.03326988220214844, + "step": 124745 + }, + { + "epoch": 1.0786763625044316, + "grad_norm": 19.489313132260982, + "learning_rate": 2.6301894177967788e-06, + "loss": 0.10294189453125, + "step": 124750 + }, + { + "epoch": 1.0787195960259748, + "grad_norm": 5.359937320983228, + "learning_rate": 2.629987239322512e-06, + "loss": 0.13187332153320314, + "step": 124755 + }, + { + "epoch": 1.078762829547518, + "grad_norm": 2.790041230418494, + "learning_rate": 2.629785062554712e-06, + "loss": 0.010089874267578125, + "step": 124760 + }, + { + "epoch": 1.0788060630690612, + "grad_norm": 2.610160147091847, + "learning_rate": 2.6295828874943115e-06, + "loss": 0.02107391357421875, + "step": 124765 + }, + { + "epoch": 1.0788492965906045, + "grad_norm": 0.25220389525089687, + "learning_rate": 2.6293807141422413e-06, + "loss": 0.0556182861328125, + "step": 124770 + }, + { + "epoch": 1.0788925301121477, + "grad_norm": 2.2063982053712667, + "learning_rate": 2.6291785424994344e-06, + "loss": 0.02864227294921875, + "step": 124775 + }, + { + "epoch": 1.078935763633691, + "grad_norm": 0.26468965370880737, + "learning_rate": 2.628976372566824e-06, + "loss": 0.028685760498046876, + "step": 124780 + }, + { + "epoch": 1.0789789971552344, + "grad_norm": 4.135651798325665, + "learning_rate": 2.62877420434534e-06, + "loss": 0.05379180908203125, + "step": 124785 + }, + { + "epoch": 1.0790222306767776, + "grad_norm": 0.42348462645305374, + "learning_rate": 2.628572037835918e-06, + "loss": 0.09991111755371093, + "step": 124790 + }, + { + "epoch": 1.0790654641983208, + "grad_norm": 9.639487156359774, + "learning_rate": 2.628369873039489e-06, + "loss": 0.06065788269042969, + "step": 124795 + }, + { + "epoch": 1.079108697719864, + "grad_norm": 0.3453293916975167, + "learning_rate": 2.6281677099569848e-06, + "loss": 0.039064788818359376, + "step": 124800 + }, + { + "epoch": 1.0791519312414073, + "grad_norm": 14.896197922045241, + "learning_rate": 2.627965548589339e-06, + "loss": 0.15157546997070312, + "step": 124805 + }, + { + "epoch": 1.0791951647629505, + "grad_norm": 2.2325037797472005, + "learning_rate": 2.6277633889374828e-06, + "loss": 0.04598388671875, + "step": 124810 + }, + { + "epoch": 1.079238398284494, + "grad_norm": 0.44784302114145114, + "learning_rate": 2.6275612310023477e-06, + "loss": 0.055106735229492186, + "step": 124815 + }, + { + "epoch": 1.0792816318060372, + "grad_norm": 28.53539884261944, + "learning_rate": 2.627359074784869e-06, + "loss": 0.16580963134765625, + "step": 124820 + }, + { + "epoch": 1.0793248653275804, + "grad_norm": 0.9519230342490004, + "learning_rate": 2.6271569202859766e-06, + "loss": 0.03971786499023437, + "step": 124825 + }, + { + "epoch": 1.0793680988491237, + "grad_norm": 1.0066359810992365, + "learning_rate": 2.6269547675066036e-06, + "loss": 0.03156585693359375, + "step": 124830 + }, + { + "epoch": 1.0794113323706669, + "grad_norm": 4.60756994840534, + "learning_rate": 2.626752616447683e-06, + "loss": 0.07071876525878906, + "step": 124835 + }, + { + "epoch": 1.0794545658922101, + "grad_norm": 0.8627866194066234, + "learning_rate": 2.6265504671101456e-06, + "loss": 0.31976966857910155, + "step": 124840 + }, + { + "epoch": 1.0794977994137533, + "grad_norm": 3.162805495933327, + "learning_rate": 2.626348319494925e-06, + "loss": 0.06861648559570313, + "step": 124845 + }, + { + "epoch": 1.0795410329352968, + "grad_norm": 2.640488529015642, + "learning_rate": 2.6261461736029513e-06, + "loss": 0.018317794799804686, + "step": 124850 + }, + { + "epoch": 1.07958426645684, + "grad_norm": 0.47632310221999163, + "learning_rate": 2.6259440294351603e-06, + "loss": 0.17193222045898438, + "step": 124855 + }, + { + "epoch": 1.0796274999783833, + "grad_norm": 0.061172589573833966, + "learning_rate": 2.625741886992481e-06, + "loss": 0.004595184326171875, + "step": 124860 + }, + { + "epoch": 1.0796707334999265, + "grad_norm": 24.302490544414322, + "learning_rate": 2.625539746275848e-06, + "loss": 0.20144882202148437, + "step": 124865 + }, + { + "epoch": 1.0797139670214697, + "grad_norm": 0.8891880948709387, + "learning_rate": 2.6253376072861923e-06, + "loss": 0.11515684127807617, + "step": 124870 + }, + { + "epoch": 1.079757200543013, + "grad_norm": 2.095883460095654, + "learning_rate": 2.625135470024447e-06, + "loss": 0.04014663696289063, + "step": 124875 + }, + { + "epoch": 1.0798004340645564, + "grad_norm": 59.278926450545995, + "learning_rate": 2.6249333344915426e-06, + "loss": 0.4886371612548828, + "step": 124880 + }, + { + "epoch": 1.0798436675860996, + "grad_norm": 0.8127754272017229, + "learning_rate": 2.6247312006884135e-06, + "loss": 0.11948623657226562, + "step": 124885 + }, + { + "epoch": 1.0798869011076428, + "grad_norm": 1.4144663902150316, + "learning_rate": 2.62452906861599e-06, + "loss": 0.10699691772460937, + "step": 124890 + }, + { + "epoch": 1.079930134629186, + "grad_norm": 13.8559323653689, + "learning_rate": 2.624326938275206e-06, + "loss": 0.10378532409667969, + "step": 124895 + }, + { + "epoch": 1.0799733681507293, + "grad_norm": 9.249379852735649, + "learning_rate": 2.6241248096669937e-06, + "loss": 0.030336761474609376, + "step": 124900 + }, + { + "epoch": 1.0800166016722725, + "grad_norm": 1.3208412699119307, + "learning_rate": 2.623922682792284e-06, + "loss": 0.08553428649902343, + "step": 124905 + }, + { + "epoch": 1.0800598351938158, + "grad_norm": 4.108253216420494, + "learning_rate": 2.623720557652008e-06, + "loss": 0.017301177978515624, + "step": 124910 + }, + { + "epoch": 1.0801030687153592, + "grad_norm": 0.3115970827281073, + "learning_rate": 2.6235184342471012e-06, + "loss": 0.090606689453125, + "step": 124915 + }, + { + "epoch": 1.0801463022369024, + "grad_norm": 3.8290729122051874, + "learning_rate": 2.6233163125784937e-06, + "loss": 0.008762359619140625, + "step": 124920 + }, + { + "epoch": 1.0801895357584457, + "grad_norm": 2.1373413115159012, + "learning_rate": 2.6231141926471185e-06, + "loss": 0.07700424194335938, + "step": 124925 + }, + { + "epoch": 1.080232769279989, + "grad_norm": 2.9867929287651345, + "learning_rate": 2.6229120744539075e-06, + "loss": 0.05775527954101563, + "step": 124930 + }, + { + "epoch": 1.0802760028015321, + "grad_norm": 0.19619123907066013, + "learning_rate": 2.6227099579997923e-06, + "loss": 0.023382568359375, + "step": 124935 + }, + { + "epoch": 1.0803192363230754, + "grad_norm": 15.468328399465674, + "learning_rate": 2.622507843285705e-06, + "loss": 0.12346458435058594, + "step": 124940 + }, + { + "epoch": 1.0803624698446188, + "grad_norm": 2.8539872696875674, + "learning_rate": 2.622305730312578e-06, + "loss": 0.1478656768798828, + "step": 124945 + }, + { + "epoch": 1.080405703366162, + "grad_norm": 0.13331386333544692, + "learning_rate": 2.6221036190813443e-06, + "loss": 0.08829574584960938, + "step": 124950 + }, + { + "epoch": 1.0804489368877053, + "grad_norm": 0.20740788425686604, + "learning_rate": 2.6219015095929355e-06, + "loss": 0.040071868896484376, + "step": 124955 + }, + { + "epoch": 1.0804921704092485, + "grad_norm": 3.84747960418852, + "learning_rate": 2.6216994018482832e-06, + "loss": 0.01425018310546875, + "step": 124960 + }, + { + "epoch": 1.0805354039307917, + "grad_norm": 0.9971731658094368, + "learning_rate": 2.6214972958483203e-06, + "loss": 0.011871337890625, + "step": 124965 + }, + { + "epoch": 1.080578637452335, + "grad_norm": 2.06289497724179, + "learning_rate": 2.6212951915939774e-06, + "loss": 0.01992034912109375, + "step": 124970 + }, + { + "epoch": 1.0806218709738784, + "grad_norm": 0.16063436969788789, + "learning_rate": 2.6210930890861873e-06, + "loss": 0.025431060791015626, + "step": 124975 + }, + { + "epoch": 1.0806651044954216, + "grad_norm": 0.45446046675468404, + "learning_rate": 2.6208909883258837e-06, + "loss": 0.020800209045410155, + "step": 124980 + }, + { + "epoch": 1.0807083380169649, + "grad_norm": 1.7190587022781114, + "learning_rate": 2.6206888893139972e-06, + "loss": 0.03424835205078125, + "step": 124985 + }, + { + "epoch": 1.080751571538508, + "grad_norm": 0.46094183822004775, + "learning_rate": 2.62048679205146e-06, + "loss": 0.06633186340332031, + "step": 124990 + }, + { + "epoch": 1.0807948050600513, + "grad_norm": 8.078675633730644, + "learning_rate": 2.6202846965392043e-06, + "loss": 0.02910308837890625, + "step": 124995 + }, + { + "epoch": 1.0808380385815946, + "grad_norm": 2.9270390214652413, + "learning_rate": 2.620082602778161e-06, + "loss": 0.08830413818359376, + "step": 125000 + }, + { + "epoch": 1.080881272103138, + "grad_norm": 2.2062405955458058, + "learning_rate": 2.6198805107692642e-06, + "loss": 0.41790237426757815, + "step": 125005 + }, + { + "epoch": 1.0809245056246812, + "grad_norm": 4.140095812202555, + "learning_rate": 2.6196784205134436e-06, + "loss": 0.030872344970703125, + "step": 125010 + }, + { + "epoch": 1.0809677391462245, + "grad_norm": 7.419499067681822, + "learning_rate": 2.6194763320116338e-06, + "loss": 0.07973861694335938, + "step": 125015 + }, + { + "epoch": 1.0810109726677677, + "grad_norm": 24.61970365460869, + "learning_rate": 2.6192742452647657e-06, + "loss": 0.29282684326171876, + "step": 125020 + }, + { + "epoch": 1.081054206189311, + "grad_norm": 10.631349434261034, + "learning_rate": 2.6190721602737702e-06, + "loss": 0.09350624084472656, + "step": 125025 + }, + { + "epoch": 1.0810974397108541, + "grad_norm": 11.408519201121534, + "learning_rate": 2.618870077039581e-06, + "loss": 0.024407196044921874, + "step": 125030 + }, + { + "epoch": 1.0811406732323974, + "grad_norm": 2.3171881569915582, + "learning_rate": 2.618667995563129e-06, + "loss": 0.11470489501953125, + "step": 125035 + }, + { + "epoch": 1.0811839067539408, + "grad_norm": 3.490794066448306, + "learning_rate": 2.6184659158453454e-06, + "loss": 0.08768196105957031, + "step": 125040 + }, + { + "epoch": 1.081227140275484, + "grad_norm": 2.1035721291789526, + "learning_rate": 2.618263837887165e-06, + "loss": 0.09283599853515626, + "step": 125045 + }, + { + "epoch": 1.0812703737970273, + "grad_norm": 3.664739120775382, + "learning_rate": 2.6180617616895175e-06, + "loss": 0.0361572265625, + "step": 125050 + }, + { + "epoch": 1.0813136073185705, + "grad_norm": 0.3321344311737583, + "learning_rate": 2.6178596872533352e-06, + "loss": 0.05041990280151367, + "step": 125055 + }, + { + "epoch": 1.0813568408401137, + "grad_norm": 14.649513671318081, + "learning_rate": 2.6176576145795502e-06, + "loss": 0.052829742431640625, + "step": 125060 + }, + { + "epoch": 1.081400074361657, + "grad_norm": 0.6744633088189409, + "learning_rate": 2.617455543669095e-06, + "loss": 0.019431686401367186, + "step": 125065 + }, + { + "epoch": 1.0814433078832004, + "grad_norm": 7.7825158616035655, + "learning_rate": 2.6172534745229e-06, + "loss": 0.026112747192382813, + "step": 125070 + }, + { + "epoch": 1.0814865414047437, + "grad_norm": 4.219789732240645, + "learning_rate": 2.617051407141899e-06, + "loss": 0.04156646728515625, + "step": 125075 + }, + { + "epoch": 1.0815297749262869, + "grad_norm": 0.555654617796529, + "learning_rate": 2.6168493415270233e-06, + "loss": 0.03400611877441406, + "step": 125080 + }, + { + "epoch": 1.08157300844783, + "grad_norm": 1.717355596701259, + "learning_rate": 2.616647277679204e-06, + "loss": 0.020787429809570313, + "step": 125085 + }, + { + "epoch": 1.0816162419693733, + "grad_norm": 15.928426150928312, + "learning_rate": 2.6164452155993743e-06, + "loss": 0.11854381561279297, + "step": 125090 + }, + { + "epoch": 1.0816594754909166, + "grad_norm": 9.191704179076087, + "learning_rate": 2.616243155288465e-06, + "loss": 0.06920356750488281, + "step": 125095 + }, + { + "epoch": 1.0817027090124598, + "grad_norm": 1.1869058153376189, + "learning_rate": 2.6160410967474083e-06, + "loss": 0.093341064453125, + "step": 125100 + }, + { + "epoch": 1.0817459425340032, + "grad_norm": 1.412536669662038, + "learning_rate": 2.6158390399771354e-06, + "loss": 0.3984569549560547, + "step": 125105 + }, + { + "epoch": 1.0817891760555465, + "grad_norm": 8.150250819203448, + "learning_rate": 2.61563698497858e-06, + "loss": 0.09862136840820312, + "step": 125110 + }, + { + "epoch": 1.0818324095770897, + "grad_norm": 14.245030420590913, + "learning_rate": 2.615434931752672e-06, + "loss": 0.07506675720214843, + "step": 125115 + }, + { + "epoch": 1.081875643098633, + "grad_norm": 1.5668471575590153, + "learning_rate": 2.615232880300345e-06, + "loss": 0.08513565063476562, + "step": 125120 + }, + { + "epoch": 1.0819188766201762, + "grad_norm": 1.824073075447018, + "learning_rate": 2.6150308306225296e-06, + "loss": 0.01719512939453125, + "step": 125125 + }, + { + "epoch": 1.0819621101417194, + "grad_norm": 20.9200025285676, + "learning_rate": 2.614828782720158e-06, + "loss": 0.18225669860839844, + "step": 125130 + }, + { + "epoch": 1.0820053436632628, + "grad_norm": 0.6694354742770925, + "learning_rate": 2.6146267365941613e-06, + "loss": 0.08328304290771485, + "step": 125135 + }, + { + "epoch": 1.082048577184806, + "grad_norm": 5.876922309548196, + "learning_rate": 2.614424692245473e-06, + "loss": 0.040305709838867186, + "step": 125140 + }, + { + "epoch": 1.0820918107063493, + "grad_norm": 1.0523085769093115, + "learning_rate": 2.6142226496750228e-06, + "loss": 0.0391021728515625, + "step": 125145 + }, + { + "epoch": 1.0821350442278925, + "grad_norm": 0.38581956840268933, + "learning_rate": 2.6140206088837447e-06, + "loss": 0.0222503662109375, + "step": 125150 + }, + { + "epoch": 1.0821782777494358, + "grad_norm": 0.8013188928974595, + "learning_rate": 2.6138185698725695e-06, + "loss": 0.0255828857421875, + "step": 125155 + }, + { + "epoch": 1.082221511270979, + "grad_norm": 6.307950148439616, + "learning_rate": 2.613616532642429e-06, + "loss": 0.2179168701171875, + "step": 125160 + }, + { + "epoch": 1.0822647447925222, + "grad_norm": 35.30400014365819, + "learning_rate": 2.6134144971942537e-06, + "loss": 0.2283203125, + "step": 125165 + }, + { + "epoch": 1.0823079783140657, + "grad_norm": 0.0936447675795698, + "learning_rate": 2.6132124635289774e-06, + "loss": 0.017731475830078124, + "step": 125170 + }, + { + "epoch": 1.082351211835609, + "grad_norm": 0.5731151195715691, + "learning_rate": 2.6130104316475313e-06, + "loss": 0.0534881591796875, + "step": 125175 + }, + { + "epoch": 1.0823944453571521, + "grad_norm": 7.403868332382267, + "learning_rate": 2.6128084015508468e-06, + "loss": 0.037441253662109375, + "step": 125180 + }, + { + "epoch": 1.0824376788786954, + "grad_norm": 7.66005827573753, + "learning_rate": 2.612606373239856e-06, + "loss": 0.05689239501953125, + "step": 125185 + }, + { + "epoch": 1.0824809124002386, + "grad_norm": 4.971510711617409, + "learning_rate": 2.6124043467154905e-06, + "loss": 0.06424560546875, + "step": 125190 + }, + { + "epoch": 1.0825241459217818, + "grad_norm": 16.552889164794284, + "learning_rate": 2.612202321978681e-06, + "loss": 0.10007495880126953, + "step": 125195 + }, + { + "epoch": 1.0825673794433253, + "grad_norm": 1.5231402330906376, + "learning_rate": 2.6120002990303594e-06, + "loss": 0.1982147216796875, + "step": 125200 + }, + { + "epoch": 1.0826106129648685, + "grad_norm": 0.13474845119810036, + "learning_rate": 2.6117982778714594e-06, + "loss": 0.10719985961914062, + "step": 125205 + }, + { + "epoch": 1.0826538464864117, + "grad_norm": 1.8569249831925114, + "learning_rate": 2.6115962585029113e-06, + "loss": 0.25894737243652344, + "step": 125210 + }, + { + "epoch": 1.082697080007955, + "grad_norm": 2.283509167703708, + "learning_rate": 2.6113942409256473e-06, + "loss": 0.2703582763671875, + "step": 125215 + }, + { + "epoch": 1.0827403135294982, + "grad_norm": 0.07765848235489972, + "learning_rate": 2.611192225140598e-06, + "loss": 0.26209564208984376, + "step": 125220 + }, + { + "epoch": 1.0827835470510414, + "grad_norm": 22.314340627955364, + "learning_rate": 2.610990211148697e-06, + "loss": 0.04214324951171875, + "step": 125225 + }, + { + "epoch": 1.0828267805725849, + "grad_norm": 9.579700147338169, + "learning_rate": 2.6107881989508722e-06, + "loss": 0.02948474884033203, + "step": 125230 + }, + { + "epoch": 1.082870014094128, + "grad_norm": 10.961494399132148, + "learning_rate": 2.6105861885480603e-06, + "loss": 0.09084129333496094, + "step": 125235 + }, + { + "epoch": 1.0829132476156713, + "grad_norm": 4.102115843366206, + "learning_rate": 2.61038417994119e-06, + "loss": 0.04121971130371094, + "step": 125240 + }, + { + "epoch": 1.0829564811372145, + "grad_norm": 4.175489720716708, + "learning_rate": 2.610182173131193e-06, + "loss": 0.0356353759765625, + "step": 125245 + }, + { + "epoch": 1.0829997146587578, + "grad_norm": 4.067654974183416, + "learning_rate": 2.609980168119002e-06, + "loss": 0.037192249298095705, + "step": 125250 + }, + { + "epoch": 1.083042948180301, + "grad_norm": 4.295628990869322, + "learning_rate": 2.6097781649055476e-06, + "loss": 0.033149337768554686, + "step": 125255 + }, + { + "epoch": 1.0830861817018445, + "grad_norm": 0.04552967220944727, + "learning_rate": 2.6095761634917607e-06, + "loss": 0.032209014892578124, + "step": 125260 + }, + { + "epoch": 1.0831294152233877, + "grad_norm": 12.728831286220741, + "learning_rate": 2.6093741638785755e-06, + "loss": 0.09433517456054688, + "step": 125265 + }, + { + "epoch": 1.083172648744931, + "grad_norm": 0.8425186211596181, + "learning_rate": 2.609172166066922e-06, + "loss": 0.0553192138671875, + "step": 125270 + }, + { + "epoch": 1.0832158822664741, + "grad_norm": 3.6511944565655607, + "learning_rate": 2.608970170057732e-06, + "loss": 0.05721588134765625, + "step": 125275 + }, + { + "epoch": 1.0832591157880174, + "grad_norm": 6.970418932993234, + "learning_rate": 2.608768175851936e-06, + "loss": 0.05053558349609375, + "step": 125280 + }, + { + "epoch": 1.0833023493095606, + "grad_norm": 3.710191541392522, + "learning_rate": 2.608566183450468e-06, + "loss": 0.030975532531738282, + "step": 125285 + }, + { + "epoch": 1.0833455828311038, + "grad_norm": 1.5130817616180303, + "learning_rate": 2.6083641928542577e-06, + "loss": 0.12900390625, + "step": 125290 + }, + { + "epoch": 1.0833888163526473, + "grad_norm": 2.071829123392011, + "learning_rate": 2.6081622040642356e-06, + "loss": 0.05002670288085938, + "step": 125295 + }, + { + "epoch": 1.0834320498741905, + "grad_norm": 11.19463641925144, + "learning_rate": 2.6079602170813366e-06, + "loss": 0.3891937255859375, + "step": 125300 + }, + { + "epoch": 1.0834752833957337, + "grad_norm": 8.108067862510607, + "learning_rate": 2.6077582319064905e-06, + "loss": 0.028661346435546874, + "step": 125305 + }, + { + "epoch": 1.083518516917277, + "grad_norm": 24.456212948205383, + "learning_rate": 2.607556248540628e-06, + "loss": 0.05661220550537109, + "step": 125310 + }, + { + "epoch": 1.0835617504388202, + "grad_norm": 8.224029878322835, + "learning_rate": 2.6073542669846814e-06, + "loss": 0.05932273864746094, + "step": 125315 + }, + { + "epoch": 1.0836049839603634, + "grad_norm": 1.4547061258160632, + "learning_rate": 2.6071522872395833e-06, + "loss": 0.038336181640625, + "step": 125320 + }, + { + "epoch": 1.0836482174819069, + "grad_norm": 13.486574967702964, + "learning_rate": 2.606950309306262e-06, + "loss": 0.0444854736328125, + "step": 125325 + }, + { + "epoch": 1.08369145100345, + "grad_norm": 0.7140365226714938, + "learning_rate": 2.606748333185653e-06, + "loss": 0.01970367431640625, + "step": 125330 + }, + { + "epoch": 1.0837346845249933, + "grad_norm": 1.9456271825381763, + "learning_rate": 2.6065463588786855e-06, + "loss": 0.04253311157226562, + "step": 125335 + }, + { + "epoch": 1.0837779180465366, + "grad_norm": 44.677623086843695, + "learning_rate": 2.606344386386291e-06, + "loss": 0.27633514404296877, + "step": 125340 + }, + { + "epoch": 1.0838211515680798, + "grad_norm": 0.4851236739225348, + "learning_rate": 2.606142415709402e-06, + "loss": 0.15786666870117189, + "step": 125345 + }, + { + "epoch": 1.083864385089623, + "grad_norm": 5.8601845296320985, + "learning_rate": 2.6059404468489492e-06, + "loss": 0.07321510314941407, + "step": 125350 + }, + { + "epoch": 1.0839076186111662, + "grad_norm": 0.2710480361512436, + "learning_rate": 2.6057384798058644e-06, + "loss": 0.17398109436035156, + "step": 125355 + }, + { + "epoch": 1.0839508521327097, + "grad_norm": 10.825864262862236, + "learning_rate": 2.6055365145810777e-06, + "loss": 0.0723388671875, + "step": 125360 + }, + { + "epoch": 1.083994085654253, + "grad_norm": 1.9767638730942587, + "learning_rate": 2.605334551175523e-06, + "loss": 0.041424560546875, + "step": 125365 + }, + { + "epoch": 1.0840373191757962, + "grad_norm": 5.25986844481743, + "learning_rate": 2.6051325895901298e-06, + "loss": 0.09025115966796875, + "step": 125370 + }, + { + "epoch": 1.0840805526973394, + "grad_norm": 2.720503093259128, + "learning_rate": 2.604930629825831e-06, + "loss": 0.11331710815429688, + "step": 125375 + }, + { + "epoch": 1.0841237862188826, + "grad_norm": 38.40289342942509, + "learning_rate": 2.604728671883557e-06, + "loss": 0.06724395751953124, + "step": 125380 + }, + { + "epoch": 1.0841670197404258, + "grad_norm": 2.986508159349019, + "learning_rate": 2.6045267157642395e-06, + "loss": 0.044049072265625, + "step": 125385 + }, + { + "epoch": 1.0842102532619693, + "grad_norm": 2.57855249016782, + "learning_rate": 2.6043247614688083e-06, + "loss": 0.040191650390625, + "step": 125390 + }, + { + "epoch": 1.0842534867835125, + "grad_norm": 0.09828026328542276, + "learning_rate": 2.604122808998198e-06, + "loss": 0.056708335876464844, + "step": 125395 + }, + { + "epoch": 1.0842967203050558, + "grad_norm": 0.5799755552480088, + "learning_rate": 2.6039208583533375e-06, + "loss": 0.0418426513671875, + "step": 125400 + }, + { + "epoch": 1.084339953826599, + "grad_norm": 0.7618957013634301, + "learning_rate": 2.60371890953516e-06, + "loss": 0.0414581298828125, + "step": 125405 + }, + { + "epoch": 1.0843831873481422, + "grad_norm": 15.188841289789643, + "learning_rate": 2.6035169625445952e-06, + "loss": 0.171661376953125, + "step": 125410 + }, + { + "epoch": 1.0844264208696854, + "grad_norm": 0.6603321246394966, + "learning_rate": 2.603315017382576e-06, + "loss": 0.011782073974609375, + "step": 125415 + }, + { + "epoch": 1.0844696543912287, + "grad_norm": 0.5591100122377758, + "learning_rate": 2.603113074050031e-06, + "loss": 0.11857109069824219, + "step": 125420 + }, + { + "epoch": 1.0845128879127721, + "grad_norm": 9.369667708720826, + "learning_rate": 2.602911132547894e-06, + "loss": 0.03488779067993164, + "step": 125425 + }, + { + "epoch": 1.0845561214343153, + "grad_norm": 1.590202093414969, + "learning_rate": 2.6027091928770967e-06, + "loss": 0.024907684326171874, + "step": 125430 + }, + { + "epoch": 1.0845993549558586, + "grad_norm": 0.8041284897661509, + "learning_rate": 2.6025072550385695e-06, + "loss": 0.04776487350463867, + "step": 125435 + }, + { + "epoch": 1.0846425884774018, + "grad_norm": 3.218875310553519, + "learning_rate": 2.602305319033244e-06, + "loss": 0.07965240478515626, + "step": 125440 + }, + { + "epoch": 1.084685821998945, + "grad_norm": 6.536209801700542, + "learning_rate": 2.60210338486205e-06, + "loss": 0.031595611572265626, + "step": 125445 + }, + { + "epoch": 1.0847290555204885, + "grad_norm": 3.619449434019156, + "learning_rate": 2.601901452525921e-06, + "loss": 0.15724334716796876, + "step": 125450 + }, + { + "epoch": 1.0847722890420317, + "grad_norm": 0.5479864358063962, + "learning_rate": 2.6016995220257856e-06, + "loss": 0.036583900451660156, + "step": 125455 + }, + { + "epoch": 1.084815522563575, + "grad_norm": 14.736642810937068, + "learning_rate": 2.6014975933625784e-06, + "loss": 0.05168895721435547, + "step": 125460 + }, + { + "epoch": 1.0848587560851182, + "grad_norm": 0.9412809246818354, + "learning_rate": 2.6012956665372295e-06, + "loss": 0.0731719970703125, + "step": 125465 + }, + { + "epoch": 1.0849019896066614, + "grad_norm": 21.67963356989237, + "learning_rate": 2.6010937415506693e-06, + "loss": 0.09212875366210938, + "step": 125470 + }, + { + "epoch": 1.0849452231282046, + "grad_norm": 41.9840432314756, + "learning_rate": 2.600891818403829e-06, + "loss": 0.0992584228515625, + "step": 125475 + }, + { + "epoch": 1.0849884566497479, + "grad_norm": 8.882006699219856, + "learning_rate": 2.600689897097641e-06, + "loss": 0.08808746337890624, + "step": 125480 + }, + { + "epoch": 1.0850316901712913, + "grad_norm": 1.3757775986580874, + "learning_rate": 2.600487977633034e-06, + "loss": 0.19360580444335937, + "step": 125485 + }, + { + "epoch": 1.0850749236928345, + "grad_norm": 26.3658770084373, + "learning_rate": 2.600286060010943e-06, + "loss": 0.1108154296875, + "step": 125490 + }, + { + "epoch": 1.0851181572143778, + "grad_norm": 1.8222006745160984, + "learning_rate": 2.6000841442322975e-06, + "loss": 0.03721542358398437, + "step": 125495 + }, + { + "epoch": 1.085161390735921, + "grad_norm": 9.48418491814481, + "learning_rate": 2.5998822302980285e-06, + "loss": 0.11158905029296876, + "step": 125500 + }, + { + "epoch": 1.0852046242574642, + "grad_norm": 3.819324319633642, + "learning_rate": 2.5996803182090666e-06, + "loss": 0.01661376953125, + "step": 125505 + }, + { + "epoch": 1.0852478577790075, + "grad_norm": 0.25915298086345473, + "learning_rate": 2.5994784079663447e-06, + "loss": 0.04110565185546875, + "step": 125510 + }, + { + "epoch": 1.085291091300551, + "grad_norm": 8.680264609721673, + "learning_rate": 2.5992764995707908e-06, + "loss": 0.36189994812011717, + "step": 125515 + }, + { + "epoch": 1.0853343248220941, + "grad_norm": 0.07098922188121701, + "learning_rate": 2.5990745930233403e-06, + "loss": 0.029152297973632814, + "step": 125520 + }, + { + "epoch": 1.0853775583436374, + "grad_norm": 1.5452545683008125, + "learning_rate": 2.5988726883249225e-06, + "loss": 0.0358917236328125, + "step": 125525 + }, + { + "epoch": 1.0854207918651806, + "grad_norm": 15.868925478794822, + "learning_rate": 2.598670785476468e-06, + "loss": 0.27010955810546877, + "step": 125530 + }, + { + "epoch": 1.0854640253867238, + "grad_norm": 0.7760719034676486, + "learning_rate": 2.598468884478908e-06, + "loss": 0.016503143310546874, + "step": 125535 + }, + { + "epoch": 1.085507258908267, + "grad_norm": 6.59219030653389, + "learning_rate": 2.5982669853331742e-06, + "loss": 0.20447196960449218, + "step": 125540 + }, + { + "epoch": 1.0855504924298103, + "grad_norm": 0.206877028553668, + "learning_rate": 2.598065088040198e-06, + "loss": 0.059452056884765625, + "step": 125545 + }, + { + "epoch": 1.0855937259513537, + "grad_norm": 16.35130299255665, + "learning_rate": 2.5978631926009085e-06, + "loss": 0.06188201904296875, + "step": 125550 + }, + { + "epoch": 1.085636959472897, + "grad_norm": 0.9652498187469307, + "learning_rate": 2.59766129901624e-06, + "loss": 0.02022552490234375, + "step": 125555 + }, + { + "epoch": 1.0856801929944402, + "grad_norm": 4.1036302808501, + "learning_rate": 2.597459407287122e-06, + "loss": 0.2531440734863281, + "step": 125560 + }, + { + "epoch": 1.0857234265159834, + "grad_norm": 32.06230029818644, + "learning_rate": 2.597257517414485e-06, + "loss": 0.08999176025390625, + "step": 125565 + }, + { + "epoch": 1.0857666600375266, + "grad_norm": 2.6141805422546533, + "learning_rate": 2.5970556293992612e-06, + "loss": 0.0483154296875, + "step": 125570 + }, + { + "epoch": 1.0858098935590699, + "grad_norm": 1.1343089226212442, + "learning_rate": 2.596853743242381e-06, + "loss": 0.41386566162109373, + "step": 125575 + }, + { + "epoch": 1.0858531270806133, + "grad_norm": 9.550181809083755, + "learning_rate": 2.596651858944775e-06, + "loss": 0.158154296875, + "step": 125580 + }, + { + "epoch": 1.0858963606021566, + "grad_norm": 0.9244062782980864, + "learning_rate": 2.596449976507376e-06, + "loss": 0.03930397033691406, + "step": 125585 + }, + { + "epoch": 1.0859395941236998, + "grad_norm": 34.44265353554068, + "learning_rate": 2.5962480959311144e-06, + "loss": 0.039190673828125, + "step": 125590 + }, + { + "epoch": 1.085982827645243, + "grad_norm": 0.11612874234295134, + "learning_rate": 2.5960462172169197e-06, + "loss": 0.0797698974609375, + "step": 125595 + }, + { + "epoch": 1.0860260611667862, + "grad_norm": 12.068750527726598, + "learning_rate": 2.5958443403657252e-06, + "loss": 0.08061676025390625, + "step": 125600 + }, + { + "epoch": 1.0860692946883295, + "grad_norm": 22.690009959052315, + "learning_rate": 2.5956424653784606e-06, + "loss": 0.15888519287109376, + "step": 125605 + }, + { + "epoch": 1.0861125282098727, + "grad_norm": 0.16233542449136917, + "learning_rate": 2.5954405922560572e-06, + "loss": 0.008541297912597657, + "step": 125610 + }, + { + "epoch": 1.0861557617314161, + "grad_norm": 3.2181106070845136, + "learning_rate": 2.595238720999445e-06, + "loss": 0.39173431396484376, + "step": 125615 + }, + { + "epoch": 1.0861989952529594, + "grad_norm": 1.734213273511591, + "learning_rate": 2.5950368516095568e-06, + "loss": 0.023504638671875, + "step": 125620 + }, + { + "epoch": 1.0862422287745026, + "grad_norm": 3.9828893994629397, + "learning_rate": 2.594834984087323e-06, + "loss": 0.02402839660644531, + "step": 125625 + }, + { + "epoch": 1.0862854622960458, + "grad_norm": 9.186432071349438, + "learning_rate": 2.5946331184336747e-06, + "loss": 0.09586544036865234, + "step": 125630 + }, + { + "epoch": 1.086328695817589, + "grad_norm": 28.502076064464486, + "learning_rate": 2.594431254649543e-06, + "loss": 0.058551025390625, + "step": 125635 + }, + { + "epoch": 1.0863719293391323, + "grad_norm": 53.30380065743373, + "learning_rate": 2.594229392735858e-06, + "loss": 0.12749481201171875, + "step": 125640 + }, + { + "epoch": 1.0864151628606757, + "grad_norm": 0.31744816175125784, + "learning_rate": 2.5940275326935506e-06, + "loss": 0.01625823974609375, + "step": 125645 + }, + { + "epoch": 1.086458396382219, + "grad_norm": 1.1635541420475868, + "learning_rate": 2.5938256745235526e-06, + "loss": 0.03060150146484375, + "step": 125650 + }, + { + "epoch": 1.0865016299037622, + "grad_norm": 10.45908223488004, + "learning_rate": 2.593623818226795e-06, + "loss": 0.09137992858886719, + "step": 125655 + }, + { + "epoch": 1.0865448634253054, + "grad_norm": 0.7213854808800897, + "learning_rate": 2.593421963804209e-06, + "loss": 0.11598587036132812, + "step": 125660 + }, + { + "epoch": 1.0865880969468487, + "grad_norm": 17.28802525662125, + "learning_rate": 2.5932201112567245e-06, + "loss": 0.11108245849609374, + "step": 125665 + }, + { + "epoch": 1.0866313304683919, + "grad_norm": 0.7807072352832423, + "learning_rate": 2.593018260585272e-06, + "loss": 0.13138465881347655, + "step": 125670 + }, + { + "epoch": 1.0866745639899353, + "grad_norm": 3.367386130738581, + "learning_rate": 2.592816411790784e-06, + "loss": 0.024263763427734376, + "step": 125675 + }, + { + "epoch": 1.0867177975114786, + "grad_norm": 7.442398657203446, + "learning_rate": 2.592614564874191e-06, + "loss": 0.15803375244140624, + "step": 125680 + }, + { + "epoch": 1.0867610310330218, + "grad_norm": 6.261962264737084, + "learning_rate": 2.592412719836423e-06, + "loss": 0.03526229858398437, + "step": 125685 + }, + { + "epoch": 1.086804264554565, + "grad_norm": 0.7188987077094737, + "learning_rate": 2.5922108766784124e-06, + "loss": 0.06363067626953126, + "step": 125690 + }, + { + "epoch": 1.0868474980761083, + "grad_norm": 12.608641314360774, + "learning_rate": 2.5920090354010893e-06, + "loss": 0.12275543212890624, + "step": 125695 + }, + { + "epoch": 1.0868907315976515, + "grad_norm": 2.3397810214353703, + "learning_rate": 2.5918071960053835e-06, + "loss": 0.031452178955078125, + "step": 125700 + }, + { + "epoch": 1.086933965119195, + "grad_norm": 0.37432189724128606, + "learning_rate": 2.591605358492228e-06, + "loss": 0.1731048583984375, + "step": 125705 + }, + { + "epoch": 1.0869771986407382, + "grad_norm": 8.62121571878457, + "learning_rate": 2.5914035228625503e-06, + "loss": 0.09457559585571289, + "step": 125710 + }, + { + "epoch": 1.0870204321622814, + "grad_norm": 1.8448187251962973, + "learning_rate": 2.5912016891172856e-06, + "loss": 0.037371063232421876, + "step": 125715 + }, + { + "epoch": 1.0870636656838246, + "grad_norm": 6.883146413521629, + "learning_rate": 2.590999857257362e-06, + "loss": 0.09587516784667968, + "step": 125720 + }, + { + "epoch": 1.0871068992053678, + "grad_norm": 0.16708358073581459, + "learning_rate": 2.5907980272837115e-06, + "loss": 0.05343437194824219, + "step": 125725 + }, + { + "epoch": 1.087150132726911, + "grad_norm": 1.2007001552721426, + "learning_rate": 2.5905961991972635e-06, + "loss": 0.022403526306152343, + "step": 125730 + }, + { + "epoch": 1.0871933662484543, + "grad_norm": 7.128886613289803, + "learning_rate": 2.5903943729989497e-06, + "loss": 0.2631385803222656, + "step": 125735 + }, + { + "epoch": 1.0872365997699978, + "grad_norm": 6.978218211607044, + "learning_rate": 2.5901925486897002e-06, + "loss": 0.059290313720703126, + "step": 125740 + }, + { + "epoch": 1.087279833291541, + "grad_norm": 0.8502200587878918, + "learning_rate": 2.5899907262704477e-06, + "loss": 0.13589935302734374, + "step": 125745 + }, + { + "epoch": 1.0873230668130842, + "grad_norm": 2.9069486990287494, + "learning_rate": 2.5897889057421216e-06, + "loss": 0.264874267578125, + "step": 125750 + }, + { + "epoch": 1.0873663003346274, + "grad_norm": 0.5650090826419563, + "learning_rate": 2.589587087105653e-06, + "loss": 0.06066970825195313, + "step": 125755 + }, + { + "epoch": 1.0874095338561707, + "grad_norm": 10.392711971680951, + "learning_rate": 2.5893852703619713e-06, + "loss": 0.08326892852783203, + "step": 125760 + }, + { + "epoch": 1.087452767377714, + "grad_norm": 22.980759182683588, + "learning_rate": 2.5891834555120093e-06, + "loss": 0.10613479614257812, + "step": 125765 + }, + { + "epoch": 1.0874960008992574, + "grad_norm": 29.230547741685584, + "learning_rate": 2.5889816425566957e-06, + "loss": 0.2099853515625, + "step": 125770 + }, + { + "epoch": 1.0875392344208006, + "grad_norm": 1.0299391336253445, + "learning_rate": 2.588779831496964e-06, + "loss": 0.16592979431152344, + "step": 125775 + }, + { + "epoch": 1.0875824679423438, + "grad_norm": 1.1815605996836598, + "learning_rate": 2.588578022333743e-06, + "loss": 0.07610397338867188, + "step": 125780 + }, + { + "epoch": 1.087625701463887, + "grad_norm": 2.5732292646893566, + "learning_rate": 2.588376215067964e-06, + "loss": 0.09249725341796874, + "step": 125785 + }, + { + "epoch": 1.0876689349854303, + "grad_norm": 10.052278078822873, + "learning_rate": 2.588174409700557e-06, + "loss": 0.10550537109375, + "step": 125790 + }, + { + "epoch": 1.0877121685069735, + "grad_norm": 15.691783136931932, + "learning_rate": 2.5879726062324538e-06, + "loss": 0.10125732421875, + "step": 125795 + }, + { + "epoch": 1.0877554020285167, + "grad_norm": 61.434044164253336, + "learning_rate": 2.5877708046645846e-06, + "loss": 0.10738458633422851, + "step": 125800 + }, + { + "epoch": 1.0877986355500602, + "grad_norm": 2.2043637747092295, + "learning_rate": 2.5875690049978787e-06, + "loss": 0.012194061279296875, + "step": 125805 + }, + { + "epoch": 1.0878418690716034, + "grad_norm": 6.276900243875299, + "learning_rate": 2.5873672072332696e-06, + "loss": 0.015412139892578124, + "step": 125810 + }, + { + "epoch": 1.0878851025931466, + "grad_norm": 10.547331771500145, + "learning_rate": 2.587165411371686e-06, + "loss": 0.07340660095214843, + "step": 125815 + }, + { + "epoch": 1.0879283361146899, + "grad_norm": 2.300326973677208, + "learning_rate": 2.5869636174140585e-06, + "loss": 0.48090248107910155, + "step": 125820 + }, + { + "epoch": 1.087971569636233, + "grad_norm": 2.0946694405008945, + "learning_rate": 2.586761825361319e-06, + "loss": 0.09488677978515625, + "step": 125825 + }, + { + "epoch": 1.0880148031577763, + "grad_norm": 55.442706583513164, + "learning_rate": 2.5865600352143975e-06, + "loss": 0.11649932861328124, + "step": 125830 + }, + { + "epoch": 1.0880580366793198, + "grad_norm": 43.997927924885005, + "learning_rate": 2.5863582469742237e-06, + "loss": 0.598541259765625, + "step": 125835 + }, + { + "epoch": 1.088101270200863, + "grad_norm": 3.8382795565373136, + "learning_rate": 2.5861564606417295e-06, + "loss": 0.18003997802734376, + "step": 125840 + }, + { + "epoch": 1.0881445037224062, + "grad_norm": 0.4209106050753982, + "learning_rate": 2.585954676217845e-06, + "loss": 0.2174509048461914, + "step": 125845 + }, + { + "epoch": 1.0881877372439495, + "grad_norm": 4.888630611125046, + "learning_rate": 2.585752893703502e-06, + "loss": 0.03318367004394531, + "step": 125850 + }, + { + "epoch": 1.0882309707654927, + "grad_norm": 1.1094050707195735, + "learning_rate": 2.585551113099629e-06, + "loss": 0.053081512451171875, + "step": 125855 + }, + { + "epoch": 1.088274204287036, + "grad_norm": 0.661462857614849, + "learning_rate": 2.5853493344071586e-06, + "loss": 0.06896438598632812, + "step": 125860 + }, + { + "epoch": 1.0883174378085791, + "grad_norm": 4.196879738145282, + "learning_rate": 2.5851475576270187e-06, + "loss": 0.29764862060546876, + "step": 125865 + }, + { + "epoch": 1.0883606713301226, + "grad_norm": 2.9948941472817134, + "learning_rate": 2.5849457827601433e-06, + "loss": 0.03280181884765625, + "step": 125870 + }, + { + "epoch": 1.0884039048516658, + "grad_norm": 3.568815531064413, + "learning_rate": 2.5847440098074603e-06, + "loss": 0.047339248657226565, + "step": 125875 + }, + { + "epoch": 1.088447138373209, + "grad_norm": 2.111426283406367, + "learning_rate": 2.584542238769902e-06, + "loss": 0.0371429443359375, + "step": 125880 + }, + { + "epoch": 1.0884903718947523, + "grad_norm": 0.43049947906568375, + "learning_rate": 2.584340469648398e-06, + "loss": 0.05040130615234375, + "step": 125885 + }, + { + "epoch": 1.0885336054162955, + "grad_norm": 4.383535805672127, + "learning_rate": 2.584138702443879e-06, + "loss": 0.03286552429199219, + "step": 125890 + }, + { + "epoch": 1.0885768389378387, + "grad_norm": 1.4921399349437419, + "learning_rate": 2.583936937157275e-06, + "loss": 0.03332443237304687, + "step": 125895 + }, + { + "epoch": 1.0886200724593822, + "grad_norm": 9.25501585750999, + "learning_rate": 2.5837351737895172e-06, + "loss": 0.051129150390625, + "step": 125900 + }, + { + "epoch": 1.0886633059809254, + "grad_norm": 4.142436550512779, + "learning_rate": 2.5835334123415364e-06, + "loss": 0.014232635498046875, + "step": 125905 + }, + { + "epoch": 1.0887065395024687, + "grad_norm": 5.643951517900261, + "learning_rate": 2.5833316528142625e-06, + "loss": 0.032705497741699216, + "step": 125910 + }, + { + "epoch": 1.0887497730240119, + "grad_norm": 0.3157689347439976, + "learning_rate": 2.5831298952086268e-06, + "loss": 0.06941680908203125, + "step": 125915 + }, + { + "epoch": 1.088793006545555, + "grad_norm": 9.846832147540429, + "learning_rate": 2.582928139525559e-06, + "loss": 0.07600860595703125, + "step": 125920 + }, + { + "epoch": 1.0888362400670983, + "grad_norm": 22.53324871357279, + "learning_rate": 2.582726385765989e-06, + "loss": 0.0928466796875, + "step": 125925 + }, + { + "epoch": 1.0888794735886418, + "grad_norm": 2.49812250431258, + "learning_rate": 2.582524633930848e-06, + "loss": 0.022971343994140626, + "step": 125930 + }, + { + "epoch": 1.088922707110185, + "grad_norm": 12.420501582027727, + "learning_rate": 2.5823228840210664e-06, + "loss": 0.1315399169921875, + "step": 125935 + }, + { + "epoch": 1.0889659406317282, + "grad_norm": 0.27584257894341274, + "learning_rate": 2.582121136037576e-06, + "loss": 0.0465362548828125, + "step": 125940 + }, + { + "epoch": 1.0890091741532715, + "grad_norm": 55.462059663508924, + "learning_rate": 2.5819193899813056e-06, + "loss": 0.11120681762695313, + "step": 125945 + }, + { + "epoch": 1.0890524076748147, + "grad_norm": 0.327248180426448, + "learning_rate": 2.581717645853186e-06, + "loss": 0.042401123046875, + "step": 125950 + }, + { + "epoch": 1.089095641196358, + "grad_norm": 3.995715690384805, + "learning_rate": 2.581515903654147e-06, + "loss": 0.035533905029296875, + "step": 125955 + }, + { + "epoch": 1.0891388747179014, + "grad_norm": 0.404573883387141, + "learning_rate": 2.5813141633851205e-06, + "loss": 0.007073211669921875, + "step": 125960 + }, + { + "epoch": 1.0891821082394446, + "grad_norm": 5.659531992941227, + "learning_rate": 2.5811124250470343e-06, + "loss": 0.0569580078125, + "step": 125965 + }, + { + "epoch": 1.0892253417609878, + "grad_norm": 8.106490867180892, + "learning_rate": 2.5809106886408223e-06, + "loss": 0.03275909423828125, + "step": 125970 + }, + { + "epoch": 1.089268575282531, + "grad_norm": 8.205949845166607, + "learning_rate": 2.5807089541674133e-06, + "loss": 0.03855476379394531, + "step": 125975 + }, + { + "epoch": 1.0893118088040743, + "grad_norm": 6.119972666477863, + "learning_rate": 2.5805072216277373e-06, + "loss": 0.08980941772460938, + "step": 125980 + }, + { + "epoch": 1.0893550423256175, + "grad_norm": 10.070431123548651, + "learning_rate": 2.5803054910227244e-06, + "loss": 0.059398651123046875, + "step": 125985 + }, + { + "epoch": 1.0893982758471608, + "grad_norm": 9.807062371819022, + "learning_rate": 2.580103762353306e-06, + "loss": 0.1686767578125, + "step": 125990 + }, + { + "epoch": 1.0894415093687042, + "grad_norm": 0.7629929309617719, + "learning_rate": 2.5799020356204105e-06, + "loss": 0.04203166961669922, + "step": 125995 + }, + { + "epoch": 1.0894847428902474, + "grad_norm": 1.130117939371068, + "learning_rate": 2.5797003108249717e-06, + "loss": 0.01276092529296875, + "step": 126000 + }, + { + "epoch": 1.0895279764117907, + "grad_norm": 1.4312817486187108, + "learning_rate": 2.5794985879679174e-06, + "loss": 0.11271200180053711, + "step": 126005 + }, + { + "epoch": 1.089571209933334, + "grad_norm": 26.121283416517006, + "learning_rate": 2.5792968670501784e-06, + "loss": 0.12481155395507812, + "step": 126010 + }, + { + "epoch": 1.0896144434548771, + "grad_norm": 6.659806268262051, + "learning_rate": 2.5790951480726845e-06, + "loss": 0.04219398498535156, + "step": 126015 + }, + { + "epoch": 1.0896576769764204, + "grad_norm": 3.0621291380454805, + "learning_rate": 2.5788934310363673e-06, + "loss": 0.190380859375, + "step": 126020 + }, + { + "epoch": 1.0897009104979638, + "grad_norm": 3.251282436575269, + "learning_rate": 2.5786917159421553e-06, + "loss": 0.048781967163085936, + "step": 126025 + }, + { + "epoch": 1.089744144019507, + "grad_norm": 1.209034108844813, + "learning_rate": 2.5784900027909804e-06, + "loss": 0.0175323486328125, + "step": 126030 + }, + { + "epoch": 1.0897873775410503, + "grad_norm": 2.5669689508869347, + "learning_rate": 2.5782882915837733e-06, + "loss": 0.1490142822265625, + "step": 126035 + }, + { + "epoch": 1.0898306110625935, + "grad_norm": 0.9702729882184149, + "learning_rate": 2.578086582321463e-06, + "loss": 0.04560279846191406, + "step": 126040 + }, + { + "epoch": 1.0898738445841367, + "grad_norm": 2.298395546434495, + "learning_rate": 2.577884875004979e-06, + "loss": 0.16833038330078126, + "step": 126045 + }, + { + "epoch": 1.08991707810568, + "grad_norm": 2.1940770687361923, + "learning_rate": 2.577683169635254e-06, + "loss": 0.21212234497070312, + "step": 126050 + }, + { + "epoch": 1.0899603116272232, + "grad_norm": 21.318307455755804, + "learning_rate": 2.5774814662132166e-06, + "loss": 0.09651260375976563, + "step": 126055 + }, + { + "epoch": 1.0900035451487666, + "grad_norm": 1.2650595784806546, + "learning_rate": 2.5772797647397958e-06, + "loss": 0.021315765380859376, + "step": 126060 + }, + { + "epoch": 1.0900467786703099, + "grad_norm": 9.26124513649271, + "learning_rate": 2.577078065215925e-06, + "loss": 0.0350799560546875, + "step": 126065 + }, + { + "epoch": 1.090090012191853, + "grad_norm": 18.71191508620381, + "learning_rate": 2.5768763676425323e-06, + "loss": 0.04223976135253906, + "step": 126070 + }, + { + "epoch": 1.0901332457133963, + "grad_norm": 9.372130391039445, + "learning_rate": 2.5766746720205484e-06, + "loss": 0.04644966125488281, + "step": 126075 + }, + { + "epoch": 1.0901764792349395, + "grad_norm": 3.740575532865607, + "learning_rate": 2.576472978350904e-06, + "loss": 0.01908416748046875, + "step": 126080 + }, + { + "epoch": 1.0902197127564828, + "grad_norm": 25.029048977902875, + "learning_rate": 2.576271286634529e-06, + "loss": 0.09224700927734375, + "step": 126085 + }, + { + "epoch": 1.0902629462780262, + "grad_norm": 3.0444012776017275, + "learning_rate": 2.5760695968723516e-06, + "loss": 0.162139892578125, + "step": 126090 + }, + { + "epoch": 1.0903061797995695, + "grad_norm": 2.8287896038860127, + "learning_rate": 2.575867909065305e-06, + "loss": 0.1351898193359375, + "step": 126095 + }, + { + "epoch": 1.0903494133211127, + "grad_norm": 3.029447504964181, + "learning_rate": 2.575666223214318e-06, + "loss": 0.05283546447753906, + "step": 126100 + }, + { + "epoch": 1.090392646842656, + "grad_norm": 6.174517418629725, + "learning_rate": 2.5754645393203216e-06, + "loss": 0.02294158935546875, + "step": 126105 + }, + { + "epoch": 1.0904358803641991, + "grad_norm": 0.6209461177039588, + "learning_rate": 2.5752628573842446e-06, + "loss": 0.06289176940917969, + "step": 126110 + }, + { + "epoch": 1.0904791138857424, + "grad_norm": 4.619609832009123, + "learning_rate": 2.575061177407018e-06, + "loss": 0.03323974609375, + "step": 126115 + }, + { + "epoch": 1.0905223474072856, + "grad_norm": 0.9574008536244486, + "learning_rate": 2.574859499389571e-06, + "loss": 0.030841827392578125, + "step": 126120 + }, + { + "epoch": 1.090565580928829, + "grad_norm": 1.9648782253015342, + "learning_rate": 2.574657823332835e-06, + "loss": 0.028015899658203124, + "step": 126125 + }, + { + "epoch": 1.0906088144503723, + "grad_norm": 15.772664092555623, + "learning_rate": 2.574456149237739e-06, + "loss": 0.07410316467285157, + "step": 126130 + }, + { + "epoch": 1.0906520479719155, + "grad_norm": 10.719661007907403, + "learning_rate": 2.5742544771052146e-06, + "loss": 0.035289764404296875, + "step": 126135 + }, + { + "epoch": 1.0906952814934587, + "grad_norm": 34.94273987798253, + "learning_rate": 2.5740528069361905e-06, + "loss": 0.128009033203125, + "step": 126140 + }, + { + "epoch": 1.090738515015002, + "grad_norm": 0.08198088451925004, + "learning_rate": 2.5738511387315972e-06, + "loss": 0.031907272338867185, + "step": 126145 + }, + { + "epoch": 1.0907817485365452, + "grad_norm": 0.05680497523755723, + "learning_rate": 2.5736494724923643e-06, + "loss": 0.20530948638916016, + "step": 126150 + }, + { + "epoch": 1.0908249820580886, + "grad_norm": 5.201738555004474, + "learning_rate": 2.5734478082194226e-06, + "loss": 0.04212799072265625, + "step": 126155 + }, + { + "epoch": 1.0908682155796319, + "grad_norm": 25.31939937445175, + "learning_rate": 2.573246145913702e-06, + "loss": 0.0780726432800293, + "step": 126160 + }, + { + "epoch": 1.090911449101175, + "grad_norm": 0.19237967738702816, + "learning_rate": 2.5730444855761324e-06, + "loss": 0.08054046630859375, + "step": 126165 + }, + { + "epoch": 1.0909546826227183, + "grad_norm": 1.6649182469427373, + "learning_rate": 2.572842827207645e-06, + "loss": 0.02956390380859375, + "step": 126170 + }, + { + "epoch": 1.0909979161442616, + "grad_norm": 4.555388004886682, + "learning_rate": 2.572641170809168e-06, + "loss": 0.0203033447265625, + "step": 126175 + }, + { + "epoch": 1.0910411496658048, + "grad_norm": 33.815774252871414, + "learning_rate": 2.5724395163816315e-06, + "loss": 0.16572914123535157, + "step": 126180 + }, + { + "epoch": 1.0910843831873482, + "grad_norm": 40.872567880909635, + "learning_rate": 2.572237863925966e-06, + "loss": 0.17661819458007813, + "step": 126185 + }, + { + "epoch": 1.0911276167088915, + "grad_norm": 46.59103149361331, + "learning_rate": 2.572036213443102e-06, + "loss": 0.10164566040039062, + "step": 126190 + }, + { + "epoch": 1.0911708502304347, + "grad_norm": 1.7850104461101732, + "learning_rate": 2.57183456493397e-06, + "loss": 0.0290679931640625, + "step": 126195 + }, + { + "epoch": 1.091214083751978, + "grad_norm": 0.49425638375204034, + "learning_rate": 2.5716329183994987e-06, + "loss": 0.027681922912597655, + "step": 126200 + }, + { + "epoch": 1.0912573172735212, + "grad_norm": 1.2805699382011453, + "learning_rate": 2.571431273840619e-06, + "loss": 0.03307938575744629, + "step": 126205 + }, + { + "epoch": 1.0913005507950644, + "grad_norm": 2.996328820048216, + "learning_rate": 2.5712296312582596e-06, + "loss": 0.058011436462402345, + "step": 126210 + }, + { + "epoch": 1.0913437843166078, + "grad_norm": 18.19829141416977, + "learning_rate": 2.571027990653351e-06, + "loss": 0.07431983947753906, + "step": 126215 + }, + { + "epoch": 1.091387017838151, + "grad_norm": 0.1273607090397378, + "learning_rate": 2.5708263520268237e-06, + "loss": 0.08337211608886719, + "step": 126220 + }, + { + "epoch": 1.0914302513596943, + "grad_norm": 20.40739193431919, + "learning_rate": 2.5706247153796075e-06, + "loss": 0.1288595199584961, + "step": 126225 + }, + { + "epoch": 1.0914734848812375, + "grad_norm": 1.4002776903838245, + "learning_rate": 2.570423080712633e-06, + "loss": 0.0376373291015625, + "step": 126230 + }, + { + "epoch": 1.0915167184027808, + "grad_norm": 0.62039421611293, + "learning_rate": 2.5702214480268288e-06, + "loss": 0.04801177978515625, + "step": 126235 + }, + { + "epoch": 1.091559951924324, + "grad_norm": 0.26519226583523, + "learning_rate": 2.5700198173231246e-06, + "loss": 0.017247772216796874, + "step": 126240 + }, + { + "epoch": 1.0916031854458672, + "grad_norm": 0.988670860286894, + "learning_rate": 2.5698181886024516e-06, + "loss": 0.10937728881835937, + "step": 126245 + }, + { + "epoch": 1.0916464189674107, + "grad_norm": 1.012353891746183, + "learning_rate": 2.569616561865738e-06, + "loss": 0.27127227783203123, + "step": 126250 + }, + { + "epoch": 1.0916896524889539, + "grad_norm": 3.0159873219751074, + "learning_rate": 2.569414937113916e-06, + "loss": 0.03813934326171875, + "step": 126255 + }, + { + "epoch": 1.0917328860104971, + "grad_norm": 1.5946186165033993, + "learning_rate": 2.5692133143479142e-06, + "loss": 0.056317138671875, + "step": 126260 + }, + { + "epoch": 1.0917761195320403, + "grad_norm": 2.2630814753786206, + "learning_rate": 2.5690116935686626e-06, + "loss": 0.0163299560546875, + "step": 126265 + }, + { + "epoch": 1.0918193530535836, + "grad_norm": 1.7399809180231778, + "learning_rate": 2.5688100747770907e-06, + "loss": 0.05600738525390625, + "step": 126270 + }, + { + "epoch": 1.0918625865751268, + "grad_norm": 14.26228159245031, + "learning_rate": 2.5686084579741285e-06, + "loss": 0.07850799560546876, + "step": 126275 + }, + { + "epoch": 1.0919058200966703, + "grad_norm": 2.897724411377956, + "learning_rate": 2.5684068431607052e-06, + "loss": 0.0734527587890625, + "step": 126280 + }, + { + "epoch": 1.0919490536182135, + "grad_norm": 23.99750568502485, + "learning_rate": 2.5682052303377526e-06, + "loss": 0.05619125366210938, + "step": 126285 + }, + { + "epoch": 1.0919922871397567, + "grad_norm": 1.2078125059344662, + "learning_rate": 2.5680036195061995e-06, + "loss": 0.04102325439453125, + "step": 126290 + }, + { + "epoch": 1.0920355206613, + "grad_norm": 1.9899664002555473, + "learning_rate": 2.5678020106669746e-06, + "loss": 0.03185005187988281, + "step": 126295 + }, + { + "epoch": 1.0920787541828432, + "grad_norm": 2.840523184968063, + "learning_rate": 2.5676004038210096e-06, + "loss": 0.019893646240234375, + "step": 126300 + }, + { + "epoch": 1.0921219877043864, + "grad_norm": 7.316384431182565, + "learning_rate": 2.567398798969233e-06, + "loss": 0.15887680053710937, + "step": 126305 + }, + { + "epoch": 1.0921652212259296, + "grad_norm": 0.24127297635856132, + "learning_rate": 2.567197196112575e-06, + "loss": 0.009359931945800782, + "step": 126310 + }, + { + "epoch": 1.092208454747473, + "grad_norm": 8.784169694905444, + "learning_rate": 2.5669955952519643e-06, + "loss": 0.0589569091796875, + "step": 126315 + }, + { + "epoch": 1.0922516882690163, + "grad_norm": 0.3454507591853524, + "learning_rate": 2.566793996388333e-06, + "loss": 0.03307838439941406, + "step": 126320 + }, + { + "epoch": 1.0922949217905595, + "grad_norm": 1.9282878824925453, + "learning_rate": 2.566592399522608e-06, + "loss": 0.045682525634765624, + "step": 126325 + }, + { + "epoch": 1.0923381553121028, + "grad_norm": 1.0000822091805748, + "learning_rate": 2.566390804655722e-06, + "loss": 0.0220855712890625, + "step": 126330 + }, + { + "epoch": 1.092381388833646, + "grad_norm": 25.19761407080171, + "learning_rate": 2.566189211788603e-06, + "loss": 0.30867538452148435, + "step": 126335 + }, + { + "epoch": 1.0924246223551892, + "grad_norm": 0.30727032413539856, + "learning_rate": 2.565987620922181e-06, + "loss": 0.061376190185546874, + "step": 126340 + }, + { + "epoch": 1.0924678558767327, + "grad_norm": 0.2324572521919113, + "learning_rate": 2.5657860320573846e-06, + "loss": 0.04014892578125, + "step": 126345 + }, + { + "epoch": 1.092511089398276, + "grad_norm": 2.5693511385848335, + "learning_rate": 2.5655844451951457e-06, + "loss": 0.12418975830078124, + "step": 126350 + }, + { + "epoch": 1.0925543229198191, + "grad_norm": 9.354218385414379, + "learning_rate": 2.5653828603363925e-06, + "loss": 0.03421630859375, + "step": 126355 + }, + { + "epoch": 1.0925975564413624, + "grad_norm": 3.6041712072474335, + "learning_rate": 2.565181277482056e-06, + "loss": 0.02120208740234375, + "step": 126360 + }, + { + "epoch": 1.0926407899629056, + "grad_norm": 0.4572242371700367, + "learning_rate": 2.564979696633065e-06, + "loss": 0.05143804550170898, + "step": 126365 + }, + { + "epoch": 1.0926840234844488, + "grad_norm": 13.672156915467742, + "learning_rate": 2.5647781177903488e-06, + "loss": 0.24105911254882811, + "step": 126370 + }, + { + "epoch": 1.0927272570059923, + "grad_norm": 1.119920200479603, + "learning_rate": 2.564576540954836e-06, + "loss": 0.0209228515625, + "step": 126375 + }, + { + "epoch": 1.0927704905275355, + "grad_norm": 10.74895438189953, + "learning_rate": 2.56437496612746e-06, + "loss": 0.04196014404296875, + "step": 126380 + }, + { + "epoch": 1.0928137240490787, + "grad_norm": 19.448044879303286, + "learning_rate": 2.5641733933091466e-06, + "loss": 0.07385330200195313, + "step": 126385 + }, + { + "epoch": 1.092856957570622, + "grad_norm": 1.6126930726678377, + "learning_rate": 2.5639718225008277e-06, + "loss": 0.13748130798339844, + "step": 126390 + }, + { + "epoch": 1.0929001910921652, + "grad_norm": 4.074215168783285, + "learning_rate": 2.563770253703433e-06, + "loss": 0.044451141357421876, + "step": 126395 + }, + { + "epoch": 1.0929434246137084, + "grad_norm": 11.885088662032194, + "learning_rate": 2.5635686869178907e-06, + "loss": 0.08440132141113281, + "step": 126400 + }, + { + "epoch": 1.0929866581352519, + "grad_norm": 2.3473741543261197, + "learning_rate": 2.56336712214513e-06, + "loss": 0.11869735717773437, + "step": 126405 + }, + { + "epoch": 1.093029891656795, + "grad_norm": 17.997632231527625, + "learning_rate": 2.5631655593860823e-06, + "loss": 0.18024444580078125, + "step": 126410 + }, + { + "epoch": 1.0930731251783383, + "grad_norm": 1.1395235605141474, + "learning_rate": 2.5629639986416765e-06, + "loss": 0.05663986206054687, + "step": 126415 + }, + { + "epoch": 1.0931163586998816, + "grad_norm": 0.7847284415612186, + "learning_rate": 2.5627624399128424e-06, + "loss": 0.08871841430664062, + "step": 126420 + }, + { + "epoch": 1.0931595922214248, + "grad_norm": 0.052561135283007834, + "learning_rate": 2.5625608832005096e-06, + "loss": 0.13736696243286134, + "step": 126425 + }, + { + "epoch": 1.093202825742968, + "grad_norm": 0.16367780573489427, + "learning_rate": 2.5623593285056068e-06, + "loss": 0.06711158752441407, + "step": 126430 + }, + { + "epoch": 1.0932460592645112, + "grad_norm": 7.194212902059272, + "learning_rate": 2.562157775829064e-06, + "loss": 0.3201080322265625, + "step": 126435 + }, + { + "epoch": 1.0932892927860547, + "grad_norm": 0.7264828274687393, + "learning_rate": 2.5619562251718106e-06, + "loss": 0.04585418701171875, + "step": 126440 + }, + { + "epoch": 1.093332526307598, + "grad_norm": 21.545435161545125, + "learning_rate": 2.561754676534776e-06, + "loss": 0.07808189392089844, + "step": 126445 + }, + { + "epoch": 1.0933757598291411, + "grad_norm": 0.4605062323123653, + "learning_rate": 2.5615531299188913e-06, + "loss": 0.02704925537109375, + "step": 126450 + }, + { + "epoch": 1.0934189933506844, + "grad_norm": 0.12473429208707512, + "learning_rate": 2.561351585325085e-06, + "loss": 0.06950836181640625, + "step": 126455 + }, + { + "epoch": 1.0934622268722276, + "grad_norm": 18.625925641943862, + "learning_rate": 2.561150042754286e-06, + "loss": 0.2655021667480469, + "step": 126460 + }, + { + "epoch": 1.0935054603937708, + "grad_norm": 0.28136246569924717, + "learning_rate": 2.560948502207424e-06, + "loss": 0.024978828430175782, + "step": 126465 + }, + { + "epoch": 1.0935486939153143, + "grad_norm": 33.315325345958826, + "learning_rate": 2.5607469636854272e-06, + "loss": 0.16916656494140625, + "step": 126470 + }, + { + "epoch": 1.0935919274368575, + "grad_norm": 3.729800778038888, + "learning_rate": 2.560545427189229e-06, + "loss": 0.08625907897949218, + "step": 126475 + }, + { + "epoch": 1.0936351609584007, + "grad_norm": 1.7049576685006544, + "learning_rate": 2.5603438927197555e-06, + "loss": 0.06757965087890624, + "step": 126480 + }, + { + "epoch": 1.093678394479944, + "grad_norm": 8.667867525318389, + "learning_rate": 2.560142360277938e-06, + "loss": 0.0507171630859375, + "step": 126485 + }, + { + "epoch": 1.0937216280014872, + "grad_norm": 60.98829834274525, + "learning_rate": 2.5599408298647038e-06, + "loss": 0.34991607666015623, + "step": 126490 + }, + { + "epoch": 1.0937648615230304, + "grad_norm": 1.1473408309982986, + "learning_rate": 2.5597393014809842e-06, + "loss": 0.017258071899414064, + "step": 126495 + }, + { + "epoch": 1.0938080950445737, + "grad_norm": 6.537091284415796, + "learning_rate": 2.5595377751277086e-06, + "loss": 0.0627227783203125, + "step": 126500 + }, + { + "epoch": 1.093851328566117, + "grad_norm": 0.38122714870888824, + "learning_rate": 2.559336250805804e-06, + "loss": 0.032693099975585935, + "step": 126505 + }, + { + "epoch": 1.0938945620876603, + "grad_norm": 2.0991299704885065, + "learning_rate": 2.559134728516203e-06, + "loss": 0.19425201416015625, + "step": 126510 + }, + { + "epoch": 1.0939377956092036, + "grad_norm": 2.412054073759596, + "learning_rate": 2.558933208259834e-06, + "loss": 0.015478515625, + "step": 126515 + }, + { + "epoch": 1.0939810291307468, + "grad_norm": 2.25281017258805, + "learning_rate": 2.5587316900376253e-06, + "loss": 0.047734832763671874, + "step": 126520 + }, + { + "epoch": 1.09402426265229, + "grad_norm": 6.742652362681152, + "learning_rate": 2.5585301738505077e-06, + "loss": 0.05656890869140625, + "step": 126525 + }, + { + "epoch": 1.0940674961738333, + "grad_norm": 18.29014274663834, + "learning_rate": 2.55832865969941e-06, + "loss": 0.34366912841796876, + "step": 126530 + }, + { + "epoch": 1.0941107296953767, + "grad_norm": 3.361682514792818, + "learning_rate": 2.55812714758526e-06, + "loss": 0.08774948120117188, + "step": 126535 + }, + { + "epoch": 1.09415396321692, + "grad_norm": 2.2263121471481555, + "learning_rate": 2.55792563750899e-06, + "loss": 0.08189697265625, + "step": 126540 + }, + { + "epoch": 1.0941971967384632, + "grad_norm": 2.080306317524999, + "learning_rate": 2.557724129471528e-06, + "loss": 0.04792575836181641, + "step": 126545 + }, + { + "epoch": 1.0942404302600064, + "grad_norm": 2.2662930817757556, + "learning_rate": 2.5575226234738024e-06, + "loss": 0.0551788330078125, + "step": 126550 + }, + { + "epoch": 1.0942836637815496, + "grad_norm": 1.466694100514354, + "learning_rate": 2.557321119516744e-06, + "loss": 0.02351226806640625, + "step": 126555 + }, + { + "epoch": 1.0943268973030928, + "grad_norm": 8.638118783539623, + "learning_rate": 2.5571196176012815e-06, + "loss": 0.053851318359375, + "step": 126560 + }, + { + "epoch": 1.094370130824636, + "grad_norm": 14.806263228087952, + "learning_rate": 2.556918117728343e-06, + "loss": 0.12444381713867188, + "step": 126565 + }, + { + "epoch": 1.0944133643461795, + "grad_norm": 6.141162915218446, + "learning_rate": 2.55671661989886e-06, + "loss": 0.18307266235351563, + "step": 126570 + }, + { + "epoch": 1.0944565978677228, + "grad_norm": 10.532554565517904, + "learning_rate": 2.556515124113761e-06, + "loss": 0.04589080810546875, + "step": 126575 + }, + { + "epoch": 1.094499831389266, + "grad_norm": 40.292439326662304, + "learning_rate": 2.5563136303739745e-06, + "loss": 0.35198211669921875, + "step": 126580 + }, + { + "epoch": 1.0945430649108092, + "grad_norm": 0.7056036637151603, + "learning_rate": 2.556112138680431e-06, + "loss": 0.0799560546875, + "step": 126585 + }, + { + "epoch": 1.0945862984323524, + "grad_norm": 0.08977093345090696, + "learning_rate": 2.5559106490340587e-06, + "loss": 0.04991970062255859, + "step": 126590 + }, + { + "epoch": 1.0946295319538957, + "grad_norm": 34.906385774820734, + "learning_rate": 2.555709161435788e-06, + "loss": 0.3605934143066406, + "step": 126595 + }, + { + "epoch": 1.0946727654754391, + "grad_norm": 0.6070032986170061, + "learning_rate": 2.5555076758865453e-06, + "loss": 0.018102455139160156, + "step": 126600 + }, + { + "epoch": 1.0947159989969824, + "grad_norm": 5.856917782092961, + "learning_rate": 2.555306192387264e-06, + "loss": 0.2399932861328125, + "step": 126605 + }, + { + "epoch": 1.0947592325185256, + "grad_norm": 1.836387500519282, + "learning_rate": 2.5551047109388698e-06, + "loss": 0.021047592163085938, + "step": 126610 + }, + { + "epoch": 1.0948024660400688, + "grad_norm": 19.721078111263143, + "learning_rate": 2.554903231542294e-06, + "loss": 0.0570037841796875, + "step": 126615 + }, + { + "epoch": 1.094845699561612, + "grad_norm": 8.986431842088738, + "learning_rate": 2.554701754198466e-06, + "loss": 0.3056549072265625, + "step": 126620 + }, + { + "epoch": 1.0948889330831553, + "grad_norm": 1.1425641354302292, + "learning_rate": 2.5545002789083135e-06, + "loss": 0.016759490966796874, + "step": 126625 + }, + { + "epoch": 1.0949321666046987, + "grad_norm": 3.939713642931747, + "learning_rate": 2.5542988056727653e-06, + "loss": 0.023193359375, + "step": 126630 + }, + { + "epoch": 1.094975400126242, + "grad_norm": 5.443499104180097, + "learning_rate": 2.554097334492753e-06, + "loss": 0.03409576416015625, + "step": 126635 + }, + { + "epoch": 1.0950186336477852, + "grad_norm": 11.286134548550054, + "learning_rate": 2.5538958653692034e-06, + "loss": 0.15680389404296874, + "step": 126640 + }, + { + "epoch": 1.0950618671693284, + "grad_norm": 0.3900438149643544, + "learning_rate": 2.5536943983030476e-06, + "loss": 0.1418182373046875, + "step": 126645 + }, + { + "epoch": 1.0951051006908716, + "grad_norm": 6.068884382599252, + "learning_rate": 2.5534929332952137e-06, + "loss": 0.018990325927734374, + "step": 126650 + }, + { + "epoch": 1.0951483342124149, + "grad_norm": 31.36992346721951, + "learning_rate": 2.553291470346631e-06, + "loss": 0.16757984161376954, + "step": 126655 + }, + { + "epoch": 1.0951915677339583, + "grad_norm": 8.9469866130204, + "learning_rate": 2.5530900094582277e-06, + "loss": 0.14795989990234376, + "step": 126660 + }, + { + "epoch": 1.0952348012555015, + "grad_norm": 0.14406123130892629, + "learning_rate": 2.552888550630933e-06, + "loss": 0.07484474182128906, + "step": 126665 + }, + { + "epoch": 1.0952780347770448, + "grad_norm": 12.331078126807181, + "learning_rate": 2.552687093865679e-06, + "loss": 0.09056396484375, + "step": 126670 + }, + { + "epoch": 1.095321268298588, + "grad_norm": 3.4740145022727966, + "learning_rate": 2.5524856391633916e-06, + "loss": 0.016197967529296874, + "step": 126675 + }, + { + "epoch": 1.0953645018201312, + "grad_norm": 0.24356557215727873, + "learning_rate": 2.5522841865250013e-06, + "loss": 0.07015914916992187, + "step": 126680 + }, + { + "epoch": 1.0954077353416745, + "grad_norm": 64.76648339658006, + "learning_rate": 2.5520827359514368e-06, + "loss": 0.2548679351806641, + "step": 126685 + }, + { + "epoch": 1.0954509688632177, + "grad_norm": 15.922622726720196, + "learning_rate": 2.5518812874436264e-06, + "loss": 0.15788841247558594, + "step": 126690 + }, + { + "epoch": 1.0954942023847611, + "grad_norm": 8.401001062011964, + "learning_rate": 2.5516798410024997e-06, + "loss": 0.2500953674316406, + "step": 126695 + }, + { + "epoch": 1.0955374359063044, + "grad_norm": 0.8627523685087327, + "learning_rate": 2.5514783966289864e-06, + "loss": 0.18740310668945312, + "step": 126700 + }, + { + "epoch": 1.0955806694278476, + "grad_norm": 6.088925413440865, + "learning_rate": 2.5512769543240156e-06, + "loss": 0.04059524536132812, + "step": 126705 + }, + { + "epoch": 1.0956239029493908, + "grad_norm": 0.5523465708730337, + "learning_rate": 2.5510755140885157e-06, + "loss": 0.13385238647460937, + "step": 126710 + }, + { + "epoch": 1.095667136470934, + "grad_norm": 0.09893585434844326, + "learning_rate": 2.5508740759234155e-06, + "loss": 0.04182844161987305, + "step": 126715 + }, + { + "epoch": 1.0957103699924773, + "grad_norm": 0.5268735465671579, + "learning_rate": 2.550672639829645e-06, + "loss": 0.08160934448242188, + "step": 126720 + }, + { + "epoch": 1.0957536035140207, + "grad_norm": 1.3677328658365941, + "learning_rate": 2.550471205808131e-06, + "loss": 0.04275054931640625, + "step": 126725 + }, + { + "epoch": 1.095796837035564, + "grad_norm": 0.030091003279083563, + "learning_rate": 2.5502697738598057e-06, + "loss": 0.04223194122314453, + "step": 126730 + }, + { + "epoch": 1.0958400705571072, + "grad_norm": 1.2241821174475638, + "learning_rate": 2.5500683439855957e-06, + "loss": 0.05161895751953125, + "step": 126735 + }, + { + "epoch": 1.0958833040786504, + "grad_norm": 1.3217811082071103, + "learning_rate": 2.5498669161864317e-06, + "loss": 0.013220977783203126, + "step": 126740 + }, + { + "epoch": 1.0959265376001937, + "grad_norm": 6.679770273568701, + "learning_rate": 2.5496654904632405e-06, + "loss": 0.08609390258789062, + "step": 126745 + }, + { + "epoch": 1.0959697711217369, + "grad_norm": 4.718646381356783, + "learning_rate": 2.549464066816953e-06, + "loss": 0.08561248779296875, + "step": 126750 + }, + { + "epoch": 1.09601300464328, + "grad_norm": 27.871104511267216, + "learning_rate": 2.5492626452484977e-06, + "loss": 0.12368192672729492, + "step": 126755 + }, + { + "epoch": 1.0960562381648236, + "grad_norm": 81.19678201549128, + "learning_rate": 2.5490612257588015e-06, + "loss": 0.16068649291992188, + "step": 126760 + }, + { + "epoch": 1.0960994716863668, + "grad_norm": 5.604459896047229, + "learning_rate": 2.5488598083487965e-06, + "loss": 0.19350090026855468, + "step": 126765 + }, + { + "epoch": 1.09614270520791, + "grad_norm": 0.39292200528788523, + "learning_rate": 2.5486583930194106e-06, + "loss": 0.019193267822265624, + "step": 126770 + }, + { + "epoch": 1.0961859387294532, + "grad_norm": 1.0951244687848172, + "learning_rate": 2.5484569797715714e-06, + "loss": 0.011670303344726563, + "step": 126775 + }, + { + "epoch": 1.0962291722509965, + "grad_norm": 1.0370774326335612, + "learning_rate": 2.548255568606209e-06, + "loss": 0.13425559997558595, + "step": 126780 + }, + { + "epoch": 1.0962724057725397, + "grad_norm": 0.8520466089012455, + "learning_rate": 2.5480541595242525e-06, + "loss": 0.131414794921875, + "step": 126785 + }, + { + "epoch": 1.0963156392940832, + "grad_norm": 2.419360534146402, + "learning_rate": 2.5478527525266287e-06, + "loss": 0.14532470703125, + "step": 126790 + }, + { + "epoch": 1.0963588728156264, + "grad_norm": 11.07381339905763, + "learning_rate": 2.54765134761427e-06, + "loss": 0.07726516723632812, + "step": 126795 + }, + { + "epoch": 1.0964021063371696, + "grad_norm": 0.7304237313024694, + "learning_rate": 2.5474499447881026e-06, + "loss": 0.06923942565917969, + "step": 126800 + }, + { + "epoch": 1.0964453398587128, + "grad_norm": 0.12190465464920058, + "learning_rate": 2.5472485440490553e-06, + "loss": 0.13769950866699218, + "step": 126805 + }, + { + "epoch": 1.096488573380256, + "grad_norm": 1.34773379564205, + "learning_rate": 2.5470471453980586e-06, + "loss": 0.10070037841796875, + "step": 126810 + }, + { + "epoch": 1.0965318069017993, + "grad_norm": 0.6040983194530404, + "learning_rate": 2.546845748836041e-06, + "loss": 0.104193115234375, + "step": 126815 + }, + { + "epoch": 1.0965750404233425, + "grad_norm": 23.398838708134665, + "learning_rate": 2.5466443543639286e-06, + "loss": 0.07343597412109375, + "step": 126820 + }, + { + "epoch": 1.096618273944886, + "grad_norm": 6.613351776408978, + "learning_rate": 2.546442961982654e-06, + "loss": 0.04634552001953125, + "step": 126825 + }, + { + "epoch": 1.0966615074664292, + "grad_norm": 0.6408161223438532, + "learning_rate": 2.546241571693145e-06, + "loss": 0.06701812744140626, + "step": 126830 + }, + { + "epoch": 1.0967047409879724, + "grad_norm": 0.2629889655452992, + "learning_rate": 2.5460401834963283e-06, + "loss": 0.026383209228515624, + "step": 126835 + }, + { + "epoch": 1.0967479745095157, + "grad_norm": 17.604375244514873, + "learning_rate": 2.545838797393135e-06, + "loss": 0.03127098083496094, + "step": 126840 + }, + { + "epoch": 1.096791208031059, + "grad_norm": 0.1534096997344158, + "learning_rate": 2.545637413384493e-06, + "loss": 0.056676483154296874, + "step": 126845 + }, + { + "epoch": 1.0968344415526021, + "grad_norm": 3.247342742084378, + "learning_rate": 2.5454360314713312e-06, + "loss": 0.16759872436523438, + "step": 126850 + }, + { + "epoch": 1.0968776750741456, + "grad_norm": 11.824774599092004, + "learning_rate": 2.545234651654577e-06, + "loss": 0.022796630859375, + "step": 126855 + }, + { + "epoch": 1.0969209085956888, + "grad_norm": 0.28055120848329695, + "learning_rate": 2.5450332739351617e-06, + "loss": 0.03705520629882812, + "step": 126860 + }, + { + "epoch": 1.096964142117232, + "grad_norm": 1.158086457427208, + "learning_rate": 2.544831898314012e-06, + "loss": 0.13382225036621093, + "step": 126865 + }, + { + "epoch": 1.0970073756387753, + "grad_norm": 4.985351263675938, + "learning_rate": 2.5446305247920583e-06, + "loss": 0.2891273498535156, + "step": 126870 + }, + { + "epoch": 1.0970506091603185, + "grad_norm": 2.980231015866108, + "learning_rate": 2.5444291533702283e-06, + "loss": 0.07153244018554687, + "step": 126875 + }, + { + "epoch": 1.0970938426818617, + "grad_norm": 2.0296084655327964, + "learning_rate": 2.5442277840494503e-06, + "loss": 0.034322357177734374, + "step": 126880 + }, + { + "epoch": 1.0971370762034052, + "grad_norm": 1.4184805643940963, + "learning_rate": 2.544026416830653e-06, + "loss": 0.10059852600097656, + "step": 126885 + }, + { + "epoch": 1.0971803097249484, + "grad_norm": 77.35719126744911, + "learning_rate": 2.5438250517147654e-06, + "loss": 0.14730873107910156, + "step": 126890 + }, + { + "epoch": 1.0972235432464916, + "grad_norm": 32.05659430719519, + "learning_rate": 2.5436236887027172e-06, + "loss": 0.11796302795410156, + "step": 126895 + }, + { + "epoch": 1.0972667767680349, + "grad_norm": 0.9039589987844138, + "learning_rate": 2.5434223277954362e-06, + "loss": 0.08088531494140624, + "step": 126900 + }, + { + "epoch": 1.097310010289578, + "grad_norm": 0.6430904270773754, + "learning_rate": 2.5432209689938516e-06, + "loss": 0.02356109619140625, + "step": 126905 + }, + { + "epoch": 1.0973532438111213, + "grad_norm": 0.3939967111630261, + "learning_rate": 2.543019612298891e-06, + "loss": 0.07890625, + "step": 126910 + }, + { + "epoch": 1.0973964773326648, + "grad_norm": 0.20301258704998168, + "learning_rate": 2.542818257711483e-06, + "loss": 0.054046630859375, + "step": 126915 + }, + { + "epoch": 1.097439710854208, + "grad_norm": 17.756027265196, + "learning_rate": 2.542616905232556e-06, + "loss": 0.092718505859375, + "step": 126920 + }, + { + "epoch": 1.0974829443757512, + "grad_norm": 5.438964758672557, + "learning_rate": 2.5424155548630413e-06, + "loss": 0.09976043701171874, + "step": 126925 + }, + { + "epoch": 1.0975261778972945, + "grad_norm": 2.856081681623022, + "learning_rate": 2.5422142066038655e-06, + "loss": 0.049483108520507815, + "step": 126930 + }, + { + "epoch": 1.0975694114188377, + "grad_norm": 4.225729492168167, + "learning_rate": 2.542012860455957e-06, + "loss": 0.01273965835571289, + "step": 126935 + }, + { + "epoch": 1.097612644940381, + "grad_norm": 2.854157701502331, + "learning_rate": 2.541811516420244e-06, + "loss": 0.013062286376953124, + "step": 126940 + }, + { + "epoch": 1.0976558784619241, + "grad_norm": 22.4667649249973, + "learning_rate": 2.541610174497657e-06, + "loss": 0.05207500457763672, + "step": 126945 + }, + { + "epoch": 1.0976991119834676, + "grad_norm": 30.025587363168686, + "learning_rate": 2.5414088346891214e-06, + "loss": 0.261962890625, + "step": 126950 + }, + { + "epoch": 1.0977423455050108, + "grad_norm": 2.7383406006881583, + "learning_rate": 2.5412074969955695e-06, + "loss": 0.02628173828125, + "step": 126955 + }, + { + "epoch": 1.097785579026554, + "grad_norm": 0.8614924520548833, + "learning_rate": 2.5410061614179283e-06, + "loss": 0.06175155639648437, + "step": 126960 + }, + { + "epoch": 1.0978288125480973, + "grad_norm": 42.700124062911954, + "learning_rate": 2.5408048279571253e-06, + "loss": 0.10916175842285156, + "step": 126965 + }, + { + "epoch": 1.0978720460696405, + "grad_norm": 3.312810976111082, + "learning_rate": 2.54060349661409e-06, + "loss": 0.0786651611328125, + "step": 126970 + }, + { + "epoch": 1.0979152795911837, + "grad_norm": 14.462753786559622, + "learning_rate": 2.540402167389751e-06, + "loss": 0.05170516967773438, + "step": 126975 + }, + { + "epoch": 1.0979585131127272, + "grad_norm": 4.737962011572079, + "learning_rate": 2.540200840285036e-06, + "loss": 0.0447479248046875, + "step": 126980 + }, + { + "epoch": 1.0980017466342704, + "grad_norm": 3.02619735519031, + "learning_rate": 2.5399995153008744e-06, + "loss": 0.0532135009765625, + "step": 126985 + }, + { + "epoch": 1.0980449801558136, + "grad_norm": 0.5115660274378127, + "learning_rate": 2.5397981924381947e-06, + "loss": 0.33786163330078123, + "step": 126990 + }, + { + "epoch": 1.0980882136773569, + "grad_norm": 13.535833249438436, + "learning_rate": 2.539596871697925e-06, + "loss": 0.0755218505859375, + "step": 126995 + }, + { + "epoch": 1.0981314471989, + "grad_norm": 0.08987802941370056, + "learning_rate": 2.5393955530809933e-06, + "loss": 0.05259361267089844, + "step": 127000 + }, + { + "epoch": 1.0981746807204433, + "grad_norm": 0.8570812373826054, + "learning_rate": 2.5391942365883296e-06, + "loss": 0.0220184326171875, + "step": 127005 + }, + { + "epoch": 1.0982179142419866, + "grad_norm": 13.197091864950073, + "learning_rate": 2.5389929222208606e-06, + "loss": 0.0779052734375, + "step": 127010 + }, + { + "epoch": 1.09826114776353, + "grad_norm": 11.944991994470996, + "learning_rate": 2.5387916099795154e-06, + "loss": 0.06171207427978516, + "step": 127015 + }, + { + "epoch": 1.0983043812850732, + "grad_norm": 0.8905445927811746, + "learning_rate": 2.5385902998652228e-06, + "loss": 0.1351837158203125, + "step": 127020 + }, + { + "epoch": 1.0983476148066165, + "grad_norm": 22.308511878187236, + "learning_rate": 2.538388991878911e-06, + "loss": 0.09975032806396485, + "step": 127025 + }, + { + "epoch": 1.0983908483281597, + "grad_norm": 0.6177895029401392, + "learning_rate": 2.538187686021508e-06, + "loss": 0.0383880615234375, + "step": 127030 + }, + { + "epoch": 1.098434081849703, + "grad_norm": 1.1127040893520568, + "learning_rate": 2.5379863822939433e-06, + "loss": 0.15639801025390626, + "step": 127035 + }, + { + "epoch": 1.0984773153712462, + "grad_norm": 4.701185742083848, + "learning_rate": 2.5377850806971447e-06, + "loss": 0.025128555297851563, + "step": 127040 + }, + { + "epoch": 1.0985205488927896, + "grad_norm": 4.476387784247552, + "learning_rate": 2.537583781232039e-06, + "loss": 0.22574310302734374, + "step": 127045 + }, + { + "epoch": 1.0985637824143328, + "grad_norm": 4.037542037015628, + "learning_rate": 2.537382483899557e-06, + "loss": 0.017414951324462892, + "step": 127050 + }, + { + "epoch": 1.098607015935876, + "grad_norm": 2.9882962654023295, + "learning_rate": 2.5371811887006267e-06, + "loss": 0.02310791015625, + "step": 127055 + }, + { + "epoch": 1.0986502494574193, + "grad_norm": 9.309444024931043, + "learning_rate": 2.536979895636175e-06, + "loss": 0.07610244750976562, + "step": 127060 + }, + { + "epoch": 1.0986934829789625, + "grad_norm": 4.462942803992017, + "learning_rate": 2.536778604707132e-06, + "loss": 0.107452392578125, + "step": 127065 + }, + { + "epoch": 1.0987367165005058, + "grad_norm": 0.6185248879198165, + "learning_rate": 2.5365773159144246e-06, + "loss": 0.09075469970703125, + "step": 127070 + }, + { + "epoch": 1.098779950022049, + "grad_norm": 1.8682413085249556, + "learning_rate": 2.536376029258981e-06, + "loss": 0.07170867919921875, + "step": 127075 + }, + { + "epoch": 1.0988231835435924, + "grad_norm": 2.561447058909358, + "learning_rate": 2.536174744741731e-06, + "loss": 0.1230743408203125, + "step": 127080 + }, + { + "epoch": 1.0988664170651357, + "grad_norm": 3.853070814471175, + "learning_rate": 2.535973462363602e-06, + "loss": 0.2787639617919922, + "step": 127085 + }, + { + "epoch": 1.0989096505866789, + "grad_norm": 5.985680296481991, + "learning_rate": 2.535772182125522e-06, + "loss": 0.06918487548828126, + "step": 127090 + }, + { + "epoch": 1.0989528841082221, + "grad_norm": 49.01577461089804, + "learning_rate": 2.5355709040284203e-06, + "loss": 0.061589813232421874, + "step": 127095 + }, + { + "epoch": 1.0989961176297653, + "grad_norm": 98.44345940934716, + "learning_rate": 2.5353696280732246e-06, + "loss": 0.22613525390625, + "step": 127100 + }, + { + "epoch": 1.0990393511513088, + "grad_norm": 3.212147779588484, + "learning_rate": 2.535168354260863e-06, + "loss": 0.09549560546875, + "step": 127105 + }, + { + "epoch": 1.099082584672852, + "grad_norm": 0.2983340037655175, + "learning_rate": 2.534967082592263e-06, + "loss": 0.09268875122070312, + "step": 127110 + }, + { + "epoch": 1.0991258181943953, + "grad_norm": 4.895856104584654, + "learning_rate": 2.5347658130683537e-06, + "loss": 0.035732269287109375, + "step": 127115 + }, + { + "epoch": 1.0991690517159385, + "grad_norm": 10.336709720782755, + "learning_rate": 2.534564545690064e-06, + "loss": 0.049893951416015624, + "step": 127120 + }, + { + "epoch": 1.0992122852374817, + "grad_norm": 37.4389176844435, + "learning_rate": 2.534363280458322e-06, + "loss": 0.14273300170898437, + "step": 127125 + }, + { + "epoch": 1.099255518759025, + "grad_norm": 2.9789967775119024, + "learning_rate": 2.5341620173740552e-06, + "loss": 0.08452033996582031, + "step": 127130 + }, + { + "epoch": 1.0992987522805682, + "grad_norm": 13.036414041870545, + "learning_rate": 2.5339607564381915e-06, + "loss": 0.137860107421875, + "step": 127135 + }, + { + "epoch": 1.0993419858021116, + "grad_norm": 18.165519530500042, + "learning_rate": 2.5337594976516594e-06, + "loss": 0.07625961303710938, + "step": 127140 + }, + { + "epoch": 1.0993852193236548, + "grad_norm": 9.93925627845199, + "learning_rate": 2.5335582410153874e-06, + "loss": 0.13929595947265624, + "step": 127145 + }, + { + "epoch": 1.099428452845198, + "grad_norm": 39.41355351792275, + "learning_rate": 2.5333569865303042e-06, + "loss": 0.19319000244140624, + "step": 127150 + }, + { + "epoch": 1.0994716863667413, + "grad_norm": 4.531021963916031, + "learning_rate": 2.5331557341973374e-06, + "loss": 0.220587158203125, + "step": 127155 + }, + { + "epoch": 1.0995149198882845, + "grad_norm": 0.2928553758357049, + "learning_rate": 2.532954484017415e-06, + "loss": 0.07268447875976562, + "step": 127160 + }, + { + "epoch": 1.0995581534098278, + "grad_norm": 5.303036932808788, + "learning_rate": 2.5327532359914648e-06, + "loss": 0.18446502685546876, + "step": 127165 + }, + { + "epoch": 1.0996013869313712, + "grad_norm": 0.913622723409364, + "learning_rate": 2.532551990120415e-06, + "loss": 0.167254638671875, + "step": 127170 + }, + { + "epoch": 1.0996446204529144, + "grad_norm": 0.5256718351740639, + "learning_rate": 2.5323507464051947e-06, + "loss": 0.058623123168945315, + "step": 127175 + }, + { + "epoch": 1.0996878539744577, + "grad_norm": 0.39925821397817673, + "learning_rate": 2.5321495048467317e-06, + "loss": 0.018684768676757814, + "step": 127180 + }, + { + "epoch": 1.099731087496001, + "grad_norm": 0.07872581988405497, + "learning_rate": 2.531948265445954e-06, + "loss": 0.07454833984375, + "step": 127185 + }, + { + "epoch": 1.0997743210175441, + "grad_norm": 0.5500944180927356, + "learning_rate": 2.531747028203789e-06, + "loss": 0.11940288543701172, + "step": 127190 + }, + { + "epoch": 1.0998175545390874, + "grad_norm": 7.935557928020133, + "learning_rate": 2.5315457931211653e-06, + "loss": 0.03408946990966797, + "step": 127195 + }, + { + "epoch": 1.0998607880606306, + "grad_norm": 0.6193155443992058, + "learning_rate": 2.5313445601990117e-06, + "loss": 0.012562942504882813, + "step": 127200 + }, + { + "epoch": 1.099904021582174, + "grad_norm": 2.2775303882840947, + "learning_rate": 2.5311433294382537e-06, + "loss": 0.023836517333984376, + "step": 127205 + }, + { + "epoch": 1.0999472551037173, + "grad_norm": 7.183234606413204, + "learning_rate": 2.530942100839823e-06, + "loss": 0.035407638549804686, + "step": 127210 + }, + { + "epoch": 1.0999904886252605, + "grad_norm": 0.2630982744053344, + "learning_rate": 2.5307408744046463e-06, + "loss": 0.0434417724609375, + "step": 127215 + }, + { + "epoch": 1.1000337221468037, + "grad_norm": 3.6245242538082407, + "learning_rate": 2.53053965013365e-06, + "loss": 0.05380439758300781, + "step": 127220 + }, + { + "epoch": 1.100076955668347, + "grad_norm": 1.3842665756562431, + "learning_rate": 2.530338428027764e-06, + "loss": 0.0402862548828125, + "step": 127225 + }, + { + "epoch": 1.1001201891898902, + "grad_norm": 1.2935671068231178, + "learning_rate": 2.5301372080879153e-06, + "loss": 0.034176063537597653, + "step": 127230 + }, + { + "epoch": 1.1001634227114336, + "grad_norm": 1.6628812887070643, + "learning_rate": 2.5299359903150315e-06, + "loss": 0.020551300048828124, + "step": 127235 + }, + { + "epoch": 1.1002066562329769, + "grad_norm": 21.63307657911172, + "learning_rate": 2.529734774710043e-06, + "loss": 0.060368824005126956, + "step": 127240 + }, + { + "epoch": 1.10024988975452, + "grad_norm": 8.423018958299144, + "learning_rate": 2.529533561273875e-06, + "loss": 0.1809417724609375, + "step": 127245 + }, + { + "epoch": 1.1002931232760633, + "grad_norm": 4.884167060895026, + "learning_rate": 2.5293323500074576e-06, + "loss": 0.02772998809814453, + "step": 127250 + }, + { + "epoch": 1.1003363567976066, + "grad_norm": 0.4643350699533376, + "learning_rate": 2.5291311409117174e-06, + "loss": 0.058917236328125, + "step": 127255 + }, + { + "epoch": 1.1003795903191498, + "grad_norm": 19.213102833380276, + "learning_rate": 2.5289299339875825e-06, + "loss": 0.121246337890625, + "step": 127260 + }, + { + "epoch": 1.100422823840693, + "grad_norm": 0.241626623476212, + "learning_rate": 2.528728729235982e-06, + "loss": 0.05409164428710937, + "step": 127265 + }, + { + "epoch": 1.1004660573622365, + "grad_norm": 4.25680438625649, + "learning_rate": 2.528527526657841e-06, + "loss": 0.037359619140625, + "step": 127270 + }, + { + "epoch": 1.1005092908837797, + "grad_norm": 6.056306058660274, + "learning_rate": 2.5283263262540912e-06, + "loss": 0.05934600830078125, + "step": 127275 + }, + { + "epoch": 1.100552524405323, + "grad_norm": 0.4068688616932093, + "learning_rate": 2.5281251280256584e-06, + "loss": 0.45074806213378904, + "step": 127280 + }, + { + "epoch": 1.1005957579268661, + "grad_norm": 9.609628182061572, + "learning_rate": 2.52792393197347e-06, + "loss": 0.19665908813476562, + "step": 127285 + }, + { + "epoch": 1.1006389914484094, + "grad_norm": 3.5971070823515308, + "learning_rate": 2.5277227380984554e-06, + "loss": 0.05870552062988281, + "step": 127290 + }, + { + "epoch": 1.1006822249699526, + "grad_norm": 12.28128144242855, + "learning_rate": 2.5275215464015423e-06, + "loss": 0.10170097351074218, + "step": 127295 + }, + { + "epoch": 1.100725458491496, + "grad_norm": 22.40515336252549, + "learning_rate": 2.527320356883656e-06, + "loss": 0.0956817626953125, + "step": 127300 + }, + { + "epoch": 1.1007686920130393, + "grad_norm": 0.17191106410303392, + "learning_rate": 2.527119169545729e-06, + "loss": 0.25529632568359373, + "step": 127305 + }, + { + "epoch": 1.1008119255345825, + "grad_norm": 26.282892108454867, + "learning_rate": 2.526917984388685e-06, + "loss": 0.10313568115234376, + "step": 127310 + }, + { + "epoch": 1.1008551590561257, + "grad_norm": 3.1739099172527614, + "learning_rate": 2.526716801413454e-06, + "loss": 0.03709259033203125, + "step": 127315 + }, + { + "epoch": 1.100898392577669, + "grad_norm": 4.04190188057112, + "learning_rate": 2.5265156206209634e-06, + "loss": 0.21924896240234376, + "step": 127320 + }, + { + "epoch": 1.1009416260992122, + "grad_norm": 53.26258858533458, + "learning_rate": 2.5263144420121413e-06, + "loss": 0.2661285400390625, + "step": 127325 + }, + { + "epoch": 1.1009848596207557, + "grad_norm": 0.44085267043924986, + "learning_rate": 2.5261132655879133e-06, + "loss": 0.041169357299804685, + "step": 127330 + }, + { + "epoch": 1.1010280931422989, + "grad_norm": 3.833721590870117, + "learning_rate": 2.5259120913492107e-06, + "loss": 0.03784255981445313, + "step": 127335 + }, + { + "epoch": 1.101071326663842, + "grad_norm": 66.21203252508826, + "learning_rate": 2.525710919296959e-06, + "loss": 0.382708740234375, + "step": 127340 + }, + { + "epoch": 1.1011145601853853, + "grad_norm": 3.081373703115743, + "learning_rate": 2.5255097494320875e-06, + "loss": 0.12411956787109375, + "step": 127345 + }, + { + "epoch": 1.1011577937069286, + "grad_norm": 8.668845797748453, + "learning_rate": 2.5253085817555228e-06, + "loss": 0.049615478515625, + "step": 127350 + }, + { + "epoch": 1.1012010272284718, + "grad_norm": 2.4390814521540363, + "learning_rate": 2.525107416268193e-06, + "loss": 0.0713714599609375, + "step": 127355 + }, + { + "epoch": 1.1012442607500152, + "grad_norm": 3.8350118757155784, + "learning_rate": 2.524906252971025e-06, + "loss": 0.015135383605957032, + "step": 127360 + }, + { + "epoch": 1.1012874942715585, + "grad_norm": 2.403802405196802, + "learning_rate": 2.5247050918649474e-06, + "loss": 0.17098922729492189, + "step": 127365 + }, + { + "epoch": 1.1013307277931017, + "grad_norm": 23.83473217583461, + "learning_rate": 2.524503932950888e-06, + "loss": 0.06944198608398437, + "step": 127370 + }, + { + "epoch": 1.101373961314645, + "grad_norm": 12.864842135199353, + "learning_rate": 2.5243027762297755e-06, + "loss": 0.09786834716796874, + "step": 127375 + }, + { + "epoch": 1.1014171948361882, + "grad_norm": 2.826932923460401, + "learning_rate": 2.5241016217025363e-06, + "loss": 0.010895538330078124, + "step": 127380 + }, + { + "epoch": 1.1014604283577314, + "grad_norm": 26.65656847955935, + "learning_rate": 2.5239004693700983e-06, + "loss": 0.09330215454101562, + "step": 127385 + }, + { + "epoch": 1.1015036618792746, + "grad_norm": 19.314627323423295, + "learning_rate": 2.5236993192333887e-06, + "loss": 0.23737640380859376, + "step": 127390 + }, + { + "epoch": 1.101546895400818, + "grad_norm": 4.704578318108213, + "learning_rate": 2.523498171293336e-06, + "loss": 0.13384742736816407, + "step": 127395 + }, + { + "epoch": 1.1015901289223613, + "grad_norm": 3.527864753668211, + "learning_rate": 2.5232970255508668e-06, + "loss": 0.04236335754394531, + "step": 127400 + }, + { + "epoch": 1.1016333624439045, + "grad_norm": 0.30972714934186557, + "learning_rate": 2.5230958820069106e-06, + "loss": 0.014772796630859375, + "step": 127405 + }, + { + "epoch": 1.1016765959654478, + "grad_norm": 1.562740599381434, + "learning_rate": 2.5228947406623946e-06, + "loss": 0.08317050933837891, + "step": 127410 + }, + { + "epoch": 1.101719829486991, + "grad_norm": 29.567166944038725, + "learning_rate": 2.5226936015182457e-06, + "loss": 0.21581954956054689, + "step": 127415 + }, + { + "epoch": 1.1017630630085342, + "grad_norm": 5.356129331053014, + "learning_rate": 2.522492464575391e-06, + "loss": 0.04698944091796875, + "step": 127420 + }, + { + "epoch": 1.1018062965300777, + "grad_norm": 10.654825048606344, + "learning_rate": 2.5222913298347583e-06, + "loss": 0.022833633422851562, + "step": 127425 + }, + { + "epoch": 1.101849530051621, + "grad_norm": 1.8776871134299293, + "learning_rate": 2.5220901972972767e-06, + "loss": 0.031855010986328126, + "step": 127430 + }, + { + "epoch": 1.1018927635731641, + "grad_norm": 5.824149280355347, + "learning_rate": 2.5218890669638726e-06, + "loss": 0.11652336120605469, + "step": 127435 + }, + { + "epoch": 1.1019359970947074, + "grad_norm": 2.940747688387108, + "learning_rate": 2.5216879388354743e-06, + "loss": 0.265325927734375, + "step": 127440 + }, + { + "epoch": 1.1019792306162506, + "grad_norm": 2.63011747988559, + "learning_rate": 2.521486812913009e-06, + "loss": 0.019387054443359374, + "step": 127445 + }, + { + "epoch": 1.1020224641377938, + "grad_norm": 30.050806669248747, + "learning_rate": 2.521285689197404e-06, + "loss": 0.13074283599853515, + "step": 127450 + }, + { + "epoch": 1.102065697659337, + "grad_norm": 2.4926874408136923, + "learning_rate": 2.5210845676895872e-06, + "loss": 0.017548465728759767, + "step": 127455 + }, + { + "epoch": 1.1021089311808805, + "grad_norm": 5.943786396687291, + "learning_rate": 2.5208834483904847e-06, + "loss": 0.027843284606933593, + "step": 127460 + }, + { + "epoch": 1.1021521647024237, + "grad_norm": 2.459725929291512, + "learning_rate": 2.520682331301027e-06, + "loss": 0.28995094299316404, + "step": 127465 + }, + { + "epoch": 1.102195398223967, + "grad_norm": 1.4119361730246713, + "learning_rate": 2.52048121642214e-06, + "loss": 0.033159637451171876, + "step": 127470 + }, + { + "epoch": 1.1022386317455102, + "grad_norm": 10.797667142790221, + "learning_rate": 2.5202801037547514e-06, + "loss": 0.09208106994628906, + "step": 127475 + }, + { + "epoch": 1.1022818652670534, + "grad_norm": 3.9527397695612785, + "learning_rate": 2.5200789932997876e-06, + "loss": 0.04429931640625, + "step": 127480 + }, + { + "epoch": 1.1023250987885966, + "grad_norm": 8.14761830778086, + "learning_rate": 2.519877885058178e-06, + "loss": 0.04005889892578125, + "step": 127485 + }, + { + "epoch": 1.10236833231014, + "grad_norm": 0.1958982998327337, + "learning_rate": 2.5196767790308476e-06, + "loss": 0.030185699462890625, + "step": 127490 + }, + { + "epoch": 1.1024115658316833, + "grad_norm": 0.4476114371801397, + "learning_rate": 2.519475675218727e-06, + "loss": 0.067633056640625, + "step": 127495 + }, + { + "epoch": 1.1024547993532265, + "grad_norm": 0.6135619840629819, + "learning_rate": 2.5192745736227417e-06, + "loss": 0.14574308395385743, + "step": 127500 + }, + { + "epoch": 1.1024980328747698, + "grad_norm": 4.1967195431397135, + "learning_rate": 2.51907347424382e-06, + "loss": 0.0654541015625, + "step": 127505 + }, + { + "epoch": 1.102541266396313, + "grad_norm": 11.94275933337, + "learning_rate": 2.518872377082888e-06, + "loss": 0.10273818969726563, + "step": 127510 + }, + { + "epoch": 1.1025844999178562, + "grad_norm": 0.5310876873146597, + "learning_rate": 2.518671282140875e-06, + "loss": 0.055828857421875, + "step": 127515 + }, + { + "epoch": 1.1026277334393995, + "grad_norm": 3.5841344323551394, + "learning_rate": 2.5184701894187065e-06, + "loss": 0.013318634033203125, + "step": 127520 + }, + { + "epoch": 1.102670966960943, + "grad_norm": 55.669025571281814, + "learning_rate": 2.5182690989173113e-06, + "loss": 0.39524497985839846, + "step": 127525 + }, + { + "epoch": 1.1027142004824861, + "grad_norm": 0.5238803877416146, + "learning_rate": 2.5180680106376175e-06, + "loss": 0.22019500732421876, + "step": 127530 + }, + { + "epoch": 1.1027574340040294, + "grad_norm": 2.8017571326042887, + "learning_rate": 2.51786692458055e-06, + "loss": 0.11536407470703125, + "step": 127535 + }, + { + "epoch": 1.1028006675255726, + "grad_norm": 0.6253171400812044, + "learning_rate": 2.517665840747039e-06, + "loss": 0.15098342895507813, + "step": 127540 + }, + { + "epoch": 1.1028439010471158, + "grad_norm": 25.971998975793618, + "learning_rate": 2.51746475913801e-06, + "loss": 0.0815399169921875, + "step": 127545 + }, + { + "epoch": 1.102887134568659, + "grad_norm": 4.165664144764669, + "learning_rate": 2.517263679754391e-06, + "loss": 0.028017616271972655, + "step": 127550 + }, + { + "epoch": 1.1029303680902025, + "grad_norm": 0.8670725295010133, + "learning_rate": 2.517062602597108e-06, + "loss": 0.02489166259765625, + "step": 127555 + }, + { + "epoch": 1.1029736016117457, + "grad_norm": 10.054094765239945, + "learning_rate": 2.5168615276670904e-06, + "loss": 0.11307296752929688, + "step": 127560 + }, + { + "epoch": 1.103016835133289, + "grad_norm": 5.422807687039429, + "learning_rate": 2.516660454965265e-06, + "loss": 0.1229888916015625, + "step": 127565 + }, + { + "epoch": 1.1030600686548322, + "grad_norm": 9.786229172547248, + "learning_rate": 2.516459384492559e-06, + "loss": 0.08174362182617187, + "step": 127570 + }, + { + "epoch": 1.1031033021763754, + "grad_norm": 1.9069288856731665, + "learning_rate": 2.5162583162498996e-06, + "loss": 0.014740371704101562, + "step": 127575 + }, + { + "epoch": 1.1031465356979187, + "grad_norm": 0.10984054961225495, + "learning_rate": 2.516057250238214e-06, + "loss": 0.014012908935546875, + "step": 127580 + }, + { + "epoch": 1.103189769219462, + "grad_norm": 0.5210090075473915, + "learning_rate": 2.5158561864584286e-06, + "loss": 0.013081645965576172, + "step": 127585 + }, + { + "epoch": 1.1032330027410053, + "grad_norm": 2.201760669687016, + "learning_rate": 2.515655124911473e-06, + "loss": 0.06486320495605469, + "step": 127590 + }, + { + "epoch": 1.1032762362625486, + "grad_norm": 3.439107803575216, + "learning_rate": 2.5154540655982725e-06, + "loss": 0.07898635864257812, + "step": 127595 + }, + { + "epoch": 1.1033194697840918, + "grad_norm": 3.7266121155529075, + "learning_rate": 2.5152530085197556e-06, + "loss": 0.02809600830078125, + "step": 127600 + }, + { + "epoch": 1.103362703305635, + "grad_norm": 7.300988519671526, + "learning_rate": 2.5150519536768487e-06, + "loss": 0.030767440795898438, + "step": 127605 + }, + { + "epoch": 1.1034059368271782, + "grad_norm": 55.09200741589161, + "learning_rate": 2.51485090107048e-06, + "loss": 0.2603607177734375, + "step": 127610 + }, + { + "epoch": 1.1034491703487217, + "grad_norm": 2.104566599816924, + "learning_rate": 2.5146498507015748e-06, + "loss": 0.08379669189453125, + "step": 127615 + }, + { + "epoch": 1.103492403870265, + "grad_norm": 0.5916444267718093, + "learning_rate": 2.514448802571062e-06, + "loss": 0.02451353073120117, + "step": 127620 + }, + { + "epoch": 1.1035356373918082, + "grad_norm": 32.38449770712314, + "learning_rate": 2.5142477566798685e-06, + "loss": 0.13473129272460938, + "step": 127625 + }, + { + "epoch": 1.1035788709133514, + "grad_norm": 0.36629942690420236, + "learning_rate": 2.5140467130289214e-06, + "loss": 0.0356658935546875, + "step": 127630 + }, + { + "epoch": 1.1036221044348946, + "grad_norm": 1.9803449549742305, + "learning_rate": 2.5138456716191486e-06, + "loss": 0.171173095703125, + "step": 127635 + }, + { + "epoch": 1.1036653379564378, + "grad_norm": 0.11132847150636456, + "learning_rate": 2.5136446324514763e-06, + "loss": 0.05691070556640625, + "step": 127640 + }, + { + "epoch": 1.103708571477981, + "grad_norm": 0.41659447169957947, + "learning_rate": 2.5134435955268314e-06, + "loss": 0.02701416015625, + "step": 127645 + }, + { + "epoch": 1.1037518049995245, + "grad_norm": 3.623254737037456, + "learning_rate": 2.5132425608461416e-06, + "loss": 0.06753578186035156, + "step": 127650 + }, + { + "epoch": 1.1037950385210678, + "grad_norm": 0.2653771366466892, + "learning_rate": 2.513041528410334e-06, + "loss": 0.018187713623046876, + "step": 127655 + }, + { + "epoch": 1.103838272042611, + "grad_norm": 2.7378576299558968, + "learning_rate": 2.5128404982203366e-06, + "loss": 0.19309463500976562, + "step": 127660 + }, + { + "epoch": 1.1038815055641542, + "grad_norm": 4.231540474251472, + "learning_rate": 2.512639470277076e-06, + "loss": 0.006848716735839843, + "step": 127665 + }, + { + "epoch": 1.1039247390856974, + "grad_norm": 51.29352853191653, + "learning_rate": 2.5124384445814788e-06, + "loss": 0.20087966918945313, + "step": 127670 + }, + { + "epoch": 1.1039679726072407, + "grad_norm": 1.0686900474452143, + "learning_rate": 2.512237421134472e-06, + "loss": 0.0436004638671875, + "step": 127675 + }, + { + "epoch": 1.1040112061287841, + "grad_norm": 39.574754287221836, + "learning_rate": 2.5120363999369826e-06, + "loss": 0.159136962890625, + "step": 127680 + }, + { + "epoch": 1.1040544396503273, + "grad_norm": 17.94795401951669, + "learning_rate": 2.511835380989939e-06, + "loss": 0.123529052734375, + "step": 127685 + }, + { + "epoch": 1.1040976731718706, + "grad_norm": 1.2477317335709832, + "learning_rate": 2.5116343642942672e-06, + "loss": 0.05896816253662109, + "step": 127690 + }, + { + "epoch": 1.1041409066934138, + "grad_norm": 1.3008453783960283, + "learning_rate": 2.5114333498508954e-06, + "loss": 0.014672088623046874, + "step": 127695 + }, + { + "epoch": 1.104184140214957, + "grad_norm": 0.29095336042373154, + "learning_rate": 2.5112323376607493e-06, + "loss": 0.06581268310546876, + "step": 127700 + }, + { + "epoch": 1.1042273737365003, + "grad_norm": 1.2715594134485577, + "learning_rate": 2.511031327724756e-06, + "loss": 0.04583930969238281, + "step": 127705 + }, + { + "epoch": 1.1042706072580435, + "grad_norm": 106.78335553736885, + "learning_rate": 2.5108303200438437e-06, + "loss": 0.5045120239257812, + "step": 127710 + }, + { + "epoch": 1.104313840779587, + "grad_norm": 0.10174593876454101, + "learning_rate": 2.5106293146189375e-06, + "loss": 0.04908943176269531, + "step": 127715 + }, + { + "epoch": 1.1043570743011302, + "grad_norm": 1.1558239526383867, + "learning_rate": 2.510428311450967e-06, + "loss": 0.12300033569335937, + "step": 127720 + }, + { + "epoch": 1.1044003078226734, + "grad_norm": 0.39089074347640396, + "learning_rate": 2.510227310540858e-06, + "loss": 0.04902706146240234, + "step": 127725 + }, + { + "epoch": 1.1044435413442166, + "grad_norm": 0.4574547439908715, + "learning_rate": 2.510026311889537e-06, + "loss": 0.19370670318603517, + "step": 127730 + }, + { + "epoch": 1.1044867748657599, + "grad_norm": 1.247871427905514, + "learning_rate": 2.5098253154979306e-06, + "loss": 0.0340118408203125, + "step": 127735 + }, + { + "epoch": 1.104530008387303, + "grad_norm": 9.632280898118989, + "learning_rate": 2.5096243213669674e-06, + "loss": 0.0788818359375, + "step": 127740 + }, + { + "epoch": 1.1045732419088465, + "grad_norm": 0.7269339365908695, + "learning_rate": 2.5094233294975724e-06, + "loss": 0.013411712646484376, + "step": 127745 + }, + { + "epoch": 1.1046164754303898, + "grad_norm": 0.06931187208570032, + "learning_rate": 2.509222339890675e-06, + "loss": 0.012879562377929688, + "step": 127750 + }, + { + "epoch": 1.104659708951933, + "grad_norm": 1.173896016382211, + "learning_rate": 2.5090213525472e-06, + "loss": 0.017243194580078124, + "step": 127755 + }, + { + "epoch": 1.1047029424734762, + "grad_norm": 20.844891399309546, + "learning_rate": 2.5088203674680754e-06, + "loss": 0.12577438354492188, + "step": 127760 + }, + { + "epoch": 1.1047461759950195, + "grad_norm": 0.38025692641221237, + "learning_rate": 2.508619384654228e-06, + "loss": 0.08081207275390626, + "step": 127765 + }, + { + "epoch": 1.1047894095165627, + "grad_norm": 19.866241095600568, + "learning_rate": 2.508418404106585e-06, + "loss": 0.057042694091796874, + "step": 127770 + }, + { + "epoch": 1.104832643038106, + "grad_norm": 4.548785603331432, + "learning_rate": 2.508217425826071e-06, + "loss": 0.019091796875, + "step": 127775 + }, + { + "epoch": 1.1048758765596494, + "grad_norm": 5.2029954067924775, + "learning_rate": 2.5080164498136165e-06, + "loss": 0.056451416015625, + "step": 127780 + }, + { + "epoch": 1.1049191100811926, + "grad_norm": 1.8586908193823741, + "learning_rate": 2.507815476070147e-06, + "loss": 0.008142662048339844, + "step": 127785 + }, + { + "epoch": 1.1049623436027358, + "grad_norm": 19.869838164972908, + "learning_rate": 2.5076145045965874e-06, + "loss": 0.27341346740722655, + "step": 127790 + }, + { + "epoch": 1.105005577124279, + "grad_norm": 0.26701813259210605, + "learning_rate": 2.5074135353938673e-06, + "loss": 0.07656936645507813, + "step": 127795 + }, + { + "epoch": 1.1050488106458223, + "grad_norm": 11.957059319666602, + "learning_rate": 2.507212568462913e-06, + "loss": 0.0537628173828125, + "step": 127800 + }, + { + "epoch": 1.1050920441673655, + "grad_norm": 0.4506671586937861, + "learning_rate": 2.50701160380465e-06, + "loss": 0.17034225463867186, + "step": 127805 + }, + { + "epoch": 1.105135277688909, + "grad_norm": 4.738937122183093, + "learning_rate": 2.506810641420005e-06, + "loss": 0.05400238037109375, + "step": 127810 + }, + { + "epoch": 1.1051785112104522, + "grad_norm": 5.297874308626703, + "learning_rate": 2.506609681309907e-06, + "loss": 0.0417388916015625, + "step": 127815 + }, + { + "epoch": 1.1052217447319954, + "grad_norm": 9.746476632029681, + "learning_rate": 2.506408723475281e-06, + "loss": 0.023974609375, + "step": 127820 + }, + { + "epoch": 1.1052649782535386, + "grad_norm": 13.412944860831297, + "learning_rate": 2.5062077679170548e-06, + "loss": 0.06099853515625, + "step": 127825 + }, + { + "epoch": 1.1053082117750819, + "grad_norm": 30.573875745793256, + "learning_rate": 2.506006814636155e-06, + "loss": 0.0848052978515625, + "step": 127830 + }, + { + "epoch": 1.105351445296625, + "grad_norm": 8.849108746111447, + "learning_rate": 2.505805863633508e-06, + "loss": 0.06045875549316406, + "step": 127835 + }, + { + "epoch": 1.1053946788181686, + "grad_norm": 8.193278706490862, + "learning_rate": 2.5056049149100395e-06, + "loss": 0.07678260803222656, + "step": 127840 + }, + { + "epoch": 1.1054379123397118, + "grad_norm": 7.295432321796844, + "learning_rate": 2.5054039684666785e-06, + "loss": 0.12874279022216797, + "step": 127845 + }, + { + "epoch": 1.105481145861255, + "grad_norm": 4.745013924817945, + "learning_rate": 2.50520302430435e-06, + "loss": 0.048328399658203125, + "step": 127850 + }, + { + "epoch": 1.1055243793827982, + "grad_norm": 0.12750734181486387, + "learning_rate": 2.5050020824239822e-06, + "loss": 0.03651371002197266, + "step": 127855 + }, + { + "epoch": 1.1055676129043415, + "grad_norm": 0.5740353081164992, + "learning_rate": 2.5048011428265016e-06, + "loss": 0.21185150146484374, + "step": 127860 + }, + { + "epoch": 1.1056108464258847, + "grad_norm": 3.1360637843106813, + "learning_rate": 2.5046002055128335e-06, + "loss": 0.0760162353515625, + "step": 127865 + }, + { + "epoch": 1.1056540799474281, + "grad_norm": 0.4814401022936033, + "learning_rate": 2.5043992704839046e-06, + "loss": 0.021514129638671876, + "step": 127870 + }, + { + "epoch": 1.1056973134689714, + "grad_norm": 2.984155360358486, + "learning_rate": 2.5041983377406434e-06, + "loss": 0.04073905944824219, + "step": 127875 + }, + { + "epoch": 1.1057405469905146, + "grad_norm": 7.71063003443004, + "learning_rate": 2.5039974072839755e-06, + "loss": 0.1240203857421875, + "step": 127880 + }, + { + "epoch": 1.1057837805120578, + "grad_norm": 7.043157516087351, + "learning_rate": 2.5037964791148286e-06, + "loss": 0.0416107177734375, + "step": 127885 + }, + { + "epoch": 1.105827014033601, + "grad_norm": 39.94061067177454, + "learning_rate": 2.503595553234128e-06, + "loss": 0.07963409423828124, + "step": 127890 + }, + { + "epoch": 1.1058702475551443, + "grad_norm": 4.775607763752272, + "learning_rate": 2.5033946296428005e-06, + "loss": 0.13946113586425782, + "step": 127895 + }, + { + "epoch": 1.1059134810766875, + "grad_norm": 3.709272312695761, + "learning_rate": 2.503193708341773e-06, + "loss": 0.03159408569335938, + "step": 127900 + }, + { + "epoch": 1.105956714598231, + "grad_norm": 0.08244208874549204, + "learning_rate": 2.502992789331972e-06, + "loss": 0.018611907958984375, + "step": 127905 + }, + { + "epoch": 1.1059999481197742, + "grad_norm": 35.35650990020282, + "learning_rate": 2.5027918726143244e-06, + "loss": 0.1156585693359375, + "step": 127910 + }, + { + "epoch": 1.1060431816413174, + "grad_norm": 12.633638689608082, + "learning_rate": 2.502590958189758e-06, + "loss": 0.03255882263183594, + "step": 127915 + }, + { + "epoch": 1.1060864151628607, + "grad_norm": 6.823880892680755, + "learning_rate": 2.5023900460591975e-06, + "loss": 0.42181930541992185, + "step": 127920 + }, + { + "epoch": 1.1061296486844039, + "grad_norm": 12.225429727933747, + "learning_rate": 2.5021891362235696e-06, + "loss": 0.05786895751953125, + "step": 127925 + }, + { + "epoch": 1.1061728822059471, + "grad_norm": 3.7630285966460684, + "learning_rate": 2.5019882286838017e-06, + "loss": 0.015865516662597657, + "step": 127930 + }, + { + "epoch": 1.1062161157274906, + "grad_norm": 11.913594967742187, + "learning_rate": 2.5017873234408194e-06, + "loss": 0.07052803039550781, + "step": 127935 + }, + { + "epoch": 1.1062593492490338, + "grad_norm": 0.21514030409804977, + "learning_rate": 2.501586420495551e-06, + "loss": 0.14121856689453124, + "step": 127940 + }, + { + "epoch": 1.106302582770577, + "grad_norm": 0.4616162030006324, + "learning_rate": 2.5013855198489214e-06, + "loss": 0.003179168701171875, + "step": 127945 + }, + { + "epoch": 1.1063458162921203, + "grad_norm": 6.635984965834555, + "learning_rate": 2.5011846215018584e-06, + "loss": 0.1706817626953125, + "step": 127950 + }, + { + "epoch": 1.1063890498136635, + "grad_norm": 9.375005983696923, + "learning_rate": 2.500983725455288e-06, + "loss": 0.034784698486328126, + "step": 127955 + }, + { + "epoch": 1.1064322833352067, + "grad_norm": 8.231637615024088, + "learning_rate": 2.5007828317101354e-06, + "loss": 0.21204032897949218, + "step": 127960 + }, + { + "epoch": 1.10647551685675, + "grad_norm": 1.376182018470664, + "learning_rate": 2.500581940267329e-06, + "loss": 0.14305191040039061, + "step": 127965 + }, + { + "epoch": 1.1065187503782934, + "grad_norm": 2.984544764477735, + "learning_rate": 2.5003810511277938e-06, + "loss": 0.05528564453125, + "step": 127970 + }, + { + "epoch": 1.1065619838998366, + "grad_norm": 5.89155128168492, + "learning_rate": 2.500180164292458e-06, + "loss": 0.2806816101074219, + "step": 127975 + }, + { + "epoch": 1.1066052174213798, + "grad_norm": 0.6649520942220347, + "learning_rate": 2.4999792797622468e-06, + "loss": 0.18996543884277345, + "step": 127980 + }, + { + "epoch": 1.106648450942923, + "grad_norm": 0.7956841027456066, + "learning_rate": 2.4997783975380866e-06, + "loss": 0.061235809326171876, + "step": 127985 + }, + { + "epoch": 1.1066916844644663, + "grad_norm": 8.811974961315759, + "learning_rate": 2.499577517620905e-06, + "loss": 0.2232452392578125, + "step": 127990 + }, + { + "epoch": 1.1067349179860095, + "grad_norm": 0.4356622815458921, + "learning_rate": 2.4993766400116278e-06, + "loss": 0.07556304931640626, + "step": 127995 + }, + { + "epoch": 1.106778151507553, + "grad_norm": 2.801565821142107, + "learning_rate": 2.4991757647111793e-06, + "loss": 0.21451797485351562, + "step": 128000 + }, + { + "epoch": 1.1068213850290962, + "grad_norm": 0.35269704633601406, + "learning_rate": 2.49897489172049e-06, + "loss": 0.090966796875, + "step": 128005 + }, + { + "epoch": 1.1068646185506394, + "grad_norm": 2.08445050139646, + "learning_rate": 2.4987740210404837e-06, + "loss": 0.06862335205078125, + "step": 128010 + }, + { + "epoch": 1.1069078520721827, + "grad_norm": 1.0173495224350961, + "learning_rate": 2.4985731526720873e-06, + "loss": 0.04008541107177734, + "step": 128015 + }, + { + "epoch": 1.106951085593726, + "grad_norm": 8.099138102911207, + "learning_rate": 2.498372286616227e-06, + "loss": 0.19085102081298827, + "step": 128020 + }, + { + "epoch": 1.1069943191152691, + "grad_norm": 0.18230089960472357, + "learning_rate": 2.49817142287383e-06, + "loss": 0.062202739715576175, + "step": 128025 + }, + { + "epoch": 1.1070375526368126, + "grad_norm": 45.02324408883329, + "learning_rate": 2.497970561445821e-06, + "loss": 0.24711761474609376, + "step": 128030 + }, + { + "epoch": 1.1070807861583558, + "grad_norm": 5.410042593639285, + "learning_rate": 2.497769702333128e-06, + "loss": 0.05330963134765625, + "step": 128035 + }, + { + "epoch": 1.107124019679899, + "grad_norm": 0.8307919560063542, + "learning_rate": 2.497568845536677e-06, + "loss": 0.030096435546875, + "step": 128040 + }, + { + "epoch": 1.1071672532014423, + "grad_norm": 0.8156261269447451, + "learning_rate": 2.4973679910573937e-06, + "loss": 0.02305145263671875, + "step": 128045 + }, + { + "epoch": 1.1072104867229855, + "grad_norm": 0.20385977952959955, + "learning_rate": 2.497167138896205e-06, + "loss": 0.040570831298828124, + "step": 128050 + }, + { + "epoch": 1.1072537202445287, + "grad_norm": 47.36295921819866, + "learning_rate": 2.4969662890540373e-06, + "loss": 0.12126007080078124, + "step": 128055 + }, + { + "epoch": 1.1072969537660722, + "grad_norm": 0.9179090145068984, + "learning_rate": 2.4967654415318167e-06, + "loss": 0.1287445068359375, + "step": 128060 + }, + { + "epoch": 1.1073401872876154, + "grad_norm": 15.945983744844593, + "learning_rate": 2.496564596330468e-06, + "loss": 0.1381500244140625, + "step": 128065 + }, + { + "epoch": 1.1073834208091586, + "grad_norm": 18.851230205181576, + "learning_rate": 2.49636375345092e-06, + "loss": 0.132855224609375, + "step": 128070 + }, + { + "epoch": 1.1074266543307019, + "grad_norm": 2.335743451306148, + "learning_rate": 2.4961629128940975e-06, + "loss": 0.023119735717773437, + "step": 128075 + }, + { + "epoch": 1.107469887852245, + "grad_norm": 10.10085957198504, + "learning_rate": 2.4959620746609277e-06, + "loss": 0.06400890350341797, + "step": 128080 + }, + { + "epoch": 1.1075131213737883, + "grad_norm": 0.26709598206875507, + "learning_rate": 2.4957612387523365e-06, + "loss": 0.04861793518066406, + "step": 128085 + }, + { + "epoch": 1.1075563548953316, + "grad_norm": 11.845330386658596, + "learning_rate": 2.4955604051692496e-06, + "loss": 0.16818618774414062, + "step": 128090 + }, + { + "epoch": 1.107599588416875, + "grad_norm": 3.8519798353125942, + "learning_rate": 2.4953595739125925e-06, + "loss": 0.14674072265625, + "step": 128095 + }, + { + "epoch": 1.1076428219384182, + "grad_norm": 0.18326399138721278, + "learning_rate": 2.4951587449832936e-06, + "loss": 0.024198150634765624, + "step": 128100 + }, + { + "epoch": 1.1076860554599615, + "grad_norm": 0.6896870272988314, + "learning_rate": 2.4949579183822773e-06, + "loss": 0.018868064880371092, + "step": 128105 + }, + { + "epoch": 1.1077292889815047, + "grad_norm": 3.252762157118422, + "learning_rate": 2.494757094110471e-06, + "loss": 0.03197784423828125, + "step": 128110 + }, + { + "epoch": 1.107772522503048, + "grad_norm": 1.279634832721045, + "learning_rate": 2.4945562721688005e-06, + "loss": 0.11417179107666016, + "step": 128115 + }, + { + "epoch": 1.1078157560245911, + "grad_norm": 0.55849258769185, + "learning_rate": 2.4943554525581917e-06, + "loss": 0.00832672119140625, + "step": 128120 + }, + { + "epoch": 1.1078589895461346, + "grad_norm": 12.29986573754067, + "learning_rate": 2.4941546352795696e-06, + "loss": 0.025952529907226563, + "step": 128125 + }, + { + "epoch": 1.1079022230676778, + "grad_norm": 0.31571948416682855, + "learning_rate": 2.493953820333863e-06, + "loss": 0.011966514587402343, + "step": 128130 + }, + { + "epoch": 1.107945456589221, + "grad_norm": 1.7020195797257685, + "learning_rate": 2.493753007721996e-06, + "loss": 0.02056427001953125, + "step": 128135 + }, + { + "epoch": 1.1079886901107643, + "grad_norm": 10.970155587637601, + "learning_rate": 2.493552197444896e-06, + "loss": 0.02491912841796875, + "step": 128140 + }, + { + "epoch": 1.1080319236323075, + "grad_norm": 37.45479108766299, + "learning_rate": 2.4933513895034885e-06, + "loss": 0.051218414306640626, + "step": 128145 + }, + { + "epoch": 1.1080751571538507, + "grad_norm": 4.438215414558965, + "learning_rate": 2.4931505838986995e-06, + "loss": 0.033466148376464847, + "step": 128150 + }, + { + "epoch": 1.108118390675394, + "grad_norm": 2.841982459086268, + "learning_rate": 2.492949780631455e-06, + "loss": 0.06522064208984375, + "step": 128155 + }, + { + "epoch": 1.1081616241969374, + "grad_norm": 3.709823310449401, + "learning_rate": 2.4927489797026805e-06, + "loss": 0.264959716796875, + "step": 128160 + }, + { + "epoch": 1.1082048577184807, + "grad_norm": 2.1321671316207285, + "learning_rate": 2.492548181113304e-06, + "loss": 0.1204193115234375, + "step": 128165 + }, + { + "epoch": 1.1082480912400239, + "grad_norm": 15.41959183073856, + "learning_rate": 2.4923473848642504e-06, + "loss": 0.04283447265625, + "step": 128170 + }, + { + "epoch": 1.108291324761567, + "grad_norm": 0.1522178062806543, + "learning_rate": 2.492146590956446e-06, + "loss": 0.022121238708496093, + "step": 128175 + }, + { + "epoch": 1.1083345582831103, + "grad_norm": 2.174507286649399, + "learning_rate": 2.4919457993908164e-06, + "loss": 0.04073333740234375, + "step": 128180 + }, + { + "epoch": 1.1083777918046536, + "grad_norm": 18.526301627686312, + "learning_rate": 2.4917450101682875e-06, + "loss": 0.11039133071899414, + "step": 128185 + }, + { + "epoch": 1.108421025326197, + "grad_norm": 13.02502892588257, + "learning_rate": 2.491544223289785e-06, + "loss": 0.06599273681640624, + "step": 128190 + }, + { + "epoch": 1.1084642588477402, + "grad_norm": 0.5171021061890388, + "learning_rate": 2.491343438756237e-06, + "loss": 0.034906768798828126, + "step": 128195 + }, + { + "epoch": 1.1085074923692835, + "grad_norm": 26.181222371811796, + "learning_rate": 2.491142656568568e-06, + "loss": 0.2290283203125, + "step": 128200 + }, + { + "epoch": 1.1085507258908267, + "grad_norm": 0.13911727669180388, + "learning_rate": 2.490941876727704e-06, + "loss": 0.017912673950195312, + "step": 128205 + }, + { + "epoch": 1.10859395941237, + "grad_norm": 0.3912580497219467, + "learning_rate": 2.4907410992345704e-06, + "loss": 0.029862213134765624, + "step": 128210 + }, + { + "epoch": 1.1086371929339132, + "grad_norm": 0.6041438574064841, + "learning_rate": 2.4905403240900945e-06, + "loss": 0.1489990234375, + "step": 128215 + }, + { + "epoch": 1.1086804264554564, + "grad_norm": 0.34349134897639794, + "learning_rate": 2.4903395512952004e-06, + "loss": 0.09855499267578124, + "step": 128220 + }, + { + "epoch": 1.1087236599769998, + "grad_norm": 1.1379871849744185, + "learning_rate": 2.490138780850817e-06, + "loss": 0.013252639770507812, + "step": 128225 + }, + { + "epoch": 1.108766893498543, + "grad_norm": 1.056249620983224, + "learning_rate": 2.4899380127578682e-06, + "loss": 0.029641342163085938, + "step": 128230 + }, + { + "epoch": 1.1088101270200863, + "grad_norm": 0.7180671638837529, + "learning_rate": 2.4897372470172797e-06, + "loss": 0.06623764038085937, + "step": 128235 + }, + { + "epoch": 1.1088533605416295, + "grad_norm": 26.027738116510076, + "learning_rate": 2.4895364836299777e-06, + "loss": 0.1487457275390625, + "step": 128240 + }, + { + "epoch": 1.1088965940631728, + "grad_norm": 0.30278170715374303, + "learning_rate": 2.489335722596889e-06, + "loss": 0.046355819702148436, + "step": 128245 + }, + { + "epoch": 1.108939827584716, + "grad_norm": 7.53027989784622, + "learning_rate": 2.4891349639189387e-06, + "loss": 0.07585334777832031, + "step": 128250 + }, + { + "epoch": 1.1089830611062594, + "grad_norm": 15.668282613896706, + "learning_rate": 2.4889342075970516e-06, + "loss": 0.09761314392089844, + "step": 128255 + }, + { + "epoch": 1.1090262946278027, + "grad_norm": 9.315210240140242, + "learning_rate": 2.488733453632156e-06, + "loss": 0.041839218139648436, + "step": 128260 + }, + { + "epoch": 1.109069528149346, + "grad_norm": 1.1846167441661, + "learning_rate": 2.488532702025177e-06, + "loss": 0.05684375762939453, + "step": 128265 + }, + { + "epoch": 1.1091127616708891, + "grad_norm": 8.913019632932889, + "learning_rate": 2.4883319527770386e-06, + "loss": 0.16549224853515626, + "step": 128270 + }, + { + "epoch": 1.1091559951924324, + "grad_norm": 16.47047697720694, + "learning_rate": 2.4881312058886692e-06, + "loss": 0.09337539672851562, + "step": 128275 + }, + { + "epoch": 1.1091992287139756, + "grad_norm": 2.1615282607640975, + "learning_rate": 2.487930461360993e-06, + "loss": 0.09237213134765625, + "step": 128280 + }, + { + "epoch": 1.109242462235519, + "grad_norm": 9.984032594688456, + "learning_rate": 2.4877297191949356e-06, + "loss": 0.09886245727539063, + "step": 128285 + }, + { + "epoch": 1.1092856957570623, + "grad_norm": 0.7788406161473383, + "learning_rate": 2.487528979391424e-06, + "loss": 0.04998855590820313, + "step": 128290 + }, + { + "epoch": 1.1093289292786055, + "grad_norm": 0.08263298292028046, + "learning_rate": 2.487328241951384e-06, + "loss": 0.019959259033203124, + "step": 128295 + }, + { + "epoch": 1.1093721628001487, + "grad_norm": 1.0931286082379992, + "learning_rate": 2.48712750687574e-06, + "loss": 0.3080482482910156, + "step": 128300 + }, + { + "epoch": 1.109415396321692, + "grad_norm": 19.27019494366795, + "learning_rate": 2.4869267741654194e-06, + "loss": 0.07622833251953125, + "step": 128305 + }, + { + "epoch": 1.1094586298432352, + "grad_norm": 6.244209732645498, + "learning_rate": 2.486726043821347e-06, + "loss": 0.04474029541015625, + "step": 128310 + }, + { + "epoch": 1.1095018633647786, + "grad_norm": 1.4609095879283887, + "learning_rate": 2.486525315844449e-06, + "loss": 0.03409423828125, + "step": 128315 + }, + { + "epoch": 1.1095450968863219, + "grad_norm": 8.827039316033531, + "learning_rate": 2.4863245902356495e-06, + "loss": 0.08356380462646484, + "step": 128320 + }, + { + "epoch": 1.109588330407865, + "grad_norm": 5.433522873170884, + "learning_rate": 2.4861238669958767e-06, + "loss": 0.152020263671875, + "step": 128325 + }, + { + "epoch": 1.1096315639294083, + "grad_norm": 4.03380528197156, + "learning_rate": 2.4859231461260547e-06, + "loss": 0.04441642761230469, + "step": 128330 + }, + { + "epoch": 1.1096747974509515, + "grad_norm": 0.34022743788079407, + "learning_rate": 2.48572242762711e-06, + "loss": 0.01196136474609375, + "step": 128335 + }, + { + "epoch": 1.1097180309724948, + "grad_norm": 1.1283479384271293, + "learning_rate": 2.4855217114999687e-06, + "loss": 0.1275327682495117, + "step": 128340 + }, + { + "epoch": 1.109761264494038, + "grad_norm": 8.904483673680215, + "learning_rate": 2.4853209977455554e-06, + "loss": 0.10310955047607422, + "step": 128345 + }, + { + "epoch": 1.1098044980155815, + "grad_norm": 8.04234066810237, + "learning_rate": 2.4851202863647953e-06, + "loss": 0.087060546875, + "step": 128350 + }, + { + "epoch": 1.1098477315371247, + "grad_norm": 0.30613007889317795, + "learning_rate": 2.484919577358616e-06, + "loss": 0.044788360595703125, + "step": 128355 + }, + { + "epoch": 1.109890965058668, + "grad_norm": 0.8566429345262514, + "learning_rate": 2.484718870727941e-06, + "loss": 0.034540557861328126, + "step": 128360 + }, + { + "epoch": 1.1099341985802111, + "grad_norm": 0.1484691522013297, + "learning_rate": 2.4845181664736982e-06, + "loss": 0.46805572509765625, + "step": 128365 + }, + { + "epoch": 1.1099774321017544, + "grad_norm": 11.956198364726879, + "learning_rate": 2.4843174645968117e-06, + "loss": 0.053824996948242186, + "step": 128370 + }, + { + "epoch": 1.1100206656232976, + "grad_norm": 2.123283122297173, + "learning_rate": 2.4841167650982078e-06, + "loss": 0.05436248779296875, + "step": 128375 + }, + { + "epoch": 1.110063899144841, + "grad_norm": 2.8382529495803457, + "learning_rate": 2.4839160679788105e-06, + "loss": 0.03891754150390625, + "step": 128380 + }, + { + "epoch": 1.1101071326663843, + "grad_norm": 2.3208658972460983, + "learning_rate": 2.483715373239547e-06, + "loss": 0.024231719970703124, + "step": 128385 + }, + { + "epoch": 1.1101503661879275, + "grad_norm": 8.14705296385232, + "learning_rate": 2.4835146808813436e-06, + "loss": 0.0927154541015625, + "step": 128390 + }, + { + "epoch": 1.1101935997094707, + "grad_norm": 0.5313361834436635, + "learning_rate": 2.4833139909051247e-06, + "loss": 0.03260955810546875, + "step": 128395 + }, + { + "epoch": 1.110236833231014, + "grad_norm": 6.1296506832514135, + "learning_rate": 2.4831133033118158e-06, + "loss": 0.04607200622558594, + "step": 128400 + }, + { + "epoch": 1.1102800667525572, + "grad_norm": 0.4823645197769045, + "learning_rate": 2.4829126181023426e-06, + "loss": 0.07298736572265625, + "step": 128405 + }, + { + "epoch": 1.1103233002741004, + "grad_norm": 11.656777382746773, + "learning_rate": 2.482711935277631e-06, + "loss": 0.11403465270996094, + "step": 128410 + }, + { + "epoch": 1.1103665337956439, + "grad_norm": 5.906433316493705, + "learning_rate": 2.482511254838605e-06, + "loss": 0.03730316162109375, + "step": 128415 + }, + { + "epoch": 1.110409767317187, + "grad_norm": 7.665839813524601, + "learning_rate": 2.482310576786193e-06, + "loss": 0.06486129760742188, + "step": 128420 + }, + { + "epoch": 1.1104530008387303, + "grad_norm": 5.805346550955966, + "learning_rate": 2.482109901121318e-06, + "loss": 0.03571662902832031, + "step": 128425 + }, + { + "epoch": 1.1104962343602736, + "grad_norm": 3.1557951072806594, + "learning_rate": 2.4819092278449065e-06, + "loss": 0.052973175048828126, + "step": 128430 + }, + { + "epoch": 1.1105394678818168, + "grad_norm": 0.4952458649210248, + "learning_rate": 2.481708556957884e-06, + "loss": 0.028166580200195312, + "step": 128435 + }, + { + "epoch": 1.11058270140336, + "grad_norm": 1.7733403389931064, + "learning_rate": 2.481507888461176e-06, + "loss": 0.03940582275390625, + "step": 128440 + }, + { + "epoch": 1.1106259349249035, + "grad_norm": 3.480627439713972, + "learning_rate": 2.481307222355707e-06, + "loss": 0.047483396530151364, + "step": 128445 + }, + { + "epoch": 1.1106691684464467, + "grad_norm": 3.628319319553592, + "learning_rate": 2.481106558642404e-06, + "loss": 0.0436370849609375, + "step": 128450 + }, + { + "epoch": 1.11071240196799, + "grad_norm": 10.225576512243242, + "learning_rate": 2.4809058973221914e-06, + "loss": 0.06779708862304687, + "step": 128455 + }, + { + "epoch": 1.1107556354895332, + "grad_norm": 0.9976935167487453, + "learning_rate": 2.4807052383959955e-06, + "loss": 0.01554107666015625, + "step": 128460 + }, + { + "epoch": 1.1107988690110764, + "grad_norm": 21.513997001901963, + "learning_rate": 2.480504581864741e-06, + "loss": 0.33359832763671876, + "step": 128465 + }, + { + "epoch": 1.1108421025326196, + "grad_norm": 1.280999793223249, + "learning_rate": 2.480303927729353e-06, + "loss": 0.06932373046875, + "step": 128470 + }, + { + "epoch": 1.1108853360541628, + "grad_norm": 5.269493875555309, + "learning_rate": 2.4801032759907565e-06, + "loss": 0.11346282958984374, + "step": 128475 + }, + { + "epoch": 1.1109285695757063, + "grad_norm": 26.083833588460482, + "learning_rate": 2.479902626649879e-06, + "loss": 0.06863212585449219, + "step": 128480 + }, + { + "epoch": 1.1109718030972495, + "grad_norm": 0.7943265806141693, + "learning_rate": 2.479701979707645e-06, + "loss": 0.0416351318359375, + "step": 128485 + }, + { + "epoch": 1.1110150366187928, + "grad_norm": 37.62764323341035, + "learning_rate": 2.479501335164979e-06, + "loss": 0.1715087890625, + "step": 128490 + }, + { + "epoch": 1.111058270140336, + "grad_norm": 2.5369072996839495, + "learning_rate": 2.4793006930228068e-06, + "loss": 0.021750259399414062, + "step": 128495 + }, + { + "epoch": 1.1111015036618792, + "grad_norm": 3.515959276050768, + "learning_rate": 2.4791000532820537e-06, + "loss": 0.035166168212890626, + "step": 128500 + }, + { + "epoch": 1.1111447371834224, + "grad_norm": 19.24703077254366, + "learning_rate": 2.4788994159436457e-06, + "loss": 0.1340728759765625, + "step": 128505 + }, + { + "epoch": 1.1111879707049659, + "grad_norm": 0.2918043683723793, + "learning_rate": 2.478698781008506e-06, + "loss": 0.059566497802734375, + "step": 128510 + }, + { + "epoch": 1.1112312042265091, + "grad_norm": 0.2777469842383504, + "learning_rate": 2.478498148477563e-06, + "loss": 0.062499618530273436, + "step": 128515 + }, + { + "epoch": 1.1112744377480523, + "grad_norm": 1.1525382808425488, + "learning_rate": 2.47829751835174e-06, + "loss": 0.09638595581054688, + "step": 128520 + }, + { + "epoch": 1.1113176712695956, + "grad_norm": 5.397667656422858, + "learning_rate": 2.4780968906319624e-06, + "loss": 0.29540481567382815, + "step": 128525 + }, + { + "epoch": 1.1113609047911388, + "grad_norm": 26.579651057841005, + "learning_rate": 2.477896265319157e-06, + "loss": 0.047620201110839845, + "step": 128530 + }, + { + "epoch": 1.111404138312682, + "grad_norm": 2.3685090117720877, + "learning_rate": 2.477695642414247e-06, + "loss": 0.010355377197265625, + "step": 128535 + }, + { + "epoch": 1.1114473718342255, + "grad_norm": 79.74472359172312, + "learning_rate": 2.4774950219181575e-06, + "loss": 0.30062255859375, + "step": 128540 + }, + { + "epoch": 1.1114906053557687, + "grad_norm": 88.72007270655982, + "learning_rate": 2.477294403831816e-06, + "loss": 0.19116287231445311, + "step": 128545 + }, + { + "epoch": 1.111533838877312, + "grad_norm": 11.78832860002922, + "learning_rate": 2.477093788156147e-06, + "loss": 0.05665283203125, + "step": 128550 + }, + { + "epoch": 1.1115770723988552, + "grad_norm": 5.153620098853675, + "learning_rate": 2.4768931748920743e-06, + "loss": 0.03179779052734375, + "step": 128555 + }, + { + "epoch": 1.1116203059203984, + "grad_norm": 29.419433575473192, + "learning_rate": 2.4766925640405247e-06, + "loss": 0.09038658142089843, + "step": 128560 + }, + { + "epoch": 1.1116635394419416, + "grad_norm": 10.611559833732626, + "learning_rate": 2.4764919556024226e-06, + "loss": 0.07852325439453126, + "step": 128565 + }, + { + "epoch": 1.111706772963485, + "grad_norm": 11.029222554806841, + "learning_rate": 2.4762913495786935e-06, + "loss": 0.11558742523193359, + "step": 128570 + }, + { + "epoch": 1.1117500064850283, + "grad_norm": 4.618354102756301, + "learning_rate": 2.4760907459702615e-06, + "loss": 0.03607254028320313, + "step": 128575 + }, + { + "epoch": 1.1117932400065715, + "grad_norm": 6.3780029067211474, + "learning_rate": 2.4758901447780532e-06, + "loss": 0.0487945556640625, + "step": 128580 + }, + { + "epoch": 1.1118364735281148, + "grad_norm": 17.351940345143895, + "learning_rate": 2.4756895460029933e-06, + "loss": 0.10655593872070312, + "step": 128585 + }, + { + "epoch": 1.111879707049658, + "grad_norm": 9.004492212432334, + "learning_rate": 2.475488949646007e-06, + "loss": 0.09480705261230468, + "step": 128590 + }, + { + "epoch": 1.1119229405712012, + "grad_norm": 0.37612681711217333, + "learning_rate": 2.4752883557080196e-06, + "loss": 0.014239883422851563, + "step": 128595 + }, + { + "epoch": 1.1119661740927445, + "grad_norm": 25.08736920760967, + "learning_rate": 2.475087764189956e-06, + "loss": 0.20391769409179689, + "step": 128600 + }, + { + "epoch": 1.112009407614288, + "grad_norm": 0.11039961671412989, + "learning_rate": 2.47488717509274e-06, + "loss": 0.10171737670898437, + "step": 128605 + }, + { + "epoch": 1.1120526411358311, + "grad_norm": 1.3695130653548924, + "learning_rate": 2.4746865884172982e-06, + "loss": 0.0358489990234375, + "step": 128610 + }, + { + "epoch": 1.1120958746573744, + "grad_norm": 0.46777935688340344, + "learning_rate": 2.474486004164556e-06, + "loss": 0.20883941650390625, + "step": 128615 + }, + { + "epoch": 1.1121391081789176, + "grad_norm": 2.9415148698051174, + "learning_rate": 2.4742854223354386e-06, + "loss": 0.1318897247314453, + "step": 128620 + }, + { + "epoch": 1.1121823417004608, + "grad_norm": 65.01441724447392, + "learning_rate": 2.4740848429308698e-06, + "loss": 0.4215518951416016, + "step": 128625 + }, + { + "epoch": 1.112225575222004, + "grad_norm": 6.8564297406545585, + "learning_rate": 2.473884265951775e-06, + "loss": 0.0390625, + "step": 128630 + }, + { + "epoch": 1.1122688087435475, + "grad_norm": 24.566685999531746, + "learning_rate": 2.473683691399079e-06, + "loss": 0.10760574340820313, + "step": 128635 + }, + { + "epoch": 1.1123120422650907, + "grad_norm": 1.6690061305274502, + "learning_rate": 2.473483119273708e-06, + "loss": 0.037908172607421874, + "step": 128640 + }, + { + "epoch": 1.112355275786634, + "grad_norm": 13.545329712656685, + "learning_rate": 2.4732825495765866e-06, + "loss": 0.09880523681640625, + "step": 128645 + }, + { + "epoch": 1.1123985093081772, + "grad_norm": 0.9468397488081716, + "learning_rate": 2.473081982308639e-06, + "loss": 0.019762611389160155, + "step": 128650 + }, + { + "epoch": 1.1124417428297204, + "grad_norm": 2.6853534854554546, + "learning_rate": 2.4728814174707914e-06, + "loss": 0.0269195556640625, + "step": 128655 + }, + { + "epoch": 1.1124849763512636, + "grad_norm": 4.93339150221075, + "learning_rate": 2.4726808550639673e-06, + "loss": 0.10246238708496094, + "step": 128660 + }, + { + "epoch": 1.1125282098728069, + "grad_norm": 5.131122891178282, + "learning_rate": 2.4724802950890933e-06, + "loss": 0.08380661010742188, + "step": 128665 + }, + { + "epoch": 1.1125714433943503, + "grad_norm": 9.69318518655052, + "learning_rate": 2.472279737547092e-06, + "loss": 0.03834457397460937, + "step": 128670 + }, + { + "epoch": 1.1126146769158936, + "grad_norm": 6.302064993427666, + "learning_rate": 2.4720791824388915e-06, + "loss": 0.0340667724609375, + "step": 128675 + }, + { + "epoch": 1.1126579104374368, + "grad_norm": 0.9355113306223594, + "learning_rate": 2.471878629765415e-06, + "loss": 0.10605583190917969, + "step": 128680 + }, + { + "epoch": 1.11270114395898, + "grad_norm": 0.12658934111666556, + "learning_rate": 2.471678079527588e-06, + "loss": 0.05934906005859375, + "step": 128685 + }, + { + "epoch": 1.1127443774805232, + "grad_norm": 0.16221819098024176, + "learning_rate": 2.4714775317263336e-06, + "loss": 0.06027030944824219, + "step": 128690 + }, + { + "epoch": 1.1127876110020665, + "grad_norm": 1.0419200024015933, + "learning_rate": 2.4712769863625793e-06, + "loss": 0.041191482543945314, + "step": 128695 + }, + { + "epoch": 1.11283084452361, + "grad_norm": 6.778049874156387, + "learning_rate": 2.4710764434372474e-06, + "loss": 0.02994232177734375, + "step": 128700 + }, + { + "epoch": 1.1128740780451531, + "grad_norm": 4.294197555105622, + "learning_rate": 2.4708759029512654e-06, + "loss": 0.018209075927734374, + "step": 128705 + }, + { + "epoch": 1.1129173115666964, + "grad_norm": 0.2056530013939974, + "learning_rate": 2.470675364905557e-06, + "loss": 0.1766387939453125, + "step": 128710 + }, + { + "epoch": 1.1129605450882396, + "grad_norm": 1.94911740075105, + "learning_rate": 2.470474829301047e-06, + "loss": 0.06995849609375, + "step": 128715 + }, + { + "epoch": 1.1130037786097828, + "grad_norm": 0.20845075814399686, + "learning_rate": 2.4702742961386595e-06, + "loss": 0.03762664794921875, + "step": 128720 + }, + { + "epoch": 1.113047012131326, + "grad_norm": 3.8653704157491426, + "learning_rate": 2.4700737654193214e-06, + "loss": 0.016852569580078126, + "step": 128725 + }, + { + "epoch": 1.1130902456528693, + "grad_norm": 5.113215477087886, + "learning_rate": 2.4698732371439546e-06, + "loss": 0.027898597717285156, + "step": 128730 + }, + { + "epoch": 1.1131334791744127, + "grad_norm": 0.07521851991998214, + "learning_rate": 2.4696727113134865e-06, + "loss": 0.022142410278320312, + "step": 128735 + }, + { + "epoch": 1.113176712695956, + "grad_norm": 4.0458776821260205, + "learning_rate": 2.4694721879288418e-06, + "loss": 0.319244384765625, + "step": 128740 + }, + { + "epoch": 1.1132199462174992, + "grad_norm": 5.802268677278056, + "learning_rate": 2.469271666990944e-06, + "loss": 0.02206573486328125, + "step": 128745 + }, + { + "epoch": 1.1132631797390424, + "grad_norm": 9.800074236012902, + "learning_rate": 2.4690711485007178e-06, + "loss": 0.126639461517334, + "step": 128750 + }, + { + "epoch": 1.1133064132605857, + "grad_norm": 1.18454960432322, + "learning_rate": 2.4688706324590893e-06, + "loss": 0.010637664794921875, + "step": 128755 + }, + { + "epoch": 1.1133496467821289, + "grad_norm": 0.47289167994715275, + "learning_rate": 2.4686701188669823e-06, + "loss": 0.07029190063476562, + "step": 128760 + }, + { + "epoch": 1.1133928803036723, + "grad_norm": 5.517949052857015, + "learning_rate": 2.468469607725321e-06, + "loss": 0.09764785766601562, + "step": 128765 + }, + { + "epoch": 1.1134361138252156, + "grad_norm": 0.16880436800042084, + "learning_rate": 2.4682690990350316e-06, + "loss": 0.08714828491210938, + "step": 128770 + }, + { + "epoch": 1.1134793473467588, + "grad_norm": 0.19910502390091758, + "learning_rate": 2.4680685927970382e-06, + "loss": 0.11855459213256836, + "step": 128775 + }, + { + "epoch": 1.113522580868302, + "grad_norm": 17.07725078853877, + "learning_rate": 2.467868089012265e-06, + "loss": 0.0954132080078125, + "step": 128780 + }, + { + "epoch": 1.1135658143898453, + "grad_norm": 6.407707165465006, + "learning_rate": 2.4676675876816374e-06, + "loss": 0.09162673950195313, + "step": 128785 + }, + { + "epoch": 1.1136090479113885, + "grad_norm": 0.5024148016414017, + "learning_rate": 2.4674670888060803e-06, + "loss": 0.024100542068481445, + "step": 128790 + }, + { + "epoch": 1.113652281432932, + "grad_norm": 19.268722627630204, + "learning_rate": 2.4672665923865164e-06, + "loss": 0.134942626953125, + "step": 128795 + }, + { + "epoch": 1.1136955149544752, + "grad_norm": 2.902876059752807, + "learning_rate": 2.4670660984238736e-06, + "loss": 0.126873779296875, + "step": 128800 + }, + { + "epoch": 1.1137387484760184, + "grad_norm": 7.00480563980368, + "learning_rate": 2.4668656069190735e-06, + "loss": 0.04051647186279297, + "step": 128805 + }, + { + "epoch": 1.1137819819975616, + "grad_norm": 0.6168238356761221, + "learning_rate": 2.4666651178730432e-06, + "loss": 0.02293853759765625, + "step": 128810 + }, + { + "epoch": 1.1138252155191048, + "grad_norm": 21.849950144359518, + "learning_rate": 2.4664646312867064e-06, + "loss": 0.12610321044921874, + "step": 128815 + }, + { + "epoch": 1.113868449040648, + "grad_norm": 1.1320397476419055, + "learning_rate": 2.4662641471609876e-06, + "loss": 0.09395923614501953, + "step": 128820 + }, + { + "epoch": 1.1139116825621915, + "grad_norm": 11.726657628660467, + "learning_rate": 2.4660636654968096e-06, + "loss": 0.058496856689453126, + "step": 128825 + }, + { + "epoch": 1.1139549160837348, + "grad_norm": 17.05948534834884, + "learning_rate": 2.4658631862951004e-06, + "loss": 0.09270172119140625, + "step": 128830 + }, + { + "epoch": 1.113998149605278, + "grad_norm": 4.592787626338808, + "learning_rate": 2.4656627095567824e-06, + "loss": 0.023855209350585938, + "step": 128835 + }, + { + "epoch": 1.1140413831268212, + "grad_norm": 2.8672629204313176, + "learning_rate": 2.4654622352827814e-06, + "loss": 0.052405166625976565, + "step": 128840 + }, + { + "epoch": 1.1140846166483644, + "grad_norm": 0.11622629005859907, + "learning_rate": 2.465261763474021e-06, + "loss": 0.09842300415039062, + "step": 128845 + }, + { + "epoch": 1.1141278501699077, + "grad_norm": 1.6282329164397618, + "learning_rate": 2.465061294131427e-06, + "loss": 0.136492919921875, + "step": 128850 + }, + { + "epoch": 1.114171083691451, + "grad_norm": 9.450757489964547, + "learning_rate": 2.4648608272559215e-06, + "loss": 0.11243476867675781, + "step": 128855 + }, + { + "epoch": 1.1142143172129944, + "grad_norm": 12.904370025324507, + "learning_rate": 2.464660362848431e-06, + "loss": 0.0309539794921875, + "step": 128860 + }, + { + "epoch": 1.1142575507345376, + "grad_norm": 0.0750652038359846, + "learning_rate": 2.4644599009098795e-06, + "loss": 0.0033756256103515624, + "step": 128865 + }, + { + "epoch": 1.1143007842560808, + "grad_norm": 5.291413497580685, + "learning_rate": 2.464259441441192e-06, + "loss": 0.27081146240234377, + "step": 128870 + }, + { + "epoch": 1.114344017777624, + "grad_norm": 0.31276442759029405, + "learning_rate": 2.4640589844432927e-06, + "loss": 0.0459136962890625, + "step": 128875 + }, + { + "epoch": 1.1143872512991673, + "grad_norm": 0.695019964098629, + "learning_rate": 2.4638585299171063e-06, + "loss": 0.01963958740234375, + "step": 128880 + }, + { + "epoch": 1.1144304848207105, + "grad_norm": 0.05106156313926565, + "learning_rate": 2.463658077863556e-06, + "loss": 0.01900625228881836, + "step": 128885 + }, + { + "epoch": 1.114473718342254, + "grad_norm": 0.12660486630043546, + "learning_rate": 2.463457628283568e-06, + "loss": 0.023853492736816407, + "step": 128890 + }, + { + "epoch": 1.1145169518637972, + "grad_norm": 5.817511648155389, + "learning_rate": 2.463257181178065e-06, + "loss": 0.12703857421875, + "step": 128895 + }, + { + "epoch": 1.1145601853853404, + "grad_norm": 0.6359467064885671, + "learning_rate": 2.4630567365479735e-06, + "loss": 0.13144378662109374, + "step": 128900 + }, + { + "epoch": 1.1146034189068836, + "grad_norm": 0.9484244639154906, + "learning_rate": 2.462856294394217e-06, + "loss": 0.06836700439453125, + "step": 128905 + }, + { + "epoch": 1.1146466524284269, + "grad_norm": 6.901328891146763, + "learning_rate": 2.4626558547177197e-06, + "loss": 0.0341796875, + "step": 128910 + }, + { + "epoch": 1.11468988594997, + "grad_norm": 109.03328183675386, + "learning_rate": 2.4624554175194054e-06, + "loss": 0.13748016357421874, + "step": 128915 + }, + { + "epoch": 1.1147331194715133, + "grad_norm": 1.2862813269629378, + "learning_rate": 2.4622549828002e-06, + "loss": 0.2707611083984375, + "step": 128920 + }, + { + "epoch": 1.1147763529930568, + "grad_norm": 1.935896224246226, + "learning_rate": 2.462054550561026e-06, + "loss": 0.05865345001220703, + "step": 128925 + }, + { + "epoch": 1.1148195865146, + "grad_norm": 1.7545835583855354, + "learning_rate": 2.46185412080281e-06, + "loss": 0.1125579833984375, + "step": 128930 + }, + { + "epoch": 1.1148628200361432, + "grad_norm": 0.17706436391630023, + "learning_rate": 2.461653693526475e-06, + "loss": 0.019864654541015624, + "step": 128935 + }, + { + "epoch": 1.1149060535576865, + "grad_norm": 6.242268168725739, + "learning_rate": 2.461453268732946e-06, + "loss": 0.06516456604003906, + "step": 128940 + }, + { + "epoch": 1.1149492870792297, + "grad_norm": 58.31104433517442, + "learning_rate": 2.4612528464231458e-06, + "loss": 0.1902141571044922, + "step": 128945 + }, + { + "epoch": 1.114992520600773, + "grad_norm": 7.176947246293251, + "learning_rate": 2.461052426598001e-06, + "loss": 0.074053955078125, + "step": 128950 + }, + { + "epoch": 1.1150357541223164, + "grad_norm": 0.839034886781232, + "learning_rate": 2.4608520092584335e-06, + "loss": 0.05518035888671875, + "step": 128955 + }, + { + "epoch": 1.1150789876438596, + "grad_norm": 23.351960791851244, + "learning_rate": 2.4606515944053696e-06, + "loss": 0.19757041931152344, + "step": 128960 + }, + { + "epoch": 1.1151222211654028, + "grad_norm": 1.4454271979010138, + "learning_rate": 2.460451182039733e-06, + "loss": 0.10550003051757813, + "step": 128965 + }, + { + "epoch": 1.115165454686946, + "grad_norm": 5.028378418183841, + "learning_rate": 2.4602507721624483e-06, + "loss": 0.1897125244140625, + "step": 128970 + }, + { + "epoch": 1.1152086882084893, + "grad_norm": 5.167092437434355, + "learning_rate": 2.4600503647744387e-06, + "loss": 0.09083709716796876, + "step": 128975 + }, + { + "epoch": 1.1152519217300325, + "grad_norm": 0.27755966139602284, + "learning_rate": 2.4598499598766298e-06, + "loss": 0.08263778686523438, + "step": 128980 + }, + { + "epoch": 1.115295155251576, + "grad_norm": 6.468421757447942, + "learning_rate": 2.459649557469944e-06, + "loss": 0.06519699096679688, + "step": 128985 + }, + { + "epoch": 1.1153383887731192, + "grad_norm": 9.51826738429706, + "learning_rate": 2.459449157555307e-06, + "loss": 0.168994140625, + "step": 128990 + }, + { + "epoch": 1.1153816222946624, + "grad_norm": 23.048559089109105, + "learning_rate": 2.4592487601336435e-06, + "loss": 0.20736541748046874, + "step": 128995 + }, + { + "epoch": 1.1154248558162057, + "grad_norm": 16.222813532571394, + "learning_rate": 2.459048365205877e-06, + "loss": 0.11198768615722657, + "step": 129000 + }, + { + "epoch": 1.1154680893377489, + "grad_norm": 16.35885809254722, + "learning_rate": 2.4588479727729307e-06, + "loss": 0.06026802062988281, + "step": 129005 + }, + { + "epoch": 1.115511322859292, + "grad_norm": 8.566860098552745, + "learning_rate": 2.4586475828357306e-06, + "loss": 0.033329010009765625, + "step": 129010 + }, + { + "epoch": 1.1155545563808356, + "grad_norm": 7.558647000792401, + "learning_rate": 2.4584471953951996e-06, + "loss": 0.08492813110351563, + "step": 129015 + }, + { + "epoch": 1.1155977899023788, + "grad_norm": 3.0782187430751664, + "learning_rate": 2.4582468104522616e-06, + "loss": 0.0167694091796875, + "step": 129020 + }, + { + "epoch": 1.115641023423922, + "grad_norm": 12.414176071547748, + "learning_rate": 2.4580464280078425e-06, + "loss": 0.6591766357421875, + "step": 129025 + }, + { + "epoch": 1.1156842569454652, + "grad_norm": 4.113946255695669, + "learning_rate": 2.4578460480628644e-06, + "loss": 0.026751708984375, + "step": 129030 + }, + { + "epoch": 1.1157274904670085, + "grad_norm": 9.194795422092787, + "learning_rate": 2.4576456706182534e-06, + "loss": 0.02647819519042969, + "step": 129035 + }, + { + "epoch": 1.1157707239885517, + "grad_norm": 107.33451867045729, + "learning_rate": 2.457445295674933e-06, + "loss": 0.21303482055664064, + "step": 129040 + }, + { + "epoch": 1.115813957510095, + "grad_norm": 3.609034520276888, + "learning_rate": 2.457244923233826e-06, + "loss": 0.239019775390625, + "step": 129045 + }, + { + "epoch": 1.1158571910316384, + "grad_norm": 6.868452021535879, + "learning_rate": 2.4570445532958564e-06, + "loss": 0.4442718505859375, + "step": 129050 + }, + { + "epoch": 1.1159004245531816, + "grad_norm": 2.2723306452734313, + "learning_rate": 2.4568441858619513e-06, + "loss": 0.06168785095214844, + "step": 129055 + }, + { + "epoch": 1.1159436580747248, + "grad_norm": 2.5145222908176597, + "learning_rate": 2.456643820933032e-06, + "loss": 0.02373199462890625, + "step": 129060 + }, + { + "epoch": 1.115986891596268, + "grad_norm": 0.12373028508780196, + "learning_rate": 2.4564434585100237e-06, + "loss": 0.013599967956542969, + "step": 129065 + }, + { + "epoch": 1.1160301251178113, + "grad_norm": 0.4426312813362358, + "learning_rate": 2.4562430985938503e-06, + "loss": 0.05526580810546875, + "step": 129070 + }, + { + "epoch": 1.1160733586393545, + "grad_norm": 2.4832179335332, + "learning_rate": 2.4560427411854356e-06, + "loss": 0.0371185302734375, + "step": 129075 + }, + { + "epoch": 1.116116592160898, + "grad_norm": 4.935807043030666, + "learning_rate": 2.455842386285702e-06, + "loss": 0.20384368896484376, + "step": 129080 + }, + { + "epoch": 1.1161598256824412, + "grad_norm": 4.139821313908692, + "learning_rate": 2.4556420338955774e-06, + "loss": 0.008806228637695312, + "step": 129085 + }, + { + "epoch": 1.1162030592039844, + "grad_norm": 18.093266521961727, + "learning_rate": 2.4554416840159825e-06, + "loss": 0.22956695556640624, + "step": 129090 + }, + { + "epoch": 1.1162462927255277, + "grad_norm": 3.526497396100708, + "learning_rate": 2.4552413366478432e-06, + "loss": 0.07874298095703125, + "step": 129095 + }, + { + "epoch": 1.116289526247071, + "grad_norm": 10.348001001704285, + "learning_rate": 2.455040991792083e-06, + "loss": 0.1040740966796875, + "step": 129100 + }, + { + "epoch": 1.1163327597686141, + "grad_norm": 39.356075518889234, + "learning_rate": 2.454840649449625e-06, + "loss": 0.3778387069702148, + "step": 129105 + }, + { + "epoch": 1.1163759932901574, + "grad_norm": 1.6908054770861192, + "learning_rate": 2.4546403096213933e-06, + "loss": 0.02889862060546875, + "step": 129110 + }, + { + "epoch": 1.1164192268117008, + "grad_norm": 1.0099127173535467, + "learning_rate": 2.4544399723083127e-06, + "loss": 0.046095848083496094, + "step": 129115 + }, + { + "epoch": 1.116462460333244, + "grad_norm": 0.8140034140301695, + "learning_rate": 2.454239637511306e-06, + "loss": 0.1416778564453125, + "step": 129120 + }, + { + "epoch": 1.1165056938547873, + "grad_norm": 4.499911425576928, + "learning_rate": 2.454039305231299e-06, + "loss": 0.06763992309570313, + "step": 129125 + }, + { + "epoch": 1.1165489273763305, + "grad_norm": 18.820733752384186, + "learning_rate": 2.4538389754692145e-06, + "loss": 0.0863433837890625, + "step": 129130 + }, + { + "epoch": 1.1165921608978737, + "grad_norm": 16.500947503453375, + "learning_rate": 2.4536386482259766e-06, + "loss": 0.12577285766601562, + "step": 129135 + }, + { + "epoch": 1.116635394419417, + "grad_norm": 1.4415404731265387, + "learning_rate": 2.4534383235025077e-06, + "loss": 0.04596786499023438, + "step": 129140 + }, + { + "epoch": 1.1166786279409604, + "grad_norm": 31.8109056647859, + "learning_rate": 2.4532380012997334e-06, + "loss": 0.09944496154785157, + "step": 129145 + }, + { + "epoch": 1.1167218614625036, + "grad_norm": 4.387398371005212, + "learning_rate": 2.453037681618577e-06, + "loss": 0.021966552734375, + "step": 129150 + }, + { + "epoch": 1.1167650949840469, + "grad_norm": 8.90360756495817, + "learning_rate": 2.452837364459963e-06, + "loss": 0.087939453125, + "step": 129155 + }, + { + "epoch": 1.11680832850559, + "grad_norm": 0.8897386885111613, + "learning_rate": 2.4526370498248153e-06, + "loss": 0.016481399536132812, + "step": 129160 + }, + { + "epoch": 1.1168515620271333, + "grad_norm": 0.6755651690333191, + "learning_rate": 2.4524367377140566e-06, + "loss": 0.13145751953125, + "step": 129165 + }, + { + "epoch": 1.1168947955486765, + "grad_norm": 1.069944486895794, + "learning_rate": 2.4522364281286105e-06, + "loss": 0.029886627197265626, + "step": 129170 + }, + { + "epoch": 1.1169380290702198, + "grad_norm": 10.517615196400142, + "learning_rate": 2.452036121069402e-06, + "loss": 0.05557594299316406, + "step": 129175 + }, + { + "epoch": 1.1169812625917632, + "grad_norm": 2.253329249267648, + "learning_rate": 2.4518358165373546e-06, + "loss": 0.08336639404296875, + "step": 129180 + }, + { + "epoch": 1.1170244961133065, + "grad_norm": 0.4820098694308349, + "learning_rate": 2.451635514533392e-06, + "loss": 0.009965133666992188, + "step": 129185 + }, + { + "epoch": 1.1170677296348497, + "grad_norm": 8.669611490234749, + "learning_rate": 2.4514352150584384e-06, + "loss": 0.1832763671875, + "step": 129190 + }, + { + "epoch": 1.117110963156393, + "grad_norm": 0.2868082749692815, + "learning_rate": 2.4512349181134175e-06, + "loss": 0.03393096923828125, + "step": 129195 + }, + { + "epoch": 1.1171541966779361, + "grad_norm": 5.476730545036447, + "learning_rate": 2.4510346236992518e-06, + "loss": 0.15752029418945312, + "step": 129200 + }, + { + "epoch": 1.1171974301994794, + "grad_norm": 3.6336509495525973, + "learning_rate": 2.450834331816866e-06, + "loss": 0.09536361694335938, + "step": 129205 + }, + { + "epoch": 1.1172406637210228, + "grad_norm": 1.3672197784100368, + "learning_rate": 2.4506340424671826e-06, + "loss": 0.03809967041015625, + "step": 129210 + }, + { + "epoch": 1.117283897242566, + "grad_norm": 7.106344025993316, + "learning_rate": 2.450433755651128e-06, + "loss": 0.10278396606445313, + "step": 129215 + }, + { + "epoch": 1.1173271307641093, + "grad_norm": 7.485958134810211, + "learning_rate": 2.4502334713696242e-06, + "loss": 0.11156120300292968, + "step": 129220 + }, + { + "epoch": 1.1173703642856525, + "grad_norm": 6.666525150807128, + "learning_rate": 2.4500331896235952e-06, + "loss": 0.042597198486328126, + "step": 129225 + }, + { + "epoch": 1.1174135978071957, + "grad_norm": 0.7547005296772529, + "learning_rate": 2.4498329104139636e-06, + "loss": 0.1266754150390625, + "step": 129230 + }, + { + "epoch": 1.117456831328739, + "grad_norm": 4.68266706235304, + "learning_rate": 2.449632633741655e-06, + "loss": 0.07550277709960937, + "step": 129235 + }, + { + "epoch": 1.1175000648502824, + "grad_norm": 0.08320650251815176, + "learning_rate": 2.449432359607591e-06, + "loss": 0.013329315185546874, + "step": 129240 + }, + { + "epoch": 1.1175432983718256, + "grad_norm": 1.8881077528799624, + "learning_rate": 2.449232088012697e-06, + "loss": 0.048366546630859375, + "step": 129245 + }, + { + "epoch": 1.1175865318933689, + "grad_norm": 0.35561758731201504, + "learning_rate": 2.449031818957896e-06, + "loss": 0.023885726928710938, + "step": 129250 + }, + { + "epoch": 1.117629765414912, + "grad_norm": 0.3478401736052178, + "learning_rate": 2.448831552444111e-06, + "loss": 0.1354595184326172, + "step": 129255 + }, + { + "epoch": 1.1176729989364553, + "grad_norm": 10.4768519487916, + "learning_rate": 2.448631288472267e-06, + "loss": 0.05406036376953125, + "step": 129260 + }, + { + "epoch": 1.1177162324579986, + "grad_norm": 0.16140925794615507, + "learning_rate": 2.448431027043287e-06, + "loss": 0.13077850341796876, + "step": 129265 + }, + { + "epoch": 1.117759465979542, + "grad_norm": 3.6030191547081536, + "learning_rate": 2.448230768158094e-06, + "loss": 0.04654388427734375, + "step": 129270 + }, + { + "epoch": 1.1178026995010852, + "grad_norm": 9.31523562439022, + "learning_rate": 2.4480305118176108e-06, + "loss": 0.05986976623535156, + "step": 129275 + }, + { + "epoch": 1.1178459330226285, + "grad_norm": 4.133533815277936, + "learning_rate": 2.447830258022763e-06, + "loss": 0.046661376953125, + "step": 129280 + }, + { + "epoch": 1.1178891665441717, + "grad_norm": 0.12752318631146436, + "learning_rate": 2.447630006774473e-06, + "loss": 0.032160186767578126, + "step": 129285 + }, + { + "epoch": 1.117932400065715, + "grad_norm": 0.6643042331104896, + "learning_rate": 2.4474297580736654e-06, + "loss": 0.01519012451171875, + "step": 129290 + }, + { + "epoch": 1.1179756335872582, + "grad_norm": 4.124062422922054, + "learning_rate": 2.4472295119212627e-06, + "loss": 0.11646575927734375, + "step": 129295 + }, + { + "epoch": 1.1180188671088014, + "grad_norm": 28.687639414007045, + "learning_rate": 2.4470292683181887e-06, + "loss": 0.2136455535888672, + "step": 129300 + }, + { + "epoch": 1.1180621006303448, + "grad_norm": 3.281613961581954, + "learning_rate": 2.4468290272653652e-06, + "loss": 0.053156280517578126, + "step": 129305 + }, + { + "epoch": 1.118105334151888, + "grad_norm": 1.558469249964307, + "learning_rate": 2.4466287887637195e-06, + "loss": 0.08493804931640625, + "step": 129310 + }, + { + "epoch": 1.1181485676734313, + "grad_norm": 0.41007191742602705, + "learning_rate": 2.4464285528141713e-06, + "loss": 0.06744537353515626, + "step": 129315 + }, + { + "epoch": 1.1181918011949745, + "grad_norm": 22.118788770948875, + "learning_rate": 2.446228319417647e-06, + "loss": 0.07305755615234374, + "step": 129320 + }, + { + "epoch": 1.1182350347165178, + "grad_norm": 6.180464252053541, + "learning_rate": 2.4460280885750682e-06, + "loss": 0.1985177993774414, + "step": 129325 + }, + { + "epoch": 1.118278268238061, + "grad_norm": 3.116553828639024, + "learning_rate": 2.4458278602873595e-06, + "loss": 0.062404727935791014, + "step": 129330 + }, + { + "epoch": 1.1183215017596044, + "grad_norm": 1.094958522851053, + "learning_rate": 2.4456276345554423e-06, + "loss": 0.11815185546875, + "step": 129335 + }, + { + "epoch": 1.1183647352811477, + "grad_norm": 1.1849626251480596, + "learning_rate": 2.4454274113802423e-06, + "loss": 0.01590099334716797, + "step": 129340 + }, + { + "epoch": 1.1184079688026909, + "grad_norm": 0.5659556833102706, + "learning_rate": 2.445227190762682e-06, + "loss": 0.058530235290527345, + "step": 129345 + }, + { + "epoch": 1.1184512023242341, + "grad_norm": 4.058453233028733, + "learning_rate": 2.4450269727036856e-06, + "loss": 0.0762237548828125, + "step": 129350 + }, + { + "epoch": 1.1184944358457773, + "grad_norm": 3.022250608489265, + "learning_rate": 2.4448267572041754e-06, + "loss": 0.05557174682617187, + "step": 129355 + }, + { + "epoch": 1.1185376693673206, + "grad_norm": 3.413543695878069, + "learning_rate": 2.444626544265075e-06, + "loss": 0.012615966796875, + "step": 129360 + }, + { + "epoch": 1.1185809028888638, + "grad_norm": 5.053285679387739, + "learning_rate": 2.444426333887307e-06, + "loss": 0.09356842041015626, + "step": 129365 + }, + { + "epoch": 1.1186241364104073, + "grad_norm": 0.04086089351238834, + "learning_rate": 2.4442261260717966e-06, + "loss": 0.08013343811035156, + "step": 129370 + }, + { + "epoch": 1.1186673699319505, + "grad_norm": 3.6528846480396218, + "learning_rate": 2.4440259208194656e-06, + "loss": 0.03523101806640625, + "step": 129375 + }, + { + "epoch": 1.1187106034534937, + "grad_norm": 3.388086416405599, + "learning_rate": 2.443825718131239e-06, + "loss": 0.09580860137939454, + "step": 129380 + }, + { + "epoch": 1.118753836975037, + "grad_norm": 1.092958255956287, + "learning_rate": 2.4436255180080386e-06, + "loss": 0.0196044921875, + "step": 129385 + }, + { + "epoch": 1.1187970704965802, + "grad_norm": 35.797733393107734, + "learning_rate": 2.4434253204507885e-06, + "loss": 0.02130126953125, + "step": 129390 + }, + { + "epoch": 1.1188403040181234, + "grad_norm": 42.04616623725835, + "learning_rate": 2.443225125460411e-06, + "loss": 0.22657470703125, + "step": 129395 + }, + { + "epoch": 1.1188835375396669, + "grad_norm": 2.052792785954826, + "learning_rate": 2.4430249330378296e-06, + "loss": 0.08375625610351563, + "step": 129400 + }, + { + "epoch": 1.11892677106121, + "grad_norm": 0.5011105530035483, + "learning_rate": 2.4428247431839677e-06, + "loss": 0.04423942565917969, + "step": 129405 + }, + { + "epoch": 1.1189700045827533, + "grad_norm": 0.5963910219797406, + "learning_rate": 2.4426245558997505e-06, + "loss": 0.03286819458007813, + "step": 129410 + }, + { + "epoch": 1.1190132381042965, + "grad_norm": 8.2656008131628, + "learning_rate": 2.442424371186099e-06, + "loss": 0.07642440795898438, + "step": 129415 + }, + { + "epoch": 1.1190564716258398, + "grad_norm": 5.7501606330326505, + "learning_rate": 2.442224189043937e-06, + "loss": 0.05054931640625, + "step": 129420 + }, + { + "epoch": 1.119099705147383, + "grad_norm": 5.988016792522696, + "learning_rate": 2.4420240094741875e-06, + "loss": 0.10507888793945312, + "step": 129425 + }, + { + "epoch": 1.1191429386689262, + "grad_norm": 2.697509495005077, + "learning_rate": 2.441823832477773e-06, + "loss": 0.11071243286132812, + "step": 129430 + }, + { + "epoch": 1.1191861721904697, + "grad_norm": 4.2308443547202135, + "learning_rate": 2.441623658055619e-06, + "loss": 0.0299163818359375, + "step": 129435 + }, + { + "epoch": 1.119229405712013, + "grad_norm": 16.399222210422455, + "learning_rate": 2.4414234862086474e-06, + "loss": 0.10907783508300781, + "step": 129440 + }, + { + "epoch": 1.1192726392335561, + "grad_norm": 39.758290976557554, + "learning_rate": 2.4412233169377814e-06, + "loss": 0.15641021728515625, + "step": 129445 + }, + { + "epoch": 1.1193158727550994, + "grad_norm": 23.93100962693997, + "learning_rate": 2.4410231502439437e-06, + "loss": 0.11891450881958007, + "step": 129450 + }, + { + "epoch": 1.1193591062766426, + "grad_norm": 0.246108005405333, + "learning_rate": 2.4408229861280576e-06, + "loss": 0.02971649169921875, + "step": 129455 + }, + { + "epoch": 1.1194023397981858, + "grad_norm": 4.118743161175308, + "learning_rate": 2.440622824591047e-06, + "loss": 0.027733230590820314, + "step": 129460 + }, + { + "epoch": 1.1194455733197293, + "grad_norm": 0.6256948549258573, + "learning_rate": 2.4404226656338334e-06, + "loss": 0.013328170776367188, + "step": 129465 + }, + { + "epoch": 1.1194888068412725, + "grad_norm": 0.7964346040594993, + "learning_rate": 2.440222509257342e-06, + "loss": 0.0277435302734375, + "step": 129470 + }, + { + "epoch": 1.1195320403628157, + "grad_norm": 58.752534676793104, + "learning_rate": 2.440022355462495e-06, + "loss": 0.1400165557861328, + "step": 129475 + }, + { + "epoch": 1.119575273884359, + "grad_norm": 21.27392460891537, + "learning_rate": 2.439822204250215e-06, + "loss": 0.057872772216796875, + "step": 129480 + }, + { + "epoch": 1.1196185074059022, + "grad_norm": 6.84742071545106, + "learning_rate": 2.4396220556214257e-06, + "loss": 0.032133865356445315, + "step": 129485 + }, + { + "epoch": 1.1196617409274454, + "grad_norm": 7.752985752934132, + "learning_rate": 2.43942190957705e-06, + "loss": 0.08486328125, + "step": 129490 + }, + { + "epoch": 1.1197049744489889, + "grad_norm": 1.03356297376939, + "learning_rate": 2.43922176611801e-06, + "loss": 0.008939552307128906, + "step": 129495 + }, + { + "epoch": 1.119748207970532, + "grad_norm": 0.2546149473499946, + "learning_rate": 2.439021625245231e-06, + "loss": 0.04080390930175781, + "step": 129500 + }, + { + "epoch": 1.1197914414920753, + "grad_norm": 0.4757219524292941, + "learning_rate": 2.4388214869596346e-06, + "loss": 0.08426666259765625, + "step": 129505 + }, + { + "epoch": 1.1198346750136186, + "grad_norm": 4.194725186007996, + "learning_rate": 2.438621351262143e-06, + "loss": 0.021083831787109375, + "step": 129510 + }, + { + "epoch": 1.1198779085351618, + "grad_norm": 20.805247215487828, + "learning_rate": 2.438421218153681e-06, + "loss": 0.05172882080078125, + "step": 129515 + }, + { + "epoch": 1.119921142056705, + "grad_norm": 13.872291510803887, + "learning_rate": 2.4382210876351707e-06, + "loss": 0.1251251220703125, + "step": 129520 + }, + { + "epoch": 1.1199643755782485, + "grad_norm": 6.339675136275614, + "learning_rate": 2.4380209597075335e-06, + "loss": 0.04058818817138672, + "step": 129525 + }, + { + "epoch": 1.1200076090997917, + "grad_norm": 2.458122998987788, + "learning_rate": 2.437820834371696e-06, + "loss": 0.21901893615722656, + "step": 129530 + }, + { + "epoch": 1.120050842621335, + "grad_norm": 5.598718157760477, + "learning_rate": 2.437620711628578e-06, + "loss": 0.044928359985351565, + "step": 129535 + }, + { + "epoch": 1.1200940761428781, + "grad_norm": 16.091141201804408, + "learning_rate": 2.437420591479104e-06, + "loss": 0.04638595581054687, + "step": 129540 + }, + { + "epoch": 1.1201373096644214, + "grad_norm": 1.436261938218011, + "learning_rate": 2.4372204739241966e-06, + "loss": 0.1976837158203125, + "step": 129545 + }, + { + "epoch": 1.1201805431859646, + "grad_norm": 15.096216280375657, + "learning_rate": 2.4370203589647786e-06, + "loss": 0.14300918579101562, + "step": 129550 + }, + { + "epoch": 1.1202237767075078, + "grad_norm": 0.4349432207733599, + "learning_rate": 2.4368202466017738e-06, + "loss": 0.03554534912109375, + "step": 129555 + }, + { + "epoch": 1.1202670102290513, + "grad_norm": 2.1419531935677885, + "learning_rate": 2.4366201368361023e-06, + "loss": 0.215087890625, + "step": 129560 + }, + { + "epoch": 1.1203102437505945, + "grad_norm": 7.984575830048299, + "learning_rate": 2.43642002966869e-06, + "loss": 0.03888721466064453, + "step": 129565 + }, + { + "epoch": 1.1203534772721377, + "grad_norm": 1.6477726633234882, + "learning_rate": 2.4362199251004584e-06, + "loss": 0.00708770751953125, + "step": 129570 + }, + { + "epoch": 1.120396710793681, + "grad_norm": 26.60016392990625, + "learning_rate": 2.4360198231323316e-06, + "loss": 0.045040130615234375, + "step": 129575 + }, + { + "epoch": 1.1204399443152242, + "grad_norm": 39.571773928977066, + "learning_rate": 2.4358197237652313e-06, + "loss": 0.18645896911621093, + "step": 129580 + }, + { + "epoch": 1.1204831778367674, + "grad_norm": 36.91934777501643, + "learning_rate": 2.4356196270000806e-06, + "loss": 0.09421577453613281, + "step": 129585 + }, + { + "epoch": 1.1205264113583109, + "grad_norm": 17.836100663016165, + "learning_rate": 2.435419532837801e-06, + "loss": 0.05631179809570312, + "step": 129590 + }, + { + "epoch": 1.120569644879854, + "grad_norm": 0.0746087771419101, + "learning_rate": 2.4352194412793173e-06, + "loss": 0.1833059310913086, + "step": 129595 + }, + { + "epoch": 1.1206128784013973, + "grad_norm": 15.828818758074387, + "learning_rate": 2.435019352325552e-06, + "loss": 0.05340118408203125, + "step": 129600 + }, + { + "epoch": 1.1206561119229406, + "grad_norm": 2.101689534207482, + "learning_rate": 2.4348192659774276e-06, + "loss": 0.01017608642578125, + "step": 129605 + }, + { + "epoch": 1.1206993454444838, + "grad_norm": 1.959512920520942, + "learning_rate": 2.4346191822358668e-06, + "loss": 0.030478668212890626, + "step": 129610 + }, + { + "epoch": 1.120742578966027, + "grad_norm": 14.116752292624199, + "learning_rate": 2.4344191011017927e-06, + "loss": 0.08555374145507813, + "step": 129615 + }, + { + "epoch": 1.1207858124875703, + "grad_norm": 28.289993117716396, + "learning_rate": 2.434219022576127e-06, + "loss": 0.2812961578369141, + "step": 129620 + }, + { + "epoch": 1.1208290460091137, + "grad_norm": 4.895680152196263, + "learning_rate": 2.4340189466597927e-06, + "loss": 0.09074525833129883, + "step": 129625 + }, + { + "epoch": 1.120872279530657, + "grad_norm": 0.8033454669604191, + "learning_rate": 2.433818873353714e-06, + "loss": 0.015899658203125, + "step": 129630 + }, + { + "epoch": 1.1209155130522002, + "grad_norm": 1.2934473075658985, + "learning_rate": 2.4336188026588126e-06, + "loss": 0.028504180908203124, + "step": 129635 + }, + { + "epoch": 1.1209587465737434, + "grad_norm": 0.8362283863491842, + "learning_rate": 2.433418734576011e-06, + "loss": 0.021947479248046874, + "step": 129640 + }, + { + "epoch": 1.1210019800952866, + "grad_norm": 1.0102292175293384, + "learning_rate": 2.433218669106232e-06, + "loss": 0.13198699951171874, + "step": 129645 + }, + { + "epoch": 1.1210452136168298, + "grad_norm": 0.2333617568071232, + "learning_rate": 2.433018606250399e-06, + "loss": 0.026702022552490233, + "step": 129650 + }, + { + "epoch": 1.1210884471383733, + "grad_norm": 8.517484662105508, + "learning_rate": 2.432818546009432e-06, + "loss": 0.0743377685546875, + "step": 129655 + }, + { + "epoch": 1.1211316806599165, + "grad_norm": 3.392555751123456, + "learning_rate": 2.4326184883842577e-06, + "loss": 0.0771240234375, + "step": 129660 + }, + { + "epoch": 1.1211749141814598, + "grad_norm": 0.9193406730674825, + "learning_rate": 2.4324184333757972e-06, + "loss": 0.028565216064453124, + "step": 129665 + }, + { + "epoch": 1.121218147703003, + "grad_norm": 0.20125857950409745, + "learning_rate": 2.4322183809849722e-06, + "loss": 0.100897216796875, + "step": 129670 + }, + { + "epoch": 1.1212613812245462, + "grad_norm": 3.1722487843111247, + "learning_rate": 2.432018331212705e-06, + "loss": 0.05390167236328125, + "step": 129675 + }, + { + "epoch": 1.1213046147460894, + "grad_norm": 25.980714737916003, + "learning_rate": 2.43181828405992e-06, + "loss": 0.17925796508789063, + "step": 129680 + }, + { + "epoch": 1.1213478482676327, + "grad_norm": 26.653026478251437, + "learning_rate": 2.4316182395275375e-06, + "loss": 0.06979827880859375, + "step": 129685 + }, + { + "epoch": 1.1213910817891761, + "grad_norm": 4.813060820705038, + "learning_rate": 2.4314181976164833e-06, + "loss": 0.0214263916015625, + "step": 129690 + }, + { + "epoch": 1.1214343153107194, + "grad_norm": 4.035252494806043, + "learning_rate": 2.4312181583276774e-06, + "loss": 0.03651351928710937, + "step": 129695 + }, + { + "epoch": 1.1214775488322626, + "grad_norm": 15.50069051288528, + "learning_rate": 2.4310181216620436e-06, + "loss": 0.098638916015625, + "step": 129700 + }, + { + "epoch": 1.1215207823538058, + "grad_norm": 24.04083417143351, + "learning_rate": 2.430818087620503e-06, + "loss": 0.12762069702148438, + "step": 129705 + }, + { + "epoch": 1.121564015875349, + "grad_norm": 0.1442259566003493, + "learning_rate": 2.43061805620398e-06, + "loss": 0.17641983032226563, + "step": 129710 + }, + { + "epoch": 1.1216072493968925, + "grad_norm": 6.67903977104618, + "learning_rate": 2.4304180274133956e-06, + "loss": 0.2449462890625, + "step": 129715 + }, + { + "epoch": 1.1216504829184357, + "grad_norm": 2.2186287590823435, + "learning_rate": 2.4302180012496723e-06, + "loss": 0.021591567993164064, + "step": 129720 + }, + { + "epoch": 1.121693716439979, + "grad_norm": 3.078201918541156, + "learning_rate": 2.4300179777137344e-06, + "loss": 0.02807273864746094, + "step": 129725 + }, + { + "epoch": 1.1217369499615222, + "grad_norm": 28.442712809045585, + "learning_rate": 2.429817956806503e-06, + "loss": 0.16834449768066406, + "step": 129730 + }, + { + "epoch": 1.1217801834830654, + "grad_norm": 0.2683904737392309, + "learning_rate": 2.4296179385289e-06, + "loss": 0.019685745239257812, + "step": 129735 + }, + { + "epoch": 1.1218234170046086, + "grad_norm": 1.134823643910375, + "learning_rate": 2.42941792288185e-06, + "loss": 0.018853378295898438, + "step": 129740 + }, + { + "epoch": 1.1218666505261519, + "grad_norm": 2.389589816287315, + "learning_rate": 2.4292179098662738e-06, + "loss": 0.194659423828125, + "step": 129745 + }, + { + "epoch": 1.1219098840476953, + "grad_norm": 0.582761923367258, + "learning_rate": 2.4290178994830923e-06, + "loss": 0.011437797546386718, + "step": 129750 + }, + { + "epoch": 1.1219531175692385, + "grad_norm": 3.73410843083112, + "learning_rate": 2.4288178917332317e-06, + "loss": 0.2260406494140625, + "step": 129755 + }, + { + "epoch": 1.1219963510907818, + "grad_norm": 0.2189597684905988, + "learning_rate": 2.4286178866176126e-06, + "loss": 0.024475860595703124, + "step": 129760 + }, + { + "epoch": 1.122039584612325, + "grad_norm": 7.190229159024774, + "learning_rate": 2.4284178841371562e-06, + "loss": 0.0779541015625, + "step": 129765 + }, + { + "epoch": 1.1220828181338682, + "grad_norm": 0.43826381507673645, + "learning_rate": 2.428217884292787e-06, + "loss": 0.06812515258789062, + "step": 129770 + }, + { + "epoch": 1.1221260516554115, + "grad_norm": 6.666376144408245, + "learning_rate": 2.4280178870854263e-06, + "loss": 0.1099395751953125, + "step": 129775 + }, + { + "epoch": 1.122169285176955, + "grad_norm": 1.1785768877276763, + "learning_rate": 2.427817892515995e-06, + "loss": 0.043233489990234374, + "step": 129780 + }, + { + "epoch": 1.1222125186984981, + "grad_norm": 12.015339076295046, + "learning_rate": 2.4276179005854188e-06, + "loss": 0.03632774353027344, + "step": 129785 + }, + { + "epoch": 1.1222557522200414, + "grad_norm": 26.491373925881458, + "learning_rate": 2.427417911294618e-06, + "loss": 0.129595947265625, + "step": 129790 + }, + { + "epoch": 1.1222989857415846, + "grad_norm": 1.2821376183535755, + "learning_rate": 2.4272179246445146e-06, + "loss": 0.0536041259765625, + "step": 129795 + }, + { + "epoch": 1.1223422192631278, + "grad_norm": 1.2905227133331278, + "learning_rate": 2.4270179406360325e-06, + "loss": 0.0154541015625, + "step": 129800 + }, + { + "epoch": 1.122385452784671, + "grad_norm": 0.5214401150670556, + "learning_rate": 2.4268179592700924e-06, + "loss": 0.026103973388671875, + "step": 129805 + }, + { + "epoch": 1.1224286863062143, + "grad_norm": 14.568070886437113, + "learning_rate": 2.4266179805476176e-06, + "loss": 0.06459083557128906, + "step": 129810 + }, + { + "epoch": 1.1224719198277577, + "grad_norm": 21.083297492578684, + "learning_rate": 2.4264180044695284e-06, + "loss": 0.11588592529296875, + "step": 129815 + }, + { + "epoch": 1.122515153349301, + "grad_norm": 5.098225861670774, + "learning_rate": 2.4262180310367503e-06, + "loss": 0.06291885375976562, + "step": 129820 + }, + { + "epoch": 1.1225583868708442, + "grad_norm": 0.19850888763086194, + "learning_rate": 2.4260180602502034e-06, + "loss": 0.07981491088867188, + "step": 129825 + }, + { + "epoch": 1.1226016203923874, + "grad_norm": 4.0008582289927865, + "learning_rate": 2.4258180921108107e-06, + "loss": 0.028519439697265624, + "step": 129830 + }, + { + "epoch": 1.1226448539139307, + "grad_norm": 4.314487177676377, + "learning_rate": 2.425618126619495e-06, + "loss": 0.0189971923828125, + "step": 129835 + }, + { + "epoch": 1.1226880874354739, + "grad_norm": 0.09933296757614568, + "learning_rate": 2.425418163777177e-06, + "loss": 0.00412750244140625, + "step": 129840 + }, + { + "epoch": 1.1227313209570173, + "grad_norm": 1.2865966343940325, + "learning_rate": 2.425218203584778e-06, + "loss": 0.051439666748046876, + "step": 129845 + }, + { + "epoch": 1.1227745544785606, + "grad_norm": 19.72750206691372, + "learning_rate": 2.425018246043224e-06, + "loss": 0.08943462371826172, + "step": 129850 + }, + { + "epoch": 1.1228177880001038, + "grad_norm": 4.863109489997008, + "learning_rate": 2.424818291153434e-06, + "loss": 0.03917388916015625, + "step": 129855 + }, + { + "epoch": 1.122861021521647, + "grad_norm": 4.347311819367541, + "learning_rate": 2.4246183389163324e-06, + "loss": 0.03983001708984375, + "step": 129860 + }, + { + "epoch": 1.1229042550431902, + "grad_norm": 13.603750433809994, + "learning_rate": 2.4244183893328396e-06, + "loss": 0.14839324951171876, + "step": 129865 + }, + { + "epoch": 1.1229474885647335, + "grad_norm": 2.223797128600227, + "learning_rate": 2.4242184424038786e-06, + "loss": 0.0722747802734375, + "step": 129870 + }, + { + "epoch": 1.1229907220862767, + "grad_norm": 2.6023436637101622, + "learning_rate": 2.42401849813037e-06, + "loss": 0.019399261474609374, + "step": 129875 + }, + { + "epoch": 1.1230339556078202, + "grad_norm": 21.130087141883518, + "learning_rate": 2.4238185565132375e-06, + "loss": 0.14051895141601561, + "step": 129880 + }, + { + "epoch": 1.1230771891293634, + "grad_norm": 0.17328602828812933, + "learning_rate": 2.423618617553404e-06, + "loss": 0.11939926147460937, + "step": 129885 + }, + { + "epoch": 1.1231204226509066, + "grad_norm": 1.1449084012110229, + "learning_rate": 2.4234186812517897e-06, + "loss": 0.01788330078125, + "step": 129890 + }, + { + "epoch": 1.1231636561724498, + "grad_norm": 2.043521986634794, + "learning_rate": 2.423218747609318e-06, + "loss": 0.023251724243164063, + "step": 129895 + }, + { + "epoch": 1.123206889693993, + "grad_norm": 20.843394584855673, + "learning_rate": 2.4230188166269103e-06, + "loss": 0.035076904296875, + "step": 129900 + }, + { + "epoch": 1.1232501232155363, + "grad_norm": 14.49170281700355, + "learning_rate": 2.422818888305489e-06, + "loss": 0.04379539489746094, + "step": 129905 + }, + { + "epoch": 1.1232933567370798, + "grad_norm": 23.222635685568303, + "learning_rate": 2.4226189626459747e-06, + "loss": 0.07717971801757813, + "step": 129910 + }, + { + "epoch": 1.123336590258623, + "grad_norm": 0.8538125415180645, + "learning_rate": 2.4224190396492923e-06, + "loss": 0.03067779541015625, + "step": 129915 + }, + { + "epoch": 1.1233798237801662, + "grad_norm": 3.7141128391538185, + "learning_rate": 2.422219119316362e-06, + "loss": 0.05117321014404297, + "step": 129920 + }, + { + "epoch": 1.1234230573017094, + "grad_norm": 28.666159576393444, + "learning_rate": 2.422019201648106e-06, + "loss": 0.06418380737304688, + "step": 129925 + }, + { + "epoch": 1.1234662908232527, + "grad_norm": 0.7903319858724523, + "learning_rate": 2.421819286645446e-06, + "loss": 0.09811515808105468, + "step": 129930 + }, + { + "epoch": 1.123509524344796, + "grad_norm": 1.7153314516505036, + "learning_rate": 2.4216193743093047e-06, + "loss": 0.037934494018554685, + "step": 129935 + }, + { + "epoch": 1.1235527578663393, + "grad_norm": 1.5472718285085518, + "learning_rate": 2.4214194646406027e-06, + "loss": 0.0308349609375, + "step": 129940 + }, + { + "epoch": 1.1235959913878826, + "grad_norm": 1.948036033743227, + "learning_rate": 2.4212195576402636e-06, + "loss": 0.029730224609375, + "step": 129945 + }, + { + "epoch": 1.1236392249094258, + "grad_norm": 0.3440359181655275, + "learning_rate": 2.4210196533092096e-06, + "loss": 0.02484588623046875, + "step": 129950 + }, + { + "epoch": 1.123682458430969, + "grad_norm": 3.7948437921799085, + "learning_rate": 2.4208197516483613e-06, + "loss": 0.047052001953125, + "step": 129955 + }, + { + "epoch": 1.1237256919525123, + "grad_norm": 12.296186438194454, + "learning_rate": 2.420619852658641e-06, + "loss": 0.07321701049804688, + "step": 129960 + }, + { + "epoch": 1.1237689254740555, + "grad_norm": 1.6539774270533687, + "learning_rate": 2.420419956340971e-06, + "loss": 0.11542205810546875, + "step": 129965 + }, + { + "epoch": 1.123812158995599, + "grad_norm": 0.29055284920252455, + "learning_rate": 2.4202200626962732e-06, + "loss": 0.17341690063476561, + "step": 129970 + }, + { + "epoch": 1.1238553925171422, + "grad_norm": 9.784549095261385, + "learning_rate": 2.4200201717254678e-06, + "loss": 0.16091766357421874, + "step": 129975 + }, + { + "epoch": 1.1238986260386854, + "grad_norm": 1.6167475855674172, + "learning_rate": 2.4198202834294793e-06, + "loss": 0.063262939453125, + "step": 129980 + }, + { + "epoch": 1.1239418595602286, + "grad_norm": 1.3705422079264644, + "learning_rate": 2.4196203978092285e-06, + "loss": 0.11477928161621094, + "step": 129985 + }, + { + "epoch": 1.1239850930817719, + "grad_norm": 0.5469401180343632, + "learning_rate": 2.4194205148656365e-06, + "loss": 0.14423446655273436, + "step": 129990 + }, + { + "epoch": 1.124028326603315, + "grad_norm": 5.381559150797281, + "learning_rate": 2.4192206345996262e-06, + "loss": 0.04717597961425781, + "step": 129995 + }, + { + "epoch": 1.1240715601248583, + "grad_norm": 1.0867822491342127, + "learning_rate": 2.4190207570121196e-06, + "loss": 0.01404876708984375, + "step": 130000 + }, + { + "epoch": 1.1241147936464018, + "grad_norm": 24.105745626300507, + "learning_rate": 2.418820882104036e-06, + "loss": 0.10480518341064453, + "step": 130005 + }, + { + "epoch": 1.124158027167945, + "grad_norm": 1.969151629474722, + "learning_rate": 2.418621009876301e-06, + "loss": 0.021805572509765624, + "step": 130010 + }, + { + "epoch": 1.1242012606894882, + "grad_norm": 14.421677199258081, + "learning_rate": 2.4184211403298342e-06, + "loss": 0.03281688690185547, + "step": 130015 + }, + { + "epoch": 1.1242444942110315, + "grad_norm": 9.687045171178545, + "learning_rate": 2.418221273465557e-06, + "loss": 0.024824142456054688, + "step": 130020 + }, + { + "epoch": 1.1242877277325747, + "grad_norm": 0.6532212739083526, + "learning_rate": 2.418021409284393e-06, + "loss": 0.01830120086669922, + "step": 130025 + }, + { + "epoch": 1.124330961254118, + "grad_norm": 2.8712918562596608, + "learning_rate": 2.417821547787262e-06, + "loss": 0.019451141357421875, + "step": 130030 + }, + { + "epoch": 1.1243741947756614, + "grad_norm": 8.8276354057482, + "learning_rate": 2.417621688975086e-06, + "loss": 0.02166461944580078, + "step": 130035 + }, + { + "epoch": 1.1244174282972046, + "grad_norm": 5.000482079642082, + "learning_rate": 2.4174218328487883e-06, + "loss": 0.03842620849609375, + "step": 130040 + }, + { + "epoch": 1.1244606618187478, + "grad_norm": 3.333950134912475, + "learning_rate": 2.4172219794092894e-06, + "loss": 0.19676132202148439, + "step": 130045 + }, + { + "epoch": 1.124503895340291, + "grad_norm": 0.578304495571765, + "learning_rate": 2.4170221286575116e-06, + "loss": 0.044217681884765624, + "step": 130050 + }, + { + "epoch": 1.1245471288618343, + "grad_norm": 1.183549421591273, + "learning_rate": 2.416822280594376e-06, + "loss": 0.03266448974609375, + "step": 130055 + }, + { + "epoch": 1.1245903623833775, + "grad_norm": 0.31629381576886006, + "learning_rate": 2.4166224352208046e-06, + "loss": 0.035915374755859375, + "step": 130060 + }, + { + "epoch": 1.1246335959049207, + "grad_norm": 19.86234626475194, + "learning_rate": 2.416422592537719e-06, + "loss": 0.06229743957519531, + "step": 130065 + }, + { + "epoch": 1.1246768294264642, + "grad_norm": 5.02767564187529, + "learning_rate": 2.4162227525460396e-06, + "loss": 0.07434234619140626, + "step": 130070 + }, + { + "epoch": 1.1247200629480074, + "grad_norm": 1.4976483782686287, + "learning_rate": 2.41602291524669e-06, + "loss": 0.0525177001953125, + "step": 130075 + }, + { + "epoch": 1.1247632964695506, + "grad_norm": 27.122285676348817, + "learning_rate": 2.4158230806405916e-06, + "loss": 0.05092620849609375, + "step": 130080 + }, + { + "epoch": 1.1248065299910939, + "grad_norm": 0.3340537780724422, + "learning_rate": 2.4156232487286654e-06, + "loss": 0.05706024169921875, + "step": 130085 + }, + { + "epoch": 1.124849763512637, + "grad_norm": 5.730812307596468, + "learning_rate": 2.4154234195118338e-06, + "loss": 0.07664337158203124, + "step": 130090 + }, + { + "epoch": 1.1248929970341803, + "grad_norm": 0.3387074263348743, + "learning_rate": 2.415223592991017e-06, + "loss": 0.04565010070800781, + "step": 130095 + }, + { + "epoch": 1.1249362305557238, + "grad_norm": 19.98365530689773, + "learning_rate": 2.4150237691671364e-06, + "loss": 0.2855854034423828, + "step": 130100 + }, + { + "epoch": 1.124979464077267, + "grad_norm": 1.2982385587482173, + "learning_rate": 2.4148239480411148e-06, + "loss": 0.06724071502685547, + "step": 130105 + }, + { + "epoch": 1.1250226975988102, + "grad_norm": 16.20112431623182, + "learning_rate": 2.4146241296138747e-06, + "loss": 0.09181270599365235, + "step": 130110 + }, + { + "epoch": 1.1250659311203535, + "grad_norm": 0.36238685993188696, + "learning_rate": 2.414424313886336e-06, + "loss": 0.01257781982421875, + "step": 130115 + }, + { + "epoch": 1.1251091646418967, + "grad_norm": 0.22633783845293326, + "learning_rate": 2.4142245008594204e-06, + "loss": 0.1639190673828125, + "step": 130120 + }, + { + "epoch": 1.12515239816344, + "grad_norm": 4.125399781884792, + "learning_rate": 2.4140246905340493e-06, + "loss": 0.0485687255859375, + "step": 130125 + }, + { + "epoch": 1.1251956316849832, + "grad_norm": 19.44771134393238, + "learning_rate": 2.4138248829111444e-06, + "loss": 0.17191619873046876, + "step": 130130 + }, + { + "epoch": 1.1252388652065266, + "grad_norm": 2.1878607244454837, + "learning_rate": 2.4136250779916273e-06, + "loss": 0.02235565185546875, + "step": 130135 + }, + { + "epoch": 1.1252820987280698, + "grad_norm": 3.329899529705424, + "learning_rate": 2.4134252757764204e-06, + "loss": 0.15943603515625, + "step": 130140 + }, + { + "epoch": 1.125325332249613, + "grad_norm": 15.664039652310846, + "learning_rate": 2.4132254762664445e-06, + "loss": 0.03887100219726562, + "step": 130145 + }, + { + "epoch": 1.1253685657711563, + "grad_norm": 0.2537432175815919, + "learning_rate": 2.4130256794626203e-06, + "loss": 0.06259384155273437, + "step": 130150 + }, + { + "epoch": 1.1254117992926995, + "grad_norm": 44.34633443038543, + "learning_rate": 2.4128258853658697e-06, + "loss": 0.23897705078125, + "step": 130155 + }, + { + "epoch": 1.125455032814243, + "grad_norm": 0.8499610015026176, + "learning_rate": 2.4126260939771144e-06, + "loss": 0.025521469116210938, + "step": 130160 + }, + { + "epoch": 1.1254982663357862, + "grad_norm": 0.07830659472528445, + "learning_rate": 2.4124263052972747e-06, + "loss": 0.010802841186523438, + "step": 130165 + }, + { + "epoch": 1.1255414998573294, + "grad_norm": 1.6426983536573267, + "learning_rate": 2.412226519327274e-06, + "loss": 0.028675079345703125, + "step": 130170 + }, + { + "epoch": 1.1255847333788727, + "grad_norm": 0.19927631113184954, + "learning_rate": 2.4120267360680328e-06, + "loss": 0.2770587921142578, + "step": 130175 + }, + { + "epoch": 1.1256279669004159, + "grad_norm": 5.567512026011445, + "learning_rate": 2.411826955520472e-06, + "loss": 0.06480979919433594, + "step": 130180 + }, + { + "epoch": 1.1256712004219591, + "grad_norm": 1.3154703566197692, + "learning_rate": 2.4116271776855132e-06, + "loss": 0.22843017578125, + "step": 130185 + }, + { + "epoch": 1.1257144339435023, + "grad_norm": 32.41698220758414, + "learning_rate": 2.4114274025640786e-06, + "loss": 0.13696670532226562, + "step": 130190 + }, + { + "epoch": 1.1257576674650456, + "grad_norm": 6.193773267090808, + "learning_rate": 2.411227630157087e-06, + "loss": 0.061224365234375, + "step": 130195 + }, + { + "epoch": 1.125800900986589, + "grad_norm": 5.015784714101083, + "learning_rate": 2.411027860465463e-06, + "loss": 0.06276321411132812, + "step": 130200 + }, + { + "epoch": 1.1258441345081323, + "grad_norm": 1.4448836597053767, + "learning_rate": 2.4108280934901267e-06, + "loss": 0.14979782104492187, + "step": 130205 + }, + { + "epoch": 1.1258873680296755, + "grad_norm": 0.30672795226264077, + "learning_rate": 2.410628329231999e-06, + "loss": 0.0608551025390625, + "step": 130210 + }, + { + "epoch": 1.1259306015512187, + "grad_norm": 3.8475008876005243, + "learning_rate": 2.4104285676920003e-06, + "loss": 0.03739013671875, + "step": 130215 + }, + { + "epoch": 1.125973835072762, + "grad_norm": 1.5154772452025025, + "learning_rate": 2.4102288088710545e-06, + "loss": 0.01399688720703125, + "step": 130220 + }, + { + "epoch": 1.1260170685943054, + "grad_norm": 0.4934967246273204, + "learning_rate": 2.4100290527700806e-06, + "loss": 0.09933547973632813, + "step": 130225 + }, + { + "epoch": 1.1260603021158486, + "grad_norm": 0.33958116303489216, + "learning_rate": 2.4098292993899992e-06, + "loss": 0.052036285400390625, + "step": 130230 + }, + { + "epoch": 1.1261035356373919, + "grad_norm": 1.3249907775530163, + "learning_rate": 2.409629548731735e-06, + "loss": 0.36257190704345704, + "step": 130235 + }, + { + "epoch": 1.126146769158935, + "grad_norm": 0.08650849693018096, + "learning_rate": 2.4094298007962066e-06, + "loss": 0.03408660888671875, + "step": 130240 + }, + { + "epoch": 1.1261900026804783, + "grad_norm": 55.03872389734682, + "learning_rate": 2.409230055584335e-06, + "loss": 0.13957633972167968, + "step": 130245 + }, + { + "epoch": 1.1262332362020215, + "grad_norm": 3.4071373206235185, + "learning_rate": 2.409030313097043e-06, + "loss": 0.022745513916015626, + "step": 130250 + }, + { + "epoch": 1.1262764697235648, + "grad_norm": 1.2337424396989318, + "learning_rate": 2.4088305733352514e-06, + "loss": 0.0808563232421875, + "step": 130255 + }, + { + "epoch": 1.1263197032451082, + "grad_norm": 90.14426591665985, + "learning_rate": 2.408630836299879e-06, + "loss": 0.20479278564453124, + "step": 130260 + }, + { + "epoch": 1.1263629367666514, + "grad_norm": 6.131007136385726, + "learning_rate": 2.4084311019918508e-06, + "loss": 0.023312759399414063, + "step": 130265 + }, + { + "epoch": 1.1264061702881947, + "grad_norm": 12.087698143295768, + "learning_rate": 2.4082313704120855e-06, + "loss": 0.1130615234375, + "step": 130270 + }, + { + "epoch": 1.126449403809738, + "grad_norm": 42.151902721687, + "learning_rate": 2.4080316415615045e-06, + "loss": 0.1490558624267578, + "step": 130275 + }, + { + "epoch": 1.1264926373312811, + "grad_norm": 3.321899586156715, + "learning_rate": 2.40783191544103e-06, + "loss": 0.0179656982421875, + "step": 130280 + }, + { + "epoch": 1.1265358708528244, + "grad_norm": 1.1213947233393333, + "learning_rate": 2.407632192051582e-06, + "loss": 0.023278045654296874, + "step": 130285 + }, + { + "epoch": 1.1265791043743678, + "grad_norm": 17.356231539586688, + "learning_rate": 2.407432471394081e-06, + "loss": 0.046785736083984376, + "step": 130290 + }, + { + "epoch": 1.126622337895911, + "grad_norm": 8.638595918823354, + "learning_rate": 2.4072327534694504e-06, + "loss": 0.02599945068359375, + "step": 130295 + }, + { + "epoch": 1.1266655714174543, + "grad_norm": 0.8903719686479115, + "learning_rate": 2.4070330382786092e-06, + "loss": 0.018367767333984375, + "step": 130300 + }, + { + "epoch": 1.1267088049389975, + "grad_norm": 47.38543659852822, + "learning_rate": 2.40683332582248e-06, + "loss": 0.22424659729003907, + "step": 130305 + }, + { + "epoch": 1.1267520384605407, + "grad_norm": 25.44204648560008, + "learning_rate": 2.4066336161019827e-06, + "loss": 0.09042739868164062, + "step": 130310 + }, + { + "epoch": 1.126795271982084, + "grad_norm": 3.699088528907577, + "learning_rate": 2.406433909118039e-06, + "loss": 0.05389251708984375, + "step": 130315 + }, + { + "epoch": 1.1268385055036272, + "grad_norm": 3.4866347982266315, + "learning_rate": 2.40623420487157e-06, + "loss": 0.029985427856445312, + "step": 130320 + }, + { + "epoch": 1.1268817390251706, + "grad_norm": 3.432791287365315, + "learning_rate": 2.4060345033634947e-06, + "loss": 0.04243927001953125, + "step": 130325 + }, + { + "epoch": 1.1269249725467139, + "grad_norm": 1.8607362906813405, + "learning_rate": 2.405834804594737e-06, + "loss": 0.14273395538330078, + "step": 130330 + }, + { + "epoch": 1.126968206068257, + "grad_norm": 4.905986357887532, + "learning_rate": 2.4056351085662163e-06, + "loss": 0.04957160949707031, + "step": 130335 + }, + { + "epoch": 1.1270114395898003, + "grad_norm": 2.3239442245663224, + "learning_rate": 2.405435415278855e-06, + "loss": 0.1741161346435547, + "step": 130340 + }, + { + "epoch": 1.1270546731113436, + "grad_norm": 2.825524958263059, + "learning_rate": 2.4052357247335723e-06, + "loss": 0.015578460693359376, + "step": 130345 + }, + { + "epoch": 1.1270979066328868, + "grad_norm": 4.081144409859378, + "learning_rate": 2.4050360369312896e-06, + "loss": 0.016614532470703124, + "step": 130350 + }, + { + "epoch": 1.1271411401544302, + "grad_norm": 4.349673723246907, + "learning_rate": 2.404836351872928e-06, + "loss": 0.13524017333984376, + "step": 130355 + }, + { + "epoch": 1.1271843736759735, + "grad_norm": 0.7553751451898975, + "learning_rate": 2.4046366695594087e-06, + "loss": 0.04749565124511719, + "step": 130360 + }, + { + "epoch": 1.1272276071975167, + "grad_norm": 2.1208318961290993, + "learning_rate": 2.404436989991653e-06, + "loss": 0.01803245544433594, + "step": 130365 + }, + { + "epoch": 1.12727084071906, + "grad_norm": 23.269326298389498, + "learning_rate": 2.404237313170581e-06, + "loss": 0.1262725830078125, + "step": 130370 + }, + { + "epoch": 1.1273140742406031, + "grad_norm": 9.449425618101715, + "learning_rate": 2.404037639097114e-06, + "loss": 0.04236106872558594, + "step": 130375 + }, + { + "epoch": 1.1273573077621464, + "grad_norm": 0.28216038114517683, + "learning_rate": 2.4038379677721725e-06, + "loss": 0.23225059509277343, + "step": 130380 + }, + { + "epoch": 1.1274005412836896, + "grad_norm": 2.5526085886926366, + "learning_rate": 2.403638299196677e-06, + "loss": 0.0345184326171875, + "step": 130385 + }, + { + "epoch": 1.127443774805233, + "grad_norm": 32.70928733135999, + "learning_rate": 2.4034386333715497e-06, + "loss": 0.07659025192260742, + "step": 130390 + }, + { + "epoch": 1.1274870083267763, + "grad_norm": 8.45091167412519, + "learning_rate": 2.4032389702977107e-06, + "loss": 0.091998291015625, + "step": 130395 + }, + { + "epoch": 1.1275302418483195, + "grad_norm": 5.294757099156754, + "learning_rate": 2.4030393099760813e-06, + "loss": 0.14073944091796875, + "step": 130400 + }, + { + "epoch": 1.1275734753698627, + "grad_norm": 21.418266122279018, + "learning_rate": 2.4028396524075816e-06, + "loss": 0.08676719665527344, + "step": 130405 + }, + { + "epoch": 1.127616708891406, + "grad_norm": 29.78562332680584, + "learning_rate": 2.402639997593132e-06, + "loss": 0.16210289001464845, + "step": 130410 + }, + { + "epoch": 1.1276599424129494, + "grad_norm": 1.106257305995257, + "learning_rate": 2.4024403455336547e-06, + "loss": 0.06763687133789062, + "step": 130415 + }, + { + "epoch": 1.1277031759344927, + "grad_norm": 1.1214738774791322, + "learning_rate": 2.4022406962300684e-06, + "loss": 0.09829330444335938, + "step": 130420 + }, + { + "epoch": 1.1277464094560359, + "grad_norm": 32.666849554857365, + "learning_rate": 2.4020410496832966e-06, + "loss": 0.15104751586914061, + "step": 130425 + }, + { + "epoch": 1.127789642977579, + "grad_norm": 3.6937755711753186, + "learning_rate": 2.4018414058942586e-06, + "loss": 0.06689929962158203, + "step": 130430 + }, + { + "epoch": 1.1278328764991223, + "grad_norm": 4.001091052468305, + "learning_rate": 2.401641764863875e-06, + "loss": 0.09403305053710938, + "step": 130435 + }, + { + "epoch": 1.1278761100206656, + "grad_norm": 5.980216026405885, + "learning_rate": 2.4014421265930663e-06, + "loss": 0.17611236572265626, + "step": 130440 + }, + { + "epoch": 1.1279193435422088, + "grad_norm": 0.30115816211090396, + "learning_rate": 2.401242491082754e-06, + "loss": 0.00945587158203125, + "step": 130445 + }, + { + "epoch": 1.1279625770637522, + "grad_norm": 0.7167089268383885, + "learning_rate": 2.4010428583338574e-06, + "loss": 0.049932861328125, + "step": 130450 + }, + { + "epoch": 1.1280058105852955, + "grad_norm": 8.659829026999311, + "learning_rate": 2.4008432283472992e-06, + "loss": 0.034334754943847655, + "step": 130455 + }, + { + "epoch": 1.1280490441068387, + "grad_norm": 0.5046145441077828, + "learning_rate": 2.4006436011239993e-06, + "loss": 0.017794036865234376, + "step": 130460 + }, + { + "epoch": 1.128092277628382, + "grad_norm": 5.449856944842705, + "learning_rate": 2.4004439766648783e-06, + "loss": 0.03371734619140625, + "step": 130465 + }, + { + "epoch": 1.1281355111499252, + "grad_norm": 2.807741371304039, + "learning_rate": 2.4002443549708557e-06, + "loss": 0.03457221984863281, + "step": 130470 + }, + { + "epoch": 1.1281787446714684, + "grad_norm": 0.6780923005764238, + "learning_rate": 2.400044736042854e-06, + "loss": 0.029914093017578126, + "step": 130475 + }, + { + "epoch": 1.1282219781930118, + "grad_norm": 7.265038216692382, + "learning_rate": 2.3998451198817915e-06, + "loss": 0.1776031494140625, + "step": 130480 + }, + { + "epoch": 1.128265211714555, + "grad_norm": 3.7830521812761466, + "learning_rate": 2.399645506488592e-06, + "loss": 0.073095703125, + "step": 130485 + }, + { + "epoch": 1.1283084452360983, + "grad_norm": 4.1853913856534435, + "learning_rate": 2.399445895864174e-06, + "loss": 0.02955322265625, + "step": 130490 + }, + { + "epoch": 1.1283516787576415, + "grad_norm": 1.1215049983800967, + "learning_rate": 2.3992462880094584e-06, + "loss": 0.0096923828125, + "step": 130495 + }, + { + "epoch": 1.1283949122791848, + "grad_norm": 4.962525584174519, + "learning_rate": 2.3990466829253652e-06, + "loss": 0.048828125, + "step": 130500 + }, + { + "epoch": 1.128438145800728, + "grad_norm": 68.34790741608663, + "learning_rate": 2.398847080612817e-06, + "loss": 0.09713516235351563, + "step": 130505 + }, + { + "epoch": 1.1284813793222712, + "grad_norm": 4.596172238021557, + "learning_rate": 2.3986474810727317e-06, + "loss": 0.010513114929199218, + "step": 130510 + }, + { + "epoch": 1.1285246128438147, + "grad_norm": 1.1698324684553212, + "learning_rate": 2.3984478843060307e-06, + "loss": 0.044847869873046876, + "step": 130515 + }, + { + "epoch": 1.128567846365358, + "grad_norm": 14.267178750495415, + "learning_rate": 2.3982482903136358e-06, + "loss": 0.04775238037109375, + "step": 130520 + }, + { + "epoch": 1.1286110798869011, + "grad_norm": 3.0888974613010687, + "learning_rate": 2.3980486990964664e-06, + "loss": 0.02202911376953125, + "step": 130525 + }, + { + "epoch": 1.1286543134084444, + "grad_norm": 2.516431759760648, + "learning_rate": 2.397849110655443e-06, + "loss": 0.03640289306640625, + "step": 130530 + }, + { + "epoch": 1.1286975469299876, + "grad_norm": 6.276250293444347, + "learning_rate": 2.397649524991487e-06, + "loss": 0.06855697631835937, + "step": 130535 + }, + { + "epoch": 1.1287407804515308, + "grad_norm": 0.21324907678518445, + "learning_rate": 2.397449942105518e-06, + "loss": 0.05607795715332031, + "step": 130540 + }, + { + "epoch": 1.1287840139730743, + "grad_norm": 1.706487591127589, + "learning_rate": 2.3972503619984555e-06, + "loss": 0.04973869323730469, + "step": 130545 + }, + { + "epoch": 1.1288272474946175, + "grad_norm": 40.21627502590464, + "learning_rate": 2.397050784671222e-06, + "loss": 0.08361968994140626, + "step": 130550 + }, + { + "epoch": 1.1288704810161607, + "grad_norm": 1.5127972702926604, + "learning_rate": 2.3968512101247367e-06, + "loss": 0.049530029296875, + "step": 130555 + }, + { + "epoch": 1.128913714537704, + "grad_norm": 0.17518038214215312, + "learning_rate": 2.3966516383599208e-06, + "loss": 0.11936874389648437, + "step": 130560 + }, + { + "epoch": 1.1289569480592472, + "grad_norm": 24.776814457010612, + "learning_rate": 2.396452069377694e-06, + "loss": 0.31107635498046876, + "step": 130565 + }, + { + "epoch": 1.1290001815807904, + "grad_norm": 3.298573577429537, + "learning_rate": 2.3962525031789772e-06, + "loss": 0.14271926879882812, + "step": 130570 + }, + { + "epoch": 1.1290434151023336, + "grad_norm": 0.04714515809178726, + "learning_rate": 2.39605293976469e-06, + "loss": 0.064288330078125, + "step": 130575 + }, + { + "epoch": 1.129086648623877, + "grad_norm": 2.4366023280519538, + "learning_rate": 2.395853379135753e-06, + "loss": 0.15999908447265626, + "step": 130580 + }, + { + "epoch": 1.1291298821454203, + "grad_norm": 3.7105689693501103, + "learning_rate": 2.395653821293087e-06, + "loss": 0.09331893920898438, + "step": 130585 + }, + { + "epoch": 1.1291731156669635, + "grad_norm": 0.5997908450941423, + "learning_rate": 2.3954542662376127e-06, + "loss": 0.17507171630859375, + "step": 130590 + }, + { + "epoch": 1.1292163491885068, + "grad_norm": 1.2480810422573363, + "learning_rate": 2.3952547139702497e-06, + "loss": 0.012926101684570312, + "step": 130595 + }, + { + "epoch": 1.12925958271005, + "grad_norm": 5.854697338871371, + "learning_rate": 2.395055164491919e-06, + "loss": 0.013048362731933594, + "step": 130600 + }, + { + "epoch": 1.1293028162315932, + "grad_norm": 19.185083118365682, + "learning_rate": 2.3948556178035395e-06, + "loss": 0.065545654296875, + "step": 130605 + }, + { + "epoch": 1.1293460497531367, + "grad_norm": 6.090475667649019, + "learning_rate": 2.3946560739060325e-06, + "loss": 0.08683958053588867, + "step": 130610 + }, + { + "epoch": 1.12938928327468, + "grad_norm": 9.839412829249843, + "learning_rate": 2.3944565328003182e-06, + "loss": 0.12168121337890625, + "step": 130615 + }, + { + "epoch": 1.1294325167962231, + "grad_norm": 0.11959893054036173, + "learning_rate": 2.3942569944873177e-06, + "loss": 0.04172592163085938, + "step": 130620 + }, + { + "epoch": 1.1294757503177664, + "grad_norm": 5.238494635812987, + "learning_rate": 2.39405745896795e-06, + "loss": 0.11026725769042969, + "step": 130625 + }, + { + "epoch": 1.1295189838393096, + "grad_norm": 0.768690883687102, + "learning_rate": 2.393857926243136e-06, + "loss": 0.04550991058349609, + "step": 130630 + }, + { + "epoch": 1.1295622173608528, + "grad_norm": 9.74787686488396, + "learning_rate": 2.393658396313795e-06, + "loss": 0.06011505126953125, + "step": 130635 + }, + { + "epoch": 1.129605450882396, + "grad_norm": 11.133656903970442, + "learning_rate": 2.393458869180848e-06, + "loss": 0.05550689697265625, + "step": 130640 + }, + { + "epoch": 1.1296486844039395, + "grad_norm": 4.700646205916001, + "learning_rate": 2.3932593448452147e-06, + "loss": 0.02383003234863281, + "step": 130645 + }, + { + "epoch": 1.1296919179254827, + "grad_norm": 0.9131948197482903, + "learning_rate": 2.3930598233078173e-06, + "loss": 0.10628852844238282, + "step": 130650 + }, + { + "epoch": 1.129735151447026, + "grad_norm": 0.7846072069875861, + "learning_rate": 2.3928603045695733e-06, + "loss": 0.32866058349609373, + "step": 130655 + }, + { + "epoch": 1.1297783849685692, + "grad_norm": 0.5443731934182863, + "learning_rate": 2.3926607886314042e-06, + "loss": 0.06565513610839843, + "step": 130660 + }, + { + "epoch": 1.1298216184901124, + "grad_norm": 11.328769763238359, + "learning_rate": 2.3924612754942295e-06, + "loss": 0.030529022216796875, + "step": 130665 + }, + { + "epoch": 1.1298648520116559, + "grad_norm": 5.393569624735401, + "learning_rate": 2.3922617651589704e-06, + "loss": 0.08470039367675782, + "step": 130670 + }, + { + "epoch": 1.129908085533199, + "grad_norm": 9.51137410736586, + "learning_rate": 2.392062257626545e-06, + "loss": 0.03761749267578125, + "step": 130675 + }, + { + "epoch": 1.1299513190547423, + "grad_norm": 22.518822507773073, + "learning_rate": 2.391862752897876e-06, + "loss": 0.05949859619140625, + "step": 130680 + }, + { + "epoch": 1.1299945525762856, + "grad_norm": 5.877176055919506, + "learning_rate": 2.391663250973882e-06, + "loss": 0.03220062255859375, + "step": 130685 + }, + { + "epoch": 1.1300377860978288, + "grad_norm": 0.2084260574431441, + "learning_rate": 2.391463751855483e-06, + "loss": 0.200634765625, + "step": 130690 + }, + { + "epoch": 1.130081019619372, + "grad_norm": 5.098561747657495, + "learning_rate": 2.3912642555436e-06, + "loss": 0.21053385734558105, + "step": 130695 + }, + { + "epoch": 1.1301242531409152, + "grad_norm": 3.5864101704039477, + "learning_rate": 2.391064762039152e-06, + "loss": 0.17829971313476561, + "step": 130700 + }, + { + "epoch": 1.1301674866624587, + "grad_norm": 24.464507114516294, + "learning_rate": 2.3908652713430587e-06, + "loss": 0.061370849609375, + "step": 130705 + }, + { + "epoch": 1.130210720184002, + "grad_norm": 5.90651905912604, + "learning_rate": 2.390665783456242e-06, + "loss": 0.26253204345703124, + "step": 130710 + }, + { + "epoch": 1.1302539537055452, + "grad_norm": 2.1006864143828454, + "learning_rate": 2.390466298379621e-06, + "loss": 0.018061065673828126, + "step": 130715 + }, + { + "epoch": 1.1302971872270884, + "grad_norm": 18.25444338692442, + "learning_rate": 2.3902668161141154e-06, + "loss": 0.08713760375976562, + "step": 130720 + }, + { + "epoch": 1.1303404207486316, + "grad_norm": 3.0614345784548753, + "learning_rate": 2.390067336660645e-06, + "loss": 0.011758804321289062, + "step": 130725 + }, + { + "epoch": 1.1303836542701748, + "grad_norm": 13.854664514349992, + "learning_rate": 2.3898678600201307e-06, + "loss": 0.06876029968261718, + "step": 130730 + }, + { + "epoch": 1.1304268877917183, + "grad_norm": 74.01520502983873, + "learning_rate": 2.3896683861934908e-06, + "loss": 0.22469329833984375, + "step": 130735 + }, + { + "epoch": 1.1304701213132615, + "grad_norm": 0.49343220524110687, + "learning_rate": 2.3894689151816472e-06, + "loss": 0.043209075927734375, + "step": 130740 + }, + { + "epoch": 1.1305133548348048, + "grad_norm": 1.6460603398812295, + "learning_rate": 2.3892694469855195e-06, + "loss": 0.05319690704345703, + "step": 130745 + }, + { + "epoch": 1.130556588356348, + "grad_norm": 2.222216792158799, + "learning_rate": 2.3890699816060262e-06, + "loss": 0.0122589111328125, + "step": 130750 + }, + { + "epoch": 1.1305998218778912, + "grad_norm": 2.165067498337972, + "learning_rate": 2.388870519044089e-06, + "loss": 0.08023948669433593, + "step": 130755 + }, + { + "epoch": 1.1306430553994344, + "grad_norm": 5.086883364204721, + "learning_rate": 2.3886710593006266e-06, + "loss": 0.049919509887695314, + "step": 130760 + }, + { + "epoch": 1.1306862889209777, + "grad_norm": 5.000471866826411, + "learning_rate": 2.3884716023765597e-06, + "loss": 0.0843130111694336, + "step": 130765 + }, + { + "epoch": 1.1307295224425211, + "grad_norm": 0.4617486834343443, + "learning_rate": 2.3882721482728063e-06, + "loss": 0.05847244262695313, + "step": 130770 + }, + { + "epoch": 1.1307727559640643, + "grad_norm": 1.3378081528479444, + "learning_rate": 2.3880726969902888e-06, + "loss": 0.042031478881835935, + "step": 130775 + }, + { + "epoch": 1.1308159894856076, + "grad_norm": 0.3034582885973815, + "learning_rate": 2.3878732485299254e-06, + "loss": 0.03419647216796875, + "step": 130780 + }, + { + "epoch": 1.1308592230071508, + "grad_norm": 5.2992038623731155, + "learning_rate": 2.3876738028926373e-06, + "loss": 0.10980072021484374, + "step": 130785 + }, + { + "epoch": 1.130902456528694, + "grad_norm": 0.838389224486613, + "learning_rate": 2.3874743600793434e-06, + "loss": 0.046254730224609374, + "step": 130790 + }, + { + "epoch": 1.1309456900502373, + "grad_norm": 2.1152789502716107, + "learning_rate": 2.387274920090964e-06, + "loss": 0.06644439697265625, + "step": 130795 + }, + { + "epoch": 1.1309889235717807, + "grad_norm": 1.6101081329480045, + "learning_rate": 2.3870754829284165e-06, + "loss": 0.05599212646484375, + "step": 130800 + }, + { + "epoch": 1.131032157093324, + "grad_norm": 1.1528027250978345, + "learning_rate": 2.3868760485926244e-06, + "loss": 0.1907745361328125, + "step": 130805 + }, + { + "epoch": 1.1310753906148672, + "grad_norm": 0.7913861652434896, + "learning_rate": 2.3866766170845052e-06, + "loss": 0.08598709106445312, + "step": 130810 + }, + { + "epoch": 1.1311186241364104, + "grad_norm": 0.08355758834802576, + "learning_rate": 2.3864771884049798e-06, + "loss": 0.08393096923828125, + "step": 130815 + }, + { + "epoch": 1.1311618576579536, + "grad_norm": 0.10306931140776397, + "learning_rate": 2.3862777625549674e-06, + "loss": 0.010329437255859376, + "step": 130820 + }, + { + "epoch": 1.1312050911794969, + "grad_norm": 1.7181101535368015, + "learning_rate": 2.3860783395353873e-06, + "loss": 0.0368896484375, + "step": 130825 + }, + { + "epoch": 1.13124832470104, + "grad_norm": 0.33907782435431366, + "learning_rate": 2.385878919347159e-06, + "loss": 0.1844097137451172, + "step": 130830 + }, + { + "epoch": 1.1312915582225835, + "grad_norm": 7.687126930776388, + "learning_rate": 2.3856795019912043e-06, + "loss": 0.10375823974609374, + "step": 130835 + }, + { + "epoch": 1.1313347917441268, + "grad_norm": 0.04270085971363794, + "learning_rate": 2.3854800874684403e-06, + "loss": 0.027728271484375, + "step": 130840 + }, + { + "epoch": 1.13137802526567, + "grad_norm": 0.5652381058060988, + "learning_rate": 2.3852806757797884e-06, + "loss": 0.017796707153320313, + "step": 130845 + }, + { + "epoch": 1.1314212587872132, + "grad_norm": 0.2501107766166256, + "learning_rate": 2.385081266926168e-06, + "loss": 0.016051483154296876, + "step": 130850 + }, + { + "epoch": 1.1314644923087565, + "grad_norm": 0.0750808686557652, + "learning_rate": 2.3848818609084984e-06, + "loss": 0.028424072265625, + "step": 130855 + }, + { + "epoch": 1.1315077258303, + "grad_norm": 0.7036415047973932, + "learning_rate": 2.3846824577276987e-06, + "loss": 0.02358722686767578, + "step": 130860 + }, + { + "epoch": 1.1315509593518431, + "grad_norm": 2.65990237335832, + "learning_rate": 2.3844830573846894e-06, + "loss": 0.015920066833496095, + "step": 130865 + }, + { + "epoch": 1.1315941928733864, + "grad_norm": 0.33754616574821633, + "learning_rate": 2.3842836598803894e-06, + "loss": 0.017026519775390624, + "step": 130870 + }, + { + "epoch": 1.1316374263949296, + "grad_norm": 0.10043498435788283, + "learning_rate": 2.3840842652157196e-06, + "loss": 0.03178348541259766, + "step": 130875 + }, + { + "epoch": 1.1316806599164728, + "grad_norm": 6.588075177592265, + "learning_rate": 2.3838848733915984e-06, + "loss": 0.07910614013671875, + "step": 130880 + }, + { + "epoch": 1.131723893438016, + "grad_norm": 36.98311394806454, + "learning_rate": 2.3836854844089462e-06, + "loss": 0.14513282775878905, + "step": 130885 + }, + { + "epoch": 1.1317671269595593, + "grad_norm": 1.0094973637069402, + "learning_rate": 2.3834860982686813e-06, + "loss": 0.09891510009765625, + "step": 130890 + }, + { + "epoch": 1.1318103604811025, + "grad_norm": 23.958262318180378, + "learning_rate": 2.383286714971724e-06, + "loss": 0.17240371704101562, + "step": 130895 + }, + { + "epoch": 1.131853594002646, + "grad_norm": 6.255912141666483, + "learning_rate": 2.3830873345189937e-06, + "loss": 0.102691650390625, + "step": 130900 + }, + { + "epoch": 1.1318968275241892, + "grad_norm": 12.829885994612232, + "learning_rate": 2.382887956911411e-06, + "loss": 0.04127655029296875, + "step": 130905 + }, + { + "epoch": 1.1319400610457324, + "grad_norm": 0.5076964615602746, + "learning_rate": 2.3826885821498943e-06, + "loss": 0.01739921569824219, + "step": 130910 + }, + { + "epoch": 1.1319832945672756, + "grad_norm": 3.6125295967112114, + "learning_rate": 2.382489210235364e-06, + "loss": 0.04216842651367188, + "step": 130915 + }, + { + "epoch": 1.1320265280888189, + "grad_norm": 12.30054672777113, + "learning_rate": 2.3822898411687374e-06, + "loss": 0.11297111511230469, + "step": 130920 + }, + { + "epoch": 1.1320697616103623, + "grad_norm": 0.23579132277147644, + "learning_rate": 2.3820904749509364e-06, + "loss": 0.173333740234375, + "step": 130925 + }, + { + "epoch": 1.1321129951319056, + "grad_norm": 2.401260350570301, + "learning_rate": 2.381891111582878e-06, + "loss": 0.0415313720703125, + "step": 130930 + }, + { + "epoch": 1.1321562286534488, + "grad_norm": 0.392394214762265, + "learning_rate": 2.381691751065485e-06, + "loss": 0.005471038818359375, + "step": 130935 + }, + { + "epoch": 1.132199462174992, + "grad_norm": 5.81102908826477, + "learning_rate": 2.3814923933996744e-06, + "loss": 0.10786895751953125, + "step": 130940 + }, + { + "epoch": 1.1322426956965352, + "grad_norm": 9.299856940109771, + "learning_rate": 2.381293038586366e-06, + "loss": 0.02996978759765625, + "step": 130945 + }, + { + "epoch": 1.1322859292180785, + "grad_norm": 6.414184337074598, + "learning_rate": 2.38109368662648e-06, + "loss": 0.23330459594726563, + "step": 130950 + }, + { + "epoch": 1.1323291627396217, + "grad_norm": 0.8601515933718639, + "learning_rate": 2.3808943375209355e-06, + "loss": 0.10400543212890626, + "step": 130955 + }, + { + "epoch": 1.1323723962611651, + "grad_norm": 1.9710786346172506, + "learning_rate": 2.3806949912706495e-06, + "loss": 0.07421417236328125, + "step": 130960 + }, + { + "epoch": 1.1324156297827084, + "grad_norm": 2.1477744150933207, + "learning_rate": 2.3804956478765456e-06, + "loss": 0.09634475708007813, + "step": 130965 + }, + { + "epoch": 1.1324588633042516, + "grad_norm": 0.28797426786141783, + "learning_rate": 2.380296307339541e-06, + "loss": 0.15162811279296876, + "step": 130970 + }, + { + "epoch": 1.1325020968257948, + "grad_norm": 6.247897929547166, + "learning_rate": 2.380096969660554e-06, + "loss": 0.1563507080078125, + "step": 130975 + }, + { + "epoch": 1.132545330347338, + "grad_norm": 3.394130657293977, + "learning_rate": 2.3798976348405057e-06, + "loss": 0.04962005615234375, + "step": 130980 + }, + { + "epoch": 1.1325885638688813, + "grad_norm": 1.2350548068227105, + "learning_rate": 2.379698302880315e-06, + "loss": 0.09965133666992188, + "step": 130985 + }, + { + "epoch": 1.1326317973904247, + "grad_norm": 0.47219471860268886, + "learning_rate": 2.3794989737808994e-06, + "loss": 0.5746139526367188, + "step": 130990 + }, + { + "epoch": 1.132675030911968, + "grad_norm": 4.16366432956136, + "learning_rate": 2.379299647543181e-06, + "loss": 0.11861896514892578, + "step": 130995 + }, + { + "epoch": 1.1327182644335112, + "grad_norm": 3.269762994757199, + "learning_rate": 2.3791003241680777e-06, + "loss": 0.0750091552734375, + "step": 131000 + }, + { + "epoch": 1.1327614979550544, + "grad_norm": 0.15143873333657693, + "learning_rate": 2.3789010036565083e-06, + "loss": 0.09971046447753906, + "step": 131005 + }, + { + "epoch": 1.1328047314765977, + "grad_norm": 1.3688816678969453, + "learning_rate": 2.3787016860093933e-06, + "loss": 0.1671703815460205, + "step": 131010 + }, + { + "epoch": 1.1328479649981409, + "grad_norm": 4.759134550454212, + "learning_rate": 2.3785023712276515e-06, + "loss": 0.17004776000976562, + "step": 131015 + }, + { + "epoch": 1.1328911985196841, + "grad_norm": 9.777096952650904, + "learning_rate": 2.3783030593122015e-06, + "loss": 0.058612060546875, + "step": 131020 + }, + { + "epoch": 1.1329344320412276, + "grad_norm": 4.42849844650717, + "learning_rate": 2.3781037502639617e-06, + "loss": 0.06453914642333984, + "step": 131025 + }, + { + "epoch": 1.1329776655627708, + "grad_norm": 1.2903761251530566, + "learning_rate": 2.3779044440838535e-06, + "loss": 0.038385009765625, + "step": 131030 + }, + { + "epoch": 1.133020899084314, + "grad_norm": 15.295721912903277, + "learning_rate": 2.3777051407727945e-06, + "loss": 0.0842437744140625, + "step": 131035 + }, + { + "epoch": 1.1330641326058573, + "grad_norm": 23.216413416265276, + "learning_rate": 2.377505840331705e-06, + "loss": 0.06247100830078125, + "step": 131040 + }, + { + "epoch": 1.1331073661274005, + "grad_norm": 2.4119796730533745, + "learning_rate": 2.377306542761504e-06, + "loss": 0.10854339599609375, + "step": 131045 + }, + { + "epoch": 1.1331505996489437, + "grad_norm": 6.897443557974375, + "learning_rate": 2.3771072480631096e-06, + "loss": 0.1571868896484375, + "step": 131050 + }, + { + "epoch": 1.1331938331704872, + "grad_norm": 1.0698333133595554, + "learning_rate": 2.376907956237441e-06, + "loss": 0.017888641357421874, + "step": 131055 + }, + { + "epoch": 1.1332370666920304, + "grad_norm": 48.09815475029834, + "learning_rate": 2.376708667285418e-06, + "loss": 0.11748199462890625, + "step": 131060 + }, + { + "epoch": 1.1332803002135736, + "grad_norm": 0.4008759197025411, + "learning_rate": 2.3765093812079594e-06, + "loss": 0.08431396484375, + "step": 131065 + }, + { + "epoch": 1.1333235337351169, + "grad_norm": 3.5074040378149074, + "learning_rate": 2.3763100980059856e-06, + "loss": 0.042078399658203126, + "step": 131070 + }, + { + "epoch": 1.13336676725666, + "grad_norm": 12.555245942905595, + "learning_rate": 2.376110817680414e-06, + "loss": 0.05492715835571289, + "step": 131075 + }, + { + "epoch": 1.1334100007782033, + "grad_norm": 28.388983740790923, + "learning_rate": 2.3759115402321645e-06, + "loss": 0.12063751220703126, + "step": 131080 + }, + { + "epoch": 1.1334532342997465, + "grad_norm": 44.47593855772626, + "learning_rate": 2.3757122656621543e-06, + "loss": 0.17005386352539062, + "step": 131085 + }, + { + "epoch": 1.13349646782129, + "grad_norm": 3.5558175257818947, + "learning_rate": 2.3755129939713054e-06, + "loss": 0.02641448974609375, + "step": 131090 + }, + { + "epoch": 1.1335397013428332, + "grad_norm": 20.253562821762788, + "learning_rate": 2.3753137251605344e-06, + "loss": 0.10867767333984375, + "step": 131095 + }, + { + "epoch": 1.1335829348643764, + "grad_norm": 34.614774538470456, + "learning_rate": 2.3751144592307622e-06, + "loss": 0.07886314392089844, + "step": 131100 + }, + { + "epoch": 1.1336261683859197, + "grad_norm": 20.723802423649936, + "learning_rate": 2.374915196182907e-06, + "loss": 0.08101959228515625, + "step": 131105 + }, + { + "epoch": 1.133669401907463, + "grad_norm": 6.62136150782422, + "learning_rate": 2.3747159360178877e-06, + "loss": 0.18260345458984376, + "step": 131110 + }, + { + "epoch": 1.1337126354290064, + "grad_norm": 4.26929008940407, + "learning_rate": 2.3745166787366227e-06, + "loss": 0.04099578857421875, + "step": 131115 + }, + { + "epoch": 1.1337558689505496, + "grad_norm": 6.1300124765810295, + "learning_rate": 2.3743174243400303e-06, + "loss": 0.042376708984375, + "step": 131120 + }, + { + "epoch": 1.1337991024720928, + "grad_norm": 0.4584457458663842, + "learning_rate": 2.3741181728290324e-06, + "loss": 0.046835136413574216, + "step": 131125 + }, + { + "epoch": 1.133842335993636, + "grad_norm": 2.970569509584825, + "learning_rate": 2.373918924204546e-06, + "loss": 0.06495018005371093, + "step": 131130 + }, + { + "epoch": 1.1338855695151793, + "grad_norm": 1.0293287057941745, + "learning_rate": 2.3737196784674905e-06, + "loss": 0.10255279541015624, + "step": 131135 + }, + { + "epoch": 1.1339288030367225, + "grad_norm": 0.1744907868569138, + "learning_rate": 2.373520435618784e-06, + "loss": 0.005064964294433594, + "step": 131140 + }, + { + "epoch": 1.1339720365582657, + "grad_norm": 0.8355107674243837, + "learning_rate": 2.373321195659346e-06, + "loss": 0.07483673095703125, + "step": 131145 + }, + { + "epoch": 1.134015270079809, + "grad_norm": 5.458460366058709, + "learning_rate": 2.373121958590094e-06, + "loss": 0.131304931640625, + "step": 131150 + }, + { + "epoch": 1.1340585036013524, + "grad_norm": 2.786650477809914, + "learning_rate": 2.3729227244119494e-06, + "loss": 0.20649185180664062, + "step": 131155 + }, + { + "epoch": 1.1341017371228956, + "grad_norm": 86.89610568162468, + "learning_rate": 2.37272349312583e-06, + "loss": 0.3582599639892578, + "step": 131160 + }, + { + "epoch": 1.1341449706444389, + "grad_norm": 2.8520376435613097, + "learning_rate": 2.3725242647326544e-06, + "loss": 0.024849700927734374, + "step": 131165 + }, + { + "epoch": 1.134188204165982, + "grad_norm": 5.834206755403312, + "learning_rate": 2.372325039233341e-06, + "loss": 0.1740743637084961, + "step": 131170 + }, + { + "epoch": 1.1342314376875253, + "grad_norm": 0.9705390569299069, + "learning_rate": 2.3721258166288095e-06, + "loss": 0.03651351928710937, + "step": 131175 + }, + { + "epoch": 1.1342746712090688, + "grad_norm": 1.7480778881712746, + "learning_rate": 2.371926596919977e-06, + "loss": 0.06610870361328125, + "step": 131180 + }, + { + "epoch": 1.134317904730612, + "grad_norm": 0.9692228469788184, + "learning_rate": 2.3717273801077645e-06, + "loss": 0.06375160217285156, + "step": 131185 + }, + { + "epoch": 1.1343611382521552, + "grad_norm": 3.2488612488704103, + "learning_rate": 2.37152816619309e-06, + "loss": 0.04710884094238281, + "step": 131190 + }, + { + "epoch": 1.1344043717736985, + "grad_norm": 0.33686729040619245, + "learning_rate": 2.3713289551768725e-06, + "loss": 0.04938011169433594, + "step": 131195 + }, + { + "epoch": 1.1344476052952417, + "grad_norm": 5.825266243746381, + "learning_rate": 2.3711297470600293e-06, + "loss": 0.035364532470703126, + "step": 131200 + }, + { + "epoch": 1.134490838816785, + "grad_norm": 0.9834080773242712, + "learning_rate": 2.370930541843481e-06, + "loss": 0.18348388671875, + "step": 131205 + }, + { + "epoch": 1.1345340723383281, + "grad_norm": 15.503396098811423, + "learning_rate": 2.370731339528145e-06, + "loss": 0.05804786682128906, + "step": 131210 + }, + { + "epoch": 1.1345773058598716, + "grad_norm": 9.697037141771077, + "learning_rate": 2.3705321401149395e-06, + "loss": 0.108892822265625, + "step": 131215 + }, + { + "epoch": 1.1346205393814148, + "grad_norm": 1.3890120726639275, + "learning_rate": 2.3703329436047855e-06, + "loss": 0.011087989807128907, + "step": 131220 + }, + { + "epoch": 1.134663772902958, + "grad_norm": 1.154916357906128, + "learning_rate": 2.3701337499986e-06, + "loss": 0.009391212463378906, + "step": 131225 + }, + { + "epoch": 1.1347070064245013, + "grad_norm": 2.470681557131281, + "learning_rate": 2.369934559297302e-06, + "loss": 0.06676597595214843, + "step": 131230 + }, + { + "epoch": 1.1347502399460445, + "grad_norm": 37.43617848723914, + "learning_rate": 2.36973537150181e-06, + "loss": 0.07153587341308594, + "step": 131235 + }, + { + "epoch": 1.1347934734675877, + "grad_norm": 13.812216139852412, + "learning_rate": 2.369536186613043e-06, + "loss": 0.08155946731567383, + "step": 131240 + }, + { + "epoch": 1.1348367069891312, + "grad_norm": 7.138103417192575, + "learning_rate": 2.3693370046319182e-06, + "loss": 0.11747589111328124, + "step": 131245 + }, + { + "epoch": 1.1348799405106744, + "grad_norm": 3.375415713979491, + "learning_rate": 2.369137825559357e-06, + "loss": 0.01619415283203125, + "step": 131250 + }, + { + "epoch": 1.1349231740322177, + "grad_norm": 3.7036340078137884, + "learning_rate": 2.368938649396276e-06, + "loss": 0.13564071655273438, + "step": 131255 + }, + { + "epoch": 1.1349664075537609, + "grad_norm": 31.47255301410065, + "learning_rate": 2.3687394761435936e-06, + "loss": 0.09851951599121093, + "step": 131260 + }, + { + "epoch": 1.135009641075304, + "grad_norm": 3.266847456197476, + "learning_rate": 2.36854030580223e-06, + "loss": 0.027984619140625, + "step": 131265 + }, + { + "epoch": 1.1350528745968473, + "grad_norm": 45.0967150103001, + "learning_rate": 2.368341138373102e-06, + "loss": 0.1514739990234375, + "step": 131270 + }, + { + "epoch": 1.1350961081183906, + "grad_norm": 0.45808197458525873, + "learning_rate": 2.3681419738571295e-06, + "loss": 0.0529052734375, + "step": 131275 + }, + { + "epoch": 1.135139341639934, + "grad_norm": 6.956351984766862, + "learning_rate": 2.3679428122552285e-06, + "loss": 0.0158416748046875, + "step": 131280 + }, + { + "epoch": 1.1351825751614772, + "grad_norm": 0.16635220899819286, + "learning_rate": 2.367743653568321e-06, + "loss": 0.013567352294921875, + "step": 131285 + }, + { + "epoch": 1.1352258086830205, + "grad_norm": 5.414640423023082, + "learning_rate": 2.3675444977973237e-06, + "loss": 0.029825210571289062, + "step": 131290 + }, + { + "epoch": 1.1352690422045637, + "grad_norm": 1.9440353389151175, + "learning_rate": 2.3673453449431554e-06, + "loss": 0.1902679443359375, + "step": 131295 + }, + { + "epoch": 1.135312275726107, + "grad_norm": 44.26614474084407, + "learning_rate": 2.3671461950067345e-06, + "loss": 0.3874786376953125, + "step": 131300 + }, + { + "epoch": 1.1353555092476502, + "grad_norm": 2.5624703916659404, + "learning_rate": 2.3669470479889794e-06, + "loss": 0.043536376953125, + "step": 131305 + }, + { + "epoch": 1.1353987427691936, + "grad_norm": 0.3696114019802846, + "learning_rate": 2.3667479038908076e-06, + "loss": 0.016654205322265626, + "step": 131310 + }, + { + "epoch": 1.1354419762907368, + "grad_norm": 1.0311376967920072, + "learning_rate": 2.3665487627131397e-06, + "loss": 0.04490509033203125, + "step": 131315 + }, + { + "epoch": 1.13548520981228, + "grad_norm": 15.511742015667267, + "learning_rate": 2.366349624456892e-06, + "loss": 0.26392478942871095, + "step": 131320 + }, + { + "epoch": 1.1355284433338233, + "grad_norm": 2.2957375614648043, + "learning_rate": 2.3661504891229847e-06, + "loss": 0.1367584228515625, + "step": 131325 + }, + { + "epoch": 1.1355716768553665, + "grad_norm": 7.113626233982482, + "learning_rate": 2.3659513567123353e-06, + "loss": 0.030694580078125, + "step": 131330 + }, + { + "epoch": 1.1356149103769098, + "grad_norm": 1.4458495519537395, + "learning_rate": 2.3657522272258625e-06, + "loss": 0.010990524291992187, + "step": 131335 + }, + { + "epoch": 1.135658143898453, + "grad_norm": 0.3389245187051944, + "learning_rate": 2.3655531006644825e-06, + "loss": 0.03451814651489258, + "step": 131340 + }, + { + "epoch": 1.1357013774199964, + "grad_norm": 3.753271035125928, + "learning_rate": 2.365353977029117e-06, + "loss": 0.009270858764648438, + "step": 131345 + }, + { + "epoch": 1.1357446109415397, + "grad_norm": 2.8958087359617988, + "learning_rate": 2.365154856320683e-06, + "loss": 0.0334716796875, + "step": 131350 + }, + { + "epoch": 1.135787844463083, + "grad_norm": 33.69095229499008, + "learning_rate": 2.3649557385400982e-06, + "loss": 0.14059829711914062, + "step": 131355 + }, + { + "epoch": 1.1358310779846261, + "grad_norm": 2.8351196452900247, + "learning_rate": 2.364756623688282e-06, + "loss": 0.03652496337890625, + "step": 131360 + }, + { + "epoch": 1.1358743115061694, + "grad_norm": 20.692794242217598, + "learning_rate": 2.3645575117661522e-06, + "loss": 0.06052093505859375, + "step": 131365 + }, + { + "epoch": 1.1359175450277128, + "grad_norm": 1.5257203523497838, + "learning_rate": 2.3643584027746263e-06, + "loss": 0.015696334838867187, + "step": 131370 + }, + { + "epoch": 1.135960778549256, + "grad_norm": 3.8219612128526204, + "learning_rate": 2.3641592967146225e-06, + "loss": 0.07277069091796876, + "step": 131375 + }, + { + "epoch": 1.1360040120707993, + "grad_norm": 20.032602588190997, + "learning_rate": 2.3639601935870612e-06, + "loss": 0.06627120971679687, + "step": 131380 + }, + { + "epoch": 1.1360472455923425, + "grad_norm": 0.6338898763304054, + "learning_rate": 2.3637610933928593e-06, + "loss": 0.031869125366210935, + "step": 131385 + }, + { + "epoch": 1.1360904791138857, + "grad_norm": 12.710885845970774, + "learning_rate": 2.363561996132935e-06, + "loss": 0.0456817626953125, + "step": 131390 + }, + { + "epoch": 1.136133712635429, + "grad_norm": 0.8999944163184158, + "learning_rate": 2.363362901808206e-06, + "loss": 0.16266565322875975, + "step": 131395 + }, + { + "epoch": 1.1361769461569722, + "grad_norm": 25.291206053461032, + "learning_rate": 2.3631638104195916e-06, + "loss": 0.16183013916015626, + "step": 131400 + }, + { + "epoch": 1.1362201796785156, + "grad_norm": 5.114187428440143, + "learning_rate": 2.3629647219680078e-06, + "loss": 0.04029006958007812, + "step": 131405 + }, + { + "epoch": 1.1362634132000589, + "grad_norm": 0.22366620010615806, + "learning_rate": 2.3627656364543763e-06, + "loss": 0.027614784240722657, + "step": 131410 + }, + { + "epoch": 1.136306646721602, + "grad_norm": 1.2393105972201006, + "learning_rate": 2.362566553879613e-06, + "loss": 0.054498291015625, + "step": 131415 + }, + { + "epoch": 1.1363498802431453, + "grad_norm": 1.6521635711897338, + "learning_rate": 2.3623674742446367e-06, + "loss": 0.13906784057617189, + "step": 131420 + }, + { + "epoch": 1.1363931137646885, + "grad_norm": 0.483675727561644, + "learning_rate": 2.3621683975503644e-06, + "loss": 0.05405712127685547, + "step": 131425 + }, + { + "epoch": 1.1364363472862318, + "grad_norm": 0.2040199687083298, + "learning_rate": 2.361969323797716e-06, + "loss": 0.06369781494140625, + "step": 131430 + }, + { + "epoch": 1.1364795808077752, + "grad_norm": 2.938781464581228, + "learning_rate": 2.361770252987607e-06, + "loss": 0.05979080200195312, + "step": 131435 + }, + { + "epoch": 1.1365228143293185, + "grad_norm": 1.1349812082521746, + "learning_rate": 2.361571185120959e-06, + "loss": 0.20696029663085938, + "step": 131440 + }, + { + "epoch": 1.1365660478508617, + "grad_norm": 0.35127658460306144, + "learning_rate": 2.3613721201986877e-06, + "loss": 0.046967697143554685, + "step": 131445 + }, + { + "epoch": 1.136609281372405, + "grad_norm": 1.8292242277499875, + "learning_rate": 2.361173058221712e-06, + "loss": 0.215667724609375, + "step": 131450 + }, + { + "epoch": 1.1366525148939481, + "grad_norm": 0.21354794728599724, + "learning_rate": 2.3609739991909493e-06, + "loss": 0.3252555847167969, + "step": 131455 + }, + { + "epoch": 1.1366957484154914, + "grad_norm": 0.45303849939261487, + "learning_rate": 2.3607749431073184e-06, + "loss": 0.07287406921386719, + "step": 131460 + }, + { + "epoch": 1.1367389819370346, + "grad_norm": 0.27429478991859163, + "learning_rate": 2.3605758899717374e-06, + "loss": 0.04975814819335937, + "step": 131465 + }, + { + "epoch": 1.136782215458578, + "grad_norm": 0.28368340122837177, + "learning_rate": 2.360376839785122e-06, + "loss": 0.152703857421875, + "step": 131470 + }, + { + "epoch": 1.1368254489801213, + "grad_norm": 1.7661290299963104, + "learning_rate": 2.3601777925483935e-06, + "loss": 0.020369338989257812, + "step": 131475 + }, + { + "epoch": 1.1368686825016645, + "grad_norm": 2.105245468803048, + "learning_rate": 2.3599787482624687e-06, + "loss": 0.03416366577148437, + "step": 131480 + }, + { + "epoch": 1.1369119160232077, + "grad_norm": 14.465258182364646, + "learning_rate": 2.359779706928265e-06, + "loss": 0.02902374267578125, + "step": 131485 + }, + { + "epoch": 1.136955149544751, + "grad_norm": 3.383301478873296, + "learning_rate": 2.359580668546701e-06, + "loss": 0.023180198669433594, + "step": 131490 + }, + { + "epoch": 1.1369983830662942, + "grad_norm": 1.8446176922198074, + "learning_rate": 2.3593816331186943e-06, + "loss": 0.01881561279296875, + "step": 131495 + }, + { + "epoch": 1.1370416165878376, + "grad_norm": 0.16071643827794582, + "learning_rate": 2.359182600645162e-06, + "loss": 0.02403411865234375, + "step": 131500 + }, + { + "epoch": 1.1370848501093809, + "grad_norm": 24.88862303881518, + "learning_rate": 2.3589835711270236e-06, + "loss": 0.028035736083984374, + "step": 131505 + }, + { + "epoch": 1.137128083630924, + "grad_norm": 0.163932818468759, + "learning_rate": 2.3587845445651967e-06, + "loss": 0.014350318908691406, + "step": 131510 + }, + { + "epoch": 1.1371713171524673, + "grad_norm": 36.35484884862121, + "learning_rate": 2.3585855209605978e-06, + "loss": 0.09383163452148438, + "step": 131515 + }, + { + "epoch": 1.1372145506740106, + "grad_norm": 3.006950976924512, + "learning_rate": 2.358386500314147e-06, + "loss": 0.08776931762695313, + "step": 131520 + }, + { + "epoch": 1.1372577841955538, + "grad_norm": 12.25290641461166, + "learning_rate": 2.358187482626761e-06, + "loss": 0.12194061279296875, + "step": 131525 + }, + { + "epoch": 1.137301017717097, + "grad_norm": 1.987941265493651, + "learning_rate": 2.357988467899357e-06, + "loss": 0.01693572998046875, + "step": 131530 + }, + { + "epoch": 1.1373442512386405, + "grad_norm": 1.9045981656311253, + "learning_rate": 2.357789456132853e-06, + "loss": 0.024407958984375, + "step": 131535 + }, + { + "epoch": 1.1373874847601837, + "grad_norm": 20.44802840063199, + "learning_rate": 2.357590447328168e-06, + "loss": 0.04453496932983399, + "step": 131540 + }, + { + "epoch": 1.137430718281727, + "grad_norm": 13.470860788826995, + "learning_rate": 2.3573914414862184e-06, + "loss": 0.1600208282470703, + "step": 131545 + }, + { + "epoch": 1.1374739518032702, + "grad_norm": 14.515093064306653, + "learning_rate": 2.3571924386079235e-06, + "loss": 0.154254150390625, + "step": 131550 + }, + { + "epoch": 1.1375171853248134, + "grad_norm": 4.50003432871451, + "learning_rate": 2.3569934386942004e-06, + "loss": 0.1608255386352539, + "step": 131555 + }, + { + "epoch": 1.1375604188463568, + "grad_norm": 10.362351998843614, + "learning_rate": 2.3567944417459668e-06, + "loss": 0.033487701416015626, + "step": 131560 + }, + { + "epoch": 1.1376036523679, + "grad_norm": 2.493325946975297, + "learning_rate": 2.3565954477641386e-06, + "loss": 0.03215484619140625, + "step": 131565 + }, + { + "epoch": 1.1376468858894433, + "grad_norm": 0.4782541988096018, + "learning_rate": 2.3563964567496363e-06, + "loss": 0.04547119140625, + "step": 131570 + }, + { + "epoch": 1.1376901194109865, + "grad_norm": 14.036552006316132, + "learning_rate": 2.3561974687033776e-06, + "loss": 0.07744483947753907, + "step": 131575 + }, + { + "epoch": 1.1377333529325298, + "grad_norm": 35.26913830491459, + "learning_rate": 2.355998483626279e-06, + "loss": 0.1161529541015625, + "step": 131580 + }, + { + "epoch": 1.137776586454073, + "grad_norm": 8.110807737271779, + "learning_rate": 2.3557995015192585e-06, + "loss": 0.0592681884765625, + "step": 131585 + }, + { + "epoch": 1.1378198199756162, + "grad_norm": 4.506325904294172, + "learning_rate": 2.355600522383233e-06, + "loss": 0.04264144897460938, + "step": 131590 + }, + { + "epoch": 1.1378630534971594, + "grad_norm": 50.8675979328958, + "learning_rate": 2.355401546219121e-06, + "loss": 0.1194091796875, + "step": 131595 + }, + { + "epoch": 1.1379062870187029, + "grad_norm": 0.1611452051004613, + "learning_rate": 2.35520257302784e-06, + "loss": 0.039841747283935545, + "step": 131600 + }, + { + "epoch": 1.1379495205402461, + "grad_norm": 3.9499397938489547, + "learning_rate": 2.3550036028103086e-06, + "loss": 0.036881637573242185, + "step": 131605 + }, + { + "epoch": 1.1379927540617893, + "grad_norm": 4.804103848296309, + "learning_rate": 2.3548046355674438e-06, + "loss": 0.016363525390625, + "step": 131610 + }, + { + "epoch": 1.1380359875833326, + "grad_norm": 2.2285539401587013, + "learning_rate": 2.354605671300163e-06, + "loss": 0.009479904174804687, + "step": 131615 + }, + { + "epoch": 1.1380792211048758, + "grad_norm": 1.0775553452652662, + "learning_rate": 2.354406710009383e-06, + "loss": 0.027819061279296876, + "step": 131620 + }, + { + "epoch": 1.1381224546264193, + "grad_norm": 0.48953388742055426, + "learning_rate": 2.3542077516960223e-06, + "loss": 0.034503936767578125, + "step": 131625 + }, + { + "epoch": 1.1381656881479625, + "grad_norm": 11.335586618907888, + "learning_rate": 2.3540087963609974e-06, + "loss": 0.09138069152832032, + "step": 131630 + }, + { + "epoch": 1.1382089216695057, + "grad_norm": 1.9378061511314995, + "learning_rate": 2.3538098440052287e-06, + "loss": 0.1017242431640625, + "step": 131635 + }, + { + "epoch": 1.138252155191049, + "grad_norm": 0.09650233202875826, + "learning_rate": 2.353610894629632e-06, + "loss": 0.0374267578125, + "step": 131640 + }, + { + "epoch": 1.1382953887125922, + "grad_norm": 6.0341707616252425, + "learning_rate": 2.353411948235124e-06, + "loss": 0.03749275207519531, + "step": 131645 + }, + { + "epoch": 1.1383386222341354, + "grad_norm": 14.218313016337062, + "learning_rate": 2.3532130048226227e-06, + "loss": 0.044797515869140624, + "step": 131650 + }, + { + "epoch": 1.1383818557556786, + "grad_norm": 1.1937333728906954, + "learning_rate": 2.353014064393046e-06, + "loss": 0.10294189453125, + "step": 131655 + }, + { + "epoch": 1.138425089277222, + "grad_norm": 11.053129042357622, + "learning_rate": 2.3528151269473104e-06, + "loss": 0.055517578125, + "step": 131660 + }, + { + "epoch": 1.1384683227987653, + "grad_norm": 0.9997159749263613, + "learning_rate": 2.352616192486336e-06, + "loss": 0.29033851623535156, + "step": 131665 + }, + { + "epoch": 1.1385115563203085, + "grad_norm": 0.4728490158838555, + "learning_rate": 2.352417261011038e-06, + "loss": 0.032487106323242185, + "step": 131670 + }, + { + "epoch": 1.1385547898418518, + "grad_norm": 3.727236766109988, + "learning_rate": 2.3522183325223344e-06, + "loss": 0.1785125732421875, + "step": 131675 + }, + { + "epoch": 1.138598023363395, + "grad_norm": 0.4403352371390667, + "learning_rate": 2.3520194070211414e-06, + "loss": 0.08198814392089844, + "step": 131680 + }, + { + "epoch": 1.1386412568849382, + "grad_norm": 0.6378902156079992, + "learning_rate": 2.3518204845083793e-06, + "loss": 0.2404632568359375, + "step": 131685 + }, + { + "epoch": 1.1386844904064817, + "grad_norm": 5.367881421593671, + "learning_rate": 2.351621564984962e-06, + "loss": 0.09812774658203124, + "step": 131690 + }, + { + "epoch": 1.138727723928025, + "grad_norm": 73.36849401403107, + "learning_rate": 2.3514226484518104e-06, + "loss": 0.14811172485351562, + "step": 131695 + }, + { + "epoch": 1.1387709574495681, + "grad_norm": 3.246369655833735, + "learning_rate": 2.35122373490984e-06, + "loss": 0.033782958984375, + "step": 131700 + }, + { + "epoch": 1.1388141909711114, + "grad_norm": 0.703505458823001, + "learning_rate": 2.351024824359968e-06, + "loss": 0.02684917449951172, + "step": 131705 + }, + { + "epoch": 1.1388574244926546, + "grad_norm": 4.929900022661867, + "learning_rate": 2.350825916803112e-06, + "loss": 0.05159816741943359, + "step": 131710 + }, + { + "epoch": 1.1389006580141978, + "grad_norm": 56.38909779897156, + "learning_rate": 2.3506270122401903e-06, + "loss": 0.47881927490234377, + "step": 131715 + }, + { + "epoch": 1.138943891535741, + "grad_norm": 0.7027758299702147, + "learning_rate": 2.3504281106721195e-06, + "loss": 0.17429046630859374, + "step": 131720 + }, + { + "epoch": 1.1389871250572845, + "grad_norm": 2.3209600468233567, + "learning_rate": 2.3502292120998153e-06, + "loss": 0.06875267028808593, + "step": 131725 + }, + { + "epoch": 1.1390303585788277, + "grad_norm": 0.9915645647235818, + "learning_rate": 2.3500303165241976e-06, + "loss": 0.2460481643676758, + "step": 131730 + }, + { + "epoch": 1.139073592100371, + "grad_norm": 0.5411032074569967, + "learning_rate": 2.349831423946183e-06, + "loss": 0.025603103637695312, + "step": 131735 + }, + { + "epoch": 1.1391168256219142, + "grad_norm": 3.269065482193243, + "learning_rate": 2.3496325343666877e-06, + "loss": 0.054369354248046876, + "step": 131740 + }, + { + "epoch": 1.1391600591434574, + "grad_norm": 12.131121965085194, + "learning_rate": 2.3494336477866303e-06, + "loss": 0.19011077880859376, + "step": 131745 + }, + { + "epoch": 1.1392032926650006, + "grad_norm": 2.964483984167602, + "learning_rate": 2.3492347642069274e-06, + "loss": 0.06822509765625, + "step": 131750 + }, + { + "epoch": 1.139246526186544, + "grad_norm": 12.252672906997958, + "learning_rate": 2.349035883628495e-06, + "loss": 0.08903961181640625, + "step": 131755 + }, + { + "epoch": 1.1392897597080873, + "grad_norm": 4.887285558552014, + "learning_rate": 2.348837006052253e-06, + "loss": 0.0291900634765625, + "step": 131760 + }, + { + "epoch": 1.1393329932296306, + "grad_norm": 0.20009043155079742, + "learning_rate": 2.348638131479117e-06, + "loss": 0.06040802001953125, + "step": 131765 + }, + { + "epoch": 1.1393762267511738, + "grad_norm": 1.5257100984813539, + "learning_rate": 2.348439259910004e-06, + "loss": 0.015200042724609375, + "step": 131770 + }, + { + "epoch": 1.139419460272717, + "grad_norm": 2.6658104288063442, + "learning_rate": 2.348240391345832e-06, + "loss": 0.05874176025390625, + "step": 131775 + }, + { + "epoch": 1.1394626937942602, + "grad_norm": 10.280206390234728, + "learning_rate": 2.3480415257875174e-06, + "loss": 0.05516357421875, + "step": 131780 + }, + { + "epoch": 1.1395059273158035, + "grad_norm": 2.627232168641464, + "learning_rate": 2.347842663235977e-06, + "loss": 0.07841148376464843, + "step": 131785 + }, + { + "epoch": 1.139549160837347, + "grad_norm": 0.3059027992053217, + "learning_rate": 2.3476438036921296e-06, + "loss": 0.0999267578125, + "step": 131790 + }, + { + "epoch": 1.1395923943588901, + "grad_norm": 5.011785131893396, + "learning_rate": 2.3474449471568907e-06, + "loss": 0.017681121826171875, + "step": 131795 + }, + { + "epoch": 1.1396356278804334, + "grad_norm": 7.300948349604102, + "learning_rate": 2.3472460936311784e-06, + "loss": 0.06030426025390625, + "step": 131800 + }, + { + "epoch": 1.1396788614019766, + "grad_norm": 0.6427425890971571, + "learning_rate": 2.34704724311591e-06, + "loss": 0.007668685913085937, + "step": 131805 + }, + { + "epoch": 1.1397220949235198, + "grad_norm": 1.3690114924902168, + "learning_rate": 2.346848395612002e-06, + "loss": 0.31987724304199217, + "step": 131810 + }, + { + "epoch": 1.1397653284450633, + "grad_norm": 3.0617241009179668, + "learning_rate": 2.34664955112037e-06, + "loss": 0.02482757568359375, + "step": 131815 + }, + { + "epoch": 1.1398085619666065, + "grad_norm": 9.079277425873348, + "learning_rate": 2.346450709641934e-06, + "loss": 0.0699127197265625, + "step": 131820 + }, + { + "epoch": 1.1398517954881497, + "grad_norm": 5.8123868706308, + "learning_rate": 2.3462518711776083e-06, + "loss": 0.045548439025878906, + "step": 131825 + }, + { + "epoch": 1.139895029009693, + "grad_norm": 3.403297024986454, + "learning_rate": 2.3460530357283122e-06, + "loss": 0.0227813720703125, + "step": 131830 + }, + { + "epoch": 1.1399382625312362, + "grad_norm": 1.8317001252005658, + "learning_rate": 2.345854203294962e-06, + "loss": 0.028020477294921874, + "step": 131835 + }, + { + "epoch": 1.1399814960527794, + "grad_norm": 66.76964027284153, + "learning_rate": 2.345655373878474e-06, + "loss": 0.30855712890625, + "step": 131840 + }, + { + "epoch": 1.1400247295743227, + "grad_norm": 5.768893197027222, + "learning_rate": 2.3454565474797652e-06, + "loss": 0.0597137451171875, + "step": 131845 + }, + { + "epoch": 1.1400679630958659, + "grad_norm": 1.352524725931117, + "learning_rate": 2.345257724099753e-06, + "loss": 0.0456634521484375, + "step": 131850 + }, + { + "epoch": 1.1401111966174093, + "grad_norm": 83.31596174619037, + "learning_rate": 2.345058903739354e-06, + "loss": 0.12911453247070312, + "step": 131855 + }, + { + "epoch": 1.1401544301389526, + "grad_norm": 0.7942584819489465, + "learning_rate": 2.3448600863994866e-06, + "loss": 0.010699462890625, + "step": 131860 + }, + { + "epoch": 1.1401976636604958, + "grad_norm": 59.71465811015962, + "learning_rate": 2.3446612720810664e-06, + "loss": 0.08753128051757812, + "step": 131865 + }, + { + "epoch": 1.140240897182039, + "grad_norm": 8.037511406256222, + "learning_rate": 2.3444624607850106e-06, + "loss": 0.05087413787841797, + "step": 131870 + }, + { + "epoch": 1.1402841307035823, + "grad_norm": 8.158106248886346, + "learning_rate": 2.344263652512235e-06, + "loss": 0.04684371948242187, + "step": 131875 + }, + { + "epoch": 1.1403273642251257, + "grad_norm": 5.156704883804914, + "learning_rate": 2.3440648472636587e-06, + "loss": 0.026627731323242188, + "step": 131880 + }, + { + "epoch": 1.140370597746669, + "grad_norm": 0.5957391499713061, + "learning_rate": 2.343866045040195e-06, + "loss": 0.19892425537109376, + "step": 131885 + }, + { + "epoch": 1.1404138312682122, + "grad_norm": 7.404204683282471, + "learning_rate": 2.3436672458427658e-06, + "loss": 0.04820098876953125, + "step": 131890 + }, + { + "epoch": 1.1404570647897554, + "grad_norm": 7.184213658290651, + "learning_rate": 2.3434684496722844e-06, + "loss": 0.06952781677246093, + "step": 131895 + }, + { + "epoch": 1.1405002983112986, + "grad_norm": 0.7210061774966808, + "learning_rate": 2.343269656529669e-06, + "loss": 0.1138143539428711, + "step": 131900 + }, + { + "epoch": 1.1405435318328419, + "grad_norm": 17.290038576011778, + "learning_rate": 2.3430708664158348e-06, + "loss": 0.0537872314453125, + "step": 131905 + }, + { + "epoch": 1.140586765354385, + "grad_norm": 19.364558110196796, + "learning_rate": 2.3428720793317003e-06, + "loss": 0.34813385009765624, + "step": 131910 + }, + { + "epoch": 1.1406299988759285, + "grad_norm": 39.56741751271782, + "learning_rate": 2.3426732952781807e-06, + "loss": 0.12197265625, + "step": 131915 + }, + { + "epoch": 1.1406732323974718, + "grad_norm": 36.19211419290911, + "learning_rate": 2.3424745142561954e-06, + "loss": 0.10318145751953126, + "step": 131920 + }, + { + "epoch": 1.140716465919015, + "grad_norm": 3.1402029524436426, + "learning_rate": 2.3422757362666588e-06, + "loss": 0.030594635009765624, + "step": 131925 + }, + { + "epoch": 1.1407596994405582, + "grad_norm": 0.1200923896066721, + "learning_rate": 2.3420769613104886e-06, + "loss": 0.025685691833496095, + "step": 131930 + }, + { + "epoch": 1.1408029329621014, + "grad_norm": 0.39751165236558805, + "learning_rate": 2.341878189388601e-06, + "loss": 0.014580535888671874, + "step": 131935 + }, + { + "epoch": 1.1408461664836447, + "grad_norm": 4.566354664614483, + "learning_rate": 2.3416794205019135e-06, + "loss": 0.02684783935546875, + "step": 131940 + }, + { + "epoch": 1.1408894000051881, + "grad_norm": 0.1457575962438163, + "learning_rate": 2.341480654651341e-06, + "loss": 0.025231170654296874, + "step": 131945 + }, + { + "epoch": 1.1409326335267314, + "grad_norm": 1.9250929533536296, + "learning_rate": 2.341281891837803e-06, + "loss": 0.2157867431640625, + "step": 131950 + }, + { + "epoch": 1.1409758670482746, + "grad_norm": 7.503154058823593, + "learning_rate": 2.3410831320622143e-06, + "loss": 0.13426704406738282, + "step": 131955 + }, + { + "epoch": 1.1410191005698178, + "grad_norm": 1.1162326721188116, + "learning_rate": 2.3408843753254924e-06, + "loss": 0.2690135955810547, + "step": 131960 + }, + { + "epoch": 1.141062334091361, + "grad_norm": 0.5310346970585036, + "learning_rate": 2.340685621628553e-06, + "loss": 0.06921806335449218, + "step": 131965 + }, + { + "epoch": 1.1411055676129043, + "grad_norm": 3.4045270074563296, + "learning_rate": 2.3404868709723132e-06, + "loss": 0.09123458862304687, + "step": 131970 + }, + { + "epoch": 1.1411488011344475, + "grad_norm": 0.16683748845872146, + "learning_rate": 2.3402881233576907e-06, + "loss": 0.03268909454345703, + "step": 131975 + }, + { + "epoch": 1.141192034655991, + "grad_norm": 4.517562970703091, + "learning_rate": 2.340089378785599e-06, + "loss": 0.2277740478515625, + "step": 131980 + }, + { + "epoch": 1.1412352681775342, + "grad_norm": 1.9341026625382698, + "learning_rate": 2.339890637256958e-06, + "loss": 0.02779083251953125, + "step": 131985 + }, + { + "epoch": 1.1412785016990774, + "grad_norm": 3.1255589230341934, + "learning_rate": 2.339691898772683e-06, + "loss": 0.05634727478027344, + "step": 131990 + }, + { + "epoch": 1.1413217352206206, + "grad_norm": 13.932944926606096, + "learning_rate": 2.339493163333691e-06, + "loss": 0.06933517456054687, + "step": 131995 + }, + { + "epoch": 1.1413649687421639, + "grad_norm": 24.686353156010693, + "learning_rate": 2.339294430940898e-06, + "loss": 0.17027130126953124, + "step": 132000 + }, + { + "epoch": 1.141408202263707, + "grad_norm": 2.6686172505726193, + "learning_rate": 2.339095701595221e-06, + "loss": 0.023446273803710938, + "step": 132005 + }, + { + "epoch": 1.1414514357852505, + "grad_norm": 13.941004813076699, + "learning_rate": 2.3388969752975744e-06, + "loss": 0.08509674072265624, + "step": 132010 + }, + { + "epoch": 1.1414946693067938, + "grad_norm": 1.248552719062131, + "learning_rate": 2.3386982520488786e-06, + "loss": 0.1733428955078125, + "step": 132015 + }, + { + "epoch": 1.141537902828337, + "grad_norm": 17.759740246830255, + "learning_rate": 2.338499531850047e-06, + "loss": 0.08356399536132812, + "step": 132020 + }, + { + "epoch": 1.1415811363498802, + "grad_norm": 0.4069768594844664, + "learning_rate": 2.3383008147019975e-06, + "loss": 0.3232166290283203, + "step": 132025 + }, + { + "epoch": 1.1416243698714235, + "grad_norm": 0.126489743157777, + "learning_rate": 2.3381021006056465e-06, + "loss": 0.14316253662109374, + "step": 132030 + }, + { + "epoch": 1.1416676033929667, + "grad_norm": 0.8427148021431667, + "learning_rate": 2.33790338956191e-06, + "loss": 0.28915252685546877, + "step": 132035 + }, + { + "epoch": 1.14171083691451, + "grad_norm": 1.066952807049968, + "learning_rate": 2.337704681571703e-06, + "loss": 0.05024986267089844, + "step": 132040 + }, + { + "epoch": 1.1417540704360534, + "grad_norm": 1.608976127353485, + "learning_rate": 2.3375059766359453e-06, + "loss": 0.06852874755859376, + "step": 132045 + }, + { + "epoch": 1.1417973039575966, + "grad_norm": 9.296666846828584, + "learning_rate": 2.3373072747555504e-06, + "loss": 0.054248809814453125, + "step": 132050 + }, + { + "epoch": 1.1418405374791398, + "grad_norm": 3.0992401076352487, + "learning_rate": 2.3371085759314367e-06, + "loss": 0.019681644439697266, + "step": 132055 + }, + { + "epoch": 1.141883771000683, + "grad_norm": 2.905652781462287, + "learning_rate": 2.3369098801645195e-06, + "loss": 0.11625518798828124, + "step": 132060 + }, + { + "epoch": 1.1419270045222263, + "grad_norm": 2.1131958266942146, + "learning_rate": 2.3367111874557156e-06, + "loss": 0.020537567138671876, + "step": 132065 + }, + { + "epoch": 1.1419702380437697, + "grad_norm": 11.093328452335191, + "learning_rate": 2.3365124978059403e-06, + "loss": 0.08855476379394531, + "step": 132070 + }, + { + "epoch": 1.142013471565313, + "grad_norm": 0.12254056215246033, + "learning_rate": 2.3363138112161105e-06, + "loss": 0.03002338409423828, + "step": 132075 + }, + { + "epoch": 1.1420567050868562, + "grad_norm": 9.150073306982133, + "learning_rate": 2.336115127687143e-06, + "loss": 0.092535400390625, + "step": 132080 + }, + { + "epoch": 1.1420999386083994, + "grad_norm": 2.081887961272878, + "learning_rate": 2.335916447219955e-06, + "loss": 0.04821205139160156, + "step": 132085 + }, + { + "epoch": 1.1421431721299427, + "grad_norm": 0.12989320645639552, + "learning_rate": 2.335717769815461e-06, + "loss": 0.11949596405029297, + "step": 132090 + }, + { + "epoch": 1.1421864056514859, + "grad_norm": 0.6921569921475984, + "learning_rate": 2.335519095474578e-06, + "loss": 0.046800994873046876, + "step": 132095 + }, + { + "epoch": 1.142229639173029, + "grad_norm": 0.2955059423416455, + "learning_rate": 2.335320424198222e-06, + "loss": 0.0184326171875, + "step": 132100 + }, + { + "epoch": 1.1422728726945726, + "grad_norm": 7.329276363369795, + "learning_rate": 2.335121755987309e-06, + "loss": 0.03849868774414063, + "step": 132105 + }, + { + "epoch": 1.1423161062161158, + "grad_norm": 29.061966845474913, + "learning_rate": 2.334923090842756e-06, + "loss": 0.06470489501953125, + "step": 132110 + }, + { + "epoch": 1.142359339737659, + "grad_norm": 41.93888815551783, + "learning_rate": 2.3347244287654793e-06, + "loss": 0.21863288879394532, + "step": 132115 + }, + { + "epoch": 1.1424025732592022, + "grad_norm": 13.482363033672932, + "learning_rate": 2.334525769756395e-06, + "loss": 0.11090202331542968, + "step": 132120 + }, + { + "epoch": 1.1424458067807455, + "grad_norm": 1.8284990392569402, + "learning_rate": 2.334327113816419e-06, + "loss": 0.02442779541015625, + "step": 132125 + }, + { + "epoch": 1.1424890403022887, + "grad_norm": 28.845968197199838, + "learning_rate": 2.334128460946467e-06, + "loss": 0.17679939270019532, + "step": 132130 + }, + { + "epoch": 1.1425322738238322, + "grad_norm": 18.148256958018855, + "learning_rate": 2.3339298111474554e-06, + "loss": 0.15983047485351562, + "step": 132135 + }, + { + "epoch": 1.1425755073453754, + "grad_norm": 23.86035135698442, + "learning_rate": 2.333731164420301e-06, + "loss": 0.11599788665771485, + "step": 132140 + }, + { + "epoch": 1.1426187408669186, + "grad_norm": 15.70876667143113, + "learning_rate": 2.33353252076592e-06, + "loss": 0.017132568359375, + "step": 132145 + }, + { + "epoch": 1.1426619743884618, + "grad_norm": 16.918028696645706, + "learning_rate": 2.333333880185228e-06, + "loss": 0.1183868408203125, + "step": 132150 + }, + { + "epoch": 1.142705207910005, + "grad_norm": 1.983600132174938, + "learning_rate": 2.3331352426791416e-06, + "loss": 0.0308502197265625, + "step": 132155 + }, + { + "epoch": 1.1427484414315483, + "grad_norm": 0.624553903039554, + "learning_rate": 2.3329366082485753e-06, + "loss": 0.11341285705566406, + "step": 132160 + }, + { + "epoch": 1.1427916749530915, + "grad_norm": 2.5746605603893022, + "learning_rate": 2.332737976894448e-06, + "loss": 0.034197998046875, + "step": 132165 + }, + { + "epoch": 1.142834908474635, + "grad_norm": 10.12861015395475, + "learning_rate": 2.3325393486176722e-06, + "loss": 0.0680572509765625, + "step": 132170 + }, + { + "epoch": 1.1428781419961782, + "grad_norm": 10.13544366818938, + "learning_rate": 2.3323407234191675e-06, + "loss": 0.14474029541015626, + "step": 132175 + }, + { + "epoch": 1.1429213755177214, + "grad_norm": 5.784227996588449, + "learning_rate": 2.332142101299848e-06, + "loss": 0.09511871337890625, + "step": 132180 + }, + { + "epoch": 1.1429646090392647, + "grad_norm": 0.12043546892628434, + "learning_rate": 2.3319434822606304e-06, + "loss": 0.047296905517578126, + "step": 132185 + }, + { + "epoch": 1.143007842560808, + "grad_norm": 5.824248131338271, + "learning_rate": 2.33174486630243e-06, + "loss": 0.0664764404296875, + "step": 132190 + }, + { + "epoch": 1.1430510760823511, + "grad_norm": 1.6586625907037187, + "learning_rate": 2.3315462534261635e-06, + "loss": 0.10140228271484375, + "step": 132195 + }, + { + "epoch": 1.1430943096038946, + "grad_norm": 0.9324059742098827, + "learning_rate": 2.3313476436327456e-06, + "loss": 0.0563812255859375, + "step": 132200 + }, + { + "epoch": 1.1431375431254378, + "grad_norm": 0.611741222114097, + "learning_rate": 2.3311490369230944e-06, + "loss": 0.12942962646484374, + "step": 132205 + }, + { + "epoch": 1.143180776646981, + "grad_norm": 37.549783669218215, + "learning_rate": 2.330950433298125e-06, + "loss": 0.215155029296875, + "step": 132210 + }, + { + "epoch": 1.1432240101685243, + "grad_norm": 2.6678999827720085, + "learning_rate": 2.330751832758752e-06, + "loss": 0.16790618896484374, + "step": 132215 + }, + { + "epoch": 1.1432672436900675, + "grad_norm": 0.81984222661096, + "learning_rate": 2.3305532353058933e-06, + "loss": 0.08229217529296876, + "step": 132220 + }, + { + "epoch": 1.1433104772116107, + "grad_norm": 3.5775742116283444, + "learning_rate": 2.330354640940464e-06, + "loss": 0.03140411376953125, + "step": 132225 + }, + { + "epoch": 1.143353710733154, + "grad_norm": 8.385975170536144, + "learning_rate": 2.33015604966338e-06, + "loss": 0.03816070556640625, + "step": 132230 + }, + { + "epoch": 1.1433969442546974, + "grad_norm": 4.868035997738729, + "learning_rate": 2.3299574614755555e-06, + "loss": 0.04813079833984375, + "step": 132235 + }, + { + "epoch": 1.1434401777762406, + "grad_norm": 0.7536921664021295, + "learning_rate": 2.3297588763779097e-06, + "loss": 0.1616352081298828, + "step": 132240 + }, + { + "epoch": 1.1434834112977839, + "grad_norm": 0.8889506999833389, + "learning_rate": 2.3295602943713556e-06, + "loss": 0.0076122283935546875, + "step": 132245 + }, + { + "epoch": 1.143526644819327, + "grad_norm": 1.1154586229098267, + "learning_rate": 2.3293617154568115e-06, + "loss": 0.05096893310546875, + "step": 132250 + }, + { + "epoch": 1.1435698783408703, + "grad_norm": 0.9342151387886091, + "learning_rate": 2.329163139635191e-06, + "loss": 0.08322334289550781, + "step": 132255 + }, + { + "epoch": 1.1436131118624135, + "grad_norm": 6.185518026660055, + "learning_rate": 2.3289645669074116e-06, + "loss": 0.13446884155273436, + "step": 132260 + }, + { + "epoch": 1.143656345383957, + "grad_norm": 5.860326051176558, + "learning_rate": 2.328765997274387e-06, + "loss": 0.1314085006713867, + "step": 132265 + }, + { + "epoch": 1.1436995789055002, + "grad_norm": 3.8256659435346627, + "learning_rate": 2.328567430737035e-06, + "loss": 0.26264152526855467, + "step": 132270 + }, + { + "epoch": 1.1437428124270435, + "grad_norm": 1.3966809843695684, + "learning_rate": 2.3283688672962704e-06, + "loss": 0.09163856506347656, + "step": 132275 + }, + { + "epoch": 1.1437860459485867, + "grad_norm": 0.14508211415482533, + "learning_rate": 2.32817030695301e-06, + "loss": 0.06276016235351563, + "step": 132280 + }, + { + "epoch": 1.14382927947013, + "grad_norm": 14.178843099564235, + "learning_rate": 2.3279717497081686e-06, + "loss": 0.09682884216308593, + "step": 132285 + }, + { + "epoch": 1.1438725129916731, + "grad_norm": 7.251196699528755, + "learning_rate": 2.3277731955626622e-06, + "loss": 0.032916259765625, + "step": 132290 + }, + { + "epoch": 1.1439157465132164, + "grad_norm": 0.4046497966397983, + "learning_rate": 2.327574644517405e-06, + "loss": 0.053165435791015625, + "step": 132295 + }, + { + "epoch": 1.1439589800347598, + "grad_norm": 2.8450765144029857, + "learning_rate": 2.3273760965733156e-06, + "loss": 0.014209556579589843, + "step": 132300 + }, + { + "epoch": 1.144002213556303, + "grad_norm": 2.8659083538124204, + "learning_rate": 2.3271775517313074e-06, + "loss": 0.08796234130859375, + "step": 132305 + }, + { + "epoch": 1.1440454470778463, + "grad_norm": 3.069090817509363, + "learning_rate": 2.3269790099922977e-06, + "loss": 0.10037994384765625, + "step": 132310 + }, + { + "epoch": 1.1440886805993895, + "grad_norm": 3.363273485588189, + "learning_rate": 2.326780471357201e-06, + "loss": 0.019079208374023438, + "step": 132315 + }, + { + "epoch": 1.1441319141209327, + "grad_norm": 7.359561602040257, + "learning_rate": 2.3265819358269334e-06, + "loss": 0.13347396850585938, + "step": 132320 + }, + { + "epoch": 1.1441751476424762, + "grad_norm": 0.14407561866932378, + "learning_rate": 2.32638340340241e-06, + "loss": 0.2113971710205078, + "step": 132325 + }, + { + "epoch": 1.1442183811640194, + "grad_norm": 10.364636469343179, + "learning_rate": 2.326184874084547e-06, + "loss": 0.3213104248046875, + "step": 132330 + }, + { + "epoch": 1.1442616146855626, + "grad_norm": 4.088260555568375, + "learning_rate": 2.325986347874259e-06, + "loss": 0.0430450439453125, + "step": 132335 + }, + { + "epoch": 1.1443048482071059, + "grad_norm": 0.6160359528937627, + "learning_rate": 2.3257878247724637e-06, + "loss": 0.027095794677734375, + "step": 132340 + }, + { + "epoch": 1.144348081728649, + "grad_norm": 17.661240140430085, + "learning_rate": 2.3255893047800754e-06, + "loss": 0.057000350952148435, + "step": 132345 + }, + { + "epoch": 1.1443913152501923, + "grad_norm": 2.6799318054790993, + "learning_rate": 2.325390787898009e-06, + "loss": 0.09862823486328125, + "step": 132350 + }, + { + "epoch": 1.1444345487717356, + "grad_norm": 12.101426568516693, + "learning_rate": 2.325192274127181e-06, + "loss": 0.05948486328125, + "step": 132355 + }, + { + "epoch": 1.144477782293279, + "grad_norm": 2.7998633290056123, + "learning_rate": 2.324993763468506e-06, + "loss": 0.06672897338867187, + "step": 132360 + }, + { + "epoch": 1.1445210158148222, + "grad_norm": 43.63569840612949, + "learning_rate": 2.3247952559229e-06, + "loss": 0.20640220642089843, + "step": 132365 + }, + { + "epoch": 1.1445642493363655, + "grad_norm": 0.2931240712740814, + "learning_rate": 2.32459675149128e-06, + "loss": 0.06991844177246094, + "step": 132370 + }, + { + "epoch": 1.1446074828579087, + "grad_norm": 11.94752509628735, + "learning_rate": 2.3243982501745594e-06, + "loss": 0.03588409423828125, + "step": 132375 + }, + { + "epoch": 1.144650716379452, + "grad_norm": 20.002633374032474, + "learning_rate": 2.3241997519736544e-06, + "loss": 0.06905527114868164, + "step": 132380 + }, + { + "epoch": 1.1446939499009952, + "grad_norm": 0.41607326179679566, + "learning_rate": 2.32400125688948e-06, + "loss": 0.12971343994140624, + "step": 132385 + }, + { + "epoch": 1.1447371834225386, + "grad_norm": 5.723234490789878, + "learning_rate": 2.323802764922951e-06, + "loss": 0.12187118530273437, + "step": 132390 + }, + { + "epoch": 1.1447804169440818, + "grad_norm": 0.11851904165784684, + "learning_rate": 2.323604276074986e-06, + "loss": 0.011939239501953126, + "step": 132395 + }, + { + "epoch": 1.144823650465625, + "grad_norm": 3.5959225667088117, + "learning_rate": 2.3234057903464976e-06, + "loss": 0.09557838439941406, + "step": 132400 + }, + { + "epoch": 1.1448668839871683, + "grad_norm": 27.464968396766793, + "learning_rate": 2.3232073077384022e-06, + "loss": 0.12448978424072266, + "step": 132405 + }, + { + "epoch": 1.1449101175087115, + "grad_norm": 0.75467022355832, + "learning_rate": 2.3230088282516152e-06, + "loss": 0.029706573486328124, + "step": 132410 + }, + { + "epoch": 1.1449533510302548, + "grad_norm": 2.0510004097488754, + "learning_rate": 2.3228103518870506e-06, + "loss": 0.14856338500976562, + "step": 132415 + }, + { + "epoch": 1.144996584551798, + "grad_norm": 2.3869914632297493, + "learning_rate": 2.3226118786456256e-06, + "loss": 0.014017009735107422, + "step": 132420 + }, + { + "epoch": 1.1450398180733414, + "grad_norm": 3.2467159397134786, + "learning_rate": 2.3224134085282533e-06, + "loss": 0.03288726806640625, + "step": 132425 + }, + { + "epoch": 1.1450830515948847, + "grad_norm": 0.25197977056546234, + "learning_rate": 2.322214941535852e-06, + "loss": 0.016278076171875, + "step": 132430 + }, + { + "epoch": 1.1451262851164279, + "grad_norm": 5.319266440820934, + "learning_rate": 2.322016477669335e-06, + "loss": 0.03200836181640625, + "step": 132435 + }, + { + "epoch": 1.1451695186379711, + "grad_norm": 0.23795189134701053, + "learning_rate": 2.3218180169296183e-06, + "loss": 0.034942626953125, + "step": 132440 + }, + { + "epoch": 1.1452127521595143, + "grad_norm": 3.285421398366531, + "learning_rate": 2.321619559317617e-06, + "loss": 0.015667724609375, + "step": 132445 + }, + { + "epoch": 1.1452559856810576, + "grad_norm": 12.409938468067738, + "learning_rate": 2.3214211048342464e-06, + "loss": 0.14974288940429686, + "step": 132450 + }, + { + "epoch": 1.145299219202601, + "grad_norm": 2.2868995280307423, + "learning_rate": 2.321222653480421e-06, + "loss": 0.066192626953125, + "step": 132455 + }, + { + "epoch": 1.1453424527241443, + "grad_norm": 0.7276421450641427, + "learning_rate": 2.3210242052570575e-06, + "loss": 0.18455047607421876, + "step": 132460 + }, + { + "epoch": 1.1453856862456875, + "grad_norm": 0.4369066146699882, + "learning_rate": 2.3208257601650703e-06, + "loss": 0.07032318115234375, + "step": 132465 + }, + { + "epoch": 1.1454289197672307, + "grad_norm": 0.3944566459375056, + "learning_rate": 2.320627318205374e-06, + "loss": 0.03746013641357422, + "step": 132470 + }, + { + "epoch": 1.145472153288774, + "grad_norm": 1.3109608766611438, + "learning_rate": 2.3204288793788856e-06, + "loss": 0.11169471740722656, + "step": 132475 + }, + { + "epoch": 1.1455153868103172, + "grad_norm": 0.30789045867106307, + "learning_rate": 2.3202304436865188e-06, + "loss": 0.025113964080810548, + "step": 132480 + }, + { + "epoch": 1.1455586203318604, + "grad_norm": 3.146261400530783, + "learning_rate": 2.3200320111291878e-06, + "loss": 0.027811431884765626, + "step": 132485 + }, + { + "epoch": 1.1456018538534039, + "grad_norm": 9.897685566497497, + "learning_rate": 2.3198335817078108e-06, + "loss": 0.12119903564453124, + "step": 132490 + }, + { + "epoch": 1.145645087374947, + "grad_norm": 3.7313515930140033, + "learning_rate": 2.319635155423301e-06, + "loss": 0.007334136962890625, + "step": 132495 + }, + { + "epoch": 1.1456883208964903, + "grad_norm": 36.787850437530665, + "learning_rate": 2.319436732276573e-06, + "loss": 0.13136520385742187, + "step": 132500 + }, + { + "epoch": 1.1457315544180335, + "grad_norm": 21.290720210069168, + "learning_rate": 2.319238312268543e-06, + "loss": 0.10370101928710937, + "step": 132505 + }, + { + "epoch": 1.1457747879395768, + "grad_norm": 0.35502112169382755, + "learning_rate": 2.319039895400126e-06, + "loss": 0.049560546875, + "step": 132510 + }, + { + "epoch": 1.1458180214611202, + "grad_norm": 0.17087819892315464, + "learning_rate": 2.318841481672237e-06, + "loss": 0.027793121337890626, + "step": 132515 + }, + { + "epoch": 1.1458612549826634, + "grad_norm": 6.42785483577708, + "learning_rate": 2.3186430710857894e-06, + "loss": 0.043098831176757814, + "step": 132520 + }, + { + "epoch": 1.1459044885042067, + "grad_norm": 15.572916582381676, + "learning_rate": 2.318444663641701e-06, + "loss": 0.036304473876953125, + "step": 132525 + }, + { + "epoch": 1.14594772202575, + "grad_norm": 5.348142791278029, + "learning_rate": 2.318246259340885e-06, + "loss": 0.16366958618164062, + "step": 132530 + }, + { + "epoch": 1.1459909555472931, + "grad_norm": 0.7112357732970624, + "learning_rate": 2.3180478581842583e-06, + "loss": 0.14745521545410156, + "step": 132535 + }, + { + "epoch": 1.1460341890688364, + "grad_norm": 3.8114732104715836, + "learning_rate": 2.3178494601727336e-06, + "loss": 0.012875175476074219, + "step": 132540 + }, + { + "epoch": 1.1460774225903796, + "grad_norm": 39.50264008161394, + "learning_rate": 2.3176510653072274e-06, + "loss": 0.41714019775390626, + "step": 132545 + }, + { + "epoch": 1.1461206561119228, + "grad_norm": 0.22366348893207724, + "learning_rate": 2.317452673588653e-06, + "loss": 0.01565093994140625, + "step": 132550 + }, + { + "epoch": 1.1461638896334663, + "grad_norm": 5.137546652275699, + "learning_rate": 2.3172542850179278e-06, + "loss": 0.12431869506835938, + "step": 132555 + }, + { + "epoch": 1.1462071231550095, + "grad_norm": 0.5856687204979139, + "learning_rate": 2.3170558995959648e-06, + "loss": 0.06631202697753906, + "step": 132560 + }, + { + "epoch": 1.1462503566765527, + "grad_norm": 12.495007491366186, + "learning_rate": 2.3168575173236802e-06, + "loss": 0.02622222900390625, + "step": 132565 + }, + { + "epoch": 1.146293590198096, + "grad_norm": 0.2747640800669744, + "learning_rate": 2.3166591382019885e-06, + "loss": 0.04324531555175781, + "step": 132570 + }, + { + "epoch": 1.1463368237196392, + "grad_norm": 2.4436807374514844, + "learning_rate": 2.3164607622318045e-06, + "loss": 0.014983749389648438, + "step": 132575 + }, + { + "epoch": 1.1463800572411826, + "grad_norm": 5.142809030436608, + "learning_rate": 2.316262389414042e-06, + "loss": 0.05091876983642578, + "step": 132580 + }, + { + "epoch": 1.1464232907627259, + "grad_norm": 1.0707511568753576, + "learning_rate": 2.3160640197496176e-06, + "loss": 0.2269451141357422, + "step": 132585 + }, + { + "epoch": 1.146466524284269, + "grad_norm": 1.5671171428998791, + "learning_rate": 2.315865653239445e-06, + "loss": 0.0095550537109375, + "step": 132590 + }, + { + "epoch": 1.1465097578058123, + "grad_norm": 4.921127364463691, + "learning_rate": 2.315667289884441e-06, + "loss": 0.0316981315612793, + "step": 132595 + }, + { + "epoch": 1.1465529913273556, + "grad_norm": 0.4483669365529113, + "learning_rate": 2.315468929685518e-06, + "loss": 0.06520843505859375, + "step": 132600 + }, + { + "epoch": 1.1465962248488988, + "grad_norm": 1.49823408800781, + "learning_rate": 2.315270572643592e-06, + "loss": 0.07666435241699218, + "step": 132605 + }, + { + "epoch": 1.146639458370442, + "grad_norm": 7.673605569623957, + "learning_rate": 2.3150722187595775e-06, + "loss": 0.042400360107421875, + "step": 132610 + }, + { + "epoch": 1.1466826918919855, + "grad_norm": 0.9097280937100114, + "learning_rate": 2.314873868034388e-06, + "loss": 0.029929351806640626, + "step": 132615 + }, + { + "epoch": 1.1467259254135287, + "grad_norm": 16.11679142231975, + "learning_rate": 2.3146755204689413e-06, + "loss": 0.18683090209960937, + "step": 132620 + }, + { + "epoch": 1.146769158935072, + "grad_norm": 2.254398112826822, + "learning_rate": 2.314477176064151e-06, + "loss": 0.07907943725585938, + "step": 132625 + }, + { + "epoch": 1.1468123924566151, + "grad_norm": 0.25844475170455394, + "learning_rate": 2.314278834820931e-06, + "loss": 0.02177085876464844, + "step": 132630 + }, + { + "epoch": 1.1468556259781584, + "grad_norm": 1.6319418840696347, + "learning_rate": 2.3140804967401967e-06, + "loss": 0.007771492004394531, + "step": 132635 + }, + { + "epoch": 1.1468988594997016, + "grad_norm": 1.0568664664468574, + "learning_rate": 2.313882161822861e-06, + "loss": 0.0914642333984375, + "step": 132640 + }, + { + "epoch": 1.146942093021245, + "grad_norm": 13.114692359605158, + "learning_rate": 2.3136838300698404e-06, + "loss": 0.033490467071533206, + "step": 132645 + }, + { + "epoch": 1.1469853265427883, + "grad_norm": 0.9239563305292574, + "learning_rate": 2.3134855014820505e-06, + "loss": 0.022174072265625, + "step": 132650 + }, + { + "epoch": 1.1470285600643315, + "grad_norm": 19.36278666809074, + "learning_rate": 2.3132871760604045e-06, + "loss": 0.07418231964111328, + "step": 132655 + }, + { + "epoch": 1.1470717935858747, + "grad_norm": 0.5664702503471727, + "learning_rate": 2.3130888538058176e-06, + "loss": 0.07680015563964844, + "step": 132660 + }, + { + "epoch": 1.147115027107418, + "grad_norm": 0.33880633522966896, + "learning_rate": 2.3128905347192035e-06, + "loss": 0.25200881958007815, + "step": 132665 + }, + { + "epoch": 1.1471582606289612, + "grad_norm": 0.2221141496249116, + "learning_rate": 2.3126922188014785e-06, + "loss": 0.026847076416015626, + "step": 132670 + }, + { + "epoch": 1.1472014941505044, + "grad_norm": 7.883159418042088, + "learning_rate": 2.3124939060535558e-06, + "loss": 0.07073135375976562, + "step": 132675 + }, + { + "epoch": 1.1472447276720479, + "grad_norm": 6.21789820582131, + "learning_rate": 2.3122955964763493e-06, + "loss": 0.05676822662353516, + "step": 132680 + }, + { + "epoch": 1.147287961193591, + "grad_norm": 0.82924644837988, + "learning_rate": 2.312097290070776e-06, + "loss": 0.0106781005859375, + "step": 132685 + }, + { + "epoch": 1.1473311947151343, + "grad_norm": 1.2221045006127904, + "learning_rate": 2.3118989868377492e-06, + "loss": 0.0118133544921875, + "step": 132690 + }, + { + "epoch": 1.1473744282366776, + "grad_norm": 3.0996075870668123, + "learning_rate": 2.311700686778183e-06, + "loss": 0.01837615966796875, + "step": 132695 + }, + { + "epoch": 1.1474176617582208, + "grad_norm": 0.5395752908172257, + "learning_rate": 2.311502389892993e-06, + "loss": 0.0293182373046875, + "step": 132700 + }, + { + "epoch": 1.147460895279764, + "grad_norm": 2.291980293252784, + "learning_rate": 2.311304096183093e-06, + "loss": 0.2191162109375, + "step": 132705 + }, + { + "epoch": 1.1475041288013075, + "grad_norm": 0.5963097645515264, + "learning_rate": 2.3111058056493964e-06, + "loss": 0.17423629760742188, + "step": 132710 + }, + { + "epoch": 1.1475473623228507, + "grad_norm": 1.9757165226643643, + "learning_rate": 2.3109075182928203e-06, + "loss": 0.03837795257568359, + "step": 132715 + }, + { + "epoch": 1.147590595844394, + "grad_norm": 1.0549508368605118, + "learning_rate": 2.310709234114278e-06, + "loss": 0.09235076904296875, + "step": 132720 + }, + { + "epoch": 1.1476338293659372, + "grad_norm": 9.106328398958997, + "learning_rate": 2.3105109531146827e-06, + "loss": 0.05371437072753906, + "step": 132725 + }, + { + "epoch": 1.1476770628874804, + "grad_norm": 6.998564723612894, + "learning_rate": 2.310312675294951e-06, + "loss": 0.31464385986328125, + "step": 132730 + }, + { + "epoch": 1.1477202964090236, + "grad_norm": 3.387975887998858, + "learning_rate": 2.310114400655996e-06, + "loss": 0.02510986328125, + "step": 132735 + }, + { + "epoch": 1.1477635299305669, + "grad_norm": 3.84023160008755, + "learning_rate": 2.3099161291987314e-06, + "loss": 0.0731414794921875, + "step": 132740 + }, + { + "epoch": 1.1478067634521103, + "grad_norm": 40.908287024368754, + "learning_rate": 2.309717860924074e-06, + "loss": 0.04113960266113281, + "step": 132745 + }, + { + "epoch": 1.1478499969736535, + "grad_norm": 2.5703468773739027, + "learning_rate": 2.3095195958329363e-06, + "loss": 0.03082275390625, + "step": 132750 + }, + { + "epoch": 1.1478932304951968, + "grad_norm": 0.4881716037036255, + "learning_rate": 2.3093213339262333e-06, + "loss": 0.015995025634765625, + "step": 132755 + }, + { + "epoch": 1.14793646401674, + "grad_norm": 3.6146468963029275, + "learning_rate": 2.3091230752048795e-06, + "loss": 0.04105033874511719, + "step": 132760 + }, + { + "epoch": 1.1479796975382832, + "grad_norm": 22.00041105149109, + "learning_rate": 2.308924819669789e-06, + "loss": 0.13140535354614258, + "step": 132765 + }, + { + "epoch": 1.1480229310598267, + "grad_norm": 9.32110145990334, + "learning_rate": 2.3087265673218763e-06, + "loss": 0.019817352294921875, + "step": 132770 + }, + { + "epoch": 1.14806616458137, + "grad_norm": 3.731961392457858, + "learning_rate": 2.3085283181620544e-06, + "loss": 0.0640472412109375, + "step": 132775 + }, + { + "epoch": 1.1481093981029131, + "grad_norm": 6.670856929328023, + "learning_rate": 2.30833007219124e-06, + "loss": 0.049854278564453125, + "step": 132780 + }, + { + "epoch": 1.1481526316244564, + "grad_norm": 16.314205565918893, + "learning_rate": 2.3081318294103452e-06, + "loss": 0.11662063598632813, + "step": 132785 + }, + { + "epoch": 1.1481958651459996, + "grad_norm": 0.1483416825192271, + "learning_rate": 2.3079335898202863e-06, + "loss": 0.020154571533203124, + "step": 132790 + }, + { + "epoch": 1.1482390986675428, + "grad_norm": 1.2370774116024272, + "learning_rate": 2.3077353534219767e-06, + "loss": 0.021487045288085937, + "step": 132795 + }, + { + "epoch": 1.148282332189086, + "grad_norm": 9.376823093343901, + "learning_rate": 2.30753712021633e-06, + "loss": 0.24368972778320314, + "step": 132800 + }, + { + "epoch": 1.1483255657106293, + "grad_norm": 6.176078025637095, + "learning_rate": 2.30733889020426e-06, + "loss": 0.3185894012451172, + "step": 132805 + }, + { + "epoch": 1.1483687992321727, + "grad_norm": 56.85700369926598, + "learning_rate": 2.307140663386683e-06, + "loss": 0.36419525146484377, + "step": 132810 + }, + { + "epoch": 1.148412032753716, + "grad_norm": 1.4558853138047851, + "learning_rate": 2.306942439764511e-06, + "loss": 0.37977752685546873, + "step": 132815 + }, + { + "epoch": 1.1484552662752592, + "grad_norm": 3.7979874526210597, + "learning_rate": 2.3067442193386606e-06, + "loss": 0.10733642578125, + "step": 132820 + }, + { + "epoch": 1.1484984997968024, + "grad_norm": 6.9166943632907065, + "learning_rate": 2.3065460021100445e-06, + "loss": 0.10954971313476562, + "step": 132825 + }, + { + "epoch": 1.1485417333183456, + "grad_norm": 1.0782454727675737, + "learning_rate": 2.306347788079576e-06, + "loss": 0.011186981201171875, + "step": 132830 + }, + { + "epoch": 1.148584966839889, + "grad_norm": 1.3369862697734822, + "learning_rate": 2.3061495772481705e-06, + "loss": 0.023215484619140626, + "step": 132835 + }, + { + "epoch": 1.1486282003614323, + "grad_norm": 12.196293908993074, + "learning_rate": 2.3059513696167413e-06, + "loss": 0.08823432922363281, + "step": 132840 + }, + { + "epoch": 1.1486714338829755, + "grad_norm": 14.328123474576039, + "learning_rate": 2.305753165186204e-06, + "loss": 0.06780471801757812, + "step": 132845 + }, + { + "epoch": 1.1487146674045188, + "grad_norm": 1.8355828821067697, + "learning_rate": 2.305554963957472e-06, + "loss": 0.08373756408691406, + "step": 132850 + }, + { + "epoch": 1.148757900926062, + "grad_norm": 5.882972989824316, + "learning_rate": 2.3053567659314586e-06, + "loss": 0.29782752990722655, + "step": 132855 + }, + { + "epoch": 1.1488011344476052, + "grad_norm": 6.839868184910067, + "learning_rate": 2.305158571109078e-06, + "loss": 0.07361640930175781, + "step": 132860 + }, + { + "epoch": 1.1488443679691485, + "grad_norm": 25.99825566482242, + "learning_rate": 2.3049603794912456e-06, + "loss": 0.037713623046875, + "step": 132865 + }, + { + "epoch": 1.148887601490692, + "grad_norm": 4.7282566645085655, + "learning_rate": 2.304762191078873e-06, + "loss": 0.025835418701171876, + "step": 132870 + }, + { + "epoch": 1.1489308350122351, + "grad_norm": 9.010734102411245, + "learning_rate": 2.304564005872877e-06, + "loss": 0.1348602294921875, + "step": 132875 + }, + { + "epoch": 1.1489740685337784, + "grad_norm": 0.7082926235773365, + "learning_rate": 2.3043658238741703e-06, + "loss": 0.007019233703613281, + "step": 132880 + }, + { + "epoch": 1.1490173020553216, + "grad_norm": 1.3602006842382455, + "learning_rate": 2.304167645083667e-06, + "loss": 0.7966176986694335, + "step": 132885 + }, + { + "epoch": 1.1490605355768648, + "grad_norm": 0.6520731839490952, + "learning_rate": 2.3039694695022808e-06, + "loss": 0.13927803039550782, + "step": 132890 + }, + { + "epoch": 1.149103769098408, + "grad_norm": 3.2116147392140153, + "learning_rate": 2.303771297130926e-06, + "loss": 0.031719970703125, + "step": 132895 + }, + { + "epoch": 1.1491470026199515, + "grad_norm": 2.7530520544603445, + "learning_rate": 2.3035731279705154e-06, + "loss": 0.024140357971191406, + "step": 132900 + }, + { + "epoch": 1.1491902361414947, + "grad_norm": 0.7573046338921291, + "learning_rate": 2.303374962021965e-06, + "loss": 0.129693603515625, + "step": 132905 + }, + { + "epoch": 1.149233469663038, + "grad_norm": 39.71652415657658, + "learning_rate": 2.3031767992861883e-06, + "loss": 0.1303731918334961, + "step": 132910 + }, + { + "epoch": 1.1492767031845812, + "grad_norm": 1.0988382419871217, + "learning_rate": 2.302978639764098e-06, + "loss": 0.08121795654296875, + "step": 132915 + }, + { + "epoch": 1.1493199367061244, + "grad_norm": 0.15332150181803617, + "learning_rate": 2.3027804834566086e-06, + "loss": 0.013747787475585938, + "step": 132920 + }, + { + "epoch": 1.1493631702276677, + "grad_norm": 0.6557806206634396, + "learning_rate": 2.3025823303646344e-06, + "loss": 0.028410720825195312, + "step": 132925 + }, + { + "epoch": 1.1494064037492109, + "grad_norm": 2.821112210991767, + "learning_rate": 2.3023841804890887e-06, + "loss": 0.061865234375, + "step": 132930 + }, + { + "epoch": 1.1494496372707543, + "grad_norm": 1.2237106361327474, + "learning_rate": 2.3021860338308845e-06, + "loss": 0.05271568298339844, + "step": 132935 + }, + { + "epoch": 1.1494928707922976, + "grad_norm": 9.190207059085465, + "learning_rate": 2.3019878903909375e-06, + "loss": 0.09175949096679688, + "step": 132940 + }, + { + "epoch": 1.1495361043138408, + "grad_norm": 3.7436132736800363, + "learning_rate": 2.3017897501701613e-06, + "loss": 0.0696197509765625, + "step": 132945 + }, + { + "epoch": 1.149579337835384, + "grad_norm": 3.162489546480364, + "learning_rate": 2.301591613169468e-06, + "loss": 0.04222869873046875, + "step": 132950 + }, + { + "epoch": 1.1496225713569272, + "grad_norm": 0.2896850193163668, + "learning_rate": 2.3013934793897735e-06, + "loss": 0.004755783081054688, + "step": 132955 + }, + { + "epoch": 1.1496658048784705, + "grad_norm": 3.216650798396178, + "learning_rate": 2.30119534883199e-06, + "loss": 0.024567413330078124, + "step": 132960 + }, + { + "epoch": 1.149709038400014, + "grad_norm": 4.408810298556773, + "learning_rate": 2.300997221497031e-06, + "loss": 0.2860382080078125, + "step": 132965 + }, + { + "epoch": 1.1497522719215572, + "grad_norm": 3.352223419266581, + "learning_rate": 2.3007990973858117e-06, + "loss": 0.09656448364257812, + "step": 132970 + }, + { + "epoch": 1.1497955054431004, + "grad_norm": 0.3141326750007739, + "learning_rate": 2.3006009764992457e-06, + "loss": 0.026001358032226564, + "step": 132975 + }, + { + "epoch": 1.1498387389646436, + "grad_norm": 2.1207069664853995, + "learning_rate": 2.3004028588382454e-06, + "loss": 0.06267356872558594, + "step": 132980 + }, + { + "epoch": 1.1498819724861868, + "grad_norm": 2.740753196008145, + "learning_rate": 2.300204744403726e-06, + "loss": 0.0312896728515625, + "step": 132985 + }, + { + "epoch": 1.14992520600773, + "grad_norm": 5.367029818022226, + "learning_rate": 2.3000066331966004e-06, + "loss": 0.01777820587158203, + "step": 132990 + }, + { + "epoch": 1.1499684395292733, + "grad_norm": 27.105223711602946, + "learning_rate": 2.299808525217781e-06, + "loss": 0.0798797607421875, + "step": 132995 + }, + { + "epoch": 1.1500116730508168, + "grad_norm": 1.0613147784038581, + "learning_rate": 2.2996104204681844e-06, + "loss": 0.093048095703125, + "step": 133000 + }, + { + "epoch": 1.15005490657236, + "grad_norm": 1.612437616860537, + "learning_rate": 2.2994123189487226e-06, + "loss": 0.021540069580078126, + "step": 133005 + }, + { + "epoch": 1.1500981400939032, + "grad_norm": 7.315323547729403, + "learning_rate": 2.299214220660308e-06, + "loss": 0.022550201416015624, + "step": 133010 + }, + { + "epoch": 1.1501413736154464, + "grad_norm": 44.192248189185705, + "learning_rate": 2.299016125603857e-06, + "loss": 0.19102783203125, + "step": 133015 + }, + { + "epoch": 1.1501846071369897, + "grad_norm": 0.16185095704449376, + "learning_rate": 2.2988180337802812e-06, + "loss": 0.04844169616699219, + "step": 133020 + }, + { + "epoch": 1.1502278406585331, + "grad_norm": 0.5278446058498001, + "learning_rate": 2.298619945190495e-06, + "loss": 0.017859649658203126, + "step": 133025 + }, + { + "epoch": 1.1502710741800763, + "grad_norm": 0.49821790024653595, + "learning_rate": 2.29842185983541e-06, + "loss": 0.14062576293945311, + "step": 133030 + }, + { + "epoch": 1.1503143077016196, + "grad_norm": 0.5186702649406162, + "learning_rate": 2.2982237777159428e-06, + "loss": 0.08519210815429687, + "step": 133035 + }, + { + "epoch": 1.1503575412231628, + "grad_norm": 0.21657058169735743, + "learning_rate": 2.298025698833005e-06, + "loss": 0.0316619873046875, + "step": 133040 + }, + { + "epoch": 1.150400774744706, + "grad_norm": 4.3579520594723045, + "learning_rate": 2.2978276231875113e-06, + "loss": 0.05365142822265625, + "step": 133045 + }, + { + "epoch": 1.1504440082662493, + "grad_norm": 0.3108189935244079, + "learning_rate": 2.297629550780374e-06, + "loss": 0.0794769287109375, + "step": 133050 + }, + { + "epoch": 1.1504872417877925, + "grad_norm": 0.17047913230109968, + "learning_rate": 2.297431481612508e-06, + "loss": 0.4125476837158203, + "step": 133055 + }, + { + "epoch": 1.150530475309336, + "grad_norm": 0.1131392446686247, + "learning_rate": 2.297233415684824e-06, + "loss": 0.0192474365234375, + "step": 133060 + }, + { + "epoch": 1.1505737088308792, + "grad_norm": 1.1044251156184826, + "learning_rate": 2.2970353529982386e-06, + "loss": 0.024825286865234376, + "step": 133065 + }, + { + "epoch": 1.1506169423524224, + "grad_norm": 4.873686893721923, + "learning_rate": 2.296837293553664e-06, + "loss": 0.026702880859375, + "step": 133070 + }, + { + "epoch": 1.1506601758739656, + "grad_norm": 0.7931959620675313, + "learning_rate": 2.2966392373520143e-06, + "loss": 0.008977508544921875, + "step": 133075 + }, + { + "epoch": 1.1507034093955089, + "grad_norm": 2.2129237370459705, + "learning_rate": 2.296441184394202e-06, + "loss": 0.0575592041015625, + "step": 133080 + }, + { + "epoch": 1.150746642917052, + "grad_norm": 1.1108623102566395, + "learning_rate": 2.2962431346811404e-06, + "loss": 0.48976669311523435, + "step": 133085 + }, + { + "epoch": 1.1507898764385955, + "grad_norm": 13.94407250747163, + "learning_rate": 2.296045088213743e-06, + "loss": 0.14289016723632814, + "step": 133090 + }, + { + "epoch": 1.1508331099601388, + "grad_norm": 0.15444943700148242, + "learning_rate": 2.2958470449929234e-06, + "loss": 0.04130573272705078, + "step": 133095 + }, + { + "epoch": 1.150876343481682, + "grad_norm": 10.077283168326396, + "learning_rate": 2.295649005019596e-06, + "loss": 0.03555984497070312, + "step": 133100 + }, + { + "epoch": 1.1509195770032252, + "grad_norm": 0.5667760320965894, + "learning_rate": 2.2954509682946725e-06, + "loss": 0.03134307861328125, + "step": 133105 + }, + { + "epoch": 1.1509628105247685, + "grad_norm": 0.4303989151289835, + "learning_rate": 2.2952529348190673e-06, + "loss": 0.030675888061523438, + "step": 133110 + }, + { + "epoch": 1.1510060440463117, + "grad_norm": 0.8655018937988556, + "learning_rate": 2.2950549045936928e-06, + "loss": 0.008742523193359376, + "step": 133115 + }, + { + "epoch": 1.151049277567855, + "grad_norm": 0.5975164518392412, + "learning_rate": 2.2948568776194636e-06, + "loss": 0.0771453857421875, + "step": 133120 + }, + { + "epoch": 1.1510925110893984, + "grad_norm": 8.406906212636505, + "learning_rate": 2.2946588538972903e-06, + "loss": 0.05542449951171875, + "step": 133125 + }, + { + "epoch": 1.1511357446109416, + "grad_norm": 15.33004555915908, + "learning_rate": 2.2944608334280895e-06, + "loss": 0.049512481689453124, + "step": 133130 + }, + { + "epoch": 1.1511789781324848, + "grad_norm": 0.2082639782725825, + "learning_rate": 2.2942628162127734e-06, + "loss": 0.011987686157226562, + "step": 133135 + }, + { + "epoch": 1.151222211654028, + "grad_norm": 4.311394970991838, + "learning_rate": 2.2940648022522542e-06, + "loss": 0.028443145751953124, + "step": 133140 + }, + { + "epoch": 1.1512654451755713, + "grad_norm": 2.130011519692306, + "learning_rate": 2.2938667915474455e-06, + "loss": 0.025562286376953125, + "step": 133145 + }, + { + "epoch": 1.1513086786971145, + "grad_norm": 5.688784194466287, + "learning_rate": 2.2936687840992613e-06, + "loss": 0.01635589599609375, + "step": 133150 + }, + { + "epoch": 1.151351912218658, + "grad_norm": 1.7731363787761945, + "learning_rate": 2.293470779908613e-06, + "loss": 0.11892623901367187, + "step": 133155 + }, + { + "epoch": 1.1513951457402012, + "grad_norm": 7.157753337496547, + "learning_rate": 2.2932727789764162e-06, + "loss": 0.4181732177734375, + "step": 133160 + }, + { + "epoch": 1.1514383792617444, + "grad_norm": 2.0238904212514424, + "learning_rate": 2.293074781303583e-06, + "loss": 0.07853775024414063, + "step": 133165 + }, + { + "epoch": 1.1514816127832876, + "grad_norm": 0.4711923033775116, + "learning_rate": 2.292876786891026e-06, + "loss": 0.09212646484375, + "step": 133170 + }, + { + "epoch": 1.1515248463048309, + "grad_norm": 4.210003529629832, + "learning_rate": 2.292678795739659e-06, + "loss": 0.070025634765625, + "step": 133175 + }, + { + "epoch": 1.151568079826374, + "grad_norm": 20.593801687855972, + "learning_rate": 2.2924808078503953e-06, + "loss": 0.05062942504882813, + "step": 133180 + }, + { + "epoch": 1.1516113133479173, + "grad_norm": 9.114253822224725, + "learning_rate": 2.2922828232241475e-06, + "loss": 0.21260833740234375, + "step": 133185 + }, + { + "epoch": 1.1516545468694608, + "grad_norm": 8.584386648311563, + "learning_rate": 2.2920848418618272e-06, + "loss": 0.06417388916015625, + "step": 133190 + }, + { + "epoch": 1.151697780391004, + "grad_norm": 2.677384651642117, + "learning_rate": 2.2918868637643506e-06, + "loss": 0.042939376831054685, + "step": 133195 + }, + { + "epoch": 1.1517410139125472, + "grad_norm": 3.3330302936457668, + "learning_rate": 2.2916888889326294e-06, + "loss": 0.2870819091796875, + "step": 133200 + }, + { + "epoch": 1.1517842474340905, + "grad_norm": 2.1935895903498093, + "learning_rate": 2.2914909173675754e-06, + "loss": 0.06695938110351562, + "step": 133205 + }, + { + "epoch": 1.1518274809556337, + "grad_norm": 20.809967451151948, + "learning_rate": 2.2912929490701035e-06, + "loss": 0.1608795166015625, + "step": 133210 + }, + { + "epoch": 1.151870714477177, + "grad_norm": 24.69606230059597, + "learning_rate": 2.291094984041126e-06, + "loss": 0.07593307495117188, + "step": 133215 + }, + { + "epoch": 1.1519139479987204, + "grad_norm": 43.42000137769734, + "learning_rate": 2.2908970222815545e-06, + "loss": 0.127880859375, + "step": 133220 + }, + { + "epoch": 1.1519571815202636, + "grad_norm": 3.513044678009496, + "learning_rate": 2.290699063792304e-06, + "loss": 0.00974273681640625, + "step": 133225 + }, + { + "epoch": 1.1520004150418068, + "grad_norm": 8.70298574336247, + "learning_rate": 2.2905011085742876e-06, + "loss": 0.07599868774414062, + "step": 133230 + }, + { + "epoch": 1.15204364856335, + "grad_norm": 5.318609056064911, + "learning_rate": 2.290303156628416e-06, + "loss": 0.04983673095703125, + "step": 133235 + }, + { + "epoch": 1.1520868820848933, + "grad_norm": 0.11721957741534579, + "learning_rate": 2.2901052079556046e-06, + "loss": 0.03460769653320313, + "step": 133240 + }, + { + "epoch": 1.1521301156064365, + "grad_norm": 5.1250004689157445, + "learning_rate": 2.2899072625567655e-06, + "loss": 0.037885284423828124, + "step": 133245 + }, + { + "epoch": 1.1521733491279798, + "grad_norm": 0.519044420471116, + "learning_rate": 2.28970932043281e-06, + "loss": 0.042919921875, + "step": 133250 + }, + { + "epoch": 1.1522165826495232, + "grad_norm": 39.53261248707634, + "learning_rate": 2.289511381584653e-06, + "loss": 0.40022125244140627, + "step": 133255 + }, + { + "epoch": 1.1522598161710664, + "grad_norm": 14.438097840765916, + "learning_rate": 2.2893134460132066e-06, + "loss": 0.15410957336425782, + "step": 133260 + }, + { + "epoch": 1.1523030496926097, + "grad_norm": 25.58536645565639, + "learning_rate": 2.2891155137193843e-06, + "loss": 0.06847286224365234, + "step": 133265 + }, + { + "epoch": 1.1523462832141529, + "grad_norm": 4.420101646777669, + "learning_rate": 2.2889175847040987e-06, + "loss": 0.12948532104492189, + "step": 133270 + }, + { + "epoch": 1.1523895167356961, + "grad_norm": 2.7156477967436286, + "learning_rate": 2.288719658968262e-06, + "loss": 0.1974761962890625, + "step": 133275 + }, + { + "epoch": 1.1524327502572396, + "grad_norm": 0.6463583053204526, + "learning_rate": 2.2885217365127877e-06, + "loss": 0.303851318359375, + "step": 133280 + }, + { + "epoch": 1.1524759837787828, + "grad_norm": 26.028923781066332, + "learning_rate": 2.2883238173385867e-06, + "loss": 0.0677154541015625, + "step": 133285 + }, + { + "epoch": 1.152519217300326, + "grad_norm": 6.532573459467697, + "learning_rate": 2.288125901446574e-06, + "loss": 0.052945709228515624, + "step": 133290 + }, + { + "epoch": 1.1525624508218693, + "grad_norm": 5.063304135954993, + "learning_rate": 2.2879279888376627e-06, + "loss": 0.029395294189453126, + "step": 133295 + }, + { + "epoch": 1.1526056843434125, + "grad_norm": 0.2714012555012003, + "learning_rate": 2.2877300795127646e-06, + "loss": 0.003974151611328125, + "step": 133300 + }, + { + "epoch": 1.1526489178649557, + "grad_norm": 9.663130195784673, + "learning_rate": 2.287532173472792e-06, + "loss": 0.03246002197265625, + "step": 133305 + }, + { + "epoch": 1.152692151386499, + "grad_norm": 2.9442936290729977, + "learning_rate": 2.2873342707186576e-06, + "loss": 0.19929351806640624, + "step": 133310 + }, + { + "epoch": 1.1527353849080424, + "grad_norm": 6.963213289134046, + "learning_rate": 2.287136371251275e-06, + "loss": 0.027936553955078124, + "step": 133315 + }, + { + "epoch": 1.1527786184295856, + "grad_norm": 2.1055309794552635, + "learning_rate": 2.2869384750715556e-06, + "loss": 0.0787689208984375, + "step": 133320 + }, + { + "epoch": 1.1528218519511289, + "grad_norm": 4.556189896232125, + "learning_rate": 2.2867405821804143e-06, + "loss": 0.032862091064453126, + "step": 133325 + }, + { + "epoch": 1.152865085472672, + "grad_norm": 24.38004294878569, + "learning_rate": 2.2865426925787623e-06, + "loss": 0.2028076171875, + "step": 133330 + }, + { + "epoch": 1.1529083189942153, + "grad_norm": 0.8768537769166725, + "learning_rate": 2.286344806267512e-06, + "loss": 0.03723983764648438, + "step": 133335 + }, + { + "epoch": 1.1529515525157585, + "grad_norm": 4.624622116115057, + "learning_rate": 2.2861469232475764e-06, + "loss": 0.050191497802734374, + "step": 133340 + }, + { + "epoch": 1.152994786037302, + "grad_norm": 6.754840225343011, + "learning_rate": 2.2859490435198675e-06, + "loss": 0.07665328979492188, + "step": 133345 + }, + { + "epoch": 1.1530380195588452, + "grad_norm": 2.970660935013993, + "learning_rate": 2.285751167085299e-06, + "loss": 0.039347267150878905, + "step": 133350 + }, + { + "epoch": 1.1530812530803884, + "grad_norm": 3.1579040900878437, + "learning_rate": 2.285553293944783e-06, + "loss": 0.12161636352539062, + "step": 133355 + }, + { + "epoch": 1.1531244866019317, + "grad_norm": 7.344201468470769, + "learning_rate": 2.2853554240992327e-06, + "loss": 0.019295883178710938, + "step": 133360 + }, + { + "epoch": 1.153167720123475, + "grad_norm": 2.96677123238546, + "learning_rate": 2.28515755754956e-06, + "loss": 0.04650115966796875, + "step": 133365 + }, + { + "epoch": 1.1532109536450181, + "grad_norm": 1.7783802470528536, + "learning_rate": 2.284959694296676e-06, + "loss": 0.022202301025390624, + "step": 133370 + }, + { + "epoch": 1.1532541871665614, + "grad_norm": 1.3245059522077185, + "learning_rate": 2.284761834341496e-06, + "loss": 0.04473114013671875, + "step": 133375 + }, + { + "epoch": 1.1532974206881048, + "grad_norm": 5.617903065210842, + "learning_rate": 2.28456397768493e-06, + "loss": 0.1925069808959961, + "step": 133380 + }, + { + "epoch": 1.153340654209648, + "grad_norm": 1.4055137003630274, + "learning_rate": 2.2843661243278933e-06, + "loss": 0.17812004089355468, + "step": 133385 + }, + { + "epoch": 1.1533838877311913, + "grad_norm": 2.773298774248741, + "learning_rate": 2.284168274271296e-06, + "loss": 0.25687408447265625, + "step": 133390 + }, + { + "epoch": 1.1534271212527345, + "grad_norm": 3.902638597537801, + "learning_rate": 2.283970427516052e-06, + "loss": 0.2743980407714844, + "step": 133395 + }, + { + "epoch": 1.1534703547742777, + "grad_norm": 0.6556673631990191, + "learning_rate": 2.2837725840630723e-06, + "loss": 0.01732025146484375, + "step": 133400 + }, + { + "epoch": 1.153513588295821, + "grad_norm": 17.3631777585206, + "learning_rate": 2.283574743913271e-06, + "loss": 0.13487014770507813, + "step": 133405 + }, + { + "epoch": 1.1535568218173644, + "grad_norm": 1.9782555737236809, + "learning_rate": 2.2833769070675583e-06, + "loss": 0.035243988037109375, + "step": 133410 + }, + { + "epoch": 1.1536000553389076, + "grad_norm": 3.9661030143766838, + "learning_rate": 2.283179073526849e-06, + "loss": 0.036467361450195315, + "step": 133415 + }, + { + "epoch": 1.1536432888604509, + "grad_norm": 34.60213516823001, + "learning_rate": 2.2829812432920547e-06, + "loss": 0.1287353515625, + "step": 133420 + }, + { + "epoch": 1.153686522381994, + "grad_norm": 1.3019005882152177, + "learning_rate": 2.2827834163640873e-06, + "loss": 0.04193801879882812, + "step": 133425 + }, + { + "epoch": 1.1537297559035373, + "grad_norm": 6.462695375610554, + "learning_rate": 2.282585592743859e-06, + "loss": 0.11549739837646485, + "step": 133430 + }, + { + "epoch": 1.1537729894250806, + "grad_norm": 0.4643428739048805, + "learning_rate": 2.2823877724322833e-06, + "loss": 0.02983245849609375, + "step": 133435 + }, + { + "epoch": 1.1538162229466238, + "grad_norm": 9.656491395605492, + "learning_rate": 2.28218995543027e-06, + "loss": 0.1073822021484375, + "step": 133440 + }, + { + "epoch": 1.1538594564681672, + "grad_norm": 0.04765018371023662, + "learning_rate": 2.281992141738735e-06, + "loss": 0.029818344116210937, + "step": 133445 + }, + { + "epoch": 1.1539026899897105, + "grad_norm": 8.235827558035426, + "learning_rate": 2.281794331358588e-06, + "loss": 0.06643829345703126, + "step": 133450 + }, + { + "epoch": 1.1539459235112537, + "grad_norm": 8.695767774154557, + "learning_rate": 2.281596524290743e-06, + "loss": 0.02590808868408203, + "step": 133455 + }, + { + "epoch": 1.153989157032797, + "grad_norm": 35.029836171674276, + "learning_rate": 2.2813987205361103e-06, + "loss": 0.11340179443359374, + "step": 133460 + }, + { + "epoch": 1.1540323905543401, + "grad_norm": 7.242002799294441, + "learning_rate": 2.2812009200956044e-06, + "loss": 0.03209419250488281, + "step": 133465 + }, + { + "epoch": 1.1540756240758836, + "grad_norm": 0.7051169677892093, + "learning_rate": 2.281003122970136e-06, + "loss": 0.048996353149414064, + "step": 133470 + }, + { + "epoch": 1.1541188575974268, + "grad_norm": 2.1446085043446046, + "learning_rate": 2.280805329160616e-06, + "loss": 0.07130661010742187, + "step": 133475 + }, + { + "epoch": 1.15416209111897, + "grad_norm": 1.62124237012239, + "learning_rate": 2.28060753866796e-06, + "loss": 0.1868194580078125, + "step": 133480 + }, + { + "epoch": 1.1542053246405133, + "grad_norm": 1.3923260227601386, + "learning_rate": 2.2804097514930775e-06, + "loss": 0.0121368408203125, + "step": 133485 + }, + { + "epoch": 1.1542485581620565, + "grad_norm": 0.35816084951730665, + "learning_rate": 2.280211967636882e-06, + "loss": 0.013489913940429688, + "step": 133490 + }, + { + "epoch": 1.1542917916835997, + "grad_norm": 14.883598581548323, + "learning_rate": 2.280014187100286e-06, + "loss": 0.094384765625, + "step": 133495 + }, + { + "epoch": 1.154335025205143, + "grad_norm": 7.81683382198366, + "learning_rate": 2.279816409884201e-06, + "loss": 0.03819122314453125, + "step": 133500 + }, + { + "epoch": 1.1543782587266862, + "grad_norm": 4.101930984311541, + "learning_rate": 2.2796186359895375e-06, + "loss": 0.1471405029296875, + "step": 133505 + }, + { + "epoch": 1.1544214922482297, + "grad_norm": 1.6878312302954113, + "learning_rate": 2.2794208654172102e-06, + "loss": 0.0669342041015625, + "step": 133510 + }, + { + "epoch": 1.1544647257697729, + "grad_norm": 8.191319617828718, + "learning_rate": 2.2792230981681295e-06, + "loss": 0.08760604858398438, + "step": 133515 + }, + { + "epoch": 1.154507959291316, + "grad_norm": 0.42456859958919296, + "learning_rate": 2.2790253342432093e-06, + "loss": 0.05368194580078125, + "step": 133520 + }, + { + "epoch": 1.1545511928128593, + "grad_norm": 21.954439568604524, + "learning_rate": 2.2788275736433606e-06, + "loss": 0.09207115173339844, + "step": 133525 + }, + { + "epoch": 1.1545944263344026, + "grad_norm": 6.175026536435297, + "learning_rate": 2.278629816369495e-06, + "loss": 0.04440231323242187, + "step": 133530 + }, + { + "epoch": 1.154637659855946, + "grad_norm": 35.76486466163445, + "learning_rate": 2.2784320624225247e-06, + "loss": 0.2220602035522461, + "step": 133535 + }, + { + "epoch": 1.1546808933774892, + "grad_norm": 0.6937071603038512, + "learning_rate": 2.2782343118033617e-06, + "loss": 0.0230377197265625, + "step": 133540 + }, + { + "epoch": 1.1547241268990325, + "grad_norm": 1.3937218084099092, + "learning_rate": 2.2780365645129184e-06, + "loss": 0.025376129150390624, + "step": 133545 + }, + { + "epoch": 1.1547673604205757, + "grad_norm": 8.308009574015276, + "learning_rate": 2.2778388205521073e-06, + "loss": 0.08209075927734374, + "step": 133550 + }, + { + "epoch": 1.154810593942119, + "grad_norm": 3.9866171840884763, + "learning_rate": 2.2776410799218396e-06, + "loss": 0.079132080078125, + "step": 133555 + }, + { + "epoch": 1.1548538274636622, + "grad_norm": 40.152871383931625, + "learning_rate": 2.2774433426230278e-06, + "loss": 0.09819564819335938, + "step": 133560 + }, + { + "epoch": 1.1548970609852054, + "grad_norm": 0.15046891293632783, + "learning_rate": 2.2772456086565825e-06, + "loss": 0.10331382751464843, + "step": 133565 + }, + { + "epoch": 1.1549402945067488, + "grad_norm": 3.3269765100511854, + "learning_rate": 2.2770478780234164e-06, + "loss": 0.02723236083984375, + "step": 133570 + }, + { + "epoch": 1.154983528028292, + "grad_norm": 7.609600822771012, + "learning_rate": 2.276850150724442e-06, + "loss": 0.0224365234375, + "step": 133575 + }, + { + "epoch": 1.1550267615498353, + "grad_norm": 2.084493891628164, + "learning_rate": 2.2766524267605714e-06, + "loss": 0.05703544616699219, + "step": 133580 + }, + { + "epoch": 1.1550699950713785, + "grad_norm": 1.6238052421817386, + "learning_rate": 2.2764547061327156e-06, + "loss": 0.05396881103515625, + "step": 133585 + }, + { + "epoch": 1.1551132285929218, + "grad_norm": 1.6958149406070204, + "learning_rate": 2.276256988841787e-06, + "loss": 0.07879180908203125, + "step": 133590 + }, + { + "epoch": 1.155156462114465, + "grad_norm": 3.425347293116939, + "learning_rate": 2.2760592748886968e-06, + "loss": 0.06844024658203125, + "step": 133595 + }, + { + "epoch": 1.1551996956360084, + "grad_norm": 2.3564193295104348, + "learning_rate": 2.2758615642743565e-06, + "loss": 0.016841888427734375, + "step": 133600 + }, + { + "epoch": 1.1552429291575517, + "grad_norm": 1.5716918970500868, + "learning_rate": 2.275663856999679e-06, + "loss": 0.0220245361328125, + "step": 133605 + }, + { + "epoch": 1.155286162679095, + "grad_norm": 16.686377291590134, + "learning_rate": 2.275466153065577e-06, + "loss": 0.07471160888671875, + "step": 133610 + }, + { + "epoch": 1.1553293962006381, + "grad_norm": 4.896176176870397, + "learning_rate": 2.2752684524729607e-06, + "loss": 0.1590496063232422, + "step": 133615 + }, + { + "epoch": 1.1553726297221814, + "grad_norm": 1.9568767675569139, + "learning_rate": 2.275070755222742e-06, + "loss": 0.17035980224609376, + "step": 133620 + }, + { + "epoch": 1.1554158632437246, + "grad_norm": 0.18215646773995756, + "learning_rate": 2.2748730613158328e-06, + "loss": 0.06443595886230469, + "step": 133625 + }, + { + "epoch": 1.1554590967652678, + "grad_norm": 0.7577339820198005, + "learning_rate": 2.274675370753145e-06, + "loss": 0.10046844482421875, + "step": 133630 + }, + { + "epoch": 1.1555023302868113, + "grad_norm": 12.813902085856137, + "learning_rate": 2.274477683535589e-06, + "loss": 0.1617218017578125, + "step": 133635 + }, + { + "epoch": 1.1555455638083545, + "grad_norm": 6.30459485044766, + "learning_rate": 2.2742799996640797e-06, + "loss": 0.08319892883300781, + "step": 133640 + }, + { + "epoch": 1.1555887973298977, + "grad_norm": 1.962744511432475, + "learning_rate": 2.2740823191395265e-06, + "loss": 0.0554443359375, + "step": 133645 + }, + { + "epoch": 1.155632030851441, + "grad_norm": 0.2515305365133615, + "learning_rate": 2.273884641962842e-06, + "loss": 0.1600311279296875, + "step": 133650 + }, + { + "epoch": 1.1556752643729842, + "grad_norm": 2.8730036042362577, + "learning_rate": 2.273686968134936e-06, + "loss": 0.438140869140625, + "step": 133655 + }, + { + "epoch": 1.1557184978945274, + "grad_norm": 1.689963251445996, + "learning_rate": 2.2734892976567224e-06, + "loss": 0.02655792236328125, + "step": 133660 + }, + { + "epoch": 1.1557617314160709, + "grad_norm": 0.10667419144847853, + "learning_rate": 2.2732916305291112e-06, + "loss": 0.055119657516479494, + "step": 133665 + }, + { + "epoch": 1.155804964937614, + "grad_norm": 1.9717339067822612, + "learning_rate": 2.2730939667530155e-06, + "loss": 0.0881134033203125, + "step": 133670 + }, + { + "epoch": 1.1558481984591573, + "grad_norm": 2.099964971179677, + "learning_rate": 2.2728963063293466e-06, + "loss": 0.081219482421875, + "step": 133675 + }, + { + "epoch": 1.1558914319807005, + "grad_norm": 1.7711954409456672, + "learning_rate": 2.272698649259015e-06, + "loss": 0.019237518310546875, + "step": 133680 + }, + { + "epoch": 1.1559346655022438, + "grad_norm": 22.32069269456446, + "learning_rate": 2.272500995542933e-06, + "loss": 0.06953887939453125, + "step": 133685 + }, + { + "epoch": 1.155977899023787, + "grad_norm": 58.263080556590424, + "learning_rate": 2.2723033451820126e-06, + "loss": 0.21615104675292968, + "step": 133690 + }, + { + "epoch": 1.1560211325453302, + "grad_norm": 1.5778276295174705, + "learning_rate": 2.2721056981771637e-06, + "loss": 0.033222389221191403, + "step": 133695 + }, + { + "epoch": 1.1560643660668737, + "grad_norm": 6.326188475609238, + "learning_rate": 2.2719080545293e-06, + "loss": 0.11496849060058593, + "step": 133700 + }, + { + "epoch": 1.156107599588417, + "grad_norm": 6.793361761970731, + "learning_rate": 2.271710414239332e-06, + "loss": 0.07210311889648438, + "step": 133705 + }, + { + "epoch": 1.1561508331099601, + "grad_norm": 1.0716161296575692, + "learning_rate": 2.2715127773081708e-06, + "loss": 0.020539093017578124, + "step": 133710 + }, + { + "epoch": 1.1561940666315034, + "grad_norm": 6.714092183889296, + "learning_rate": 2.271315143736729e-06, + "loss": 0.03129730224609375, + "step": 133715 + }, + { + "epoch": 1.1562373001530466, + "grad_norm": 2.834235539359804, + "learning_rate": 2.2711175135259173e-06, + "loss": 0.104150390625, + "step": 133720 + }, + { + "epoch": 1.15628053367459, + "grad_norm": 0.3580949124290805, + "learning_rate": 2.2709198866766473e-06, + "loss": 0.0156280517578125, + "step": 133725 + }, + { + "epoch": 1.1563237671961333, + "grad_norm": 5.701344284153593, + "learning_rate": 2.2707222631898287e-06, + "loss": 0.1511138916015625, + "step": 133730 + }, + { + "epoch": 1.1563670007176765, + "grad_norm": 1.6106052574913396, + "learning_rate": 2.270524643066376e-06, + "loss": 0.06678390502929688, + "step": 133735 + }, + { + "epoch": 1.1564102342392197, + "grad_norm": 16.570961763774918, + "learning_rate": 2.2703270263071986e-06, + "loss": 0.10121612548828125, + "step": 133740 + }, + { + "epoch": 1.156453467760763, + "grad_norm": 0.41758954535399123, + "learning_rate": 2.270129412913209e-06, + "loss": 0.0124237060546875, + "step": 133745 + }, + { + "epoch": 1.1564967012823062, + "grad_norm": 9.158952886606262, + "learning_rate": 2.2699318028853184e-06, + "loss": 0.029180145263671874, + "step": 133750 + }, + { + "epoch": 1.1565399348038494, + "grad_norm": 27.134811398256655, + "learning_rate": 2.2697341962244375e-06, + "loss": 0.04898223876953125, + "step": 133755 + }, + { + "epoch": 1.1565831683253927, + "grad_norm": 0.15958730648252306, + "learning_rate": 2.269536592931477e-06, + "loss": 0.1096710205078125, + "step": 133760 + }, + { + "epoch": 1.156626401846936, + "grad_norm": 17.257821444175534, + "learning_rate": 2.2693389930073505e-06, + "loss": 0.150958251953125, + "step": 133765 + }, + { + "epoch": 1.1566696353684793, + "grad_norm": 2.0466994298600447, + "learning_rate": 2.2691413964529668e-06, + "loss": 0.055254364013671876, + "step": 133770 + }, + { + "epoch": 1.1567128688900226, + "grad_norm": 6.373401143496399, + "learning_rate": 2.2689438032692393e-06, + "loss": 0.2199859619140625, + "step": 133775 + }, + { + "epoch": 1.1567561024115658, + "grad_norm": 0.387129275313566, + "learning_rate": 2.2687462134570787e-06, + "loss": 0.014337921142578125, + "step": 133780 + }, + { + "epoch": 1.156799335933109, + "grad_norm": 21.30892547327635, + "learning_rate": 2.268548627017396e-06, + "loss": 0.16899032592773439, + "step": 133785 + }, + { + "epoch": 1.1568425694546525, + "grad_norm": 2.2553026261346476, + "learning_rate": 2.2683510439511004e-06, + "loss": 0.28838043212890624, + "step": 133790 + }, + { + "epoch": 1.1568858029761957, + "grad_norm": 3.823482578315964, + "learning_rate": 2.268153464259107e-06, + "loss": 0.02581939697265625, + "step": 133795 + }, + { + "epoch": 1.156929036497739, + "grad_norm": 5.639098052422422, + "learning_rate": 2.2679558879423245e-06, + "loss": 0.114361572265625, + "step": 133800 + }, + { + "epoch": 1.1569722700192822, + "grad_norm": 0.3029178083548912, + "learning_rate": 2.267758315001665e-06, + "loss": 0.02944488525390625, + "step": 133805 + }, + { + "epoch": 1.1570155035408254, + "grad_norm": 2.8202813454913285, + "learning_rate": 2.2675607454380396e-06, + "loss": 0.01835289001464844, + "step": 133810 + }, + { + "epoch": 1.1570587370623686, + "grad_norm": 5.942529918119508, + "learning_rate": 2.26736317925236e-06, + "loss": 0.079888916015625, + "step": 133815 + }, + { + "epoch": 1.1571019705839118, + "grad_norm": 29.652756351240182, + "learning_rate": 2.2671656164455354e-06, + "loss": 0.0947052001953125, + "step": 133820 + }, + { + "epoch": 1.1571452041054553, + "grad_norm": 40.30471260556235, + "learning_rate": 2.266968057018478e-06, + "loss": 0.0875946044921875, + "step": 133825 + }, + { + "epoch": 1.1571884376269985, + "grad_norm": 2.192009648769222, + "learning_rate": 2.2667705009720997e-06, + "loss": 0.01610107421875, + "step": 133830 + }, + { + "epoch": 1.1572316711485418, + "grad_norm": 1.3563357288278124, + "learning_rate": 2.2665729483073113e-06, + "loss": 0.09295883178710937, + "step": 133835 + }, + { + "epoch": 1.157274904670085, + "grad_norm": 2.7494152476407976, + "learning_rate": 2.2663753990250237e-06, + "loss": 0.06201381683349609, + "step": 133840 + }, + { + "epoch": 1.1573181381916282, + "grad_norm": 2.299293605883273, + "learning_rate": 2.2661778531261482e-06, + "loss": 0.01560821533203125, + "step": 133845 + }, + { + "epoch": 1.1573613717131714, + "grad_norm": 1.533252331967905, + "learning_rate": 2.2659803106115946e-06, + "loss": 0.08483963012695313, + "step": 133850 + }, + { + "epoch": 1.157404605234715, + "grad_norm": 6.749673138842369, + "learning_rate": 2.265782771482275e-06, + "loss": 0.0852081298828125, + "step": 133855 + }, + { + "epoch": 1.1574478387562581, + "grad_norm": 9.532436642633318, + "learning_rate": 2.265585235739101e-06, + "loss": 0.018894195556640625, + "step": 133860 + }, + { + "epoch": 1.1574910722778013, + "grad_norm": 1.4399859582139565, + "learning_rate": 2.265387703382983e-06, + "loss": 0.12791519165039061, + "step": 133865 + }, + { + "epoch": 1.1575343057993446, + "grad_norm": 25.40323272218827, + "learning_rate": 2.2651901744148323e-06, + "loss": 0.20026016235351562, + "step": 133870 + }, + { + "epoch": 1.1575775393208878, + "grad_norm": 19.415636396868873, + "learning_rate": 2.2649926488355594e-06, + "loss": 0.12335205078125, + "step": 133875 + }, + { + "epoch": 1.157620772842431, + "grad_norm": 2.416599308763879, + "learning_rate": 2.2647951266460746e-06, + "loss": 0.04187469482421875, + "step": 133880 + }, + { + "epoch": 1.1576640063639743, + "grad_norm": 5.855834194079354, + "learning_rate": 2.264597607847291e-06, + "loss": 0.03190193176269531, + "step": 133885 + }, + { + "epoch": 1.1577072398855177, + "grad_norm": 30.633952635639208, + "learning_rate": 2.264400092440117e-06, + "loss": 0.16215858459472657, + "step": 133890 + }, + { + "epoch": 1.157750473407061, + "grad_norm": 11.223356582800456, + "learning_rate": 2.264202580425465e-06, + "loss": 0.15989189147949218, + "step": 133895 + }, + { + "epoch": 1.1577937069286042, + "grad_norm": 51.01092746966548, + "learning_rate": 2.2640050718042465e-06, + "loss": 0.08603515625, + "step": 133900 + }, + { + "epoch": 1.1578369404501474, + "grad_norm": 11.232999637778185, + "learning_rate": 2.2638075665773715e-06, + "loss": 0.0715576171875, + "step": 133905 + }, + { + "epoch": 1.1578801739716906, + "grad_norm": 5.48242769063063, + "learning_rate": 2.26361006474575e-06, + "loss": 0.07428569793701172, + "step": 133910 + }, + { + "epoch": 1.1579234074932339, + "grad_norm": 7.157772276951832, + "learning_rate": 2.263412566310295e-06, + "loss": 0.1029815673828125, + "step": 133915 + }, + { + "epoch": 1.1579666410147773, + "grad_norm": 0.2530554351620788, + "learning_rate": 2.2632150712719144e-06, + "loss": 0.06466217041015625, + "step": 133920 + }, + { + "epoch": 1.1580098745363205, + "grad_norm": 2.092557059686923, + "learning_rate": 2.2630175796315225e-06, + "loss": 0.104736328125, + "step": 133925 + }, + { + "epoch": 1.1580531080578638, + "grad_norm": 0.30910189945404365, + "learning_rate": 2.262820091390028e-06, + "loss": 0.11269989013671874, + "step": 133930 + }, + { + "epoch": 1.158096341579407, + "grad_norm": 58.608704155085775, + "learning_rate": 2.2626226065483414e-06, + "loss": 0.165869140625, + "step": 133935 + }, + { + "epoch": 1.1581395751009502, + "grad_norm": 10.42185768136698, + "learning_rate": 2.262425125107375e-06, + "loss": 0.02159576416015625, + "step": 133940 + }, + { + "epoch": 1.1581828086224935, + "grad_norm": 6.961259686073282, + "learning_rate": 2.262227647068039e-06, + "loss": 0.08582763671875, + "step": 133945 + }, + { + "epoch": 1.1582260421440367, + "grad_norm": 4.570618130656307, + "learning_rate": 2.262030172431242e-06, + "loss": 0.10697402954101562, + "step": 133950 + }, + { + "epoch": 1.1582692756655801, + "grad_norm": 1.2510093005679255, + "learning_rate": 2.2618327011978987e-06, + "loss": 0.02993621826171875, + "step": 133955 + }, + { + "epoch": 1.1583125091871234, + "grad_norm": 42.087076236123295, + "learning_rate": 2.261635233368917e-06, + "loss": 0.22702484130859374, + "step": 133960 + }, + { + "epoch": 1.1583557427086666, + "grad_norm": 6.635902743572, + "learning_rate": 2.261437768945208e-06, + "loss": 0.0937225341796875, + "step": 133965 + }, + { + "epoch": 1.1583989762302098, + "grad_norm": 0.4990105366828453, + "learning_rate": 2.261240307927684e-06, + "loss": 0.02244434356689453, + "step": 133970 + }, + { + "epoch": 1.158442209751753, + "grad_norm": 11.276556883411335, + "learning_rate": 2.2610428503172535e-06, + "loss": 0.09365596771240234, + "step": 133975 + }, + { + "epoch": 1.1584854432732965, + "grad_norm": 0.45874511041046767, + "learning_rate": 2.260845396114829e-06, + "loss": 0.00360870361328125, + "step": 133980 + }, + { + "epoch": 1.1585286767948397, + "grad_norm": 15.82211943448202, + "learning_rate": 2.260647945321318e-06, + "loss": 0.05277099609375, + "step": 133985 + }, + { + "epoch": 1.158571910316383, + "grad_norm": 31.180534674141164, + "learning_rate": 2.260450497937635e-06, + "loss": 0.24921875, + "step": 133990 + }, + { + "epoch": 1.1586151438379262, + "grad_norm": 1.2050170405422405, + "learning_rate": 2.2602530539646884e-06, + "loss": 0.09048652648925781, + "step": 133995 + }, + { + "epoch": 1.1586583773594694, + "grad_norm": 1.7791095194143165, + "learning_rate": 2.26005561340339e-06, + "loss": 0.03601760864257812, + "step": 134000 + }, + { + "epoch": 1.1587016108810126, + "grad_norm": 6.992236453381495, + "learning_rate": 2.2598581762546495e-06, + "loss": 0.051943206787109376, + "step": 134005 + }, + { + "epoch": 1.1587448444025559, + "grad_norm": 79.06406306149874, + "learning_rate": 2.259660742519378e-06, + "loss": 0.045934295654296874, + "step": 134010 + }, + { + "epoch": 1.1587880779240993, + "grad_norm": 3.1515947588588475, + "learning_rate": 2.2594633121984844e-06, + "loss": 0.02334747314453125, + "step": 134015 + }, + { + "epoch": 1.1588313114456426, + "grad_norm": 4.204501214674971, + "learning_rate": 2.259265885292882e-06, + "loss": 0.03329391479492187, + "step": 134020 + }, + { + "epoch": 1.1588745449671858, + "grad_norm": 6.665529667446314, + "learning_rate": 2.259068461803479e-06, + "loss": 0.05425872802734375, + "step": 134025 + }, + { + "epoch": 1.158917778488729, + "grad_norm": 16.64593572881307, + "learning_rate": 2.258871041731187e-06, + "loss": 0.03234100341796875, + "step": 134030 + }, + { + "epoch": 1.1589610120102722, + "grad_norm": 2.530155081969253, + "learning_rate": 2.258673625076917e-06, + "loss": 0.14989910125732422, + "step": 134035 + }, + { + "epoch": 1.1590042455318155, + "grad_norm": 0.7099026993920547, + "learning_rate": 2.258476211841578e-06, + "loss": 0.1185638427734375, + "step": 134040 + }, + { + "epoch": 1.159047479053359, + "grad_norm": 0.5675441355989223, + "learning_rate": 2.2582788020260806e-06, + "loss": 0.24899139404296874, + "step": 134045 + }, + { + "epoch": 1.1590907125749021, + "grad_norm": 9.079082225982834, + "learning_rate": 2.258081395631337e-06, + "loss": 0.17288742065429688, + "step": 134050 + }, + { + "epoch": 1.1591339460964454, + "grad_norm": 4.412149594350276, + "learning_rate": 2.2578839926582555e-06, + "loss": 0.1032012939453125, + "step": 134055 + }, + { + "epoch": 1.1591771796179886, + "grad_norm": 0.07450179650238097, + "learning_rate": 2.257686593107748e-06, + "loss": 0.06752300262451172, + "step": 134060 + }, + { + "epoch": 1.1592204131395318, + "grad_norm": 0.2635764475327676, + "learning_rate": 2.2574891969807246e-06, + "loss": 0.029701614379882814, + "step": 134065 + }, + { + "epoch": 1.159263646661075, + "grad_norm": 15.295979950889711, + "learning_rate": 2.2572918042780956e-06, + "loss": 0.1370269775390625, + "step": 134070 + }, + { + "epoch": 1.1593068801826183, + "grad_norm": 1.0690755078080116, + "learning_rate": 2.2570944150007705e-06, + "loss": 0.018491363525390624, + "step": 134075 + }, + { + "epoch": 1.1593501137041617, + "grad_norm": 9.493675883201442, + "learning_rate": 2.25689702914966e-06, + "loss": 0.17321128845214845, + "step": 134080 + }, + { + "epoch": 1.159393347225705, + "grad_norm": 3.9139266271845727, + "learning_rate": 2.256699646725675e-06, + "loss": 0.17313499450683595, + "step": 134085 + }, + { + "epoch": 1.1594365807472482, + "grad_norm": 7.194115329031797, + "learning_rate": 2.256502267729726e-06, + "loss": 0.1210174560546875, + "step": 134090 + }, + { + "epoch": 1.1594798142687914, + "grad_norm": 2.2231325429831488, + "learning_rate": 2.256304892162723e-06, + "loss": 0.15592727661132813, + "step": 134095 + }, + { + "epoch": 1.1595230477903347, + "grad_norm": 2.7080058019412996, + "learning_rate": 2.256107520025576e-06, + "loss": 0.07067184448242188, + "step": 134100 + }, + { + "epoch": 1.1595662813118779, + "grad_norm": 1.3171689057121363, + "learning_rate": 2.2559101513191946e-06, + "loss": 0.03203401565551758, + "step": 134105 + }, + { + "epoch": 1.1596095148334213, + "grad_norm": 1.5304259942484344, + "learning_rate": 2.2557127860444894e-06, + "loss": 0.02929229736328125, + "step": 134110 + }, + { + "epoch": 1.1596527483549646, + "grad_norm": 6.541379540918804, + "learning_rate": 2.2555154242023726e-06, + "loss": 0.052009963989257814, + "step": 134115 + }, + { + "epoch": 1.1596959818765078, + "grad_norm": 47.73030008387264, + "learning_rate": 2.2553180657937525e-06, + "loss": 0.058101654052734375, + "step": 134120 + }, + { + "epoch": 1.159739215398051, + "grad_norm": 20.947245995091183, + "learning_rate": 2.2551207108195397e-06, + "loss": 0.235345458984375, + "step": 134125 + }, + { + "epoch": 1.1597824489195943, + "grad_norm": 1.0415567316114374, + "learning_rate": 2.254923359280644e-06, + "loss": 0.09542236328125, + "step": 134130 + }, + { + "epoch": 1.1598256824411375, + "grad_norm": 27.498459108124116, + "learning_rate": 2.2547260111779762e-06, + "loss": 0.0780059814453125, + "step": 134135 + }, + { + "epoch": 1.1598689159626807, + "grad_norm": 36.239132602156346, + "learning_rate": 2.2545286665124467e-06, + "loss": 0.16152687072753907, + "step": 134140 + }, + { + "epoch": 1.1599121494842242, + "grad_norm": 34.439828536293156, + "learning_rate": 2.2543313252849637e-06, + "loss": 0.13512115478515624, + "step": 134145 + }, + { + "epoch": 1.1599553830057674, + "grad_norm": 16.33569747058649, + "learning_rate": 2.2541339874964395e-06, + "loss": 0.32981719970703127, + "step": 134150 + }, + { + "epoch": 1.1599986165273106, + "grad_norm": 10.257216588569568, + "learning_rate": 2.253936653147784e-06, + "loss": 0.02365875244140625, + "step": 134155 + }, + { + "epoch": 1.1600418500488539, + "grad_norm": 15.502860822532424, + "learning_rate": 2.2537393222399054e-06, + "loss": 0.06431999206542968, + "step": 134160 + }, + { + "epoch": 1.160085083570397, + "grad_norm": 4.731999629466489, + "learning_rate": 2.2535419947737165e-06, + "loss": 0.01789989471435547, + "step": 134165 + }, + { + "epoch": 1.1601283170919405, + "grad_norm": 0.2400620569278615, + "learning_rate": 2.2533446707501257e-06, + "loss": 0.06652297973632812, + "step": 134170 + }, + { + "epoch": 1.1601715506134838, + "grad_norm": 1.0630396713506147, + "learning_rate": 2.253147350170042e-06, + "loss": 0.0519287109375, + "step": 134175 + }, + { + "epoch": 1.160214784135027, + "grad_norm": 5.1657919391607345, + "learning_rate": 2.252950033034378e-06, + "loss": 0.04139919281005859, + "step": 134180 + }, + { + "epoch": 1.1602580176565702, + "grad_norm": 25.856214889919745, + "learning_rate": 2.2527527193440423e-06, + "loss": 0.08375244140625, + "step": 134185 + }, + { + "epoch": 1.1603012511781134, + "grad_norm": 5.629958289585152, + "learning_rate": 2.2525554090999446e-06, + "loss": 0.05551424026489258, + "step": 134190 + }, + { + "epoch": 1.1603444846996567, + "grad_norm": 0.5556069449571588, + "learning_rate": 2.252358102302996e-06, + "loss": 0.031103515625, + "step": 134195 + }, + { + "epoch": 1.1603877182212, + "grad_norm": 5.37973241829324, + "learning_rate": 2.2521607989541056e-06, + "loss": 0.015468788146972657, + "step": 134200 + }, + { + "epoch": 1.1604309517427431, + "grad_norm": 4.4090596443999495, + "learning_rate": 2.251963499054182e-06, + "loss": 0.07113113403320312, + "step": 134205 + }, + { + "epoch": 1.1604741852642866, + "grad_norm": 1.8291288875417044, + "learning_rate": 2.2517662026041384e-06, + "loss": 0.35985870361328126, + "step": 134210 + }, + { + "epoch": 1.1605174187858298, + "grad_norm": 29.04964355435419, + "learning_rate": 2.2515689096048827e-06, + "loss": 0.2092376708984375, + "step": 134215 + }, + { + "epoch": 1.160560652307373, + "grad_norm": 6.834346655568883, + "learning_rate": 2.2513716200573248e-06, + "loss": 0.02182159423828125, + "step": 134220 + }, + { + "epoch": 1.1606038858289163, + "grad_norm": 1.42336536906255, + "learning_rate": 2.2511743339623752e-06, + "loss": 0.02062091827392578, + "step": 134225 + }, + { + "epoch": 1.1606471193504595, + "grad_norm": 0.8786884996282535, + "learning_rate": 2.2509770513209433e-06, + "loss": 0.04801788330078125, + "step": 134230 + }, + { + "epoch": 1.160690352872003, + "grad_norm": 3.329121167565472, + "learning_rate": 2.250779772133939e-06, + "loss": 0.037641143798828124, + "step": 134235 + }, + { + "epoch": 1.1607335863935462, + "grad_norm": 5.162021519479797, + "learning_rate": 2.250582496402271e-06, + "loss": 0.42841854095458987, + "step": 134240 + }, + { + "epoch": 1.1607768199150894, + "grad_norm": 46.96739067960662, + "learning_rate": 2.2503852241268515e-06, + "loss": 0.20804290771484374, + "step": 134245 + }, + { + "epoch": 1.1608200534366326, + "grad_norm": 21.41670362118157, + "learning_rate": 2.2501879553085886e-06, + "loss": 0.23464202880859375, + "step": 134250 + }, + { + "epoch": 1.1608632869581759, + "grad_norm": 4.395104380008601, + "learning_rate": 2.249990689948393e-06, + "loss": 0.04432830810546875, + "step": 134255 + }, + { + "epoch": 1.160906520479719, + "grad_norm": 20.491367687351296, + "learning_rate": 2.249793428047175e-06, + "loss": 0.16364307403564454, + "step": 134260 + }, + { + "epoch": 1.1609497540012623, + "grad_norm": 1.7072281343449858, + "learning_rate": 2.2495961696058424e-06, + "loss": 0.1724628448486328, + "step": 134265 + }, + { + "epoch": 1.1609929875228058, + "grad_norm": 13.627482886768547, + "learning_rate": 2.249398914625305e-06, + "loss": 0.1355813980102539, + "step": 134270 + }, + { + "epoch": 1.161036221044349, + "grad_norm": 6.062845916612386, + "learning_rate": 2.249201663106475e-06, + "loss": 0.06520462036132812, + "step": 134275 + }, + { + "epoch": 1.1610794545658922, + "grad_norm": 27.61635604941095, + "learning_rate": 2.2490044150502596e-06, + "loss": 0.18558502197265625, + "step": 134280 + }, + { + "epoch": 1.1611226880874355, + "grad_norm": 5.2525870205845715, + "learning_rate": 2.24880717045757e-06, + "loss": 0.061254119873046874, + "step": 134285 + }, + { + "epoch": 1.1611659216089787, + "grad_norm": 1.2228668955851798, + "learning_rate": 2.2486099293293157e-06, + "loss": 0.028879165649414062, + "step": 134290 + }, + { + "epoch": 1.161209155130522, + "grad_norm": 0.46293813012943696, + "learning_rate": 2.2484126916664056e-06, + "loss": 0.022118473052978517, + "step": 134295 + }, + { + "epoch": 1.1612523886520654, + "grad_norm": 2.8941254261980585, + "learning_rate": 2.248215457469749e-06, + "loss": 0.04986352920532226, + "step": 134300 + }, + { + "epoch": 1.1612956221736086, + "grad_norm": 7.709160062766305, + "learning_rate": 2.2480182267402574e-06, + "loss": 0.119525146484375, + "step": 134305 + }, + { + "epoch": 1.1613388556951518, + "grad_norm": 0.16627322534021993, + "learning_rate": 2.2478209994788383e-06, + "loss": 0.037799072265625, + "step": 134310 + }, + { + "epoch": 1.161382089216695, + "grad_norm": 5.134723234393926, + "learning_rate": 2.247623775686403e-06, + "loss": 0.035483169555664065, + "step": 134315 + }, + { + "epoch": 1.1614253227382383, + "grad_norm": 5.971679973022363, + "learning_rate": 2.247426555363861e-06, + "loss": 0.09680004119873047, + "step": 134320 + }, + { + "epoch": 1.1614685562597815, + "grad_norm": 1.5459402332760237, + "learning_rate": 2.247229338512121e-06, + "loss": 0.012604713439941406, + "step": 134325 + }, + { + "epoch": 1.1615117897813247, + "grad_norm": 2.3335341034697015, + "learning_rate": 2.2470321251320916e-06, + "loss": 0.01416168212890625, + "step": 134330 + }, + { + "epoch": 1.1615550233028682, + "grad_norm": 0.20496892504863154, + "learning_rate": 2.246834915224683e-06, + "loss": 0.01716270446777344, + "step": 134335 + }, + { + "epoch": 1.1615982568244114, + "grad_norm": 0.39927508307194054, + "learning_rate": 2.246637708790807e-06, + "loss": 0.015299224853515625, + "step": 134340 + }, + { + "epoch": 1.1616414903459547, + "grad_norm": 28.303426110205997, + "learning_rate": 2.246440505831371e-06, + "loss": 0.059772491455078125, + "step": 134345 + }, + { + "epoch": 1.1616847238674979, + "grad_norm": 0.05225016540662398, + "learning_rate": 2.2462433063472847e-06, + "loss": 0.09785995483398438, + "step": 134350 + }, + { + "epoch": 1.161727957389041, + "grad_norm": 0.22285092851155663, + "learning_rate": 2.2460461103394573e-06, + "loss": 0.01853523254394531, + "step": 134355 + }, + { + "epoch": 1.1617711909105843, + "grad_norm": 12.704446304119266, + "learning_rate": 2.2458489178087988e-06, + "loss": 0.11764450073242187, + "step": 134360 + }, + { + "epoch": 1.1618144244321278, + "grad_norm": 12.05327024099846, + "learning_rate": 2.2456517287562176e-06, + "loss": 0.0679290771484375, + "step": 134365 + }, + { + "epoch": 1.161857657953671, + "grad_norm": 1.4614787482649731, + "learning_rate": 2.245454543182625e-06, + "loss": 0.12588958740234374, + "step": 134370 + }, + { + "epoch": 1.1619008914752142, + "grad_norm": 3.668824854102762, + "learning_rate": 2.2452573610889297e-06, + "loss": 0.19322509765625, + "step": 134375 + }, + { + "epoch": 1.1619441249967575, + "grad_norm": 0.14333356433827565, + "learning_rate": 2.2450601824760407e-06, + "loss": 0.07650909423828126, + "step": 134380 + }, + { + "epoch": 1.1619873585183007, + "grad_norm": 40.86201916048913, + "learning_rate": 2.2448630073448666e-06, + "loss": 0.16491546630859374, + "step": 134385 + }, + { + "epoch": 1.162030592039844, + "grad_norm": 13.672213703551586, + "learning_rate": 2.2446658356963184e-06, + "loss": 0.12009468078613281, + "step": 134390 + }, + { + "epoch": 1.1620738255613872, + "grad_norm": 4.2342886138870925, + "learning_rate": 2.244468667531303e-06, + "loss": 0.09096603393554688, + "step": 134395 + }, + { + "epoch": 1.1621170590829306, + "grad_norm": 2.248531197652303, + "learning_rate": 2.2442715028507332e-06, + "loss": 0.014521026611328125, + "step": 134400 + }, + { + "epoch": 1.1621602926044738, + "grad_norm": 2.158063908510092, + "learning_rate": 2.2440743416555156e-06, + "loss": 0.045309829711914065, + "step": 134405 + }, + { + "epoch": 1.162203526126017, + "grad_norm": 1.0708150480966074, + "learning_rate": 2.2438771839465607e-06, + "loss": 0.18060379028320311, + "step": 134410 + }, + { + "epoch": 1.1622467596475603, + "grad_norm": 6.5420392800461995, + "learning_rate": 2.2436800297247774e-06, + "loss": 0.02018890380859375, + "step": 134415 + }, + { + "epoch": 1.1622899931691035, + "grad_norm": 6.195346545967367, + "learning_rate": 2.243482878991075e-06, + "loss": 0.104736328125, + "step": 134420 + }, + { + "epoch": 1.162333226690647, + "grad_norm": 0.8200216322570147, + "learning_rate": 2.243285731746363e-06, + "loss": 0.015525054931640626, + "step": 134425 + }, + { + "epoch": 1.1623764602121902, + "grad_norm": 2.7409451306551573, + "learning_rate": 2.2430885879915486e-06, + "loss": 0.0684539794921875, + "step": 134430 + }, + { + "epoch": 1.1624196937337334, + "grad_norm": 1.0059139543931084, + "learning_rate": 2.2428914477275447e-06, + "loss": 0.014942550659179687, + "step": 134435 + }, + { + "epoch": 1.1624629272552767, + "grad_norm": 0.07797216723946593, + "learning_rate": 2.242694310955258e-06, + "loss": 0.029413414001464844, + "step": 134440 + }, + { + "epoch": 1.16250616077682, + "grad_norm": 6.742145617901043, + "learning_rate": 2.2424971776755977e-06, + "loss": 0.043994140625, + "step": 134445 + }, + { + "epoch": 1.1625493942983631, + "grad_norm": 0.07576349026304403, + "learning_rate": 2.242300047889474e-06, + "loss": 0.03033313751220703, + "step": 134450 + }, + { + "epoch": 1.1625926278199064, + "grad_norm": 0.6483701224682931, + "learning_rate": 2.242102921597796e-06, + "loss": 0.036936187744140626, + "step": 134455 + }, + { + "epoch": 1.1626358613414496, + "grad_norm": 7.990677303530469, + "learning_rate": 2.2419057988014708e-06, + "loss": 0.16938629150390624, + "step": 134460 + }, + { + "epoch": 1.162679094862993, + "grad_norm": 0.892700106796687, + "learning_rate": 2.2417086795014104e-06, + "loss": 0.055621337890625, + "step": 134465 + }, + { + "epoch": 1.1627223283845363, + "grad_norm": 4.455274920949106, + "learning_rate": 2.2415115636985224e-06, + "loss": 0.08957138061523437, + "step": 134470 + }, + { + "epoch": 1.1627655619060795, + "grad_norm": 0.2345228180497521, + "learning_rate": 2.2413144513937154e-06, + "loss": 0.12932891845703126, + "step": 134475 + }, + { + "epoch": 1.1628087954276227, + "grad_norm": 1.8044300195091585, + "learning_rate": 2.2411173425879e-06, + "loss": 0.046868515014648435, + "step": 134480 + }, + { + "epoch": 1.162852028949166, + "grad_norm": 2.256003223590363, + "learning_rate": 2.240920237281984e-06, + "loss": 0.0563812255859375, + "step": 134485 + }, + { + "epoch": 1.1628952624707094, + "grad_norm": 1.9526239041290594, + "learning_rate": 2.240723135476877e-06, + "loss": 0.01860504150390625, + "step": 134490 + }, + { + "epoch": 1.1629384959922526, + "grad_norm": 0.20741821151013676, + "learning_rate": 2.2405260371734866e-06, + "loss": 0.11746826171875, + "step": 134495 + }, + { + "epoch": 1.1629817295137959, + "grad_norm": 4.594300427355353, + "learning_rate": 2.2403289423727243e-06, + "loss": 0.07764739990234375, + "step": 134500 + }, + { + "epoch": 1.163024963035339, + "grad_norm": 0.6544963455311675, + "learning_rate": 2.240131851075497e-06, + "loss": 0.11034927368164063, + "step": 134505 + }, + { + "epoch": 1.1630681965568823, + "grad_norm": 16.49306197179081, + "learning_rate": 2.2399347632827154e-06, + "loss": 0.04330902099609375, + "step": 134510 + }, + { + "epoch": 1.1631114300784255, + "grad_norm": 36.573944636214684, + "learning_rate": 2.239737678995287e-06, + "loss": 0.06246604919433594, + "step": 134515 + }, + { + "epoch": 1.1631546635999688, + "grad_norm": 2.3683416231520953, + "learning_rate": 2.239540598214122e-06, + "loss": 0.007585906982421875, + "step": 134520 + }, + { + "epoch": 1.1631978971215122, + "grad_norm": 1.7090028537475614, + "learning_rate": 2.2393435209401267e-06, + "loss": 0.07345848083496094, + "step": 134525 + }, + { + "epoch": 1.1632411306430555, + "grad_norm": 34.272521380370755, + "learning_rate": 2.2391464471742126e-06, + "loss": 0.1302520751953125, + "step": 134530 + }, + { + "epoch": 1.1632843641645987, + "grad_norm": 0.5105252497018308, + "learning_rate": 2.2389493769172882e-06, + "loss": 0.030199432373046876, + "step": 134535 + }, + { + "epoch": 1.163327597686142, + "grad_norm": 13.834683240457016, + "learning_rate": 2.2387523101702626e-06, + "loss": 0.09534111022949218, + "step": 134540 + }, + { + "epoch": 1.1633708312076851, + "grad_norm": 4.100352488275142, + "learning_rate": 2.2385552469340437e-06, + "loss": 0.027423095703125, + "step": 134545 + }, + { + "epoch": 1.1634140647292284, + "grad_norm": 3.314721739671922, + "learning_rate": 2.2383581872095408e-06, + "loss": 0.07559356689453126, + "step": 134550 + }, + { + "epoch": 1.1634572982507718, + "grad_norm": 19.288051929314594, + "learning_rate": 2.2381611309976615e-06, + "loss": 0.07042465209960938, + "step": 134555 + }, + { + "epoch": 1.163500531772315, + "grad_norm": 0.6523857696086718, + "learning_rate": 2.2379640782993164e-06, + "loss": 0.047673797607421874, + "step": 134560 + }, + { + "epoch": 1.1635437652938583, + "grad_norm": 2.240403715152893, + "learning_rate": 2.2377670291154143e-06, + "loss": 0.03320121765136719, + "step": 134565 + }, + { + "epoch": 1.1635869988154015, + "grad_norm": 6.793034791542887, + "learning_rate": 2.237569983446863e-06, + "loss": 0.04875755310058594, + "step": 134570 + }, + { + "epoch": 1.1636302323369447, + "grad_norm": 2.8264692333407555, + "learning_rate": 2.2373729412945718e-06, + "loss": 0.0399169921875, + "step": 134575 + }, + { + "epoch": 1.163673465858488, + "grad_norm": 4.134791883046251, + "learning_rate": 2.2371759026594486e-06, + "loss": 0.01280364990234375, + "step": 134580 + }, + { + "epoch": 1.1637166993800312, + "grad_norm": 7.58963884300767, + "learning_rate": 2.2369788675424035e-06, + "loss": 0.11256179809570313, + "step": 134585 + }, + { + "epoch": 1.1637599329015746, + "grad_norm": 3.2066356055969796, + "learning_rate": 2.2367818359443424e-06, + "loss": 0.03774566650390625, + "step": 134590 + }, + { + "epoch": 1.1638031664231179, + "grad_norm": 50.079651738964166, + "learning_rate": 2.2365848078661784e-06, + "loss": 0.07159671783447266, + "step": 134595 + }, + { + "epoch": 1.163846399944661, + "grad_norm": 1.0116835189937199, + "learning_rate": 2.236387783308817e-06, + "loss": 0.03564529418945313, + "step": 134600 + }, + { + "epoch": 1.1638896334662043, + "grad_norm": 17.823255256950876, + "learning_rate": 2.236190762273168e-06, + "loss": 0.35109634399414064, + "step": 134605 + }, + { + "epoch": 1.1639328669877476, + "grad_norm": 0.39107203422362824, + "learning_rate": 2.235993744760139e-06, + "loss": 0.07100067138671876, + "step": 134610 + }, + { + "epoch": 1.1639761005092908, + "grad_norm": 0.5502122035441627, + "learning_rate": 2.23579673077064e-06, + "loss": 0.024794769287109376, + "step": 134615 + }, + { + "epoch": 1.1640193340308342, + "grad_norm": 6.481086137554232, + "learning_rate": 2.235599720305578e-06, + "loss": 0.06053009033203125, + "step": 134620 + }, + { + "epoch": 1.1640625675523775, + "grad_norm": 41.31244927766083, + "learning_rate": 2.235402713365863e-06, + "loss": 0.45039520263671873, + "step": 134625 + }, + { + "epoch": 1.1641058010739207, + "grad_norm": 4.592561117375793, + "learning_rate": 2.235205709952404e-06, + "loss": 0.061502838134765626, + "step": 134630 + }, + { + "epoch": 1.164149034595464, + "grad_norm": 1.7996359031430156, + "learning_rate": 2.235008710066108e-06, + "loss": 0.2354074478149414, + "step": 134635 + }, + { + "epoch": 1.1641922681170072, + "grad_norm": 5.154368195471226, + "learning_rate": 2.234811713707884e-06, + "loss": 0.06398849487304688, + "step": 134640 + }, + { + "epoch": 1.1642355016385504, + "grad_norm": 1.3629073112995678, + "learning_rate": 2.2346147208786415e-06, + "loss": 0.030533599853515624, + "step": 134645 + }, + { + "epoch": 1.1642787351600936, + "grad_norm": 3.15234172807544, + "learning_rate": 2.2344177315792865e-06, + "loss": 0.03219757080078125, + "step": 134650 + }, + { + "epoch": 1.164321968681637, + "grad_norm": 2.9109692942112315, + "learning_rate": 2.234220745810731e-06, + "loss": 0.09611587524414063, + "step": 134655 + }, + { + "epoch": 1.1643652022031803, + "grad_norm": 1.5967345626210951, + "learning_rate": 2.234023763573882e-06, + "loss": 0.11495895385742187, + "step": 134660 + }, + { + "epoch": 1.1644084357247235, + "grad_norm": 0.5076048747234777, + "learning_rate": 2.233826784869647e-06, + "loss": 0.06411094665527343, + "step": 134665 + }, + { + "epoch": 1.1644516692462668, + "grad_norm": 40.385163290301584, + "learning_rate": 2.2336298096989353e-06, + "loss": 0.12347869873046875, + "step": 134670 + }, + { + "epoch": 1.16449490276781, + "grad_norm": 1.8046914219940364, + "learning_rate": 2.233432838062655e-06, + "loss": 0.188116455078125, + "step": 134675 + }, + { + "epoch": 1.1645381362893534, + "grad_norm": 10.9009944920034, + "learning_rate": 2.2332358699617153e-06, + "loss": 0.0989410400390625, + "step": 134680 + }, + { + "epoch": 1.1645813698108967, + "grad_norm": 50.090722980112545, + "learning_rate": 2.2330389053970226e-06, + "loss": 0.23023834228515624, + "step": 134685 + }, + { + "epoch": 1.16462460333244, + "grad_norm": 2.642813774271068, + "learning_rate": 2.2328419443694878e-06, + "loss": 0.09209365844726562, + "step": 134690 + }, + { + "epoch": 1.1646678368539831, + "grad_norm": 1.560833778999369, + "learning_rate": 2.2326449868800185e-06, + "loss": 0.01753864288330078, + "step": 134695 + }, + { + "epoch": 1.1647110703755263, + "grad_norm": 0.613461379919321, + "learning_rate": 2.232448032929522e-06, + "loss": 0.01988983154296875, + "step": 134700 + }, + { + "epoch": 1.1647543038970696, + "grad_norm": 0.4630783304835401, + "learning_rate": 2.2322510825189076e-06, + "loss": 0.17957687377929688, + "step": 134705 + }, + { + "epoch": 1.1647975374186128, + "grad_norm": 2.5883997973738673, + "learning_rate": 2.2320541356490834e-06, + "loss": 0.00990753173828125, + "step": 134710 + }, + { + "epoch": 1.1648407709401563, + "grad_norm": 3.672755973025246, + "learning_rate": 2.2318571923209564e-06, + "loss": 0.013779830932617188, + "step": 134715 + }, + { + "epoch": 1.1648840044616995, + "grad_norm": 0.6407305260555205, + "learning_rate": 2.231660252535438e-06, + "loss": 0.006665420532226562, + "step": 134720 + }, + { + "epoch": 1.1649272379832427, + "grad_norm": 2.6654382005746933, + "learning_rate": 2.2314633162934343e-06, + "loss": 0.12904052734375, + "step": 134725 + }, + { + "epoch": 1.164970471504786, + "grad_norm": 0.33686839701380045, + "learning_rate": 2.2312663835958532e-06, + "loss": 0.01639404296875, + "step": 134730 + }, + { + "epoch": 1.1650137050263292, + "grad_norm": 0.6086090696864543, + "learning_rate": 2.2310694544436043e-06, + "loss": 0.10285530090332032, + "step": 134735 + }, + { + "epoch": 1.1650569385478724, + "grad_norm": 2.9584817007590836, + "learning_rate": 2.2308725288375953e-06, + "loss": 0.021906280517578126, + "step": 134740 + }, + { + "epoch": 1.1651001720694159, + "grad_norm": 0.22579772945712184, + "learning_rate": 2.2306756067787328e-06, + "loss": 0.034992408752441403, + "step": 134745 + }, + { + "epoch": 1.165143405590959, + "grad_norm": 0.07889632858415709, + "learning_rate": 2.2304786882679274e-06, + "loss": 0.11852874755859374, + "step": 134750 + }, + { + "epoch": 1.1651866391125023, + "grad_norm": 0.36892531870499434, + "learning_rate": 2.2302817733060863e-06, + "loss": 0.018634796142578125, + "step": 134755 + }, + { + "epoch": 1.1652298726340455, + "grad_norm": 12.873585036531736, + "learning_rate": 2.2300848618941175e-06, + "loss": 0.042899131774902344, + "step": 134760 + }, + { + "epoch": 1.1652731061555888, + "grad_norm": 66.12658494913627, + "learning_rate": 2.22988795403293e-06, + "loss": 0.1279266357421875, + "step": 134765 + }, + { + "epoch": 1.165316339677132, + "grad_norm": 3.751129049066998, + "learning_rate": 2.2296910497234306e-06, + "loss": 0.03802680969238281, + "step": 134770 + }, + { + "epoch": 1.1653595731986752, + "grad_norm": 0.05453286322288938, + "learning_rate": 2.2294941489665285e-06, + "loss": 0.036122703552246095, + "step": 134775 + }, + { + "epoch": 1.1654028067202187, + "grad_norm": 12.4200788939797, + "learning_rate": 2.2292972517631295e-06, + "loss": 0.2465892791748047, + "step": 134780 + }, + { + "epoch": 1.165446040241762, + "grad_norm": 0.6186649871085349, + "learning_rate": 2.2291003581141447e-06, + "loss": 0.08001480102539063, + "step": 134785 + }, + { + "epoch": 1.1654892737633051, + "grad_norm": 2.708279835151894, + "learning_rate": 2.228903468020481e-06, + "loss": 0.07984085083007812, + "step": 134790 + }, + { + "epoch": 1.1655325072848484, + "grad_norm": 6.701186450970692, + "learning_rate": 2.2287065814830468e-06, + "loss": 0.37801513671875, + "step": 134795 + }, + { + "epoch": 1.1655757408063916, + "grad_norm": 3.5543973357862804, + "learning_rate": 2.22850969850275e-06, + "loss": 0.03680267333984375, + "step": 134800 + }, + { + "epoch": 1.1656189743279348, + "grad_norm": 1.188079243988627, + "learning_rate": 2.228312819080497e-06, + "loss": 0.13039932250976563, + "step": 134805 + }, + { + "epoch": 1.1656622078494783, + "grad_norm": 0.4191663297919348, + "learning_rate": 2.2281159432171977e-06, + "loss": 0.049546432495117185, + "step": 134810 + }, + { + "epoch": 1.1657054413710215, + "grad_norm": 9.996859149417594, + "learning_rate": 2.2279190709137586e-06, + "loss": 0.0379608154296875, + "step": 134815 + }, + { + "epoch": 1.1657486748925647, + "grad_norm": 1.5652200548348396, + "learning_rate": 2.22772220217109e-06, + "loss": 0.016646575927734376, + "step": 134820 + }, + { + "epoch": 1.165791908414108, + "grad_norm": 3.2174237057477293, + "learning_rate": 2.227525336990098e-06, + "loss": 0.23635406494140626, + "step": 134825 + }, + { + "epoch": 1.1658351419356512, + "grad_norm": 20.509179996093952, + "learning_rate": 2.2273284753716907e-06, + "loss": 0.040904998779296875, + "step": 134830 + }, + { + "epoch": 1.1658783754571944, + "grad_norm": 1.0922758379099415, + "learning_rate": 2.227131617316776e-06, + "loss": 0.10801906585693359, + "step": 134835 + }, + { + "epoch": 1.1659216089787376, + "grad_norm": 9.15353803630008, + "learning_rate": 2.2269347628262623e-06, + "loss": 0.04020214080810547, + "step": 134840 + }, + { + "epoch": 1.165964842500281, + "grad_norm": 0.7131762784165008, + "learning_rate": 2.2267379119010556e-06, + "loss": 0.11994094848632812, + "step": 134845 + }, + { + "epoch": 1.1660080760218243, + "grad_norm": 2.624252024461212, + "learning_rate": 2.2265410645420673e-06, + "loss": 0.024005126953125, + "step": 134850 + }, + { + "epoch": 1.1660513095433676, + "grad_norm": 11.384120311930298, + "learning_rate": 2.2263442207502028e-06, + "loss": 0.040138626098632814, + "step": 134855 + }, + { + "epoch": 1.1660945430649108, + "grad_norm": 49.52887022233461, + "learning_rate": 2.226147380526371e-06, + "loss": 0.09749889373779297, + "step": 134860 + }, + { + "epoch": 1.166137776586454, + "grad_norm": 1.042028857590928, + "learning_rate": 2.2259505438714777e-06, + "loss": 0.03686027526855469, + "step": 134865 + }, + { + "epoch": 1.1661810101079972, + "grad_norm": 79.14944533889937, + "learning_rate": 2.225753710786433e-06, + "loss": 0.18653640747070313, + "step": 134870 + }, + { + "epoch": 1.1662242436295407, + "grad_norm": 1.2264332120186447, + "learning_rate": 2.2255568812721425e-06, + "loss": 0.09314346313476562, + "step": 134875 + }, + { + "epoch": 1.166267477151084, + "grad_norm": 0.061478632367170515, + "learning_rate": 2.225360055329517e-06, + "loss": 0.07412033081054688, + "step": 134880 + }, + { + "epoch": 1.1663107106726271, + "grad_norm": 0.0523766701451406, + "learning_rate": 2.225163232959462e-06, + "loss": 0.14949264526367187, + "step": 134885 + }, + { + "epoch": 1.1663539441941704, + "grad_norm": 5.031309815368348, + "learning_rate": 2.2249664141628855e-06, + "loss": 0.028147506713867187, + "step": 134890 + }, + { + "epoch": 1.1663971777157136, + "grad_norm": 2.665098030747026, + "learning_rate": 2.2247695989406948e-06, + "loss": 0.04410686492919922, + "step": 134895 + }, + { + "epoch": 1.1664404112372568, + "grad_norm": 34.971756923485955, + "learning_rate": 2.2245727872937992e-06, + "loss": 0.09122161865234375, + "step": 134900 + }, + { + "epoch": 1.1664836447588, + "grad_norm": 15.009375455233855, + "learning_rate": 2.224375979223104e-06, + "loss": 0.039493751525878903, + "step": 134905 + }, + { + "epoch": 1.1665268782803435, + "grad_norm": 24.343404561626322, + "learning_rate": 2.2241791747295193e-06, + "loss": 0.1259429931640625, + "step": 134910 + }, + { + "epoch": 1.1665701118018867, + "grad_norm": 16.05960506602766, + "learning_rate": 2.223982373813952e-06, + "loss": 0.19701614379882812, + "step": 134915 + }, + { + "epoch": 1.16661334532343, + "grad_norm": 7.830209151780092, + "learning_rate": 2.2237855764773094e-06, + "loss": 0.0993011474609375, + "step": 134920 + }, + { + "epoch": 1.1666565788449732, + "grad_norm": 10.348844380017196, + "learning_rate": 2.2235887827204985e-06, + "loss": 0.05659027099609375, + "step": 134925 + }, + { + "epoch": 1.1666998123665164, + "grad_norm": 5.277352387100172, + "learning_rate": 2.2233919925444282e-06, + "loss": 0.017481231689453126, + "step": 134930 + }, + { + "epoch": 1.1667430458880599, + "grad_norm": 1.6704138543624933, + "learning_rate": 2.2231952059500055e-06, + "loss": 0.13240203857421876, + "step": 134935 + }, + { + "epoch": 1.166786279409603, + "grad_norm": 0.11825584475035152, + "learning_rate": 2.222998422938136e-06, + "loss": 0.012115097045898438, + "step": 134940 + }, + { + "epoch": 1.1668295129311463, + "grad_norm": 0.03143648290839498, + "learning_rate": 2.2228016435097307e-06, + "loss": 0.2736686706542969, + "step": 134945 + }, + { + "epoch": 1.1668727464526896, + "grad_norm": 9.972083218179197, + "learning_rate": 2.2226048676656956e-06, + "loss": 0.017679595947265626, + "step": 134950 + }, + { + "epoch": 1.1669159799742328, + "grad_norm": 0.6015685915643704, + "learning_rate": 2.2224080954069374e-06, + "loss": 0.07438507080078124, + "step": 134955 + }, + { + "epoch": 1.166959213495776, + "grad_norm": 10.491270058945538, + "learning_rate": 2.2222113267343656e-06, + "loss": 0.0611663818359375, + "step": 134960 + }, + { + "epoch": 1.1670024470173193, + "grad_norm": 4.538584708749273, + "learning_rate": 2.222014561648886e-06, + "loss": 0.036116409301757815, + "step": 134965 + }, + { + "epoch": 1.1670456805388627, + "grad_norm": 3.52451764070769, + "learning_rate": 2.221817800151405e-06, + "loss": 0.057757568359375, + "step": 134970 + }, + { + "epoch": 1.167088914060406, + "grad_norm": 1.346294713168966, + "learning_rate": 2.221621042242833e-06, + "loss": 0.164105224609375, + "step": 134975 + }, + { + "epoch": 1.1671321475819492, + "grad_norm": 2.289759016142265, + "learning_rate": 2.221424287924075e-06, + "loss": 0.0788330078125, + "step": 134980 + }, + { + "epoch": 1.1671753811034924, + "grad_norm": 8.06399343029766, + "learning_rate": 2.2212275371960405e-06, + "loss": 0.08787155151367188, + "step": 134985 + }, + { + "epoch": 1.1672186146250356, + "grad_norm": 1.9989874359882072, + "learning_rate": 2.221030790059636e-06, + "loss": 0.04577484130859375, + "step": 134990 + }, + { + "epoch": 1.1672618481465789, + "grad_norm": 5.519577630993498, + "learning_rate": 2.220834046515768e-06, + "loss": 0.03157310485839844, + "step": 134995 + }, + { + "epoch": 1.1673050816681223, + "grad_norm": 0.29653191731546996, + "learning_rate": 2.2206373065653436e-06, + "loss": 0.043661308288574216, + "step": 135000 + }, + { + "epoch": 1.1673483151896655, + "grad_norm": 8.90781281460662, + "learning_rate": 2.2204405702092726e-06, + "loss": 0.07531051635742188, + "step": 135005 + }, + { + "epoch": 1.1673915487112088, + "grad_norm": 3.7773538458419003, + "learning_rate": 2.2202438374484595e-06, + "loss": 0.1903594970703125, + "step": 135010 + }, + { + "epoch": 1.167434782232752, + "grad_norm": 3.8232598630792642, + "learning_rate": 2.220047108283814e-06, + "loss": 0.12020721435546874, + "step": 135015 + }, + { + "epoch": 1.1674780157542952, + "grad_norm": 17.772501462328247, + "learning_rate": 2.2198503827162425e-06, + "loss": 0.06385650634765624, + "step": 135020 + }, + { + "epoch": 1.1675212492758384, + "grad_norm": 19.145399554411814, + "learning_rate": 2.2196536607466516e-06, + "loss": 0.07235336303710938, + "step": 135025 + }, + { + "epoch": 1.1675644827973817, + "grad_norm": 2.659041638986902, + "learning_rate": 2.2194569423759488e-06, + "loss": 0.017928314208984376, + "step": 135030 + }, + { + "epoch": 1.1676077163189251, + "grad_norm": 3.4866307672184105, + "learning_rate": 2.2192602276050415e-06, + "loss": 0.08513832092285156, + "step": 135035 + }, + { + "epoch": 1.1676509498404684, + "grad_norm": 0.23619893832711766, + "learning_rate": 2.2190635164348373e-06, + "loss": 0.09208450317382813, + "step": 135040 + }, + { + "epoch": 1.1676941833620116, + "grad_norm": 10.117043697993008, + "learning_rate": 2.2188668088662436e-06, + "loss": 0.04802570343017578, + "step": 135045 + }, + { + "epoch": 1.1677374168835548, + "grad_norm": 2.328709825847701, + "learning_rate": 2.218670104900167e-06, + "loss": 0.03897171020507813, + "step": 135050 + }, + { + "epoch": 1.167780650405098, + "grad_norm": 2.2939482087695935, + "learning_rate": 2.2184734045375145e-06, + "loss": 0.056109619140625, + "step": 135055 + }, + { + "epoch": 1.1678238839266413, + "grad_norm": 0.19913880572771833, + "learning_rate": 2.218276707779194e-06, + "loss": 0.04937591552734375, + "step": 135060 + }, + { + "epoch": 1.1678671174481847, + "grad_norm": 4.801959153937354, + "learning_rate": 2.2180800146261116e-06, + "loss": 0.01998291015625, + "step": 135065 + }, + { + "epoch": 1.167910350969728, + "grad_norm": 1.6838541687092372, + "learning_rate": 2.2178833250791747e-06, + "loss": 0.21771163940429689, + "step": 135070 + }, + { + "epoch": 1.1679535844912712, + "grad_norm": 0.10089833502228918, + "learning_rate": 2.2176866391392923e-06, + "loss": 0.06021537780761719, + "step": 135075 + }, + { + "epoch": 1.1679968180128144, + "grad_norm": 15.964135357134394, + "learning_rate": 2.2174899568073693e-06, + "loss": 0.18336563110351561, + "step": 135080 + }, + { + "epoch": 1.1680400515343576, + "grad_norm": 0.7473366574818405, + "learning_rate": 2.2172932780843136e-06, + "loss": 0.22418441772460937, + "step": 135085 + }, + { + "epoch": 1.1680832850559009, + "grad_norm": 9.459267738953221, + "learning_rate": 2.217096602971032e-06, + "loss": 0.24194107055664063, + "step": 135090 + }, + { + "epoch": 1.168126518577444, + "grad_norm": 17.90983921999759, + "learning_rate": 2.2168999314684314e-06, + "loss": 0.058124542236328125, + "step": 135095 + }, + { + "epoch": 1.1681697520989875, + "grad_norm": 1.0083689284418689, + "learning_rate": 2.2167032635774185e-06, + "loss": 0.02467041015625, + "step": 135100 + }, + { + "epoch": 1.1682129856205308, + "grad_norm": 5.097860463343778, + "learning_rate": 2.2165065992989025e-06, + "loss": 0.13041763305664061, + "step": 135105 + }, + { + "epoch": 1.168256219142074, + "grad_norm": 4.14341027717458, + "learning_rate": 2.2163099386337883e-06, + "loss": 0.0517822265625, + "step": 135110 + }, + { + "epoch": 1.1682994526636172, + "grad_norm": 0.877193475633764, + "learning_rate": 2.216113281582984e-06, + "loss": 0.0619354248046875, + "step": 135115 + }, + { + "epoch": 1.1683426861851605, + "grad_norm": 4.688531552422696, + "learning_rate": 2.2159166281473943e-06, + "loss": 0.07720832824707032, + "step": 135120 + }, + { + "epoch": 1.168385919706704, + "grad_norm": 14.915353646995548, + "learning_rate": 2.2157199783279293e-06, + "loss": 0.04845504760742188, + "step": 135125 + }, + { + "epoch": 1.1684291532282471, + "grad_norm": 12.261311935745805, + "learning_rate": 2.2155233321254927e-06, + "loss": 0.037996673583984376, + "step": 135130 + }, + { + "epoch": 1.1684723867497904, + "grad_norm": 0.755651548763714, + "learning_rate": 2.215326689540995e-06, + "loss": 0.04114837646484375, + "step": 135135 + }, + { + "epoch": 1.1685156202713336, + "grad_norm": 13.760612984966912, + "learning_rate": 2.215130050575341e-06, + "loss": 0.07426910400390625, + "step": 135140 + }, + { + "epoch": 1.1685588537928768, + "grad_norm": 6.459271717591311, + "learning_rate": 2.214933415229438e-06, + "loss": 0.10554294586181641, + "step": 135145 + }, + { + "epoch": 1.16860208731442, + "grad_norm": 0.44235541306093046, + "learning_rate": 2.214736783504192e-06, + "loss": 0.11172065734863282, + "step": 135150 + }, + { + "epoch": 1.1686453208359633, + "grad_norm": 0.9624781830014674, + "learning_rate": 2.214540155400511e-06, + "loss": 0.05018692016601563, + "step": 135155 + }, + { + "epoch": 1.1686885543575065, + "grad_norm": 1.7646608593881896, + "learning_rate": 2.2143435309193004e-06, + "loss": 0.09041290283203125, + "step": 135160 + }, + { + "epoch": 1.16873178787905, + "grad_norm": 5.47618865973983, + "learning_rate": 2.214146910061469e-06, + "loss": 0.08641357421875, + "step": 135165 + }, + { + "epoch": 1.1687750214005932, + "grad_norm": 13.53330432565681, + "learning_rate": 2.2139502928279226e-06, + "loss": 0.12753143310546874, + "step": 135170 + }, + { + "epoch": 1.1688182549221364, + "grad_norm": 1.7158161243748735, + "learning_rate": 2.2137536792195675e-06, + "loss": 0.015428924560546875, + "step": 135175 + }, + { + "epoch": 1.1688614884436797, + "grad_norm": 1.3891113971424283, + "learning_rate": 2.2135570692373114e-06, + "loss": 0.07568931579589844, + "step": 135180 + }, + { + "epoch": 1.1689047219652229, + "grad_norm": 0.8270812027397502, + "learning_rate": 2.2133604628820614e-06, + "loss": 0.01897430419921875, + "step": 135185 + }, + { + "epoch": 1.1689479554867663, + "grad_norm": 2.630237541680456, + "learning_rate": 2.2131638601547225e-06, + "loss": 0.23862152099609374, + "step": 135190 + }, + { + "epoch": 1.1689911890083096, + "grad_norm": 5.640778721141565, + "learning_rate": 2.2129672610562014e-06, + "loss": 0.04095916748046875, + "step": 135195 + }, + { + "epoch": 1.1690344225298528, + "grad_norm": 4.3668694706492, + "learning_rate": 2.212770665587407e-06, + "loss": 0.0709014892578125, + "step": 135200 + }, + { + "epoch": 1.169077656051396, + "grad_norm": 5.182629146720832, + "learning_rate": 2.2125740737492443e-06, + "loss": 0.04513702392578125, + "step": 135205 + }, + { + "epoch": 1.1691208895729392, + "grad_norm": 1.7346212349313475, + "learning_rate": 2.212377485542621e-06, + "loss": 0.1774810791015625, + "step": 135210 + }, + { + "epoch": 1.1691641230944825, + "grad_norm": 2.9742930554813913, + "learning_rate": 2.2121809009684428e-06, + "loss": 0.03406982421875, + "step": 135215 + }, + { + "epoch": 1.1692073566160257, + "grad_norm": 0.16451804542679258, + "learning_rate": 2.211984320027617e-06, + "loss": 0.009560394287109374, + "step": 135220 + }, + { + "epoch": 1.1692505901375692, + "grad_norm": 9.838784562912426, + "learning_rate": 2.2117877427210486e-06, + "loss": 0.12433319091796875, + "step": 135225 + }, + { + "epoch": 1.1692938236591124, + "grad_norm": 31.1489530583462, + "learning_rate": 2.2115911690496465e-06, + "loss": 0.20289535522460939, + "step": 135230 + }, + { + "epoch": 1.1693370571806556, + "grad_norm": 2.80252887046646, + "learning_rate": 2.211394599014316e-06, + "loss": 0.04393768310546875, + "step": 135235 + }, + { + "epoch": 1.1693802907021988, + "grad_norm": 4.381786714639045, + "learning_rate": 2.211198032615964e-06, + "loss": 0.030385971069335938, + "step": 135240 + }, + { + "epoch": 1.169423524223742, + "grad_norm": 0.29390091627083664, + "learning_rate": 2.211001469855497e-06, + "loss": 0.07831039428710937, + "step": 135245 + }, + { + "epoch": 1.1694667577452853, + "grad_norm": 0.03448566552666318, + "learning_rate": 2.210804910733822e-06, + "loss": 0.03169927597045898, + "step": 135250 + }, + { + "epoch": 1.1695099912668288, + "grad_norm": 21.141632267660146, + "learning_rate": 2.2106083552518436e-06, + "loss": 0.0977813720703125, + "step": 135255 + }, + { + "epoch": 1.169553224788372, + "grad_norm": 28.624189106462843, + "learning_rate": 2.210411803410471e-06, + "loss": 0.13356876373291016, + "step": 135260 + }, + { + "epoch": 1.1695964583099152, + "grad_norm": 0.3732006252025714, + "learning_rate": 2.210215255210609e-06, + "loss": 0.045270538330078124, + "step": 135265 + }, + { + "epoch": 1.1696396918314584, + "grad_norm": 0.8157761892945088, + "learning_rate": 2.210018710653165e-06, + "loss": 0.5588897705078125, + "step": 135270 + }, + { + "epoch": 1.1696829253530017, + "grad_norm": 1.036911587273681, + "learning_rate": 2.2098221697390446e-06, + "loss": 0.028939056396484374, + "step": 135275 + }, + { + "epoch": 1.169726158874545, + "grad_norm": 0.5451869209101543, + "learning_rate": 2.2096256324691544e-06, + "loss": 0.03900604248046875, + "step": 135280 + }, + { + "epoch": 1.1697693923960881, + "grad_norm": 12.977916403514275, + "learning_rate": 2.2094290988444006e-06, + "loss": 0.07445602416992188, + "step": 135285 + }, + { + "epoch": 1.1698126259176316, + "grad_norm": 2.762952678311178, + "learning_rate": 2.2092325688656896e-06, + "loss": 0.0529510498046875, + "step": 135290 + }, + { + "epoch": 1.1698558594391748, + "grad_norm": 1.0037116325766462, + "learning_rate": 2.2090360425339285e-06, + "loss": 0.15511474609375, + "step": 135295 + }, + { + "epoch": 1.169899092960718, + "grad_norm": 21.52110618199366, + "learning_rate": 2.2088395198500236e-06, + "loss": 0.09952468872070312, + "step": 135300 + }, + { + "epoch": 1.1699423264822613, + "grad_norm": 1.1769254809516219, + "learning_rate": 2.2086430008148813e-06, + "loss": 0.1950927734375, + "step": 135305 + }, + { + "epoch": 1.1699855600038045, + "grad_norm": 10.581574983348826, + "learning_rate": 2.208446485429408e-06, + "loss": 0.1689136505126953, + "step": 135310 + }, + { + "epoch": 1.1700287935253477, + "grad_norm": 0.13076555550929733, + "learning_rate": 2.208249973694508e-06, + "loss": 0.023144912719726563, + "step": 135315 + }, + { + "epoch": 1.1700720270468912, + "grad_norm": 0.5670024289674439, + "learning_rate": 2.208053465611089e-06, + "loss": 0.16415252685546874, + "step": 135320 + }, + { + "epoch": 1.1701152605684344, + "grad_norm": 21.177132631043268, + "learning_rate": 2.207856961180058e-06, + "loss": 0.2908050537109375, + "step": 135325 + }, + { + "epoch": 1.1701584940899776, + "grad_norm": 11.288158321931437, + "learning_rate": 2.2076604604023214e-06, + "loss": 0.1029052734375, + "step": 135330 + }, + { + "epoch": 1.1702017276115209, + "grad_norm": 0.7020223273407831, + "learning_rate": 2.207463963278785e-06, + "loss": 0.01620330810546875, + "step": 135335 + }, + { + "epoch": 1.170244961133064, + "grad_norm": 1.941472868462906, + "learning_rate": 2.207267469810354e-06, + "loss": 0.018190765380859376, + "step": 135340 + }, + { + "epoch": 1.1702881946546073, + "grad_norm": 0.19409428323447175, + "learning_rate": 2.2070709799979356e-06, + "loss": 0.05478935241699219, + "step": 135345 + }, + { + "epoch": 1.1703314281761505, + "grad_norm": 4.95024032185644, + "learning_rate": 2.206874493842435e-06, + "loss": 0.021622848510742188, + "step": 135350 + }, + { + "epoch": 1.170374661697694, + "grad_norm": 6.843690381842323, + "learning_rate": 2.2066780113447597e-06, + "loss": 0.013451766967773438, + "step": 135355 + }, + { + "epoch": 1.1704178952192372, + "grad_norm": 5.891976721334907, + "learning_rate": 2.2064815325058158e-06, + "loss": 0.06725425720214843, + "step": 135360 + }, + { + "epoch": 1.1704611287407805, + "grad_norm": 16.527384840883645, + "learning_rate": 2.206285057326508e-06, + "loss": 0.1841888427734375, + "step": 135365 + }, + { + "epoch": 1.1705043622623237, + "grad_norm": 0.15135147269816862, + "learning_rate": 2.206088585807745e-06, + "loss": 0.05767974853515625, + "step": 135370 + }, + { + "epoch": 1.170547595783867, + "grad_norm": 16.7727507735024, + "learning_rate": 2.20589211795043e-06, + "loss": 0.104443359375, + "step": 135375 + }, + { + "epoch": 1.1705908293054104, + "grad_norm": 0.6721267597916655, + "learning_rate": 2.2056956537554705e-06, + "loss": 0.07919387817382813, + "step": 135380 + }, + { + "epoch": 1.1706340628269536, + "grad_norm": 0.19993483355253225, + "learning_rate": 2.205499193223772e-06, + "loss": 0.051652717590332034, + "step": 135385 + }, + { + "epoch": 1.1706772963484968, + "grad_norm": 9.995289110706457, + "learning_rate": 2.2053027363562418e-06, + "loss": 0.040309906005859375, + "step": 135390 + }, + { + "epoch": 1.17072052987004, + "grad_norm": 6.3922102606064835, + "learning_rate": 2.205106283153785e-06, + "loss": 0.26829833984375, + "step": 135395 + }, + { + "epoch": 1.1707637633915833, + "grad_norm": 0.848889817474031, + "learning_rate": 2.2049098336173073e-06, + "loss": 0.02147369384765625, + "step": 135400 + }, + { + "epoch": 1.1708069969131265, + "grad_norm": 39.650253561616104, + "learning_rate": 2.204713387747716e-06, + "loss": 0.17079572677612304, + "step": 135405 + }, + { + "epoch": 1.1708502304346697, + "grad_norm": 0.7947392454074348, + "learning_rate": 2.2045169455459163e-06, + "loss": 0.0300506591796875, + "step": 135410 + }, + { + "epoch": 1.170893463956213, + "grad_norm": 1.458330715174079, + "learning_rate": 2.204320507012813e-06, + "loss": 0.12550048828125, + "step": 135415 + }, + { + "epoch": 1.1709366974777564, + "grad_norm": 2.0104848674137754, + "learning_rate": 2.204124072149314e-06, + "loss": 0.0192657470703125, + "step": 135420 + }, + { + "epoch": 1.1709799309992996, + "grad_norm": 3.4250863591547946, + "learning_rate": 2.203927640956325e-06, + "loss": 0.023164749145507812, + "step": 135425 + }, + { + "epoch": 1.1710231645208429, + "grad_norm": 23.90844491021141, + "learning_rate": 2.2037312134347505e-06, + "loss": 0.24563217163085938, + "step": 135430 + }, + { + "epoch": 1.171066398042386, + "grad_norm": 16.14939493646493, + "learning_rate": 2.2035347895854976e-06, + "loss": 0.055657958984375, + "step": 135435 + }, + { + "epoch": 1.1711096315639293, + "grad_norm": 0.7005309662118506, + "learning_rate": 2.203338369409472e-06, + "loss": 0.08491058349609375, + "step": 135440 + }, + { + "epoch": 1.1711528650854728, + "grad_norm": 0.4867225222536298, + "learning_rate": 2.2031419529075783e-06, + "loss": 0.040020751953125, + "step": 135445 + }, + { + "epoch": 1.171196098607016, + "grad_norm": 6.330491872943669, + "learning_rate": 2.202945540080725e-06, + "loss": 0.126373291015625, + "step": 135450 + }, + { + "epoch": 1.1712393321285592, + "grad_norm": 1.2338190553084658, + "learning_rate": 2.2027491309298163e-06, + "loss": 0.14784011840820313, + "step": 135455 + }, + { + "epoch": 1.1712825656501025, + "grad_norm": 1.2381569108952166, + "learning_rate": 2.2025527254557573e-06, + "loss": 0.02611846923828125, + "step": 135460 + }, + { + "epoch": 1.1713257991716457, + "grad_norm": 4.134113738533587, + "learning_rate": 2.2023563236594557e-06, + "loss": 0.14800233840942384, + "step": 135465 + }, + { + "epoch": 1.171369032693189, + "grad_norm": 0.2098507463684043, + "learning_rate": 2.202159925541816e-06, + "loss": 0.0700531005859375, + "step": 135470 + }, + { + "epoch": 1.1714122662147322, + "grad_norm": 66.36484893861702, + "learning_rate": 2.2019635311037444e-06, + "loss": 0.22484912872314453, + "step": 135475 + }, + { + "epoch": 1.1714554997362756, + "grad_norm": 7.47457299935105, + "learning_rate": 2.201767140346145e-06, + "loss": 0.15092620849609376, + "step": 135480 + }, + { + "epoch": 1.1714987332578188, + "grad_norm": 8.9251630588575, + "learning_rate": 2.2015707532699264e-06, + "loss": 0.22509231567382812, + "step": 135485 + }, + { + "epoch": 1.171541966779362, + "grad_norm": 4.301135499576143, + "learning_rate": 2.2013743698759925e-06, + "loss": 0.1538726806640625, + "step": 135490 + }, + { + "epoch": 1.1715852003009053, + "grad_norm": 0.4414152896836997, + "learning_rate": 2.2011779901652495e-06, + "loss": 0.14150772094726563, + "step": 135495 + }, + { + "epoch": 1.1716284338224485, + "grad_norm": 15.675639406403914, + "learning_rate": 2.2009816141386036e-06, + "loss": 0.03289108276367188, + "step": 135500 + }, + { + "epoch": 1.1716716673439918, + "grad_norm": 10.30209375023644, + "learning_rate": 2.2007852417969596e-06, + "loss": 0.11135025024414062, + "step": 135505 + }, + { + "epoch": 1.1717149008655352, + "grad_norm": 16.63723981374186, + "learning_rate": 2.2005888731412223e-06, + "loss": 0.11665992736816407, + "step": 135510 + }, + { + "epoch": 1.1717581343870784, + "grad_norm": 9.045459274651284, + "learning_rate": 2.2003925081722997e-06, + "loss": 0.0648101806640625, + "step": 135515 + }, + { + "epoch": 1.1718013679086217, + "grad_norm": 0.2601389188621445, + "learning_rate": 2.2001961468910955e-06, + "loss": 0.171392822265625, + "step": 135520 + }, + { + "epoch": 1.171844601430165, + "grad_norm": 0.39825926885202667, + "learning_rate": 2.1999997892985165e-06, + "loss": 0.010726356506347656, + "step": 135525 + }, + { + "epoch": 1.1718878349517081, + "grad_norm": 0.17146589329190193, + "learning_rate": 2.199803435395468e-06, + "loss": 0.06682167053222657, + "step": 135530 + }, + { + "epoch": 1.1719310684732513, + "grad_norm": 19.323609137266068, + "learning_rate": 2.199607085182855e-06, + "loss": 0.03624725341796875, + "step": 135535 + }, + { + "epoch": 1.1719743019947946, + "grad_norm": 5.308618029017753, + "learning_rate": 2.1994107386615834e-06, + "loss": 0.04222869873046875, + "step": 135540 + }, + { + "epoch": 1.172017535516338, + "grad_norm": 5.379194265409374, + "learning_rate": 2.1992143958325583e-06, + "loss": 0.0556060791015625, + "step": 135545 + }, + { + "epoch": 1.1720607690378813, + "grad_norm": 5.049657268172443, + "learning_rate": 2.199018056696686e-06, + "loss": 0.019141006469726562, + "step": 135550 + }, + { + "epoch": 1.1721040025594245, + "grad_norm": 1.1345484916074453, + "learning_rate": 2.1988217212548717e-06, + "loss": 0.3303852081298828, + "step": 135555 + }, + { + "epoch": 1.1721472360809677, + "grad_norm": 0.173903193849041, + "learning_rate": 2.1986253895080217e-06, + "loss": 0.07509307861328125, + "step": 135560 + }, + { + "epoch": 1.172190469602511, + "grad_norm": 0.8038488229988133, + "learning_rate": 2.1984290614570398e-06, + "loss": 0.04715576171875, + "step": 135565 + }, + { + "epoch": 1.1722337031240542, + "grad_norm": 8.945190604248944, + "learning_rate": 2.1982327371028315e-06, + "loss": 0.081134033203125, + "step": 135570 + }, + { + "epoch": 1.1722769366455976, + "grad_norm": 55.44099361994988, + "learning_rate": 2.198036416446303e-06, + "loss": 0.1900054931640625, + "step": 135575 + }, + { + "epoch": 1.1723201701671409, + "grad_norm": 0.028382314583803695, + "learning_rate": 2.197840099488361e-06, + "loss": 0.01731529235839844, + "step": 135580 + }, + { + "epoch": 1.172363403688684, + "grad_norm": 1.443333614497193, + "learning_rate": 2.197643786229909e-06, + "loss": 0.07301292419433594, + "step": 135585 + }, + { + "epoch": 1.1724066372102273, + "grad_norm": 0.7135766094109041, + "learning_rate": 2.197447476671853e-06, + "loss": 0.016898345947265626, + "step": 135590 + }, + { + "epoch": 1.1724498707317705, + "grad_norm": 0.34790083664098304, + "learning_rate": 2.1972511708150988e-06, + "loss": 0.014599990844726563, + "step": 135595 + }, + { + "epoch": 1.1724931042533138, + "grad_norm": 0.1943976890582348, + "learning_rate": 2.1970548686605505e-06, + "loss": 0.017541885375976562, + "step": 135600 + }, + { + "epoch": 1.172536337774857, + "grad_norm": 0.276689818528895, + "learning_rate": 2.1968585702091133e-06, + "loss": 0.02801647186279297, + "step": 135605 + }, + { + "epoch": 1.1725795712964004, + "grad_norm": 0.5117976843874993, + "learning_rate": 2.196662275461695e-06, + "loss": 0.01919097900390625, + "step": 135610 + }, + { + "epoch": 1.1726228048179437, + "grad_norm": 0.36894440020413716, + "learning_rate": 2.1964659844191986e-06, + "loss": 0.03032073974609375, + "step": 135615 + }, + { + "epoch": 1.172666038339487, + "grad_norm": 1.5327192323361618, + "learning_rate": 2.1962696970825308e-06, + "loss": 0.0162933349609375, + "step": 135620 + }, + { + "epoch": 1.1727092718610301, + "grad_norm": 4.009642781410487, + "learning_rate": 2.196073413452595e-06, + "loss": 0.04615707397460937, + "step": 135625 + }, + { + "epoch": 1.1727525053825734, + "grad_norm": 1.1419098903354123, + "learning_rate": 2.195877133530299e-06, + "loss": 0.20460567474365235, + "step": 135630 + }, + { + "epoch": 1.1727957389041168, + "grad_norm": 0.6021469663139059, + "learning_rate": 2.195680857316546e-06, + "loss": 0.024811553955078124, + "step": 135635 + }, + { + "epoch": 1.17283897242566, + "grad_norm": 55.05054365448391, + "learning_rate": 2.1954845848122406e-06, + "loss": 0.16132965087890624, + "step": 135640 + }, + { + "epoch": 1.1728822059472033, + "grad_norm": 0.12681970744711465, + "learning_rate": 2.1952883160182908e-06, + "loss": 0.11283721923828124, + "step": 135645 + }, + { + "epoch": 1.1729254394687465, + "grad_norm": 0.12957789870633887, + "learning_rate": 2.1950920509355993e-06, + "loss": 0.074267578125, + "step": 135650 + }, + { + "epoch": 1.1729686729902897, + "grad_norm": 3.1523713911547167, + "learning_rate": 2.194895789565072e-06, + "loss": 0.13565750122070314, + "step": 135655 + }, + { + "epoch": 1.173011906511833, + "grad_norm": 4.1098784901612175, + "learning_rate": 2.1946995319076153e-06, + "loss": 0.1263641357421875, + "step": 135660 + }, + { + "epoch": 1.1730551400333762, + "grad_norm": 62.1988671271223, + "learning_rate": 2.194503277964133e-06, + "loss": 0.10404548645019532, + "step": 135665 + }, + { + "epoch": 1.1730983735549196, + "grad_norm": 1.794507739199354, + "learning_rate": 2.1943070277355288e-06, + "loss": 0.04521636962890625, + "step": 135670 + }, + { + "epoch": 1.1731416070764629, + "grad_norm": 4.172874987800248, + "learning_rate": 2.1941107812227107e-06, + "loss": 0.0852264404296875, + "step": 135675 + }, + { + "epoch": 1.173184840598006, + "grad_norm": 4.224593955938796, + "learning_rate": 2.193914538426583e-06, + "loss": 0.026727294921875, + "step": 135680 + }, + { + "epoch": 1.1732280741195493, + "grad_norm": 8.755286096943518, + "learning_rate": 2.1937182993480486e-06, + "loss": 0.04284820556640625, + "step": 135685 + }, + { + "epoch": 1.1732713076410926, + "grad_norm": 6.098971789099543, + "learning_rate": 2.1935220639880156e-06, + "loss": 0.0375762939453125, + "step": 135690 + }, + { + "epoch": 1.1733145411626358, + "grad_norm": 1.75531170772763, + "learning_rate": 2.1933258323473876e-06, + "loss": 0.05203857421875, + "step": 135695 + }, + { + "epoch": 1.1733577746841792, + "grad_norm": 1.3231188709875408, + "learning_rate": 2.1931296044270677e-06, + "loss": 0.12398605346679688, + "step": 135700 + }, + { + "epoch": 1.1734010082057225, + "grad_norm": 0.19322577792941392, + "learning_rate": 2.1929333802279643e-06, + "loss": 0.048903465270996094, + "step": 135705 + }, + { + "epoch": 1.1734442417272657, + "grad_norm": 2.180283069208195, + "learning_rate": 2.1927371597509815e-06, + "loss": 0.07793426513671875, + "step": 135710 + }, + { + "epoch": 1.173487475248809, + "grad_norm": 0.6638845766958363, + "learning_rate": 2.192540942997022e-06, + "loss": 0.084088134765625, + "step": 135715 + }, + { + "epoch": 1.1735307087703521, + "grad_norm": 2.825852406200356, + "learning_rate": 2.1923447299669933e-06, + "loss": 0.029866409301757813, + "step": 135720 + }, + { + "epoch": 1.1735739422918954, + "grad_norm": 33.42054172300076, + "learning_rate": 2.1921485206617995e-06, + "loss": 0.064385986328125, + "step": 135725 + }, + { + "epoch": 1.1736171758134386, + "grad_norm": 4.506620272270105, + "learning_rate": 2.1919523150823454e-06, + "loss": 0.12593650817871094, + "step": 135730 + }, + { + "epoch": 1.173660409334982, + "grad_norm": 0.27711560838708355, + "learning_rate": 2.191756113229534e-06, + "loss": 0.12541465759277343, + "step": 135735 + }, + { + "epoch": 1.1737036428565253, + "grad_norm": 15.04941635853294, + "learning_rate": 2.191559915104274e-06, + "loss": 0.05955924987792969, + "step": 135740 + }, + { + "epoch": 1.1737468763780685, + "grad_norm": 1.505665762739486, + "learning_rate": 2.191363720707467e-06, + "loss": 0.07113876342773437, + "step": 135745 + }, + { + "epoch": 1.1737901098996117, + "grad_norm": 3.6761239474492213, + "learning_rate": 2.19116753004002e-06, + "loss": 0.01864776611328125, + "step": 135750 + }, + { + "epoch": 1.173833343421155, + "grad_norm": 20.710835482899626, + "learning_rate": 2.1909713431028368e-06, + "loss": 0.25810890197753905, + "step": 135755 + }, + { + "epoch": 1.1738765769426982, + "grad_norm": 2.936523025256353, + "learning_rate": 2.190775159896822e-06, + "loss": 0.02280597686767578, + "step": 135760 + }, + { + "epoch": 1.1739198104642417, + "grad_norm": 15.876880250555917, + "learning_rate": 2.19057898042288e-06, + "loss": 0.14376373291015626, + "step": 135765 + }, + { + "epoch": 1.1739630439857849, + "grad_norm": 4.73298017441132, + "learning_rate": 2.190382804681917e-06, + "loss": 0.03303260803222656, + "step": 135770 + }, + { + "epoch": 1.174006277507328, + "grad_norm": 0.4340513441053416, + "learning_rate": 2.1901866326748367e-06, + "loss": 0.0060977935791015625, + "step": 135775 + }, + { + "epoch": 1.1740495110288713, + "grad_norm": 0.6943813577274006, + "learning_rate": 2.1899904644025443e-06, + "loss": 0.03716259002685547, + "step": 135780 + }, + { + "epoch": 1.1740927445504146, + "grad_norm": 3.113259358745574, + "learning_rate": 2.1897942998659446e-06, + "loss": 0.06585693359375, + "step": 135785 + }, + { + "epoch": 1.1741359780719578, + "grad_norm": 2.9323231351207557, + "learning_rate": 2.1895981390659422e-06, + "loss": 0.0482574462890625, + "step": 135790 + }, + { + "epoch": 1.174179211593501, + "grad_norm": 7.52017968913391, + "learning_rate": 2.1894019820034406e-06, + "loss": 0.10164527893066407, + "step": 135795 + }, + { + "epoch": 1.1742224451150445, + "grad_norm": 1.2105824446372526, + "learning_rate": 2.189205828679345e-06, + "loss": 0.17501258850097656, + "step": 135800 + }, + { + "epoch": 1.1742656786365877, + "grad_norm": 0.5624743748917692, + "learning_rate": 2.1890096790945617e-06, + "loss": 0.033751487731933594, + "step": 135805 + }, + { + "epoch": 1.174308912158131, + "grad_norm": 0.8135641770206271, + "learning_rate": 2.188813533249994e-06, + "loss": 0.22757492065429688, + "step": 135810 + }, + { + "epoch": 1.1743521456796742, + "grad_norm": 18.698221229569608, + "learning_rate": 2.188617391146546e-06, + "loss": 0.11321754455566406, + "step": 135815 + }, + { + "epoch": 1.1743953792012174, + "grad_norm": 18.40958374921003, + "learning_rate": 2.188421252785124e-06, + "loss": 0.08443317413330079, + "step": 135820 + }, + { + "epoch": 1.1744386127227608, + "grad_norm": 1.6868558939501657, + "learning_rate": 2.1882251181666304e-06, + "loss": 0.040782928466796875, + "step": 135825 + }, + { + "epoch": 1.174481846244304, + "grad_norm": 0.8873887481826415, + "learning_rate": 2.1880289872919704e-06, + "loss": 0.08815460205078125, + "step": 135830 + }, + { + "epoch": 1.1745250797658473, + "grad_norm": 4.008349609313991, + "learning_rate": 2.1878328601620495e-06, + "loss": 0.023082733154296875, + "step": 135835 + }, + { + "epoch": 1.1745683132873905, + "grad_norm": 4.354264586366272, + "learning_rate": 2.1876367367777726e-06, + "loss": 0.09662628173828125, + "step": 135840 + }, + { + "epoch": 1.1746115468089338, + "grad_norm": 9.32465849935487, + "learning_rate": 2.1874406171400428e-06, + "loss": 0.0544921875, + "step": 135845 + }, + { + "epoch": 1.174654780330477, + "grad_norm": 14.054626712562834, + "learning_rate": 2.1872445012497646e-06, + "loss": 0.2196176528930664, + "step": 135850 + }, + { + "epoch": 1.1746980138520202, + "grad_norm": 2.537919667553755, + "learning_rate": 2.1870483891078435e-06, + "loss": 0.026933670043945312, + "step": 135855 + }, + { + "epoch": 1.1747412473735634, + "grad_norm": 2.3794460628996124, + "learning_rate": 2.186852280715182e-06, + "loss": 0.024271392822265626, + "step": 135860 + }, + { + "epoch": 1.174784480895107, + "grad_norm": 3.2737583914206247, + "learning_rate": 2.1866561760726873e-06, + "loss": 0.022529411315917968, + "step": 135865 + }, + { + "epoch": 1.1748277144166501, + "grad_norm": 2.6476649725579082, + "learning_rate": 2.1864600751812628e-06, + "loss": 0.15770111083984376, + "step": 135870 + }, + { + "epoch": 1.1748709479381934, + "grad_norm": 1.0795566078198513, + "learning_rate": 2.186263978041812e-06, + "loss": 0.057476806640625, + "step": 135875 + }, + { + "epoch": 1.1749141814597366, + "grad_norm": 0.7483751356653224, + "learning_rate": 2.1860678846552394e-06, + "loss": 0.0561279296875, + "step": 135880 + }, + { + "epoch": 1.1749574149812798, + "grad_norm": 13.77108316153658, + "learning_rate": 2.18587179502245e-06, + "loss": 0.03892784118652344, + "step": 135885 + }, + { + "epoch": 1.1750006485028233, + "grad_norm": 0.756713289968042, + "learning_rate": 2.1856757091443488e-06, + "loss": 0.09635391235351562, + "step": 135890 + }, + { + "epoch": 1.1750438820243665, + "grad_norm": 5.951638575622559, + "learning_rate": 2.1854796270218373e-06, + "loss": 0.09505348205566407, + "step": 135895 + }, + { + "epoch": 1.1750871155459097, + "grad_norm": 0.13007138484243413, + "learning_rate": 2.1852835486558234e-06, + "loss": 0.008919906616210938, + "step": 135900 + }, + { + "epoch": 1.175130349067453, + "grad_norm": 10.60814309210817, + "learning_rate": 2.18508747404721e-06, + "loss": 0.1432039260864258, + "step": 135905 + }, + { + "epoch": 1.1751735825889962, + "grad_norm": 0.42818084226029174, + "learning_rate": 2.1848914031969e-06, + "loss": 0.01872882843017578, + "step": 135910 + }, + { + "epoch": 1.1752168161105394, + "grad_norm": 0.2786694914387243, + "learning_rate": 2.1846953361057995e-06, + "loss": 0.0762237548828125, + "step": 135915 + }, + { + "epoch": 1.1752600496320826, + "grad_norm": 10.948334139020945, + "learning_rate": 2.1844992727748127e-06, + "loss": 0.0734466552734375, + "step": 135920 + }, + { + "epoch": 1.175303283153626, + "grad_norm": 48.93428042528682, + "learning_rate": 2.1843032132048415e-06, + "loss": 0.174078369140625, + "step": 135925 + }, + { + "epoch": 1.1753465166751693, + "grad_norm": 35.38112731343612, + "learning_rate": 2.184107157396793e-06, + "loss": 0.11788597106933593, + "step": 135930 + }, + { + "epoch": 1.1753897501967125, + "grad_norm": 0.4714948819949132, + "learning_rate": 2.1839111053515702e-06, + "loss": 0.020959091186523438, + "step": 135935 + }, + { + "epoch": 1.1754329837182558, + "grad_norm": 0.7600230009902272, + "learning_rate": 2.183715057070077e-06, + "loss": 0.09001312255859376, + "step": 135940 + }, + { + "epoch": 1.175476217239799, + "grad_norm": 1.5121379943376572, + "learning_rate": 2.183519012553218e-06, + "loss": 0.029108428955078126, + "step": 135945 + }, + { + "epoch": 1.1755194507613422, + "grad_norm": 0.45230072712361086, + "learning_rate": 2.1833229718018974e-06, + "loss": 0.06507720947265624, + "step": 135950 + }, + { + "epoch": 1.1755626842828857, + "grad_norm": 4.252426943929874, + "learning_rate": 2.1831269348170175e-06, + "loss": 0.026696014404296874, + "step": 135955 + }, + { + "epoch": 1.175605917804429, + "grad_norm": 4.0112549405435125, + "learning_rate": 2.1829309015994855e-06, + "loss": 0.06100616455078125, + "step": 135960 + }, + { + "epoch": 1.1756491513259721, + "grad_norm": 25.748050866909175, + "learning_rate": 2.182734872150204e-06, + "loss": 0.10779857635498047, + "step": 135965 + }, + { + "epoch": 1.1756923848475154, + "grad_norm": 1.6209487926674504, + "learning_rate": 2.182538846470077e-06, + "loss": 0.057513427734375, + "step": 135970 + }, + { + "epoch": 1.1757356183690586, + "grad_norm": 0.3585909563098068, + "learning_rate": 2.1823428245600084e-06, + "loss": 0.0079742431640625, + "step": 135975 + }, + { + "epoch": 1.1757788518906018, + "grad_norm": 2.3084330396184103, + "learning_rate": 2.1821468064209025e-06, + "loss": 0.009328079223632813, + "step": 135980 + }, + { + "epoch": 1.175822085412145, + "grad_norm": 0.1508253747698342, + "learning_rate": 2.1819507920536636e-06, + "loss": 0.09381303787231446, + "step": 135985 + }, + { + "epoch": 1.1758653189336885, + "grad_norm": 0.13379599492325286, + "learning_rate": 2.1817547814591937e-06, + "loss": 0.05394611358642578, + "step": 135990 + }, + { + "epoch": 1.1759085524552317, + "grad_norm": 22.33777687206901, + "learning_rate": 2.1815587746384004e-06, + "loss": 0.07233963012695313, + "step": 135995 + }, + { + "epoch": 1.175951785976775, + "grad_norm": 1.0874525528714303, + "learning_rate": 2.1813627715921846e-06, + "loss": 0.03777618408203125, + "step": 136000 + }, + { + "epoch": 1.1759950194983182, + "grad_norm": 12.630824018202821, + "learning_rate": 2.181166772321452e-06, + "loss": 0.02577037811279297, + "step": 136005 + }, + { + "epoch": 1.1760382530198614, + "grad_norm": 1.8030581340173926, + "learning_rate": 2.180970776827106e-06, + "loss": 0.01457672119140625, + "step": 136010 + }, + { + "epoch": 1.1760814865414047, + "grad_norm": 0.4882717083587967, + "learning_rate": 2.18077478511005e-06, + "loss": 0.06643753051757813, + "step": 136015 + }, + { + "epoch": 1.176124720062948, + "grad_norm": 2.7900838004270687, + "learning_rate": 2.180578797171188e-06, + "loss": 0.016504669189453126, + "step": 136020 + }, + { + "epoch": 1.1761679535844913, + "grad_norm": 3.030039831366421, + "learning_rate": 2.180382813011424e-06, + "loss": 0.019738006591796874, + "step": 136025 + }, + { + "epoch": 1.1762111871060346, + "grad_norm": 12.561278391985697, + "learning_rate": 2.180186832631663e-06, + "loss": 0.123046875, + "step": 136030 + }, + { + "epoch": 1.1762544206275778, + "grad_norm": 0.4022469778093203, + "learning_rate": 2.179990856032808e-06, + "loss": 0.088470458984375, + "step": 136035 + }, + { + "epoch": 1.176297654149121, + "grad_norm": 9.981491307195459, + "learning_rate": 2.179794883215762e-06, + "loss": 0.11034698486328125, + "step": 136040 + }, + { + "epoch": 1.1763408876706642, + "grad_norm": 19.517441399806227, + "learning_rate": 2.1795989141814293e-06, + "loss": 0.04881591796875, + "step": 136045 + }, + { + "epoch": 1.1763841211922075, + "grad_norm": 0.4154829730719481, + "learning_rate": 2.1794029489307142e-06, + "loss": 0.04733734130859375, + "step": 136050 + }, + { + "epoch": 1.176427354713751, + "grad_norm": 2.0621132968449363, + "learning_rate": 2.1792069874645204e-06, + "loss": 0.04402084350585937, + "step": 136055 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 4.973889636740747, + "learning_rate": 2.1790110297837514e-06, + "loss": 0.24368438720703126, + "step": 136060 + }, + { + "epoch": 1.1765138217568374, + "grad_norm": 3.5767821054365636, + "learning_rate": 2.1788150758893113e-06, + "loss": 0.05855846405029297, + "step": 136065 + }, + { + "epoch": 1.1765570552783806, + "grad_norm": 33.83807938733083, + "learning_rate": 2.1786191257821032e-06, + "loss": 0.11605186462402343, + "step": 136070 + }, + { + "epoch": 1.1766002887999238, + "grad_norm": 20.416738616865345, + "learning_rate": 2.178423179463031e-06, + "loss": 0.26545028686523436, + "step": 136075 + }, + { + "epoch": 1.1766435223214673, + "grad_norm": 1.2190980082533962, + "learning_rate": 2.178227236932999e-06, + "loss": 0.087713623046875, + "step": 136080 + }, + { + "epoch": 1.1766867558430105, + "grad_norm": 43.02351208948724, + "learning_rate": 2.1780312981929088e-06, + "loss": 0.08278121948242187, + "step": 136085 + }, + { + "epoch": 1.1767299893645538, + "grad_norm": 47.74692511544248, + "learning_rate": 2.177835363243667e-06, + "loss": 0.28122482299804685, + "step": 136090 + }, + { + "epoch": 1.176773222886097, + "grad_norm": 2.2684733621139173, + "learning_rate": 2.1776394320861763e-06, + "loss": 0.1711639404296875, + "step": 136095 + }, + { + "epoch": 1.1768164564076402, + "grad_norm": 1.504459312048285, + "learning_rate": 2.1774435047213396e-06, + "loss": 0.02997283935546875, + "step": 136100 + }, + { + "epoch": 1.1768596899291834, + "grad_norm": 7.0846962544670795, + "learning_rate": 2.1772475811500597e-06, + "loss": 0.046613311767578124, + "step": 136105 + }, + { + "epoch": 1.1769029234507267, + "grad_norm": 22.646445451723295, + "learning_rate": 2.1770516613732423e-06, + "loss": 0.06699638366699219, + "step": 136110 + }, + { + "epoch": 1.17694615697227, + "grad_norm": 4.016611041607673, + "learning_rate": 2.176855745391789e-06, + "loss": 0.052245712280273436, + "step": 136115 + }, + { + "epoch": 1.1769893904938133, + "grad_norm": 25.498414037848494, + "learning_rate": 2.1766598332066046e-06, + "loss": 0.1215301513671875, + "step": 136120 + }, + { + "epoch": 1.1770326240153566, + "grad_norm": 1.4499967095601618, + "learning_rate": 2.1764639248185932e-06, + "loss": 0.101220703125, + "step": 136125 + }, + { + "epoch": 1.1770758575368998, + "grad_norm": 1.0180116150754142, + "learning_rate": 2.176268020228657e-06, + "loss": 0.0460906982421875, + "step": 136130 + }, + { + "epoch": 1.177119091058443, + "grad_norm": 2.879821324638065, + "learning_rate": 2.176072119437699e-06, + "loss": 0.028746414184570312, + "step": 136135 + }, + { + "epoch": 1.1771623245799863, + "grad_norm": 0.9736554387878877, + "learning_rate": 2.1758762224466245e-06, + "loss": 0.023383331298828126, + "step": 136140 + }, + { + "epoch": 1.1772055581015297, + "grad_norm": 6.002274093528516, + "learning_rate": 2.175680329256336e-06, + "loss": 0.03519439697265625, + "step": 136145 + }, + { + "epoch": 1.177248791623073, + "grad_norm": 1.314894491607286, + "learning_rate": 2.175484439867736e-06, + "loss": 0.06360969543457032, + "step": 136150 + }, + { + "epoch": 1.1772920251446162, + "grad_norm": 0.6147371805877917, + "learning_rate": 2.17528855428173e-06, + "loss": 0.0916229248046875, + "step": 136155 + }, + { + "epoch": 1.1773352586661594, + "grad_norm": 11.76682706396422, + "learning_rate": 2.1750926724992203e-06, + "loss": 0.05531005859375, + "step": 136160 + }, + { + "epoch": 1.1773784921877026, + "grad_norm": 2.913378892437486, + "learning_rate": 2.1748967945211092e-06, + "loss": 0.0222442626953125, + "step": 136165 + }, + { + "epoch": 1.1774217257092459, + "grad_norm": 24.665159085014892, + "learning_rate": 2.1747009203483026e-06, + "loss": 0.08992538452148438, + "step": 136170 + }, + { + "epoch": 1.177464959230789, + "grad_norm": 0.13647082873060393, + "learning_rate": 2.1745050499817024e-06, + "loss": 0.2740201950073242, + "step": 136175 + }, + { + "epoch": 1.1775081927523325, + "grad_norm": 0.45648441344323354, + "learning_rate": 2.17430918342221e-06, + "loss": 0.12256698608398438, + "step": 136180 + }, + { + "epoch": 1.1775514262738758, + "grad_norm": 9.314471152928594, + "learning_rate": 2.174113320670732e-06, + "loss": 0.08104953765869141, + "step": 136185 + }, + { + "epoch": 1.177594659795419, + "grad_norm": 14.763741507315183, + "learning_rate": 2.173917461728171e-06, + "loss": 0.06793670654296875, + "step": 136190 + }, + { + "epoch": 1.1776378933169622, + "grad_norm": 1.4497437112865734, + "learning_rate": 2.173721606595429e-06, + "loss": 0.10117225646972657, + "step": 136195 + }, + { + "epoch": 1.1776811268385055, + "grad_norm": 0.08290094657166688, + "learning_rate": 2.1735257552734103e-06, + "loss": 0.05637283325195312, + "step": 136200 + }, + { + "epoch": 1.1777243603600487, + "grad_norm": 8.353450175061976, + "learning_rate": 2.173329907763018e-06, + "loss": 0.0358428955078125, + "step": 136205 + }, + { + "epoch": 1.1777675938815921, + "grad_norm": 0.30048327790291185, + "learning_rate": 2.1731340640651537e-06, + "loss": 0.3773059844970703, + "step": 136210 + }, + { + "epoch": 1.1778108274031354, + "grad_norm": 2.5494829579143277, + "learning_rate": 2.172938224180723e-06, + "loss": 0.0333221435546875, + "step": 136215 + }, + { + "epoch": 1.1778540609246786, + "grad_norm": 0.26415592281051214, + "learning_rate": 2.172742388110629e-06, + "loss": 0.10937843322753907, + "step": 136220 + }, + { + "epoch": 1.1778972944462218, + "grad_norm": 47.37464280794261, + "learning_rate": 2.1725465558557726e-06, + "loss": 0.20207901000976564, + "step": 136225 + }, + { + "epoch": 1.177940527967765, + "grad_norm": 7.167886858164615, + "learning_rate": 2.172350727417059e-06, + "loss": 0.04692916870117188, + "step": 136230 + }, + { + "epoch": 1.1779837614893083, + "grad_norm": 4.183395875100449, + "learning_rate": 2.1721549027953904e-06, + "loss": 0.04485626220703125, + "step": 136235 + }, + { + "epoch": 1.1780269950108515, + "grad_norm": 1.0700643524714175, + "learning_rate": 2.171959081991671e-06, + "loss": 0.05039901733398437, + "step": 136240 + }, + { + "epoch": 1.178070228532395, + "grad_norm": 17.80574578934326, + "learning_rate": 2.171763265006802e-06, + "loss": 0.05625, + "step": 136245 + }, + { + "epoch": 1.1781134620539382, + "grad_norm": 15.781963427456196, + "learning_rate": 2.1715674518416876e-06, + "loss": 0.19627685546875, + "step": 136250 + }, + { + "epoch": 1.1781566955754814, + "grad_norm": 1.8958954486221489, + "learning_rate": 2.171371642497232e-06, + "loss": 0.0388519287109375, + "step": 136255 + }, + { + "epoch": 1.1781999290970246, + "grad_norm": 0.819697125615935, + "learning_rate": 2.171175836974337e-06, + "loss": 0.023729324340820312, + "step": 136260 + }, + { + "epoch": 1.1782431626185679, + "grad_norm": 0.37785044776699356, + "learning_rate": 2.1709800352739056e-06, + "loss": 0.1805126190185547, + "step": 136265 + }, + { + "epoch": 1.178286396140111, + "grad_norm": 4.794270089593496, + "learning_rate": 2.170784237396841e-06, + "loss": 0.05611724853515625, + "step": 136270 + }, + { + "epoch": 1.1783296296616546, + "grad_norm": 4.829358527427721, + "learning_rate": 2.1705884433440454e-06, + "loss": 0.11441688537597657, + "step": 136275 + }, + { + "epoch": 1.1783728631831978, + "grad_norm": 6.189892080282801, + "learning_rate": 2.1703926531164233e-06, + "loss": 0.03274688720703125, + "step": 136280 + }, + { + "epoch": 1.178416096704741, + "grad_norm": 14.425999940738748, + "learning_rate": 2.170196866714877e-06, + "loss": 0.028591156005859375, + "step": 136285 + }, + { + "epoch": 1.1784593302262842, + "grad_norm": 3.772347140608787, + "learning_rate": 2.1700010841403103e-06, + "loss": 0.0946319580078125, + "step": 136290 + }, + { + "epoch": 1.1785025637478275, + "grad_norm": 3.2090213811632364, + "learning_rate": 2.1698053053936247e-06, + "loss": 0.04427032470703125, + "step": 136295 + }, + { + "epoch": 1.1785457972693707, + "grad_norm": 1.415495550839214, + "learning_rate": 2.169609530475723e-06, + "loss": 0.07074661254882812, + "step": 136300 + }, + { + "epoch": 1.178589030790914, + "grad_norm": 14.894859903969861, + "learning_rate": 2.1694137593875087e-06, + "loss": 0.11403212547302247, + "step": 136305 + }, + { + "epoch": 1.1786322643124574, + "grad_norm": 4.9506147924818915, + "learning_rate": 2.169217992129885e-06, + "loss": 0.1072906494140625, + "step": 136310 + }, + { + "epoch": 1.1786754978340006, + "grad_norm": 6.464680680384122, + "learning_rate": 2.169022228703755e-06, + "loss": 0.16910324096679688, + "step": 136315 + }, + { + "epoch": 1.1787187313555438, + "grad_norm": 4.944719728653225, + "learning_rate": 2.1688264691100208e-06, + "loss": 0.1239837646484375, + "step": 136320 + }, + { + "epoch": 1.178761964877087, + "grad_norm": 4.94528892508745, + "learning_rate": 2.168630713349586e-06, + "loss": 0.024346542358398438, + "step": 136325 + }, + { + "epoch": 1.1788051983986303, + "grad_norm": 5.7430786951544315, + "learning_rate": 2.168434961423352e-06, + "loss": 0.048276519775390624, + "step": 136330 + }, + { + "epoch": 1.1788484319201737, + "grad_norm": 0.5725401079166434, + "learning_rate": 2.168239213332223e-06, + "loss": 0.23799095153808594, + "step": 136335 + }, + { + "epoch": 1.178891665441717, + "grad_norm": 2.365016127486399, + "learning_rate": 2.1680434690771e-06, + "loss": 0.15163040161132812, + "step": 136340 + }, + { + "epoch": 1.1789348989632602, + "grad_norm": 8.076197720771543, + "learning_rate": 2.167847728658889e-06, + "loss": 0.1154815673828125, + "step": 136345 + }, + { + "epoch": 1.1789781324848034, + "grad_norm": 1.7928178568333342, + "learning_rate": 2.16765199207849e-06, + "loss": 0.20245819091796874, + "step": 136350 + }, + { + "epoch": 1.1790213660063467, + "grad_norm": 6.8355753561049415, + "learning_rate": 2.1674562593368064e-06, + "loss": 0.03519134521484375, + "step": 136355 + }, + { + "epoch": 1.17906459952789, + "grad_norm": 8.361481837803556, + "learning_rate": 2.1672605304347407e-06, + "loss": 0.03843374252319336, + "step": 136360 + }, + { + "epoch": 1.1791078330494331, + "grad_norm": 0.2988396290167496, + "learning_rate": 2.1670648053731965e-06, + "loss": 0.09163379669189453, + "step": 136365 + }, + { + "epoch": 1.1791510665709766, + "grad_norm": 6.994795559157086, + "learning_rate": 2.166869084153074e-06, + "loss": 0.01724395751953125, + "step": 136370 + }, + { + "epoch": 1.1791943000925198, + "grad_norm": 22.776326565231123, + "learning_rate": 2.1666733667752796e-06, + "loss": 0.07335205078125, + "step": 136375 + }, + { + "epoch": 1.179237533614063, + "grad_norm": 1.7053860016971154, + "learning_rate": 2.1664776532407137e-06, + "loss": 0.03681488037109375, + "step": 136380 + }, + { + "epoch": 1.1792807671356063, + "grad_norm": 32.63999328582743, + "learning_rate": 2.1662819435502793e-06, + "loss": 0.15687332153320313, + "step": 136385 + }, + { + "epoch": 1.1793240006571495, + "grad_norm": 58.01424832048083, + "learning_rate": 2.166086237704878e-06, + "loss": 0.17108001708984374, + "step": 136390 + }, + { + "epoch": 1.1793672341786927, + "grad_norm": 1.7166228743893697, + "learning_rate": 2.165890535705414e-06, + "loss": 0.0530975341796875, + "step": 136395 + }, + { + "epoch": 1.1794104677002362, + "grad_norm": 0.13507068061889582, + "learning_rate": 2.165694837552788e-06, + "loss": 0.172320556640625, + "step": 136400 + }, + { + "epoch": 1.1794537012217794, + "grad_norm": 22.08091228846898, + "learning_rate": 2.165499143247905e-06, + "loss": 0.058489227294921876, + "step": 136405 + }, + { + "epoch": 1.1794969347433226, + "grad_norm": 0.10809712032963739, + "learning_rate": 2.1653034527916663e-06, + "loss": 0.019968032836914062, + "step": 136410 + }, + { + "epoch": 1.1795401682648659, + "grad_norm": 3.8723652686131445, + "learning_rate": 2.165107766184974e-06, + "loss": 0.08487014770507813, + "step": 136415 + }, + { + "epoch": 1.179583401786409, + "grad_norm": 3.8286902882546996, + "learning_rate": 2.1649120834287305e-06, + "loss": 0.03329315185546875, + "step": 136420 + }, + { + "epoch": 1.1796266353079523, + "grad_norm": 0.9243040697779685, + "learning_rate": 2.1647164045238395e-06, + "loss": 0.117962646484375, + "step": 136425 + }, + { + "epoch": 1.1796698688294955, + "grad_norm": 21.02748355668387, + "learning_rate": 2.164520729471202e-06, + "loss": 0.31047782897949217, + "step": 136430 + }, + { + "epoch": 1.179713102351039, + "grad_norm": 1.0796102992355192, + "learning_rate": 2.1643250582717204e-06, + "loss": 0.0076732635498046875, + "step": 136435 + }, + { + "epoch": 1.1797563358725822, + "grad_norm": 0.4926092170257662, + "learning_rate": 2.164129390926299e-06, + "loss": 0.22415733337402344, + "step": 136440 + }, + { + "epoch": 1.1797995693941254, + "grad_norm": 41.19633531608077, + "learning_rate": 2.1639337274358385e-06, + "loss": 0.19427108764648438, + "step": 136445 + }, + { + "epoch": 1.1798428029156687, + "grad_norm": 15.221336666075835, + "learning_rate": 2.163738067801242e-06, + "loss": 0.05867900848388672, + "step": 136450 + }, + { + "epoch": 1.179886036437212, + "grad_norm": 39.05702616857593, + "learning_rate": 2.163542412023412e-06, + "loss": 0.2541961669921875, + "step": 136455 + }, + { + "epoch": 1.1799292699587551, + "grad_norm": 4.306777506727566, + "learning_rate": 2.1633467601032503e-06, + "loss": 0.0284881591796875, + "step": 136460 + }, + { + "epoch": 1.1799725034802986, + "grad_norm": 33.6133963766939, + "learning_rate": 2.163151112041658e-06, + "loss": 0.17840423583984374, + "step": 136465 + }, + { + "epoch": 1.1800157370018418, + "grad_norm": 0.6408600879098452, + "learning_rate": 2.1629554678395403e-06, + "loss": 0.008979034423828126, + "step": 136470 + }, + { + "epoch": 1.180058970523385, + "grad_norm": 1.7242754273818715, + "learning_rate": 2.162759827497797e-06, + "loss": 0.14636878967285155, + "step": 136475 + }, + { + "epoch": 1.1801022040449283, + "grad_norm": 2.346632508866449, + "learning_rate": 2.1625641910173325e-06, + "loss": 0.034088134765625, + "step": 136480 + }, + { + "epoch": 1.1801454375664715, + "grad_norm": 3.2632050368505428, + "learning_rate": 2.1623685583990477e-06, + "loss": 0.02151641845703125, + "step": 136485 + }, + { + "epoch": 1.1801886710880147, + "grad_norm": 3.2433192491089975, + "learning_rate": 2.162172929643845e-06, + "loss": 0.0394927978515625, + "step": 136490 + }, + { + "epoch": 1.180231904609558, + "grad_norm": 0.6447582001114774, + "learning_rate": 2.1619773047526265e-06, + "loss": 0.04859027862548828, + "step": 136495 + }, + { + "epoch": 1.1802751381311014, + "grad_norm": 4.691412630271948, + "learning_rate": 2.1617816837262943e-06, + "loss": 0.24203052520751953, + "step": 136500 + }, + { + "epoch": 1.1803183716526446, + "grad_norm": 0.7024666180898387, + "learning_rate": 2.161586066565751e-06, + "loss": 0.1169778823852539, + "step": 136505 + }, + { + "epoch": 1.1803616051741879, + "grad_norm": 1.3364598192601187, + "learning_rate": 2.161390453271899e-06, + "loss": 0.017483997344970702, + "step": 136510 + }, + { + "epoch": 1.180404838695731, + "grad_norm": 0.9287818055722843, + "learning_rate": 2.1611948438456406e-06, + "loss": 0.054282760620117186, + "step": 136515 + }, + { + "epoch": 1.1804480722172743, + "grad_norm": 0.7875222552620307, + "learning_rate": 2.1609992382878774e-06, + "loss": 0.14098892211914063, + "step": 136520 + }, + { + "epoch": 1.1804913057388176, + "grad_norm": 6.350215030434358, + "learning_rate": 2.1608036365995106e-06, + "loss": 0.031023406982421876, + "step": 136525 + }, + { + "epoch": 1.180534539260361, + "grad_norm": 0.8538071823617713, + "learning_rate": 2.1606080387814436e-06, + "loss": 0.06092414855957031, + "step": 136530 + }, + { + "epoch": 1.1805777727819042, + "grad_norm": 6.16239614645737, + "learning_rate": 2.160412444834578e-06, + "loss": 0.020355033874511718, + "step": 136535 + }, + { + "epoch": 1.1806210063034475, + "grad_norm": 0.15409054977267939, + "learning_rate": 2.160216854759817e-06, + "loss": 0.025872039794921874, + "step": 136540 + }, + { + "epoch": 1.1806642398249907, + "grad_norm": 11.921061946181487, + "learning_rate": 2.1600212685580614e-06, + "loss": 0.21059112548828124, + "step": 136545 + }, + { + "epoch": 1.180707473346534, + "grad_norm": 3.122732493246429, + "learning_rate": 2.1598256862302137e-06, + "loss": 0.01569643020629883, + "step": 136550 + }, + { + "epoch": 1.1807507068680771, + "grad_norm": 0.17874679685379632, + "learning_rate": 2.159630107777175e-06, + "loss": 0.009689712524414062, + "step": 136555 + }, + { + "epoch": 1.1807939403896204, + "grad_norm": 1.0013778338974413, + "learning_rate": 2.1594345331998475e-06, + "loss": 0.149957275390625, + "step": 136560 + }, + { + "epoch": 1.1808371739111638, + "grad_norm": 13.438023514070018, + "learning_rate": 2.1592389624991337e-06, + "loss": 0.09857101440429687, + "step": 136565 + }, + { + "epoch": 1.180880407432707, + "grad_norm": 0.09198660544051841, + "learning_rate": 2.159043395675937e-06, + "loss": 0.011076927185058594, + "step": 136570 + }, + { + "epoch": 1.1809236409542503, + "grad_norm": 5.728634838407282, + "learning_rate": 2.1588478327311566e-06, + "loss": 0.020623397827148438, + "step": 136575 + }, + { + "epoch": 1.1809668744757935, + "grad_norm": 23.186073643239936, + "learning_rate": 2.158652273665697e-06, + "loss": 0.139208984375, + "step": 136580 + }, + { + "epoch": 1.1810101079973367, + "grad_norm": 11.194280322779594, + "learning_rate": 2.158456718480457e-06, + "loss": 0.08724517822265625, + "step": 136585 + }, + { + "epoch": 1.1810533415188802, + "grad_norm": 3.8127969401995845, + "learning_rate": 2.1582611671763417e-06, + "loss": 0.01722869873046875, + "step": 136590 + }, + { + "epoch": 1.1810965750404234, + "grad_norm": 0.448675272006371, + "learning_rate": 2.15806561975425e-06, + "loss": 0.012467765808105468, + "step": 136595 + }, + { + "epoch": 1.1811398085619667, + "grad_norm": 1.5196566644836917, + "learning_rate": 2.1578700762150862e-06, + "loss": 0.03192596435546875, + "step": 136600 + }, + { + "epoch": 1.1811830420835099, + "grad_norm": 0.6439247360219893, + "learning_rate": 2.157674536559751e-06, + "loss": 0.0767303466796875, + "step": 136605 + }, + { + "epoch": 1.181226275605053, + "grad_norm": 2.509082288910682, + "learning_rate": 2.1574790007891473e-06, + "loss": 0.4013633728027344, + "step": 136610 + }, + { + "epoch": 1.1812695091265963, + "grad_norm": 1.4093857398616267, + "learning_rate": 2.157283468904175e-06, + "loss": 0.10367279052734375, + "step": 136615 + }, + { + "epoch": 1.1813127426481396, + "grad_norm": 1.0467779480200208, + "learning_rate": 2.157087940905737e-06, + "loss": 0.05435447692871094, + "step": 136620 + }, + { + "epoch": 1.181355976169683, + "grad_norm": 0.7418903728325985, + "learning_rate": 2.1568924167947347e-06, + "loss": 0.11711063385009765, + "step": 136625 + }, + { + "epoch": 1.1813992096912262, + "grad_norm": 1.7192485428626283, + "learning_rate": 2.15669689657207e-06, + "loss": 0.04070415496826172, + "step": 136630 + }, + { + "epoch": 1.1814424432127695, + "grad_norm": 0.5037516877223672, + "learning_rate": 2.1565013802386455e-06, + "loss": 0.01109771728515625, + "step": 136635 + }, + { + "epoch": 1.1814856767343127, + "grad_norm": 51.33801394407994, + "learning_rate": 2.1563058677953625e-06, + "loss": 0.14928741455078126, + "step": 136640 + }, + { + "epoch": 1.181528910255856, + "grad_norm": 0.2956216859819918, + "learning_rate": 2.156110359243121e-06, + "loss": 0.039961624145507815, + "step": 136645 + }, + { + "epoch": 1.1815721437773992, + "grad_norm": 1.7752786313147568, + "learning_rate": 2.155914854582825e-06, + "loss": 0.0517486572265625, + "step": 136650 + }, + { + "epoch": 1.1816153772989426, + "grad_norm": 0.6238602011566858, + "learning_rate": 2.1557193538153736e-06, + "loss": 0.04334716796875, + "step": 136655 + }, + { + "epoch": 1.1816586108204858, + "grad_norm": 17.169954610452276, + "learning_rate": 2.155523856941671e-06, + "loss": 0.03969020843505859, + "step": 136660 + }, + { + "epoch": 1.181701844342029, + "grad_norm": 1.1441389570697666, + "learning_rate": 2.1553283639626186e-06, + "loss": 0.09718399047851563, + "step": 136665 + }, + { + "epoch": 1.1817450778635723, + "grad_norm": 12.561684338331917, + "learning_rate": 2.1551328748791155e-06, + "loss": 0.13990325927734376, + "step": 136670 + }, + { + "epoch": 1.1817883113851155, + "grad_norm": 2.9715852894651067, + "learning_rate": 2.154937389692066e-06, + "loss": 0.06011772155761719, + "step": 136675 + }, + { + "epoch": 1.1818315449066588, + "grad_norm": 4.542533967450781, + "learning_rate": 2.1547419084023706e-06, + "loss": 0.03621330261230469, + "step": 136680 + }, + { + "epoch": 1.181874778428202, + "grad_norm": 0.01654486415961691, + "learning_rate": 2.154546431010931e-06, + "loss": 0.04509658813476562, + "step": 136685 + }, + { + "epoch": 1.1819180119497454, + "grad_norm": 0.1330640944807108, + "learning_rate": 2.154350957518647e-06, + "loss": 0.024353790283203124, + "step": 136690 + }, + { + "epoch": 1.1819612454712887, + "grad_norm": 1.3106228547287895, + "learning_rate": 2.154155487926423e-06, + "loss": 0.0321929931640625, + "step": 136695 + }, + { + "epoch": 1.182004478992832, + "grad_norm": 4.5320644462674915, + "learning_rate": 2.1539600222351584e-06, + "loss": 0.024754714965820313, + "step": 136700 + }, + { + "epoch": 1.1820477125143751, + "grad_norm": 14.695968314009654, + "learning_rate": 2.153764560445756e-06, + "loss": 0.0462432861328125, + "step": 136705 + }, + { + "epoch": 1.1820909460359184, + "grad_norm": 0.05581606702105669, + "learning_rate": 2.153569102559117e-06, + "loss": 0.2228445053100586, + "step": 136710 + }, + { + "epoch": 1.1821341795574616, + "grad_norm": 0.7148735019513953, + "learning_rate": 2.153373648576142e-06, + "loss": 0.003436279296875, + "step": 136715 + }, + { + "epoch": 1.182177413079005, + "grad_norm": 1.134272987813662, + "learning_rate": 2.1531781984977325e-06, + "loss": 0.03503265380859375, + "step": 136720 + }, + { + "epoch": 1.1822206466005483, + "grad_norm": 5.8024899827275975, + "learning_rate": 2.152982752324791e-06, + "loss": 0.032043838500976564, + "step": 136725 + }, + { + "epoch": 1.1822638801220915, + "grad_norm": 4.993229992555818, + "learning_rate": 2.1527873100582177e-06, + "loss": 0.01417388916015625, + "step": 136730 + }, + { + "epoch": 1.1823071136436347, + "grad_norm": 4.365967059117303, + "learning_rate": 2.152591871698915e-06, + "loss": 0.02764739990234375, + "step": 136735 + }, + { + "epoch": 1.182350347165178, + "grad_norm": 4.549407938944957, + "learning_rate": 2.152396437247784e-06, + "loss": 0.010919189453125, + "step": 136740 + }, + { + "epoch": 1.1823935806867212, + "grad_norm": 5.429082206625261, + "learning_rate": 2.1522010067057254e-06, + "loss": 0.17424774169921875, + "step": 136745 + }, + { + "epoch": 1.1824368142082644, + "grad_norm": 0.8633833831523025, + "learning_rate": 2.1520055800736396e-06, + "loss": 0.04385986328125, + "step": 136750 + }, + { + "epoch": 1.1824800477298079, + "grad_norm": 0.5267796314893158, + "learning_rate": 2.1518101573524307e-06, + "loss": 0.02272186279296875, + "step": 136755 + }, + { + "epoch": 1.182523281251351, + "grad_norm": 5.774368946545794, + "learning_rate": 2.1516147385429974e-06, + "loss": 0.04107513427734375, + "step": 136760 + }, + { + "epoch": 1.1825665147728943, + "grad_norm": 3.4930524697867016, + "learning_rate": 2.1514193236462427e-06, + "loss": 0.07230300903320312, + "step": 136765 + }, + { + "epoch": 1.1826097482944375, + "grad_norm": 4.535774997993125, + "learning_rate": 2.1512239126630673e-06, + "loss": 0.0854248046875, + "step": 136770 + }, + { + "epoch": 1.1826529818159808, + "grad_norm": 1.0910291656980322, + "learning_rate": 2.151028505594372e-06, + "loss": 0.06272029876708984, + "step": 136775 + }, + { + "epoch": 1.1826962153375242, + "grad_norm": 23.598828045018063, + "learning_rate": 2.1508331024410575e-06, + "loss": 0.2534446716308594, + "step": 136780 + }, + { + "epoch": 1.1827394488590675, + "grad_norm": 0.3004969819017617, + "learning_rate": 2.150637703204026e-06, + "loss": 0.02914276123046875, + "step": 136785 + }, + { + "epoch": 1.1827826823806107, + "grad_norm": 1.401243686675638, + "learning_rate": 2.1504423078841783e-06, + "loss": 0.168536376953125, + "step": 136790 + }, + { + "epoch": 1.182825915902154, + "grad_norm": 10.600951279395586, + "learning_rate": 2.1502469164824163e-06, + "loss": 0.09454765319824218, + "step": 136795 + }, + { + "epoch": 1.1828691494236971, + "grad_norm": 6.41799146336082, + "learning_rate": 2.1500515289996402e-06, + "loss": 0.0608245849609375, + "step": 136800 + }, + { + "epoch": 1.1829123829452404, + "grad_norm": 0.4679313764987, + "learning_rate": 2.1498561454367514e-06, + "loss": 0.056102943420410153, + "step": 136805 + }, + { + "epoch": 1.1829556164667836, + "grad_norm": 2.134738201665819, + "learning_rate": 2.14966076579465e-06, + "loss": 0.21866073608398437, + "step": 136810 + }, + { + "epoch": 1.1829988499883268, + "grad_norm": 27.641912817160215, + "learning_rate": 2.1494653900742386e-06, + "loss": 0.20996551513671874, + "step": 136815 + }, + { + "epoch": 1.1830420835098703, + "grad_norm": 1.6678879293692537, + "learning_rate": 2.1492700182764175e-06, + "loss": 0.017317962646484376, + "step": 136820 + }, + { + "epoch": 1.1830853170314135, + "grad_norm": 16.059714501227123, + "learning_rate": 2.1490746504020883e-06, + "loss": 0.06313629150390625, + "step": 136825 + }, + { + "epoch": 1.1831285505529567, + "grad_norm": 3.627317986401713, + "learning_rate": 2.148879286452152e-06, + "loss": 0.019886016845703125, + "step": 136830 + }, + { + "epoch": 1.1831717840745, + "grad_norm": 2.7366785499228183, + "learning_rate": 2.1486839264275087e-06, + "loss": 0.143280029296875, + "step": 136835 + }, + { + "epoch": 1.1832150175960432, + "grad_norm": 1.058094685390683, + "learning_rate": 2.1484885703290597e-06, + "loss": 0.00976104736328125, + "step": 136840 + }, + { + "epoch": 1.1832582511175866, + "grad_norm": 0.10475571319044151, + "learning_rate": 2.148293218157707e-06, + "loss": 0.011181259155273437, + "step": 136845 + }, + { + "epoch": 1.1833014846391299, + "grad_norm": 0.3854035802545965, + "learning_rate": 2.148097869914349e-06, + "loss": 0.0686197280883789, + "step": 136850 + }, + { + "epoch": 1.183344718160673, + "grad_norm": 1.7278234968419726, + "learning_rate": 2.14790252559989e-06, + "loss": 0.06700286865234376, + "step": 136855 + }, + { + "epoch": 1.1833879516822163, + "grad_norm": 4.930793047777584, + "learning_rate": 2.147707185215229e-06, + "loss": 0.31807861328125, + "step": 136860 + }, + { + "epoch": 1.1834311852037596, + "grad_norm": 30.474588321031835, + "learning_rate": 2.147511848761267e-06, + "loss": 0.18791236877441406, + "step": 136865 + }, + { + "epoch": 1.1834744187253028, + "grad_norm": 1.281094465024427, + "learning_rate": 2.1473165162389047e-06, + "loss": 0.04571609497070313, + "step": 136870 + }, + { + "epoch": 1.183517652246846, + "grad_norm": 2.8049229901552133, + "learning_rate": 2.1471211876490433e-06, + "loss": 0.152480411529541, + "step": 136875 + }, + { + "epoch": 1.1835608857683895, + "grad_norm": 4.349119801372064, + "learning_rate": 2.146925862992583e-06, + "loss": 0.025677490234375, + "step": 136880 + }, + { + "epoch": 1.1836041192899327, + "grad_norm": 3.1469440558950295, + "learning_rate": 2.146730542270426e-06, + "loss": 0.022141265869140624, + "step": 136885 + }, + { + "epoch": 1.183647352811476, + "grad_norm": 14.7683109342428, + "learning_rate": 2.1465352254834727e-06, + "loss": 0.14005889892578124, + "step": 136890 + }, + { + "epoch": 1.1836905863330192, + "grad_norm": 0.07186595986544017, + "learning_rate": 2.146339912632623e-06, + "loss": 0.03420944213867187, + "step": 136895 + }, + { + "epoch": 1.1837338198545624, + "grad_norm": 5.427289632426695, + "learning_rate": 2.146144603718779e-06, + "loss": 0.023936080932617187, + "step": 136900 + }, + { + "epoch": 1.1837770533761056, + "grad_norm": 3.566298711550155, + "learning_rate": 2.1459492987428402e-06, + "loss": 0.015008544921875, + "step": 136905 + }, + { + "epoch": 1.183820286897649, + "grad_norm": 34.81004933421178, + "learning_rate": 2.1457539977057065e-06, + "loss": 0.09953155517578124, + "step": 136910 + }, + { + "epoch": 1.1838635204191923, + "grad_norm": 1.4187471257433026, + "learning_rate": 2.1455587006082814e-06, + "loss": 0.0395172119140625, + "step": 136915 + }, + { + "epoch": 1.1839067539407355, + "grad_norm": 27.884990071661168, + "learning_rate": 2.145363407451464e-06, + "loss": 0.5713644981384277, + "step": 136920 + }, + { + "epoch": 1.1839499874622788, + "grad_norm": 44.69511348364024, + "learning_rate": 2.1451681182361545e-06, + "loss": 0.3834503173828125, + "step": 136925 + }, + { + "epoch": 1.183993220983822, + "grad_norm": 1.2595529280880788, + "learning_rate": 2.1449728329632547e-06, + "loss": 0.04056625366210938, + "step": 136930 + }, + { + "epoch": 1.1840364545053652, + "grad_norm": 0.08993612357632798, + "learning_rate": 2.1447775516336647e-06, + "loss": 0.09247512817382812, + "step": 136935 + }, + { + "epoch": 1.1840796880269084, + "grad_norm": 0.25118987635355866, + "learning_rate": 2.1445822742482845e-06, + "loss": 0.012154388427734374, + "step": 136940 + }, + { + "epoch": 1.184122921548452, + "grad_norm": 4.83892200395154, + "learning_rate": 2.1443870008080144e-06, + "loss": 0.023288726806640625, + "step": 136945 + }, + { + "epoch": 1.1841661550699951, + "grad_norm": 6.93943063402155, + "learning_rate": 2.1441917313137574e-06, + "loss": 0.015662765502929686, + "step": 136950 + }, + { + "epoch": 1.1842093885915383, + "grad_norm": 2.2652473656777405, + "learning_rate": 2.1439964657664114e-06, + "loss": 0.05804443359375, + "step": 136955 + }, + { + "epoch": 1.1842526221130816, + "grad_norm": 1.94578336112189, + "learning_rate": 2.143801204166879e-06, + "loss": 0.004749870300292969, + "step": 136960 + }, + { + "epoch": 1.1842958556346248, + "grad_norm": 4.336595122702513, + "learning_rate": 2.1436059465160598e-06, + "loss": 0.05752887725830078, + "step": 136965 + }, + { + "epoch": 1.184339089156168, + "grad_norm": 17.32503747907069, + "learning_rate": 2.143410692814854e-06, + "loss": 0.1298431396484375, + "step": 136970 + }, + { + "epoch": 1.1843823226777115, + "grad_norm": 5.601687467896195, + "learning_rate": 2.1432154430641614e-06, + "loss": 0.07754898071289062, + "step": 136975 + }, + { + "epoch": 1.1844255561992547, + "grad_norm": 8.321931031195396, + "learning_rate": 2.143020197264885e-06, + "loss": 0.12086772918701172, + "step": 136980 + }, + { + "epoch": 1.184468789720798, + "grad_norm": 3.034973701795758, + "learning_rate": 2.1428249554179227e-06, + "loss": 0.27593460083007815, + "step": 136985 + }, + { + "epoch": 1.1845120232423412, + "grad_norm": 11.10600293486134, + "learning_rate": 2.1426297175241768e-06, + "loss": 0.04179229736328125, + "step": 136990 + }, + { + "epoch": 1.1845552567638844, + "grad_norm": 2.387834538669062, + "learning_rate": 2.1424344835845466e-06, + "loss": 0.020232391357421876, + "step": 136995 + }, + { + "epoch": 1.1845984902854276, + "grad_norm": 0.18426787624535765, + "learning_rate": 2.1422392535999327e-06, + "loss": 0.014127349853515625, + "step": 137000 + }, + { + "epoch": 1.1846417238069709, + "grad_norm": 0.2949835790538636, + "learning_rate": 2.142044027571235e-06, + "loss": 0.06693592071533203, + "step": 137005 + }, + { + "epoch": 1.1846849573285143, + "grad_norm": 0.681064687118617, + "learning_rate": 2.141848805499355e-06, + "loss": 0.038551712036132814, + "step": 137010 + }, + { + "epoch": 1.1847281908500575, + "grad_norm": 12.03628534710598, + "learning_rate": 2.141653587385192e-06, + "loss": 0.2519031524658203, + "step": 137015 + }, + { + "epoch": 1.1847714243716008, + "grad_norm": 0.16074680263808322, + "learning_rate": 2.1414583732296474e-06, + "loss": 0.0064280986785888675, + "step": 137020 + }, + { + "epoch": 1.184814657893144, + "grad_norm": 2.28329464300501, + "learning_rate": 2.1412631630336213e-06, + "loss": 0.018253326416015625, + "step": 137025 + }, + { + "epoch": 1.1848578914146872, + "grad_norm": 7.124391979004687, + "learning_rate": 2.141067956798013e-06, + "loss": 0.06789360046386719, + "step": 137030 + }, + { + "epoch": 1.1849011249362307, + "grad_norm": 1.825086565274103, + "learning_rate": 2.140872754523723e-06, + "loss": 0.08690032958984376, + "step": 137035 + }, + { + "epoch": 1.184944358457774, + "grad_norm": 12.079539176984746, + "learning_rate": 2.1406775562116524e-06, + "loss": 0.05164642333984375, + "step": 137040 + }, + { + "epoch": 1.1849875919793171, + "grad_norm": 27.74030985528184, + "learning_rate": 2.140482361862701e-06, + "loss": 0.386260986328125, + "step": 137045 + }, + { + "epoch": 1.1850308255008604, + "grad_norm": 23.413681044676377, + "learning_rate": 2.140287171477769e-06, + "loss": 0.053958988189697264, + "step": 137050 + }, + { + "epoch": 1.1850740590224036, + "grad_norm": 90.0075955542109, + "learning_rate": 2.1400919850577567e-06, + "loss": 0.3309051513671875, + "step": 137055 + }, + { + "epoch": 1.1851172925439468, + "grad_norm": 4.205869312346468, + "learning_rate": 2.1398968026035644e-06, + "loss": 0.057592010498046874, + "step": 137060 + }, + { + "epoch": 1.18516052606549, + "grad_norm": 0.34937137342334146, + "learning_rate": 2.1397016241160914e-06, + "loss": 0.016500091552734374, + "step": 137065 + }, + { + "epoch": 1.1852037595870333, + "grad_norm": 52.61697691560923, + "learning_rate": 2.139506449596238e-06, + "loss": 0.10601329803466797, + "step": 137070 + }, + { + "epoch": 1.1852469931085767, + "grad_norm": 2.422968634254911, + "learning_rate": 2.139311279044906e-06, + "loss": 0.02037811279296875, + "step": 137075 + }, + { + "epoch": 1.18529022663012, + "grad_norm": 1.5529352181438902, + "learning_rate": 2.139116112462994e-06, + "loss": 0.08039741516113282, + "step": 137080 + }, + { + "epoch": 1.1853334601516632, + "grad_norm": 6.141964724701772, + "learning_rate": 2.138920949851403e-06, + "loss": 0.02557220458984375, + "step": 137085 + }, + { + "epoch": 1.1853766936732064, + "grad_norm": 8.26204797203064, + "learning_rate": 2.138725791211032e-06, + "loss": 0.05674285888671875, + "step": 137090 + }, + { + "epoch": 1.1854199271947496, + "grad_norm": 0.8160646529746266, + "learning_rate": 2.138530636542781e-06, + "loss": 0.035889053344726564, + "step": 137095 + }, + { + "epoch": 1.185463160716293, + "grad_norm": 32.202677007971154, + "learning_rate": 2.138335485847551e-06, + "loss": 0.15253639221191406, + "step": 137100 + }, + { + "epoch": 1.1855063942378363, + "grad_norm": 40.8388507069326, + "learning_rate": 2.1381403391262408e-06, + "loss": 0.3540149688720703, + "step": 137105 + }, + { + "epoch": 1.1855496277593796, + "grad_norm": 4.689151459306824, + "learning_rate": 2.1379451963797523e-06, + "loss": 0.057331085205078125, + "step": 137110 + }, + { + "epoch": 1.1855928612809228, + "grad_norm": 31.953709160263326, + "learning_rate": 2.137750057608984e-06, + "loss": 0.09087944030761719, + "step": 137115 + }, + { + "epoch": 1.185636094802466, + "grad_norm": 52.52071470330433, + "learning_rate": 2.137554922814836e-06, + "loss": 0.08621063232421874, + "step": 137120 + }, + { + "epoch": 1.1856793283240092, + "grad_norm": 5.746046987277402, + "learning_rate": 2.1373597919982093e-06, + "loss": 0.033138275146484375, + "step": 137125 + }, + { + "epoch": 1.1857225618455525, + "grad_norm": 9.934616066147195, + "learning_rate": 2.137164665160003e-06, + "loss": 0.046882247924804686, + "step": 137130 + }, + { + "epoch": 1.185765795367096, + "grad_norm": 0.5739962658291007, + "learning_rate": 2.1369695423011154e-06, + "loss": 0.10555953979492187, + "step": 137135 + }, + { + "epoch": 1.1858090288886391, + "grad_norm": 20.500942423273422, + "learning_rate": 2.1367744234224493e-06, + "loss": 0.04649810791015625, + "step": 137140 + }, + { + "epoch": 1.1858522624101824, + "grad_norm": 4.0434219076073905, + "learning_rate": 2.136579308524904e-06, + "loss": 0.12581634521484375, + "step": 137145 + }, + { + "epoch": 1.1858954959317256, + "grad_norm": 0.6537438579118504, + "learning_rate": 2.1363841976093773e-06, + "loss": 0.02999725341796875, + "step": 137150 + }, + { + "epoch": 1.1859387294532688, + "grad_norm": 2.431038969408564, + "learning_rate": 2.1361890906767713e-06, + "loss": 0.23019866943359374, + "step": 137155 + }, + { + "epoch": 1.185981962974812, + "grad_norm": 1.2624927994481459, + "learning_rate": 2.1359939877279844e-06, + "loss": 0.02892303466796875, + "step": 137160 + }, + { + "epoch": 1.1860251964963555, + "grad_norm": 25.984582247814885, + "learning_rate": 2.1357988887639163e-06, + "loss": 0.3334201812744141, + "step": 137165 + }, + { + "epoch": 1.1860684300178987, + "grad_norm": 3.3293495148850223, + "learning_rate": 2.1356037937854677e-06, + "loss": 0.05373382568359375, + "step": 137170 + }, + { + "epoch": 1.186111663539442, + "grad_norm": 0.8688116739717535, + "learning_rate": 2.135408702793539e-06, + "loss": 0.041123580932617185, + "step": 137175 + }, + { + "epoch": 1.1861548970609852, + "grad_norm": 0.26916002250309307, + "learning_rate": 2.1352136157890276e-06, + "loss": 0.18870849609375, + "step": 137180 + }, + { + "epoch": 1.1861981305825284, + "grad_norm": 2.8161999145366337, + "learning_rate": 2.1350185327728356e-06, + "loss": 0.013839340209960938, + "step": 137185 + }, + { + "epoch": 1.1862413641040717, + "grad_norm": 4.688389928547216, + "learning_rate": 2.1348234537458614e-06, + "loss": 0.07870330810546874, + "step": 137190 + }, + { + "epoch": 1.186284597625615, + "grad_norm": 1.406456205064578, + "learning_rate": 2.1346283787090056e-06, + "loss": 0.10125274658203125, + "step": 137195 + }, + { + "epoch": 1.1863278311471583, + "grad_norm": 21.04081026199601, + "learning_rate": 2.134433307663166e-06, + "loss": 0.07392120361328125, + "step": 137200 + }, + { + "epoch": 1.1863710646687016, + "grad_norm": 5.095990247888941, + "learning_rate": 2.134238240609244e-06, + "loss": 0.07143478393554688, + "step": 137205 + }, + { + "epoch": 1.1864142981902448, + "grad_norm": 2.281301145029493, + "learning_rate": 2.1340431775481384e-06, + "loss": 0.13701171875, + "step": 137210 + }, + { + "epoch": 1.186457531711788, + "grad_norm": 7.997680148938295, + "learning_rate": 2.13384811848075e-06, + "loss": 0.3999660491943359, + "step": 137215 + }, + { + "epoch": 1.1865007652333313, + "grad_norm": 0.88916598718675, + "learning_rate": 2.1336530634079773e-06, + "loss": 0.144207763671875, + "step": 137220 + }, + { + "epoch": 1.1865439987548745, + "grad_norm": 0.980024169717198, + "learning_rate": 2.13345801233072e-06, + "loss": 0.18268928527832032, + "step": 137225 + }, + { + "epoch": 1.186587232276418, + "grad_norm": 12.81127941236662, + "learning_rate": 2.133262965249877e-06, + "loss": 0.02723979949951172, + "step": 137230 + }, + { + "epoch": 1.1866304657979612, + "grad_norm": 1.1363314446256756, + "learning_rate": 2.1330679221663495e-06, + "loss": 0.10653457641601563, + "step": 137235 + }, + { + "epoch": 1.1866736993195044, + "grad_norm": 40.916804692203364, + "learning_rate": 2.132872883081036e-06, + "loss": 0.3139739990234375, + "step": 137240 + }, + { + "epoch": 1.1867169328410476, + "grad_norm": 0.34906966183314253, + "learning_rate": 2.132677847994836e-06, + "loss": 0.03871917724609375, + "step": 137245 + }, + { + "epoch": 1.1867601663625909, + "grad_norm": 30.81020284477104, + "learning_rate": 2.1324828169086498e-06, + "loss": 0.23165283203125, + "step": 137250 + }, + { + "epoch": 1.186803399884134, + "grad_norm": 0.5994872972146069, + "learning_rate": 2.1322877898233757e-06, + "loss": 0.05431289672851562, + "step": 137255 + }, + { + "epoch": 1.1868466334056773, + "grad_norm": 30.172131850356045, + "learning_rate": 2.132092766739913e-06, + "loss": 0.25528945922851565, + "step": 137260 + }, + { + "epoch": 1.1868898669272208, + "grad_norm": 5.126251235050765, + "learning_rate": 2.1318977476591626e-06, + "loss": 0.170977783203125, + "step": 137265 + }, + { + "epoch": 1.186933100448764, + "grad_norm": 6.9828761256617415, + "learning_rate": 2.1317027325820226e-06, + "loss": 0.012158584594726563, + "step": 137270 + }, + { + "epoch": 1.1869763339703072, + "grad_norm": 3.9805356151091518, + "learning_rate": 2.131507721509393e-06, + "loss": 0.1148468017578125, + "step": 137275 + }, + { + "epoch": 1.1870195674918504, + "grad_norm": 11.50148335775541, + "learning_rate": 2.1313127144421736e-06, + "loss": 0.04836330413818359, + "step": 137280 + }, + { + "epoch": 1.1870628010133937, + "grad_norm": 21.35317038921864, + "learning_rate": 2.1311177113812633e-06, + "loss": 0.1897064208984375, + "step": 137285 + }, + { + "epoch": 1.1871060345349371, + "grad_norm": 2.731122215148192, + "learning_rate": 2.1309227123275604e-06, + "loss": 0.05495147705078125, + "step": 137290 + }, + { + "epoch": 1.1871492680564804, + "grad_norm": 0.4564344002786457, + "learning_rate": 2.1307277172819644e-06, + "loss": 0.12155914306640625, + "step": 137295 + }, + { + "epoch": 1.1871925015780236, + "grad_norm": 22.19226150576236, + "learning_rate": 2.130532726245377e-06, + "loss": 0.12317581176757812, + "step": 137300 + }, + { + "epoch": 1.1872357350995668, + "grad_norm": 2.1490726570370073, + "learning_rate": 2.1303377392186954e-06, + "loss": 0.008603668212890625, + "step": 137305 + }, + { + "epoch": 1.18727896862111, + "grad_norm": 0.25270295623098693, + "learning_rate": 2.1301427562028193e-06, + "loss": 0.049347305297851564, + "step": 137310 + }, + { + "epoch": 1.1873222021426533, + "grad_norm": 0.22954838807689326, + "learning_rate": 2.129947777198648e-06, + "loss": 0.06608295440673828, + "step": 137315 + }, + { + "epoch": 1.1873654356641965, + "grad_norm": 23.02829829538801, + "learning_rate": 2.1297528022070805e-06, + "loss": 0.036035919189453126, + "step": 137320 + }, + { + "epoch": 1.18740866918574, + "grad_norm": 1.6055136361769535, + "learning_rate": 2.1295578312290156e-06, + "loss": 0.01204376220703125, + "step": 137325 + }, + { + "epoch": 1.1874519027072832, + "grad_norm": 4.242230152408065, + "learning_rate": 2.129362864265354e-06, + "loss": 0.047855377197265625, + "step": 137330 + }, + { + "epoch": 1.1874951362288264, + "grad_norm": 2.6348707881144486, + "learning_rate": 2.129167901316994e-06, + "loss": 0.06351280212402344, + "step": 137335 + }, + { + "epoch": 1.1875383697503696, + "grad_norm": 0.8399208592501526, + "learning_rate": 2.1289729423848348e-06, + "loss": 0.1732147216796875, + "step": 137340 + }, + { + "epoch": 1.1875816032719129, + "grad_norm": 10.106270888605858, + "learning_rate": 2.1287779874697744e-06, + "loss": 0.31241455078125, + "step": 137345 + }, + { + "epoch": 1.187624836793456, + "grad_norm": 6.809666576282685, + "learning_rate": 2.128583036572714e-06, + "loss": 0.057390785217285155, + "step": 137350 + }, + { + "epoch": 1.1876680703149995, + "grad_norm": 3.3559958419268194, + "learning_rate": 2.12838808969455e-06, + "loss": 0.0499969482421875, + "step": 137355 + }, + { + "epoch": 1.1877113038365428, + "grad_norm": 0.23957578852413727, + "learning_rate": 2.1281931468361843e-06, + "loss": 0.0247039794921875, + "step": 137360 + }, + { + "epoch": 1.187754537358086, + "grad_norm": 3.2936734990273693, + "learning_rate": 2.1279982079985152e-06, + "loss": 0.16567535400390626, + "step": 137365 + }, + { + "epoch": 1.1877977708796292, + "grad_norm": 2.562703982614621, + "learning_rate": 2.1278032731824413e-06, + "loss": 0.031397247314453126, + "step": 137370 + }, + { + "epoch": 1.1878410044011725, + "grad_norm": 0.3758339278626724, + "learning_rate": 2.1276083423888607e-06, + "loss": 0.05019874572753906, + "step": 137375 + }, + { + "epoch": 1.1878842379227157, + "grad_norm": 57.89182730928034, + "learning_rate": 2.1274134156186744e-06, + "loss": 0.21787528991699218, + "step": 137380 + }, + { + "epoch": 1.187927471444259, + "grad_norm": 20.807236438984056, + "learning_rate": 2.12721849287278e-06, + "loss": 0.16295814514160156, + "step": 137385 + }, + { + "epoch": 1.1879707049658024, + "grad_norm": 5.903407770234463, + "learning_rate": 2.1270235741520753e-06, + "loss": 0.130255126953125, + "step": 137390 + }, + { + "epoch": 1.1880139384873456, + "grad_norm": 41.61700767129111, + "learning_rate": 2.1268286594574626e-06, + "loss": 0.24345645904541016, + "step": 137395 + }, + { + "epoch": 1.1880571720088888, + "grad_norm": 7.948458103355531, + "learning_rate": 2.1266337487898384e-06, + "loss": 0.09929656982421875, + "step": 137400 + }, + { + "epoch": 1.188100405530432, + "grad_norm": 4.174400382761302, + "learning_rate": 2.126438842150102e-06, + "loss": 0.026245880126953124, + "step": 137405 + }, + { + "epoch": 1.1881436390519753, + "grad_norm": 13.43988807382409, + "learning_rate": 2.126243939539153e-06, + "loss": 0.09514389038085938, + "step": 137410 + }, + { + "epoch": 1.1881868725735185, + "grad_norm": 2.3288979360159288, + "learning_rate": 2.1260490409578893e-06, + "loss": 0.08062477111816406, + "step": 137415 + }, + { + "epoch": 1.188230106095062, + "grad_norm": 14.36111616245285, + "learning_rate": 2.125854146407209e-06, + "loss": 0.11404571533203126, + "step": 137420 + }, + { + "epoch": 1.1882733396166052, + "grad_norm": 46.652270027239794, + "learning_rate": 2.125659255888014e-06, + "loss": 0.20543212890625, + "step": 137425 + }, + { + "epoch": 1.1883165731381484, + "grad_norm": 5.521395092084674, + "learning_rate": 2.125464369401201e-06, + "loss": 0.05611572265625, + "step": 137430 + }, + { + "epoch": 1.1883598066596917, + "grad_norm": 0.2504743021896586, + "learning_rate": 2.1252694869476684e-06, + "loss": 0.05844268798828125, + "step": 137435 + }, + { + "epoch": 1.1884030401812349, + "grad_norm": 0.16859954588531187, + "learning_rate": 2.125074608528316e-06, + "loss": 0.047338104248046874, + "step": 137440 + }, + { + "epoch": 1.188446273702778, + "grad_norm": 4.780997661181056, + "learning_rate": 2.1248797341440423e-06, + "loss": 0.3883644104003906, + "step": 137445 + }, + { + "epoch": 1.1884895072243213, + "grad_norm": 1.2198136732149059, + "learning_rate": 2.1246848637957464e-06, + "loss": 0.09355506896972657, + "step": 137450 + }, + { + "epoch": 1.1885327407458648, + "grad_norm": 8.731268679148892, + "learning_rate": 2.1244899974843247e-06, + "loss": 0.10717315673828125, + "step": 137455 + }, + { + "epoch": 1.188575974267408, + "grad_norm": 82.01736336868254, + "learning_rate": 2.124295135210679e-06, + "loss": 0.23993301391601562, + "step": 137460 + }, + { + "epoch": 1.1886192077889512, + "grad_norm": 1.2913544774544639, + "learning_rate": 2.124100276975707e-06, + "loss": 0.020749664306640624, + "step": 137465 + }, + { + "epoch": 1.1886624413104945, + "grad_norm": 0.28118340465706676, + "learning_rate": 2.1239054227803067e-06, + "loss": 0.0559600830078125, + "step": 137470 + }, + { + "epoch": 1.1887056748320377, + "grad_norm": 11.081870406176645, + "learning_rate": 2.123710572625378e-06, + "loss": 0.17809295654296875, + "step": 137475 + }, + { + "epoch": 1.1887489083535812, + "grad_norm": 7.117760755958318, + "learning_rate": 2.123515726511818e-06, + "loss": 0.03301925659179687, + "step": 137480 + }, + { + "epoch": 1.1887921418751244, + "grad_norm": 14.325210970483273, + "learning_rate": 2.1233208844405258e-06, + "loss": 0.21500053405761718, + "step": 137485 + }, + { + "epoch": 1.1888353753966676, + "grad_norm": 1.7875278354113646, + "learning_rate": 2.1231260464124003e-06, + "loss": 0.1370269775390625, + "step": 137490 + }, + { + "epoch": 1.1888786089182108, + "grad_norm": 0.11060527183946078, + "learning_rate": 2.12293121242834e-06, + "loss": 0.03408107757568359, + "step": 137495 + }, + { + "epoch": 1.188921842439754, + "grad_norm": 6.08927363897772, + "learning_rate": 2.122736382489244e-06, + "loss": 0.11429901123046875, + "step": 137500 + }, + { + "epoch": 1.1889650759612973, + "grad_norm": 12.598851247215304, + "learning_rate": 2.12254155659601e-06, + "loss": 0.12692832946777344, + "step": 137505 + }, + { + "epoch": 1.1890083094828405, + "grad_norm": 0.22496756478251384, + "learning_rate": 2.1223467347495372e-06, + "loss": 0.07457389831542968, + "step": 137510 + }, + { + "epoch": 1.1890515430043838, + "grad_norm": 0.12129762585457404, + "learning_rate": 2.122151916950722e-06, + "loss": 0.03363800048828125, + "step": 137515 + }, + { + "epoch": 1.1890947765259272, + "grad_norm": 2.164925192829945, + "learning_rate": 2.1219571032004657e-06, + "loss": 0.22584686279296876, + "step": 137520 + }, + { + "epoch": 1.1891380100474704, + "grad_norm": 1.550909313149649, + "learning_rate": 2.121762293499666e-06, + "loss": 0.1312580108642578, + "step": 137525 + }, + { + "epoch": 1.1891812435690137, + "grad_norm": 19.825026108780357, + "learning_rate": 2.1215674878492213e-06, + "loss": 0.18934345245361328, + "step": 137530 + }, + { + "epoch": 1.189224477090557, + "grad_norm": 17.499321771216525, + "learning_rate": 2.1213726862500292e-06, + "loss": 0.10388412475585937, + "step": 137535 + }, + { + "epoch": 1.1892677106121001, + "grad_norm": 2.419091613543521, + "learning_rate": 2.1211778887029886e-06, + "loss": 0.03184089660644531, + "step": 137540 + }, + { + "epoch": 1.1893109441336436, + "grad_norm": 0.4084604796397496, + "learning_rate": 2.120983095208998e-06, + "loss": 0.026990509033203124, + "step": 137545 + }, + { + "epoch": 1.1893541776551868, + "grad_norm": 6.222174300463918, + "learning_rate": 2.1207883057689546e-06, + "loss": 0.17747955322265624, + "step": 137550 + }, + { + "epoch": 1.18939741117673, + "grad_norm": 6.333259464462372, + "learning_rate": 2.1205935203837596e-06, + "loss": 0.09151573181152343, + "step": 137555 + }, + { + "epoch": 1.1894406446982733, + "grad_norm": 2.828350467896455, + "learning_rate": 2.120398739054309e-06, + "loss": 0.08280305862426758, + "step": 137560 + }, + { + "epoch": 1.1894838782198165, + "grad_norm": 1.5067547221801298, + "learning_rate": 2.1202039617815017e-06, + "loss": 0.07875709533691407, + "step": 137565 + }, + { + "epoch": 1.1895271117413597, + "grad_norm": 15.734736719250641, + "learning_rate": 2.1200091885662355e-06, + "loss": 0.12082901000976562, + "step": 137570 + }, + { + "epoch": 1.189570345262903, + "grad_norm": 0.5476125396026765, + "learning_rate": 2.1198144194094103e-06, + "loss": 0.08797683715820312, + "step": 137575 + }, + { + "epoch": 1.1896135787844464, + "grad_norm": 0.3418596421656417, + "learning_rate": 2.119619654311921e-06, + "loss": 0.0260040283203125, + "step": 137580 + }, + { + "epoch": 1.1896568123059896, + "grad_norm": 35.855786522940825, + "learning_rate": 2.11942489327467e-06, + "loss": 0.10571937561035157, + "step": 137585 + }, + { + "epoch": 1.1897000458275329, + "grad_norm": 61.87890273516517, + "learning_rate": 2.1192301362985533e-06, + "loss": 0.14497604370117187, + "step": 137590 + }, + { + "epoch": 1.189743279349076, + "grad_norm": 0.14575877658429642, + "learning_rate": 2.1190353833844696e-06, + "loss": 0.02748565673828125, + "step": 137595 + }, + { + "epoch": 1.1897865128706193, + "grad_norm": 0.7885369593488748, + "learning_rate": 2.118840634533316e-06, + "loss": 0.02067985534667969, + "step": 137600 + }, + { + "epoch": 1.1898297463921625, + "grad_norm": 0.7353016498471289, + "learning_rate": 2.1186458897459923e-06, + "loss": 0.10811424255371094, + "step": 137605 + }, + { + "epoch": 1.189872979913706, + "grad_norm": 7.614877512805711, + "learning_rate": 2.1184511490233945e-06, + "loss": 0.07537689208984374, + "step": 137610 + }, + { + "epoch": 1.1899162134352492, + "grad_norm": 0.9869098566035494, + "learning_rate": 2.118256412366424e-06, + "loss": 0.0317840576171875, + "step": 137615 + }, + { + "epoch": 1.1899594469567925, + "grad_norm": 2.548830150869308, + "learning_rate": 2.1180616797759763e-06, + "loss": 0.152825927734375, + "step": 137620 + }, + { + "epoch": 1.1900026804783357, + "grad_norm": 0.13846603729841386, + "learning_rate": 2.1178669512529505e-06, + "loss": 0.01729583740234375, + "step": 137625 + }, + { + "epoch": 1.190045913999879, + "grad_norm": 1.708779816299705, + "learning_rate": 2.1176722267982438e-06, + "loss": 0.01761016845703125, + "step": 137630 + }, + { + "epoch": 1.1900891475214221, + "grad_norm": 0.09204880629319573, + "learning_rate": 2.1174775064127553e-06, + "loss": 0.1507843017578125, + "step": 137635 + }, + { + "epoch": 1.1901323810429654, + "grad_norm": 2.5535933198542566, + "learning_rate": 2.117282790097382e-06, + "loss": 0.09757156372070312, + "step": 137640 + }, + { + "epoch": 1.1901756145645088, + "grad_norm": 5.507273124430331, + "learning_rate": 2.117088077853022e-06, + "loss": 0.034100341796875, + "step": 137645 + }, + { + "epoch": 1.190218848086052, + "grad_norm": 6.71374906471874, + "learning_rate": 2.1168933696805744e-06, + "loss": 0.036106109619140625, + "step": 137650 + }, + { + "epoch": 1.1902620816075953, + "grad_norm": 14.79099220022775, + "learning_rate": 2.1166986655809377e-06, + "loss": 0.03944854736328125, + "step": 137655 + }, + { + "epoch": 1.1903053151291385, + "grad_norm": 0.30377494979857106, + "learning_rate": 2.116503965555007e-06, + "loss": 0.0188690185546875, + "step": 137660 + }, + { + "epoch": 1.1903485486506817, + "grad_norm": 13.02830776181758, + "learning_rate": 2.116309269603683e-06, + "loss": 0.12111167907714844, + "step": 137665 + }, + { + "epoch": 1.190391782172225, + "grad_norm": 15.51413138196862, + "learning_rate": 2.1161145777278624e-06, + "loss": 0.05136871337890625, + "step": 137670 + }, + { + "epoch": 1.1904350156937684, + "grad_norm": 26.60662491613819, + "learning_rate": 2.1159198899284417e-06, + "loss": 0.17907371520996093, + "step": 137675 + }, + { + "epoch": 1.1904782492153116, + "grad_norm": 11.365465894192079, + "learning_rate": 2.1157252062063225e-06, + "loss": 0.07342605590820313, + "step": 137680 + }, + { + "epoch": 1.1905214827368549, + "grad_norm": 0.1917140491558968, + "learning_rate": 2.1155305265623997e-06, + "loss": 0.1084197998046875, + "step": 137685 + }, + { + "epoch": 1.190564716258398, + "grad_norm": 0.11998106378482247, + "learning_rate": 2.1153358509975716e-06, + "loss": 0.052997589111328125, + "step": 137690 + }, + { + "epoch": 1.1906079497799413, + "grad_norm": 0.1574867808296858, + "learning_rate": 2.115141179512737e-06, + "loss": 0.08243789672851562, + "step": 137695 + }, + { + "epoch": 1.1906511833014846, + "grad_norm": 0.4413787498025961, + "learning_rate": 2.114946512108793e-06, + "loss": 0.1404582977294922, + "step": 137700 + }, + { + "epoch": 1.1906944168230278, + "grad_norm": 0.11858392228554977, + "learning_rate": 2.1147518487866364e-06, + "loss": 0.017664718627929687, + "step": 137705 + }, + { + "epoch": 1.1907376503445712, + "grad_norm": 16.937275296150165, + "learning_rate": 2.1145571895471673e-06, + "loss": 0.07893295288085937, + "step": 137710 + }, + { + "epoch": 1.1907808838661145, + "grad_norm": 48.12533470964757, + "learning_rate": 2.1143625343912817e-06, + "loss": 0.10647964477539062, + "step": 137715 + }, + { + "epoch": 1.1908241173876577, + "grad_norm": 4.356749958869818, + "learning_rate": 2.1141678833198783e-06, + "loss": 0.0968048095703125, + "step": 137720 + }, + { + "epoch": 1.190867350909201, + "grad_norm": 16.05790242187009, + "learning_rate": 2.1139732363338546e-06, + "loss": 0.0633819580078125, + "step": 137725 + }, + { + "epoch": 1.1909105844307442, + "grad_norm": 12.10972311601907, + "learning_rate": 2.113778593434107e-06, + "loss": 0.05055656433105469, + "step": 137730 + }, + { + "epoch": 1.1909538179522876, + "grad_norm": 2.404158742265544, + "learning_rate": 2.113583954621535e-06, + "loss": 0.010098648071289063, + "step": 137735 + }, + { + "epoch": 1.1909970514738308, + "grad_norm": 1.6752891124394274, + "learning_rate": 2.1133893198970344e-06, + "loss": 0.24563999176025392, + "step": 137740 + }, + { + "epoch": 1.191040284995374, + "grad_norm": 6.382591956062064, + "learning_rate": 2.1131946892615045e-06, + "loss": 0.3754158020019531, + "step": 137745 + }, + { + "epoch": 1.1910835185169173, + "grad_norm": 0.47357986769278, + "learning_rate": 2.113000062715843e-06, + "loss": 0.0501495361328125, + "step": 137750 + }, + { + "epoch": 1.1911267520384605, + "grad_norm": 0.9277473808029624, + "learning_rate": 2.1128054402609465e-06, + "loss": 0.11808242797851562, + "step": 137755 + }, + { + "epoch": 1.1911699855600038, + "grad_norm": 51.725840170632836, + "learning_rate": 2.112610821897713e-06, + "loss": 0.13841094970703124, + "step": 137760 + }, + { + "epoch": 1.191213219081547, + "grad_norm": 6.024746063996118, + "learning_rate": 2.1124162076270398e-06, + "loss": 0.053800201416015624, + "step": 137765 + }, + { + "epoch": 1.1912564526030902, + "grad_norm": 7.498135079542265, + "learning_rate": 2.112221597449824e-06, + "loss": 0.02793464660644531, + "step": 137770 + }, + { + "epoch": 1.1912996861246337, + "grad_norm": 4.24425435474748, + "learning_rate": 2.112026991366964e-06, + "loss": 0.0856292724609375, + "step": 137775 + }, + { + "epoch": 1.191342919646177, + "grad_norm": 4.339584737339486, + "learning_rate": 2.111832389379358e-06, + "loss": 0.019499588012695312, + "step": 137780 + }, + { + "epoch": 1.1913861531677201, + "grad_norm": 4.140186289529695, + "learning_rate": 2.1116377914879018e-06, + "loss": 0.12003974914550782, + "step": 137785 + }, + { + "epoch": 1.1914293866892633, + "grad_norm": 6.269459448643126, + "learning_rate": 2.1114431976934944e-06, + "loss": 0.03525390625, + "step": 137790 + }, + { + "epoch": 1.1914726202108066, + "grad_norm": 1.3422257979118062, + "learning_rate": 2.111248607997031e-06, + "loss": 0.08025836944580078, + "step": 137795 + }, + { + "epoch": 1.19151585373235, + "grad_norm": 5.847138439243364, + "learning_rate": 2.111054022399412e-06, + "loss": 0.016106414794921874, + "step": 137800 + }, + { + "epoch": 1.1915590872538933, + "grad_norm": 1.7052932416333735, + "learning_rate": 2.1108594409015313e-06, + "loss": 0.099176025390625, + "step": 137805 + }, + { + "epoch": 1.1916023207754365, + "grad_norm": 0.7249524029075066, + "learning_rate": 2.1106648635042896e-06, + "loss": 0.06364383697509765, + "step": 137810 + }, + { + "epoch": 1.1916455542969797, + "grad_norm": 9.213152323462928, + "learning_rate": 2.1104702902085835e-06, + "loss": 0.06638107299804688, + "step": 137815 + }, + { + "epoch": 1.191688787818523, + "grad_norm": 11.498910800985753, + "learning_rate": 2.1102757210153095e-06, + "loss": 0.0813375473022461, + "step": 137820 + }, + { + "epoch": 1.1917320213400662, + "grad_norm": 0.22831961198252695, + "learning_rate": 2.1100811559253647e-06, + "loss": 0.0709136962890625, + "step": 137825 + }, + { + "epoch": 1.1917752548616094, + "grad_norm": 1.6069270245299494, + "learning_rate": 2.1098865949396477e-06, + "loss": 0.40871992111206057, + "step": 137830 + }, + { + "epoch": 1.1918184883831529, + "grad_norm": 0.23595335978444293, + "learning_rate": 2.1096920380590535e-06, + "loss": 0.034456825256347655, + "step": 137835 + }, + { + "epoch": 1.191861721904696, + "grad_norm": 3.7479101921971822, + "learning_rate": 2.1094974852844823e-06, + "loss": 0.057379150390625, + "step": 137840 + }, + { + "epoch": 1.1919049554262393, + "grad_norm": 23.104569805806676, + "learning_rate": 2.1093029366168305e-06, + "loss": 0.08448486328125, + "step": 137845 + }, + { + "epoch": 1.1919481889477825, + "grad_norm": 7.59316562675714, + "learning_rate": 2.109108392056994e-06, + "loss": 0.06657791137695312, + "step": 137850 + }, + { + "epoch": 1.1919914224693258, + "grad_norm": 0.09555783638906098, + "learning_rate": 2.1089138516058714e-06, + "loss": 0.07741851806640625, + "step": 137855 + }, + { + "epoch": 1.192034655990869, + "grad_norm": 63.03501882601356, + "learning_rate": 2.108719315264359e-06, + "loss": 0.1252227783203125, + "step": 137860 + }, + { + "epoch": 1.1920778895124124, + "grad_norm": 7.944073934128864, + "learning_rate": 2.1085247830333532e-06, + "loss": 0.030339813232421874, + "step": 137865 + }, + { + "epoch": 1.1921211230339557, + "grad_norm": 6.724615050063124, + "learning_rate": 2.1083302549137538e-06, + "loss": 0.0276031494140625, + "step": 137870 + }, + { + "epoch": 1.192164356555499, + "grad_norm": 15.36303601889355, + "learning_rate": 2.1081357309064565e-06, + "loss": 0.056256103515625, + "step": 137875 + }, + { + "epoch": 1.1922075900770421, + "grad_norm": 0.1951698192847201, + "learning_rate": 2.107941211012359e-06, + "loss": 0.052915191650390624, + "step": 137880 + }, + { + "epoch": 1.1922508235985854, + "grad_norm": 0.6757887819315873, + "learning_rate": 2.107746695232356e-06, + "loss": 0.029254150390625, + "step": 137885 + }, + { + "epoch": 1.1922940571201286, + "grad_norm": 12.962988295819047, + "learning_rate": 2.1075521835673476e-06, + "loss": 0.04498138427734375, + "step": 137890 + }, + { + "epoch": 1.1923372906416718, + "grad_norm": 8.01304031507938, + "learning_rate": 2.10735767601823e-06, + "loss": 0.08148155212402344, + "step": 137895 + }, + { + "epoch": 1.1923805241632153, + "grad_norm": 35.910615293426346, + "learning_rate": 2.1071631725858983e-06, + "loss": 0.23514404296875, + "step": 137900 + }, + { + "epoch": 1.1924237576847585, + "grad_norm": 4.232832150468731, + "learning_rate": 2.1069686732712522e-06, + "loss": 0.03245849609375, + "step": 137905 + }, + { + "epoch": 1.1924669912063017, + "grad_norm": 0.6886658640231189, + "learning_rate": 2.106774178075188e-06, + "loss": 0.034474945068359374, + "step": 137910 + }, + { + "epoch": 1.192510224727845, + "grad_norm": 5.524143130255841, + "learning_rate": 2.1065796869986015e-06, + "loss": 0.0257568359375, + "step": 137915 + }, + { + "epoch": 1.1925534582493882, + "grad_norm": 2.5167090471038867, + "learning_rate": 2.106385200042392e-06, + "loss": 0.013330841064453125, + "step": 137920 + }, + { + "epoch": 1.1925966917709314, + "grad_norm": 0.3197912940264087, + "learning_rate": 2.106190717207454e-06, + "loss": 0.15560836791992189, + "step": 137925 + }, + { + "epoch": 1.1926399252924749, + "grad_norm": 5.794103369455954, + "learning_rate": 2.1059962384946845e-06, + "loss": 0.07259521484375, + "step": 137930 + }, + { + "epoch": 1.192683158814018, + "grad_norm": 8.678584401931689, + "learning_rate": 2.1058017639049827e-06, + "loss": 0.08668975830078125, + "step": 137935 + }, + { + "epoch": 1.1927263923355613, + "grad_norm": 2.783810475409061, + "learning_rate": 2.1056072934392432e-06, + "loss": 0.03922119140625, + "step": 137940 + }, + { + "epoch": 1.1927696258571046, + "grad_norm": 16.47827753362577, + "learning_rate": 2.105412827098365e-06, + "loss": 0.19488601684570311, + "step": 137945 + }, + { + "epoch": 1.1928128593786478, + "grad_norm": 6.837107092744151, + "learning_rate": 2.1052183648832435e-06, + "loss": 0.01691093444824219, + "step": 137950 + }, + { + "epoch": 1.192856092900191, + "grad_norm": 3.798798218069593, + "learning_rate": 2.105023906794776e-06, + "loss": 0.03571243286132812, + "step": 137955 + }, + { + "epoch": 1.1928993264217342, + "grad_norm": 0.9102629171838861, + "learning_rate": 2.104829452833857e-06, + "loss": 0.33994140625, + "step": 137960 + }, + { + "epoch": 1.1929425599432777, + "grad_norm": 3.1748335170986555, + "learning_rate": 2.1046350030013876e-06, + "loss": 0.024411773681640624, + "step": 137965 + }, + { + "epoch": 1.192985793464821, + "grad_norm": 0.44478171929132043, + "learning_rate": 2.104440557298262e-06, + "loss": 0.07269706726074218, + "step": 137970 + }, + { + "epoch": 1.1930290269863641, + "grad_norm": 0.6498312361355235, + "learning_rate": 2.1042461157253776e-06, + "loss": 0.007224273681640625, + "step": 137975 + }, + { + "epoch": 1.1930722605079074, + "grad_norm": 1.4739204814972127, + "learning_rate": 2.1040516782836313e-06, + "loss": 0.05400161743164063, + "step": 137980 + }, + { + "epoch": 1.1931154940294506, + "grad_norm": 6.482035943159915, + "learning_rate": 2.103857244973919e-06, + "loss": 0.018175506591796876, + "step": 137985 + }, + { + "epoch": 1.193158727550994, + "grad_norm": 0.9923257687229898, + "learning_rate": 2.103662815797138e-06, + "loss": 0.08499832153320312, + "step": 137990 + }, + { + "epoch": 1.1932019610725373, + "grad_norm": 4.027999553003646, + "learning_rate": 2.103468390754184e-06, + "loss": 0.08896560668945312, + "step": 137995 + }, + { + "epoch": 1.1932451945940805, + "grad_norm": 14.37700842191297, + "learning_rate": 2.1032739698459553e-06, + "loss": 0.066796875, + "step": 138000 + }, + { + "epoch": 1.1932884281156237, + "grad_norm": 3.449792700406486, + "learning_rate": 2.1030795530733476e-06, + "loss": 0.02721405029296875, + "step": 138005 + }, + { + "epoch": 1.193331661637167, + "grad_norm": 8.303652423978894, + "learning_rate": 2.102885140437259e-06, + "loss": 0.057745361328125, + "step": 138010 + }, + { + "epoch": 1.1933748951587102, + "grad_norm": 0.8810503000678901, + "learning_rate": 2.102690731938584e-06, + "loss": 0.10467910766601562, + "step": 138015 + }, + { + "epoch": 1.1934181286802534, + "grad_norm": 3.102726624704454, + "learning_rate": 2.1024963275782195e-06, + "loss": 0.09535064697265624, + "step": 138020 + }, + { + "epoch": 1.1934613622017967, + "grad_norm": 18.451189754854177, + "learning_rate": 2.1023019273570626e-06, + "loss": 0.04011669158935547, + "step": 138025 + }, + { + "epoch": 1.1935045957233401, + "grad_norm": 3.8069738990875273, + "learning_rate": 2.10210753127601e-06, + "loss": 0.04205474853515625, + "step": 138030 + }, + { + "epoch": 1.1935478292448833, + "grad_norm": 4.158613887743789, + "learning_rate": 2.101913139335959e-06, + "loss": 0.017534255981445312, + "step": 138035 + }, + { + "epoch": 1.1935910627664266, + "grad_norm": 4.9363999087786645, + "learning_rate": 2.101718751537805e-06, + "loss": 0.064080810546875, + "step": 138040 + }, + { + "epoch": 1.1936342962879698, + "grad_norm": 1.1082326623880172, + "learning_rate": 2.1015243678824445e-06, + "loss": 0.041435623168945314, + "step": 138045 + }, + { + "epoch": 1.193677529809513, + "grad_norm": 10.00564356162984, + "learning_rate": 2.101329988370774e-06, + "loss": 0.083538818359375, + "step": 138050 + }, + { + "epoch": 1.1937207633310565, + "grad_norm": 0.09756205065592521, + "learning_rate": 2.1011356130036897e-06, + "loss": 0.017784881591796874, + "step": 138055 + }, + { + "epoch": 1.1937639968525997, + "grad_norm": 13.670370284813188, + "learning_rate": 2.1009412417820893e-06, + "loss": 0.05382347106933594, + "step": 138060 + }, + { + "epoch": 1.193807230374143, + "grad_norm": 12.450916040364744, + "learning_rate": 2.1007468747068685e-06, + "loss": 0.07175521850585938, + "step": 138065 + }, + { + "epoch": 1.1938504638956862, + "grad_norm": 2.3945761072124334, + "learning_rate": 2.1005525117789235e-06, + "loss": 0.17200965881347657, + "step": 138070 + }, + { + "epoch": 1.1938936974172294, + "grad_norm": 12.918399793220221, + "learning_rate": 2.1003581529991512e-06, + "loss": 0.041389083862304686, + "step": 138075 + }, + { + "epoch": 1.1939369309387726, + "grad_norm": 4.37245416102477, + "learning_rate": 2.1001637983684475e-06, + "loss": 0.0693603515625, + "step": 138080 + }, + { + "epoch": 1.1939801644603159, + "grad_norm": 4.325097501946686, + "learning_rate": 2.0999694478877084e-06, + "loss": 0.024981689453125, + "step": 138085 + }, + { + "epoch": 1.1940233979818593, + "grad_norm": 0.19362267207847159, + "learning_rate": 2.09977510155783e-06, + "loss": 0.0341949462890625, + "step": 138090 + }, + { + "epoch": 1.1940666315034025, + "grad_norm": 3.4869788755918507, + "learning_rate": 2.0995807593797104e-06, + "loss": 0.0405364990234375, + "step": 138095 + }, + { + "epoch": 1.1941098650249458, + "grad_norm": 2.985077493573906, + "learning_rate": 2.099386421354245e-06, + "loss": 0.01493072509765625, + "step": 138100 + }, + { + "epoch": 1.194153098546489, + "grad_norm": 25.10243619089974, + "learning_rate": 2.0991920874823297e-06, + "loss": 0.08132095336914062, + "step": 138105 + }, + { + "epoch": 1.1941963320680322, + "grad_norm": 0.8960202924663462, + "learning_rate": 2.0989977577648605e-06, + "loss": 0.022348403930664062, + "step": 138110 + }, + { + "epoch": 1.1942395655895754, + "grad_norm": 0.20051784912347667, + "learning_rate": 2.0988034322027342e-06, + "loss": 0.23114700317382814, + "step": 138115 + }, + { + "epoch": 1.194282799111119, + "grad_norm": 3.1331592597824, + "learning_rate": 2.0986091107968457e-06, + "loss": 0.14591445922851562, + "step": 138120 + }, + { + "epoch": 1.1943260326326621, + "grad_norm": 77.99370239935206, + "learning_rate": 2.098414793548094e-06, + "loss": 0.2564300537109375, + "step": 138125 + }, + { + "epoch": 1.1943692661542054, + "grad_norm": 0.03297569164901899, + "learning_rate": 2.0982204804573726e-06, + "loss": 0.031846237182617185, + "step": 138130 + }, + { + "epoch": 1.1944124996757486, + "grad_norm": 5.627899920601307, + "learning_rate": 2.09802617152558e-06, + "loss": 0.08205900192260743, + "step": 138135 + }, + { + "epoch": 1.1944557331972918, + "grad_norm": 11.433073976511029, + "learning_rate": 2.0978318667536093e-06, + "loss": 0.06284103393554688, + "step": 138140 + }, + { + "epoch": 1.194498966718835, + "grad_norm": 15.00347545546601, + "learning_rate": 2.0976375661423597e-06, + "loss": 0.17294387817382811, + "step": 138145 + }, + { + "epoch": 1.1945422002403783, + "grad_norm": 0.6872176131329228, + "learning_rate": 2.0974432696927253e-06, + "loss": 0.09503631591796875, + "step": 138150 + }, + { + "epoch": 1.1945854337619217, + "grad_norm": 16.41601683058593, + "learning_rate": 2.0972489774056023e-06, + "loss": 0.12457866668701172, + "step": 138155 + }, + { + "epoch": 1.194628667283465, + "grad_norm": 1.8956751509376206, + "learning_rate": 2.0970546892818876e-06, + "loss": 0.011481857299804688, + "step": 138160 + }, + { + "epoch": 1.1946719008050082, + "grad_norm": 19.606196051226775, + "learning_rate": 2.096860405322477e-06, + "loss": 0.23611602783203126, + "step": 138165 + }, + { + "epoch": 1.1947151343265514, + "grad_norm": 0.5676773126468059, + "learning_rate": 2.0966661255282665e-06, + "loss": 0.04676055908203125, + "step": 138170 + }, + { + "epoch": 1.1947583678480946, + "grad_norm": 0.532565004696452, + "learning_rate": 2.096471849900152e-06, + "loss": 0.03890151977539062, + "step": 138175 + }, + { + "epoch": 1.1948016013696379, + "grad_norm": 0.2233156898432003, + "learning_rate": 2.09627757843903e-06, + "loss": 0.030123138427734376, + "step": 138180 + }, + { + "epoch": 1.1948448348911813, + "grad_norm": 0.603490845304141, + "learning_rate": 2.0960833111457945e-06, + "loss": 0.06516952514648437, + "step": 138185 + }, + { + "epoch": 1.1948880684127245, + "grad_norm": 0.866246808704552, + "learning_rate": 2.0958890480213437e-06, + "loss": 0.243011474609375, + "step": 138190 + }, + { + "epoch": 1.1949313019342678, + "grad_norm": 1.5176137653337138, + "learning_rate": 2.0956947890665723e-06, + "loss": 0.03977622985839844, + "step": 138195 + }, + { + "epoch": 1.194974535455811, + "grad_norm": 32.13627928673331, + "learning_rate": 2.0955005342823776e-06, + "loss": 0.07114028930664062, + "step": 138200 + }, + { + "epoch": 1.1950177689773542, + "grad_norm": 2.3281974177493487, + "learning_rate": 2.095306283669654e-06, + "loss": 0.0669891357421875, + "step": 138205 + }, + { + "epoch": 1.1950610024988975, + "grad_norm": 15.041649565497732, + "learning_rate": 2.0951120372292986e-06, + "loss": 0.024398040771484376, + "step": 138210 + }, + { + "epoch": 1.1951042360204407, + "grad_norm": 3.2722610638281604, + "learning_rate": 2.094917794962205e-06, + "loss": 0.07773609161376953, + "step": 138215 + }, + { + "epoch": 1.1951474695419841, + "grad_norm": 0.6250416258276534, + "learning_rate": 2.0947235568692713e-06, + "loss": 0.02699432373046875, + "step": 138220 + }, + { + "epoch": 1.1951907030635274, + "grad_norm": 5.616437475266587, + "learning_rate": 2.0945293229513924e-06, + "loss": 0.14965591430664063, + "step": 138225 + }, + { + "epoch": 1.1952339365850706, + "grad_norm": 0.25336318973926364, + "learning_rate": 2.094335093209465e-06, + "loss": 0.018896484375, + "step": 138230 + }, + { + "epoch": 1.1952771701066138, + "grad_norm": 6.575807013066722, + "learning_rate": 2.094140867644384e-06, + "loss": 0.037738800048828125, + "step": 138235 + }, + { + "epoch": 1.195320403628157, + "grad_norm": 0.5562916260954754, + "learning_rate": 2.093946646257045e-06, + "loss": 0.045664215087890626, + "step": 138240 + }, + { + "epoch": 1.1953636371497005, + "grad_norm": 10.905499649229931, + "learning_rate": 2.0937524290483434e-06, + "loss": 0.0689697265625, + "step": 138245 + }, + { + "epoch": 1.1954068706712437, + "grad_norm": 0.690670586527416, + "learning_rate": 2.0935582160191758e-06, + "loss": 0.030800628662109374, + "step": 138250 + }, + { + "epoch": 1.195450104192787, + "grad_norm": 2.2914956769024166, + "learning_rate": 2.0933640071704376e-06, + "loss": 0.03530864715576172, + "step": 138255 + }, + { + "epoch": 1.1954933377143302, + "grad_norm": 0.8726024331113922, + "learning_rate": 2.093169802503025e-06, + "loss": 0.054721832275390625, + "step": 138260 + }, + { + "epoch": 1.1955365712358734, + "grad_norm": 0.4004979133893406, + "learning_rate": 2.092975602017833e-06, + "loss": 0.050618553161621095, + "step": 138265 + }, + { + "epoch": 1.1955798047574167, + "grad_norm": 4.145566692148898, + "learning_rate": 2.0927814057157575e-06, + "loss": 0.028289031982421876, + "step": 138270 + }, + { + "epoch": 1.1956230382789599, + "grad_norm": 0.734158376898886, + "learning_rate": 2.0925872135976937e-06, + "loss": 0.04007415771484375, + "step": 138275 + }, + { + "epoch": 1.1956662718005033, + "grad_norm": 0.07871884423728934, + "learning_rate": 2.092393025664537e-06, + "loss": 0.025353240966796874, + "step": 138280 + }, + { + "epoch": 1.1957095053220466, + "grad_norm": 0.49123508931981236, + "learning_rate": 2.0921988419171838e-06, + "loss": 0.07340621948242188, + "step": 138285 + }, + { + "epoch": 1.1957527388435898, + "grad_norm": 15.326411016921698, + "learning_rate": 2.0920046623565295e-06, + "loss": 0.11606254577636718, + "step": 138290 + }, + { + "epoch": 1.195795972365133, + "grad_norm": 6.5118513461586405, + "learning_rate": 2.0918104869834702e-06, + "loss": 0.10066070556640624, + "step": 138295 + }, + { + "epoch": 1.1958392058866762, + "grad_norm": 0.8271208741345119, + "learning_rate": 2.0916163157989003e-06, + "loss": 0.013069725036621094, + "step": 138300 + }, + { + "epoch": 1.1958824394082195, + "grad_norm": 33.115033398521305, + "learning_rate": 2.091422148803715e-06, + "loss": 0.04830474853515625, + "step": 138305 + }, + { + "epoch": 1.195925672929763, + "grad_norm": 4.411261011670754, + "learning_rate": 2.0912279859988104e-06, + "loss": 0.29379119873046877, + "step": 138310 + }, + { + "epoch": 1.1959689064513062, + "grad_norm": 4.683466670791225, + "learning_rate": 2.091033827385082e-06, + "loss": 0.033917999267578124, + "step": 138315 + }, + { + "epoch": 1.1960121399728494, + "grad_norm": 4.505914669316596, + "learning_rate": 2.090839672963426e-06, + "loss": 0.08066825866699219, + "step": 138320 + }, + { + "epoch": 1.1960553734943926, + "grad_norm": 1.0009859855106404, + "learning_rate": 2.0906455227347372e-06, + "loss": 0.017730712890625, + "step": 138325 + }, + { + "epoch": 1.1960986070159358, + "grad_norm": 0.8981141544217036, + "learning_rate": 2.090451376699911e-06, + "loss": 0.03164834976196289, + "step": 138330 + }, + { + "epoch": 1.196141840537479, + "grad_norm": 0.7285847061860797, + "learning_rate": 2.090257234859842e-06, + "loss": 0.0679168701171875, + "step": 138335 + }, + { + "epoch": 1.1961850740590223, + "grad_norm": 0.08264759272752045, + "learning_rate": 2.0900630972154267e-06, + "loss": 0.026834869384765626, + "step": 138340 + }, + { + "epoch": 1.1962283075805658, + "grad_norm": 27.49370624787061, + "learning_rate": 2.0898689637675587e-06, + "loss": 0.16729812622070311, + "step": 138345 + }, + { + "epoch": 1.196271541102109, + "grad_norm": 1.8873153155922928, + "learning_rate": 2.0896748345171357e-06, + "loss": 0.051966094970703126, + "step": 138350 + }, + { + "epoch": 1.1963147746236522, + "grad_norm": 0.09556188105289044, + "learning_rate": 2.0894807094650523e-06, + "loss": 0.03498077392578125, + "step": 138355 + }, + { + "epoch": 1.1963580081451954, + "grad_norm": 0.9075238121539583, + "learning_rate": 2.0892865886122033e-06, + "loss": 0.09491729736328125, + "step": 138360 + }, + { + "epoch": 1.1964012416667387, + "grad_norm": 5.127958259075319, + "learning_rate": 2.0890924719594833e-06, + "loss": 0.017502593994140624, + "step": 138365 + }, + { + "epoch": 1.196444475188282, + "grad_norm": 1.341783036614073, + "learning_rate": 2.088898359507789e-06, + "loss": 0.19848098754882812, + "step": 138370 + }, + { + "epoch": 1.1964877087098253, + "grad_norm": 1.4998091717999034, + "learning_rate": 2.088704251258014e-06, + "loss": 0.06176300048828125, + "step": 138375 + }, + { + "epoch": 1.1965309422313686, + "grad_norm": 8.406292240125703, + "learning_rate": 2.0885101472110557e-06, + "loss": 0.08324594497680664, + "step": 138380 + }, + { + "epoch": 1.1965741757529118, + "grad_norm": 2.9612974159191494, + "learning_rate": 2.088316047367808e-06, + "loss": 0.06438713073730469, + "step": 138385 + }, + { + "epoch": 1.196617409274455, + "grad_norm": 2.0111788588812463, + "learning_rate": 2.088121951729165e-06, + "loss": 0.192657470703125, + "step": 138390 + }, + { + "epoch": 1.1966606427959983, + "grad_norm": 3.7032780701818844, + "learning_rate": 2.0879278602960243e-06, + "loss": 0.05791397094726562, + "step": 138395 + }, + { + "epoch": 1.1967038763175415, + "grad_norm": 0.5256806085004351, + "learning_rate": 2.087733773069279e-06, + "loss": 0.10604705810546874, + "step": 138400 + }, + { + "epoch": 1.1967471098390847, + "grad_norm": 4.728646848791551, + "learning_rate": 2.0875396900498247e-06, + "loss": 0.016472625732421874, + "step": 138405 + }, + { + "epoch": 1.1967903433606282, + "grad_norm": 6.041676201483051, + "learning_rate": 2.0873456112385574e-06, + "loss": 0.14843978881835937, + "step": 138410 + }, + { + "epoch": 1.1968335768821714, + "grad_norm": 4.517720288641565, + "learning_rate": 2.0871515366363713e-06, + "loss": 0.15311660766601562, + "step": 138415 + }, + { + "epoch": 1.1968768104037146, + "grad_norm": 1.8318378670988997, + "learning_rate": 2.0869574662441612e-06, + "loss": 0.20757293701171875, + "step": 138420 + }, + { + "epoch": 1.1969200439252579, + "grad_norm": 1.4447967437051255, + "learning_rate": 2.086763400062823e-06, + "loss": 0.034782028198242186, + "step": 138425 + }, + { + "epoch": 1.196963277446801, + "grad_norm": 9.209066195681386, + "learning_rate": 2.086569338093252e-06, + "loss": 0.23064498901367186, + "step": 138430 + }, + { + "epoch": 1.1970065109683445, + "grad_norm": 2.2492318567024983, + "learning_rate": 2.0863752803363417e-06, + "loss": 0.0356170654296875, + "step": 138435 + }, + { + "epoch": 1.1970497444898878, + "grad_norm": 0.5704659612423328, + "learning_rate": 2.086181226792987e-06, + "loss": 0.021524810791015626, + "step": 138440 + }, + { + "epoch": 1.197092978011431, + "grad_norm": 17.56426291594204, + "learning_rate": 2.085987177464085e-06, + "loss": 0.11206893920898438, + "step": 138445 + }, + { + "epoch": 1.1971362115329742, + "grad_norm": 4.292062985375581, + "learning_rate": 2.0857931323505286e-06, + "loss": 0.10605392456054688, + "step": 138450 + }, + { + "epoch": 1.1971794450545175, + "grad_norm": 9.946993763955577, + "learning_rate": 2.0855990914532145e-06, + "loss": 0.02379150390625, + "step": 138455 + }, + { + "epoch": 1.1972226785760607, + "grad_norm": 36.92619633594002, + "learning_rate": 2.085405054773036e-06, + "loss": 0.1119476318359375, + "step": 138460 + }, + { + "epoch": 1.197265912097604, + "grad_norm": 43.26817775059952, + "learning_rate": 2.085211022310889e-06, + "loss": 0.2081939697265625, + "step": 138465 + }, + { + "epoch": 1.1973091456191471, + "grad_norm": 9.564128066200288, + "learning_rate": 2.0850169940676665e-06, + "loss": 0.03826217651367188, + "step": 138470 + }, + { + "epoch": 1.1973523791406906, + "grad_norm": 7.5604547065769925, + "learning_rate": 2.084822970044266e-06, + "loss": 0.05338134765625, + "step": 138475 + }, + { + "epoch": 1.1973956126622338, + "grad_norm": 27.730173085136496, + "learning_rate": 2.0846289502415803e-06, + "loss": 0.09583206176757812, + "step": 138480 + }, + { + "epoch": 1.197438846183777, + "grad_norm": 1.827268996821012, + "learning_rate": 2.084434934660506e-06, + "loss": 0.1669635772705078, + "step": 138485 + }, + { + "epoch": 1.1974820797053203, + "grad_norm": 50.56727758190179, + "learning_rate": 2.084240923301937e-06, + "loss": 0.15257911682128905, + "step": 138490 + }, + { + "epoch": 1.1975253132268635, + "grad_norm": 9.510304679810035, + "learning_rate": 2.0840469161667675e-06, + "loss": 0.03919029235839844, + "step": 138495 + }, + { + "epoch": 1.197568546748407, + "grad_norm": 6.7504831684726225, + "learning_rate": 2.0838529132558923e-06, + "loss": 0.07259521484375, + "step": 138500 + }, + { + "epoch": 1.1976117802699502, + "grad_norm": 35.74554289496441, + "learning_rate": 2.0836589145702064e-06, + "loss": 0.14317779541015624, + "step": 138505 + }, + { + "epoch": 1.1976550137914934, + "grad_norm": 71.0981210174626, + "learning_rate": 2.0834649201106046e-06, + "loss": 0.1505096435546875, + "step": 138510 + }, + { + "epoch": 1.1976982473130366, + "grad_norm": 8.141586778641734, + "learning_rate": 2.0832709298779826e-06, + "loss": 0.10998973846435547, + "step": 138515 + }, + { + "epoch": 1.1977414808345799, + "grad_norm": 0.8397990715300293, + "learning_rate": 2.0830769438732337e-06, + "loss": 0.06506233215332032, + "step": 138520 + }, + { + "epoch": 1.197784714356123, + "grad_norm": 2.516117626181883, + "learning_rate": 2.082882962097253e-06, + "loss": 0.02259063720703125, + "step": 138525 + }, + { + "epoch": 1.1978279478776663, + "grad_norm": 28.814858547259202, + "learning_rate": 2.082688984550934e-06, + "loss": 0.1390361785888672, + "step": 138530 + }, + { + "epoch": 1.1978711813992098, + "grad_norm": 3.9423053839557243, + "learning_rate": 2.0824950112351734e-06, + "loss": 0.05373077392578125, + "step": 138535 + }, + { + "epoch": 1.197914414920753, + "grad_norm": 9.87058988623747, + "learning_rate": 2.082301042150864e-06, + "loss": 0.05084991455078125, + "step": 138540 + }, + { + "epoch": 1.1979576484422962, + "grad_norm": 0.11337187061878826, + "learning_rate": 2.0821070772989015e-06, + "loss": 0.036787796020507815, + "step": 138545 + }, + { + "epoch": 1.1980008819638395, + "grad_norm": 1.3365535222723552, + "learning_rate": 2.0819131166801807e-06, + "loss": 0.03709831237792969, + "step": 138550 + }, + { + "epoch": 1.1980441154853827, + "grad_norm": 67.7189786953445, + "learning_rate": 2.0817191602955945e-06, + "loss": 0.43768310546875, + "step": 138555 + }, + { + "epoch": 1.198087349006926, + "grad_norm": 3.7296577321991866, + "learning_rate": 2.081525208146039e-06, + "loss": 0.06714668273925781, + "step": 138560 + }, + { + "epoch": 1.1981305825284694, + "grad_norm": 0.45790687364074134, + "learning_rate": 2.0813312602324066e-06, + "loss": 0.03677520751953125, + "step": 138565 + }, + { + "epoch": 1.1981738160500126, + "grad_norm": 14.78608568838092, + "learning_rate": 2.081137316555595e-06, + "loss": 0.04607086181640625, + "step": 138570 + }, + { + "epoch": 1.1982170495715558, + "grad_norm": 1.7386733189715495, + "learning_rate": 2.0809433771164966e-06, + "loss": 0.03914794921875, + "step": 138575 + }, + { + "epoch": 1.198260283093099, + "grad_norm": 0.9108891661201046, + "learning_rate": 2.080749441916006e-06, + "loss": 0.051531219482421876, + "step": 138580 + }, + { + "epoch": 1.1983035166146423, + "grad_norm": 0.415713540779652, + "learning_rate": 2.0805555109550176e-06, + "loss": 0.08670806884765625, + "step": 138585 + }, + { + "epoch": 1.1983467501361855, + "grad_norm": 1.9285563098838598, + "learning_rate": 2.080361584234426e-06, + "loss": 0.017218017578125, + "step": 138590 + }, + { + "epoch": 1.1983899836577288, + "grad_norm": 9.374548362792027, + "learning_rate": 2.0801676617551264e-06, + "loss": 0.17249679565429688, + "step": 138595 + }, + { + "epoch": 1.1984332171792722, + "grad_norm": 0.06294607625877498, + "learning_rate": 2.0799737435180103e-06, + "loss": 0.04450492858886719, + "step": 138600 + }, + { + "epoch": 1.1984764507008154, + "grad_norm": 0.12932393928566646, + "learning_rate": 2.0797798295239754e-06, + "loss": 0.09863109588623047, + "step": 138605 + }, + { + "epoch": 1.1985196842223587, + "grad_norm": 22.05418790855996, + "learning_rate": 2.0795859197739143e-06, + "loss": 0.14305534362792968, + "step": 138610 + }, + { + "epoch": 1.198562917743902, + "grad_norm": 0.6775201561490614, + "learning_rate": 2.079392014268721e-06, + "loss": 0.07960662841796876, + "step": 138615 + }, + { + "epoch": 1.1986061512654451, + "grad_norm": 4.784914825187814, + "learning_rate": 2.0791981130092917e-06, + "loss": 0.0273162841796875, + "step": 138620 + }, + { + "epoch": 1.1986493847869883, + "grad_norm": 12.80148028787467, + "learning_rate": 2.079004215996519e-06, + "loss": 0.08386878967285157, + "step": 138625 + }, + { + "epoch": 1.1986926183085318, + "grad_norm": 22.3033440283415, + "learning_rate": 2.078810323231296e-06, + "loss": 0.2934417724609375, + "step": 138630 + }, + { + "epoch": 1.198735851830075, + "grad_norm": 18.775963522755152, + "learning_rate": 2.07861643471452e-06, + "loss": 0.04985198974609375, + "step": 138635 + }, + { + "epoch": 1.1987790853516183, + "grad_norm": 1.2537140107650424, + "learning_rate": 2.078422550447083e-06, + "loss": 0.019592857360839842, + "step": 138640 + }, + { + "epoch": 1.1988223188731615, + "grad_norm": 1.168938058567609, + "learning_rate": 2.0782286704298797e-06, + "loss": 0.25589447021484374, + "step": 138645 + }, + { + "epoch": 1.1988655523947047, + "grad_norm": 1.0992447393138978, + "learning_rate": 2.0780347946638054e-06, + "loss": 0.042428207397460935, + "step": 138650 + }, + { + "epoch": 1.198908785916248, + "grad_norm": 2.5876202929846217, + "learning_rate": 2.077840923149752e-06, + "loss": 0.049239730834960936, + "step": 138655 + }, + { + "epoch": 1.1989520194377912, + "grad_norm": 9.020595921074108, + "learning_rate": 2.077647055888614e-06, + "loss": 0.13937034606933593, + "step": 138660 + }, + { + "epoch": 1.1989952529593346, + "grad_norm": 13.89041991723094, + "learning_rate": 2.0774531928812883e-06, + "loss": 0.18696937561035157, + "step": 138665 + }, + { + "epoch": 1.1990384864808779, + "grad_norm": 8.683198077958954, + "learning_rate": 2.0772593341286657e-06, + "loss": 0.14476165771484376, + "step": 138670 + }, + { + "epoch": 1.199081720002421, + "grad_norm": 13.467176245224667, + "learning_rate": 2.0770654796316416e-06, + "loss": 0.27303314208984375, + "step": 138675 + }, + { + "epoch": 1.1991249535239643, + "grad_norm": 2.088386348079561, + "learning_rate": 2.07687162939111e-06, + "loss": 0.0765869140625, + "step": 138680 + }, + { + "epoch": 1.1991681870455075, + "grad_norm": 7.81511845007021, + "learning_rate": 2.0766777834079657e-06, + "loss": 0.03342704772949219, + "step": 138685 + }, + { + "epoch": 1.199211420567051, + "grad_norm": 1.2743632266740106, + "learning_rate": 2.0764839416831016e-06, + "loss": 0.03330078125, + "step": 138690 + }, + { + "epoch": 1.1992546540885942, + "grad_norm": 7.98463285767262, + "learning_rate": 2.0762901042174105e-06, + "loss": 0.030873870849609374, + "step": 138695 + }, + { + "epoch": 1.1992978876101374, + "grad_norm": 1.4992717359860193, + "learning_rate": 2.076096271011789e-06, + "loss": 0.027114105224609376, + "step": 138700 + }, + { + "epoch": 1.1993411211316807, + "grad_norm": 2.4379750002412948, + "learning_rate": 2.0759024420671295e-06, + "loss": 0.013969039916992188, + "step": 138705 + }, + { + "epoch": 1.199384354653224, + "grad_norm": 11.8963290696172, + "learning_rate": 2.075708617384327e-06, + "loss": 0.0284149169921875, + "step": 138710 + }, + { + "epoch": 1.1994275881747671, + "grad_norm": 8.176094294988113, + "learning_rate": 2.0755147969642743e-06, + "loss": 0.0224365234375, + "step": 138715 + }, + { + "epoch": 1.1994708216963104, + "grad_norm": 7.024796922103817, + "learning_rate": 2.075320980807866e-06, + "loss": 0.06451568603515626, + "step": 138720 + }, + { + "epoch": 1.1995140552178536, + "grad_norm": 3.3746191015709472, + "learning_rate": 2.0751271689159943e-06, + "loss": 0.0557159423828125, + "step": 138725 + }, + { + "epoch": 1.199557288739397, + "grad_norm": 0.3084159253440839, + "learning_rate": 2.0749333612895557e-06, + "loss": 0.06317825317382812, + "step": 138730 + }, + { + "epoch": 1.1996005222609403, + "grad_norm": 2.3190123573699664, + "learning_rate": 2.0747395579294417e-06, + "loss": 0.03651351928710937, + "step": 138735 + }, + { + "epoch": 1.1996437557824835, + "grad_norm": 55.521320898157995, + "learning_rate": 2.074545758836548e-06, + "loss": 0.11585311889648438, + "step": 138740 + }, + { + "epoch": 1.1996869893040267, + "grad_norm": 3.1322996100004765, + "learning_rate": 2.0743519640117672e-06, + "loss": 0.015975761413574218, + "step": 138745 + }, + { + "epoch": 1.19973022282557, + "grad_norm": 1.5099279593775283, + "learning_rate": 2.0741581734559936e-06, + "loss": 0.07155303955078125, + "step": 138750 + }, + { + "epoch": 1.1997734563471134, + "grad_norm": 11.690420210197294, + "learning_rate": 2.0739643871701203e-06, + "loss": 0.03268280029296875, + "step": 138755 + }, + { + "epoch": 1.1998166898686566, + "grad_norm": 3.8831104302547357, + "learning_rate": 2.0737706051550413e-06, + "loss": 0.06447219848632812, + "step": 138760 + }, + { + "epoch": 1.1998599233901999, + "grad_norm": 1.2889312348557873, + "learning_rate": 2.07357682741165e-06, + "loss": 0.01617774963378906, + "step": 138765 + }, + { + "epoch": 1.199903156911743, + "grad_norm": 1.1070346069598263, + "learning_rate": 2.0733830539408416e-06, + "loss": 0.07914047241210938, + "step": 138770 + }, + { + "epoch": 1.1999463904332863, + "grad_norm": 0.23433750585399524, + "learning_rate": 2.0731892847435086e-06, + "loss": 0.26970348358154295, + "step": 138775 + }, + { + "epoch": 1.1999896239548296, + "grad_norm": 1.3177997147309488, + "learning_rate": 2.072995519820545e-06, + "loss": 0.043627357482910155, + "step": 138780 + }, + { + "epoch": 1.2000328574763728, + "grad_norm": 0.8714877254375202, + "learning_rate": 2.072801759172843e-06, + "loss": 0.03763885498046875, + "step": 138785 + }, + { + "epoch": 1.2000760909979162, + "grad_norm": 4.530243170505061, + "learning_rate": 2.0726080028012964e-06, + "loss": 0.022182464599609375, + "step": 138790 + }, + { + "epoch": 1.2001193245194595, + "grad_norm": 7.081251939475107, + "learning_rate": 2.0724142507068016e-06, + "loss": 0.07630233764648438, + "step": 138795 + }, + { + "epoch": 1.2001625580410027, + "grad_norm": 1.6196915358730715, + "learning_rate": 2.0722205028902506e-06, + "loss": 0.04997940063476562, + "step": 138800 + }, + { + "epoch": 1.200205791562546, + "grad_norm": 6.590157563503012, + "learning_rate": 2.072026759352536e-06, + "loss": 0.046916961669921875, + "step": 138805 + }, + { + "epoch": 1.2002490250840891, + "grad_norm": 17.206188846768665, + "learning_rate": 2.0718330200945515e-06, + "loss": 0.035660076141357425, + "step": 138810 + }, + { + "epoch": 1.2002922586056324, + "grad_norm": 16.695945331128982, + "learning_rate": 2.071639285117192e-06, + "loss": 0.10666007995605468, + "step": 138815 + }, + { + "epoch": 1.2003354921271758, + "grad_norm": 0.15093806809762003, + "learning_rate": 2.071445554421349e-06, + "loss": 0.031043243408203126, + "step": 138820 + }, + { + "epoch": 1.200378725648719, + "grad_norm": 5.287174530362721, + "learning_rate": 2.0712518280079178e-06, + "loss": 0.03523674011230469, + "step": 138825 + }, + { + "epoch": 1.2004219591702623, + "grad_norm": 1.0031024828327668, + "learning_rate": 2.071058105877791e-06, + "loss": 0.06273345947265625, + "step": 138830 + }, + { + "epoch": 1.2004651926918055, + "grad_norm": 10.144336758910134, + "learning_rate": 2.0708643880318628e-06, + "loss": 0.0472625732421875, + "step": 138835 + }, + { + "epoch": 1.2005084262133487, + "grad_norm": 4.084581263196554, + "learning_rate": 2.070670674471025e-06, + "loss": 0.14051971435546876, + "step": 138840 + }, + { + "epoch": 1.200551659734892, + "grad_norm": 0.2471104377034229, + "learning_rate": 2.0704769651961725e-06, + "loss": 0.016692733764648436, + "step": 138845 + }, + { + "epoch": 1.2005948932564352, + "grad_norm": 1.5234147025540095, + "learning_rate": 2.0702832602081983e-06, + "loss": 0.01384592056274414, + "step": 138850 + }, + { + "epoch": 1.2006381267779787, + "grad_norm": 6.596572318643876, + "learning_rate": 2.070089559507994e-06, + "loss": 0.052633476257324216, + "step": 138855 + }, + { + "epoch": 1.2006813602995219, + "grad_norm": 3.5855446289003683, + "learning_rate": 2.0698958630964557e-06, + "loss": 0.0594146728515625, + "step": 138860 + }, + { + "epoch": 1.2007245938210651, + "grad_norm": 4.703663656205993, + "learning_rate": 2.0697021709744757e-06, + "loss": 0.07279052734375, + "step": 138865 + }, + { + "epoch": 1.2007678273426083, + "grad_norm": 32.31271162725099, + "learning_rate": 2.0695084831429463e-06, + "loss": 0.19767417907714843, + "step": 138870 + }, + { + "epoch": 1.2008110608641516, + "grad_norm": 2.2609101563958305, + "learning_rate": 2.0693147996027624e-06, + "loss": 0.02229766845703125, + "step": 138875 + }, + { + "epoch": 1.2008542943856948, + "grad_norm": 1.229664002573435, + "learning_rate": 2.0691211203548164e-06, + "loss": 0.18943939208984376, + "step": 138880 + }, + { + "epoch": 1.2008975279072382, + "grad_norm": 4.5833890150033465, + "learning_rate": 2.0689274454e-06, + "loss": 0.03517608642578125, + "step": 138885 + }, + { + "epoch": 1.2009407614287815, + "grad_norm": 0.5071037120265555, + "learning_rate": 2.0687337747392096e-06, + "loss": 0.28788909912109373, + "step": 138890 + }, + { + "epoch": 1.2009839949503247, + "grad_norm": 9.824257484779148, + "learning_rate": 2.0685401083733365e-06, + "loss": 0.10642318725585938, + "step": 138895 + }, + { + "epoch": 1.201027228471868, + "grad_norm": 1.526308707674228, + "learning_rate": 2.0683464463032737e-06, + "loss": 0.09889678955078125, + "step": 138900 + }, + { + "epoch": 1.2010704619934112, + "grad_norm": 0.026319510569815772, + "learning_rate": 2.068152788529915e-06, + "loss": 0.031116485595703125, + "step": 138905 + }, + { + "epoch": 1.2011136955149544, + "grad_norm": 3.175098195274948, + "learning_rate": 2.0679591350541534e-06, + "loss": 0.11084346771240235, + "step": 138910 + }, + { + "epoch": 1.2011569290364976, + "grad_norm": 0.14045639493853057, + "learning_rate": 2.067765485876881e-06, + "loss": 0.05997772216796875, + "step": 138915 + }, + { + "epoch": 1.201200162558041, + "grad_norm": 47.435288563079666, + "learning_rate": 2.0675718409989926e-06, + "loss": 0.2546714782714844, + "step": 138920 + }, + { + "epoch": 1.2012433960795843, + "grad_norm": 1.2770914222114067, + "learning_rate": 2.067378200421381e-06, + "loss": 0.04309234619140625, + "step": 138925 + }, + { + "epoch": 1.2012866296011275, + "grad_norm": 1.0167417575734998, + "learning_rate": 2.0671845641449377e-06, + "loss": 0.0735321044921875, + "step": 138930 + }, + { + "epoch": 1.2013298631226708, + "grad_norm": 1.1072408190719893, + "learning_rate": 2.0669909321705577e-06, + "loss": 0.010124588012695312, + "step": 138935 + }, + { + "epoch": 1.201373096644214, + "grad_norm": 1.713064002724102, + "learning_rate": 2.0667973044991326e-06, + "loss": 0.06317615509033203, + "step": 138940 + }, + { + "epoch": 1.2014163301657574, + "grad_norm": 1.655250656785791, + "learning_rate": 2.0666036811315564e-06, + "loss": 0.029586029052734376, + "step": 138945 + }, + { + "epoch": 1.2014595636873007, + "grad_norm": 0.574065553782038, + "learning_rate": 2.06641006206872e-06, + "loss": 0.018476104736328124, + "step": 138950 + }, + { + "epoch": 1.201502797208844, + "grad_norm": 16.688370767650454, + "learning_rate": 2.066216447311519e-06, + "loss": 0.1478952407836914, + "step": 138955 + }, + { + "epoch": 1.2015460307303871, + "grad_norm": 0.7208191375106024, + "learning_rate": 2.066022836860845e-06, + "loss": 0.03370933532714844, + "step": 138960 + }, + { + "epoch": 1.2015892642519304, + "grad_norm": 1.8760587539129094, + "learning_rate": 2.0658292307175916e-06, + "loss": 0.06501312255859375, + "step": 138965 + }, + { + "epoch": 1.2016324977734736, + "grad_norm": 3.409713864557175, + "learning_rate": 2.0656356288826514e-06, + "loss": 0.08510150909423828, + "step": 138970 + }, + { + "epoch": 1.2016757312950168, + "grad_norm": 29.48817719305051, + "learning_rate": 2.0654420313569167e-06, + "loss": 0.18905487060546874, + "step": 138975 + }, + { + "epoch": 1.2017189648165603, + "grad_norm": 7.862188708265569, + "learning_rate": 2.06524843814128e-06, + "loss": 0.2007542610168457, + "step": 138980 + }, + { + "epoch": 1.2017621983381035, + "grad_norm": 7.154959308415255, + "learning_rate": 2.065054849236635e-06, + "loss": 0.06446304321289062, + "step": 138985 + }, + { + "epoch": 1.2018054318596467, + "grad_norm": 38.32972070072011, + "learning_rate": 2.0648612646438755e-06, + "loss": 0.20425338745117189, + "step": 138990 + }, + { + "epoch": 1.20184866538119, + "grad_norm": 30.72693862140157, + "learning_rate": 2.064667684363893e-06, + "loss": 0.19514598846435546, + "step": 138995 + }, + { + "epoch": 1.2018918989027332, + "grad_norm": 10.10904973123971, + "learning_rate": 2.0644741083975803e-06, + "loss": 0.24425086975097657, + "step": 139000 + }, + { + "epoch": 1.2019351324242764, + "grad_norm": 12.448394970279669, + "learning_rate": 2.0642805367458307e-06, + "loss": 0.035228919982910153, + "step": 139005 + }, + { + "epoch": 1.2019783659458199, + "grad_norm": 40.15899021204217, + "learning_rate": 2.0640869694095357e-06, + "loss": 0.23628158569335939, + "step": 139010 + }, + { + "epoch": 1.202021599467363, + "grad_norm": 5.222547706434134, + "learning_rate": 2.063893406389589e-06, + "loss": 0.30140533447265627, + "step": 139015 + }, + { + "epoch": 1.2020648329889063, + "grad_norm": 0.6908431385555702, + "learning_rate": 2.063699847686884e-06, + "loss": 0.04617919921875, + "step": 139020 + }, + { + "epoch": 1.2021080665104495, + "grad_norm": 6.191427941370251, + "learning_rate": 2.063506293302312e-06, + "loss": 0.1102996826171875, + "step": 139025 + }, + { + "epoch": 1.2021513000319928, + "grad_norm": 2.7212380962606675, + "learning_rate": 2.0633127432367673e-06, + "loss": 0.015325164794921875, + "step": 139030 + }, + { + "epoch": 1.202194533553536, + "grad_norm": 0.5054274624079205, + "learning_rate": 2.06311919749114e-06, + "loss": 0.06713676452636719, + "step": 139035 + }, + { + "epoch": 1.2022377670750792, + "grad_norm": 7.989931209125742, + "learning_rate": 2.062925656066325e-06, + "loss": 0.07756500244140625, + "step": 139040 + }, + { + "epoch": 1.2022810005966227, + "grad_norm": 0.9381644003970205, + "learning_rate": 2.062732118963213e-06, + "loss": 0.059373092651367185, + "step": 139045 + }, + { + "epoch": 1.202324234118166, + "grad_norm": 2.5393431865737837, + "learning_rate": 2.0625385861826984e-06, + "loss": 0.03130340576171875, + "step": 139050 + }, + { + "epoch": 1.2023674676397091, + "grad_norm": 0.0522676794436516, + "learning_rate": 2.0623450577256735e-06, + "loss": 0.08855857849121093, + "step": 139055 + }, + { + "epoch": 1.2024107011612524, + "grad_norm": 4.128083844236364, + "learning_rate": 2.0621515335930304e-06, + "loss": 0.15209503173828126, + "step": 139060 + }, + { + "epoch": 1.2024539346827956, + "grad_norm": 2.032670549529516, + "learning_rate": 2.0619580137856607e-06, + "loss": 0.01489410400390625, + "step": 139065 + }, + { + "epoch": 1.2024971682043388, + "grad_norm": 4.392331570377542, + "learning_rate": 2.061764498304458e-06, + "loss": 0.060703086853027347, + "step": 139070 + }, + { + "epoch": 1.2025404017258823, + "grad_norm": 0.866786958100705, + "learning_rate": 2.061570987150314e-06, + "loss": 0.18980712890625, + "step": 139075 + }, + { + "epoch": 1.2025836352474255, + "grad_norm": 0.35827667330501733, + "learning_rate": 2.0613774803241226e-06, + "loss": 0.24125595092773439, + "step": 139080 + }, + { + "epoch": 1.2026268687689687, + "grad_norm": 19.12933264766434, + "learning_rate": 2.0611839778267754e-06, + "loss": 0.059391021728515625, + "step": 139085 + }, + { + "epoch": 1.202670102290512, + "grad_norm": 1.291647036254134, + "learning_rate": 2.0609904796591646e-06, + "loss": 0.03344039916992188, + "step": 139090 + }, + { + "epoch": 1.2027133358120552, + "grad_norm": 0.7248510978672822, + "learning_rate": 2.060796985822182e-06, + "loss": 0.09651641845703125, + "step": 139095 + }, + { + "epoch": 1.2027565693335984, + "grad_norm": 2.5774074850390605, + "learning_rate": 2.060603496316722e-06, + "loss": 0.14166412353515626, + "step": 139100 + }, + { + "epoch": 1.2027998028551417, + "grad_norm": 0.3757501392729066, + "learning_rate": 2.060410011143675e-06, + "loss": 0.06319503784179688, + "step": 139105 + }, + { + "epoch": 1.202843036376685, + "grad_norm": 17.511871537923625, + "learning_rate": 2.0602165303039326e-06, + "loss": 0.099810791015625, + "step": 139110 + }, + { + "epoch": 1.2028862698982283, + "grad_norm": 11.753939995947341, + "learning_rate": 2.0600230537983902e-06, + "loss": 0.1826223373413086, + "step": 139115 + }, + { + "epoch": 1.2029295034197716, + "grad_norm": 1.8801444213318668, + "learning_rate": 2.0598295816279384e-06, + "loss": 0.11477890014648437, + "step": 139120 + }, + { + "epoch": 1.2029727369413148, + "grad_norm": 33.714345962868045, + "learning_rate": 2.059636113793469e-06, + "loss": 0.1781597137451172, + "step": 139125 + }, + { + "epoch": 1.203015970462858, + "grad_norm": 21.371722574056626, + "learning_rate": 2.059442650295875e-06, + "loss": 0.041020774841308595, + "step": 139130 + }, + { + "epoch": 1.2030592039844012, + "grad_norm": 0.6930301058336009, + "learning_rate": 2.0592491911360487e-06, + "loss": 0.1900482177734375, + "step": 139135 + }, + { + "epoch": 1.2031024375059447, + "grad_norm": 0.6959321196484762, + "learning_rate": 2.0590557363148805e-06, + "loss": 0.02346038818359375, + "step": 139140 + }, + { + "epoch": 1.203145671027488, + "grad_norm": 40.427223433558694, + "learning_rate": 2.0588622858332655e-06, + "loss": 0.21114501953125, + "step": 139145 + }, + { + "epoch": 1.2031889045490312, + "grad_norm": 0.42941728352611047, + "learning_rate": 2.0586688396920945e-06, + "loss": 0.01255340576171875, + "step": 139150 + }, + { + "epoch": 1.2032321380705744, + "grad_norm": 5.306556435127014, + "learning_rate": 2.058475397892259e-06, + "loss": 0.0505584716796875, + "step": 139155 + }, + { + "epoch": 1.2032753715921176, + "grad_norm": 1.6957211185970598, + "learning_rate": 2.0582819604346525e-06, + "loss": 0.017661285400390626, + "step": 139160 + }, + { + "epoch": 1.2033186051136608, + "grad_norm": 5.342441807876128, + "learning_rate": 2.058088527320166e-06, + "loss": 0.07253150939941407, + "step": 139165 + }, + { + "epoch": 1.203361838635204, + "grad_norm": 2.4293014582084425, + "learning_rate": 2.057895098549691e-06, + "loss": 0.012178421020507812, + "step": 139170 + }, + { + "epoch": 1.2034050721567475, + "grad_norm": 0.6350918640938813, + "learning_rate": 2.057701674124122e-06, + "loss": 0.025646209716796875, + "step": 139175 + }, + { + "epoch": 1.2034483056782908, + "grad_norm": 1.8635669830979515, + "learning_rate": 2.0575082540443492e-06, + "loss": 0.110919189453125, + "step": 139180 + }, + { + "epoch": 1.203491539199834, + "grad_norm": 6.2798055750294575, + "learning_rate": 2.0573148383112647e-06, + "loss": 0.027515411376953125, + "step": 139185 + }, + { + "epoch": 1.2035347727213772, + "grad_norm": 0.4858795682611497, + "learning_rate": 2.0571214269257615e-06, + "loss": 0.018305206298828126, + "step": 139190 + }, + { + "epoch": 1.2035780062429204, + "grad_norm": 10.660147415242976, + "learning_rate": 2.056928019888731e-06, + "loss": 0.1860107421875, + "step": 139195 + }, + { + "epoch": 1.203621239764464, + "grad_norm": 2.010544736890455, + "learning_rate": 2.056734617201065e-06, + "loss": 0.111004638671875, + "step": 139200 + }, + { + "epoch": 1.2036644732860071, + "grad_norm": 4.556341545277373, + "learning_rate": 2.056541218863654e-06, + "loss": 0.14046249389648438, + "step": 139205 + }, + { + "epoch": 1.2037077068075503, + "grad_norm": 3.610608576086802, + "learning_rate": 2.056347824877393e-06, + "loss": 0.03601150512695313, + "step": 139210 + }, + { + "epoch": 1.2037509403290936, + "grad_norm": 0.054369869930672166, + "learning_rate": 2.0561544352431725e-06, + "loss": 0.028932571411132812, + "step": 139215 + }, + { + "epoch": 1.2037941738506368, + "grad_norm": 0.9337348688176222, + "learning_rate": 2.0559610499618846e-06, + "loss": 0.1479715347290039, + "step": 139220 + }, + { + "epoch": 1.20383740737218, + "grad_norm": 5.793630355947047, + "learning_rate": 2.0557676690344207e-06, + "loss": 0.05323333740234375, + "step": 139225 + }, + { + "epoch": 1.2038806408937233, + "grad_norm": 5.46968446446485, + "learning_rate": 2.0555742924616725e-06, + "loss": 0.05544853210449219, + "step": 139230 + }, + { + "epoch": 1.2039238744152667, + "grad_norm": 27.343578929476994, + "learning_rate": 2.055380920244532e-06, + "loss": 0.2606483459472656, + "step": 139235 + }, + { + "epoch": 1.20396710793681, + "grad_norm": 7.02436336738699, + "learning_rate": 2.055187552383891e-06, + "loss": 0.18802490234375, + "step": 139240 + }, + { + "epoch": 1.2040103414583532, + "grad_norm": 14.310067929977285, + "learning_rate": 2.0549941888806427e-06, + "loss": 0.0507354736328125, + "step": 139245 + }, + { + "epoch": 1.2040535749798964, + "grad_norm": 25.438000341488017, + "learning_rate": 2.054800829735677e-06, + "loss": 0.09795866012573243, + "step": 139250 + }, + { + "epoch": 1.2040968085014396, + "grad_norm": 0.11173212455017006, + "learning_rate": 2.054607474949887e-06, + "loss": 0.03194427490234375, + "step": 139255 + }, + { + "epoch": 1.2041400420229829, + "grad_norm": 1.0148909795309906, + "learning_rate": 2.054414124524163e-06, + "loss": 0.18052902221679687, + "step": 139260 + }, + { + "epoch": 1.2041832755445263, + "grad_norm": 0.9557777543041679, + "learning_rate": 2.0542207784593976e-06, + "loss": 0.13628616333007812, + "step": 139265 + }, + { + "epoch": 1.2042265090660695, + "grad_norm": 3.921521599223851, + "learning_rate": 2.054027436756482e-06, + "loss": 0.10600337982177735, + "step": 139270 + }, + { + "epoch": 1.2042697425876128, + "grad_norm": 4.018145095557568, + "learning_rate": 2.053834099416309e-06, + "loss": 0.02635498046875, + "step": 139275 + }, + { + "epoch": 1.204312976109156, + "grad_norm": 56.990347488530304, + "learning_rate": 2.0536407664397697e-06, + "loss": 0.39272308349609375, + "step": 139280 + }, + { + "epoch": 1.2043562096306992, + "grad_norm": 0.5416865124326556, + "learning_rate": 2.053447437827756e-06, + "loss": 0.018795013427734375, + "step": 139285 + }, + { + "epoch": 1.2043994431522425, + "grad_norm": 2.7778366812444033, + "learning_rate": 2.053254113581158e-06, + "loss": 0.11019287109375, + "step": 139290 + }, + { + "epoch": 1.2044426766737857, + "grad_norm": 3.31331005962144, + "learning_rate": 2.0530607937008684e-06, + "loss": 0.0163818359375, + "step": 139295 + }, + { + "epoch": 1.2044859101953291, + "grad_norm": 1.3895563510177944, + "learning_rate": 2.0528674781877786e-06, + "loss": 0.05475616455078125, + "step": 139300 + }, + { + "epoch": 1.2045291437168724, + "grad_norm": 4.994873169130787, + "learning_rate": 2.052674167042781e-06, + "loss": 0.009003829956054688, + "step": 139305 + }, + { + "epoch": 1.2045723772384156, + "grad_norm": 16.861257034281927, + "learning_rate": 2.052480860266766e-06, + "loss": 0.08458328247070312, + "step": 139310 + }, + { + "epoch": 1.2046156107599588, + "grad_norm": 1.3131697231537902, + "learning_rate": 2.0522875578606264e-06, + "loss": 0.08985786437988282, + "step": 139315 + }, + { + "epoch": 1.204658844281502, + "grad_norm": 1.2805051575142714, + "learning_rate": 2.0520942598252518e-06, + "loss": 0.14641380310058594, + "step": 139320 + }, + { + "epoch": 1.2047020778030453, + "grad_norm": 42.44522590090933, + "learning_rate": 2.0519009661615352e-06, + "loss": 0.12601165771484374, + "step": 139325 + }, + { + "epoch": 1.2047453113245887, + "grad_norm": 0.08546782925337697, + "learning_rate": 2.0517076768703667e-06, + "loss": 0.021690750122070314, + "step": 139330 + }, + { + "epoch": 1.204788544846132, + "grad_norm": 5.253749183851321, + "learning_rate": 2.0515143919526397e-06, + "loss": 0.0674044132232666, + "step": 139335 + }, + { + "epoch": 1.2048317783676752, + "grad_norm": 19.45502126205231, + "learning_rate": 2.0513211114092446e-06, + "loss": 0.12439842224121093, + "step": 139340 + }, + { + "epoch": 1.2048750118892184, + "grad_norm": 3.714974243993426, + "learning_rate": 2.0511278352410724e-06, + "loss": 0.048846435546875, + "step": 139345 + }, + { + "epoch": 1.2049182454107616, + "grad_norm": 3.4483158892067327, + "learning_rate": 2.050934563449014e-06, + "loss": 0.15435791015625, + "step": 139350 + }, + { + "epoch": 1.2049614789323049, + "grad_norm": 2.7875867731884143, + "learning_rate": 2.0507412960339626e-06, + "loss": 0.020399856567382812, + "step": 139355 + }, + { + "epoch": 1.205004712453848, + "grad_norm": 30.195368425730724, + "learning_rate": 2.0505480329968073e-06, + "loss": 0.2744121551513672, + "step": 139360 + }, + { + "epoch": 1.2050479459753916, + "grad_norm": 1.8679556293893982, + "learning_rate": 2.0503547743384417e-06, + "loss": 0.153253173828125, + "step": 139365 + }, + { + "epoch": 1.2050911794969348, + "grad_norm": 1.4251560074162741, + "learning_rate": 2.0501615200597557e-06, + "loss": 0.025920581817626954, + "step": 139370 + }, + { + "epoch": 1.205134413018478, + "grad_norm": 15.035326038725234, + "learning_rate": 2.049968270161641e-06, + "loss": 0.059661865234375, + "step": 139375 + }, + { + "epoch": 1.2051776465400212, + "grad_norm": 0.249002286483663, + "learning_rate": 2.049775024644988e-06, + "loss": 0.020980072021484376, + "step": 139380 + }, + { + "epoch": 1.2052208800615645, + "grad_norm": 2.9291305775328507, + "learning_rate": 2.0495817835106896e-06, + "loss": 0.06386260986328125, + "step": 139385 + }, + { + "epoch": 1.205264113583108, + "grad_norm": 0.09913777279858105, + "learning_rate": 2.0493885467596356e-06, + "loss": 0.03094329833984375, + "step": 139390 + }, + { + "epoch": 1.2053073471046512, + "grad_norm": 1.4803144967074513, + "learning_rate": 2.0491953143927166e-06, + "loss": 0.0105499267578125, + "step": 139395 + }, + { + "epoch": 1.2053505806261944, + "grad_norm": 7.2842023246948235, + "learning_rate": 2.049002086410826e-06, + "loss": 0.11195068359375, + "step": 139400 + }, + { + "epoch": 1.2053938141477376, + "grad_norm": 2.4642549809159138, + "learning_rate": 2.0488088628148537e-06, + "loss": 0.01859130859375, + "step": 139405 + }, + { + "epoch": 1.2054370476692808, + "grad_norm": 0.7063541286123396, + "learning_rate": 2.0486156436056902e-06, + "loss": 0.008090782165527343, + "step": 139410 + }, + { + "epoch": 1.205480281190824, + "grad_norm": 9.285075276757246, + "learning_rate": 2.048422428784228e-06, + "loss": 0.03944816589355469, + "step": 139415 + }, + { + "epoch": 1.2055235147123673, + "grad_norm": 3.362958954629544, + "learning_rate": 2.0482292183513578e-06, + "loss": 0.05256462097167969, + "step": 139420 + }, + { + "epoch": 1.2055667482339105, + "grad_norm": 8.540883677221512, + "learning_rate": 2.048036012307969e-06, + "loss": 0.06324176788330078, + "step": 139425 + }, + { + "epoch": 1.205609981755454, + "grad_norm": 2.908737424795161, + "learning_rate": 2.047842810654955e-06, + "loss": 0.04590187072753906, + "step": 139430 + }, + { + "epoch": 1.2056532152769972, + "grad_norm": 1.0895706747366238, + "learning_rate": 2.047649613393205e-06, + "loss": 0.010724258422851563, + "step": 139435 + }, + { + "epoch": 1.2056964487985404, + "grad_norm": 3.8280220485451912, + "learning_rate": 2.0474564205236114e-06, + "loss": 0.038720703125, + "step": 139440 + }, + { + "epoch": 1.2057396823200837, + "grad_norm": 10.945638315710006, + "learning_rate": 2.0472632320470653e-06, + "loss": 0.03608551025390625, + "step": 139445 + }, + { + "epoch": 1.205782915841627, + "grad_norm": 3.1576678041021737, + "learning_rate": 2.0470700479644564e-06, + "loss": 0.0512908935546875, + "step": 139450 + }, + { + "epoch": 1.2058261493631703, + "grad_norm": 0.7715339430546068, + "learning_rate": 2.0468768682766755e-06, + "loss": 0.05257492065429688, + "step": 139455 + }, + { + "epoch": 1.2058693828847136, + "grad_norm": 5.29944290090136, + "learning_rate": 2.046683692984615e-06, + "loss": 0.04202423095703125, + "step": 139460 + }, + { + "epoch": 1.2059126164062568, + "grad_norm": 2.1249736161324106, + "learning_rate": 2.046490522089164e-06, + "loss": 0.04300117492675781, + "step": 139465 + }, + { + "epoch": 1.2059558499278, + "grad_norm": 4.130695047413964, + "learning_rate": 2.0462973555912163e-06, + "loss": 0.0297393798828125, + "step": 139470 + }, + { + "epoch": 1.2059990834493433, + "grad_norm": 0.7527503026227602, + "learning_rate": 2.0461041934916603e-06, + "loss": 0.015238189697265625, + "step": 139475 + }, + { + "epoch": 1.2060423169708865, + "grad_norm": 1.013630334093553, + "learning_rate": 2.0459110357913876e-06, + "loss": 0.11386489868164062, + "step": 139480 + }, + { + "epoch": 1.2060855504924297, + "grad_norm": 0.652919747277603, + "learning_rate": 2.0457178824912885e-06, + "loss": 0.010098838806152343, + "step": 139485 + }, + { + "epoch": 1.2061287840139732, + "grad_norm": 1.6652672977803988, + "learning_rate": 2.0455247335922535e-06, + "loss": 0.021869087219238283, + "step": 139490 + }, + { + "epoch": 1.2061720175355164, + "grad_norm": 0.22591019644579582, + "learning_rate": 2.045331589095174e-06, + "loss": 0.09204444885253907, + "step": 139495 + }, + { + "epoch": 1.2062152510570596, + "grad_norm": 37.67362126180664, + "learning_rate": 2.0451384490009425e-06, + "loss": 0.11650238037109376, + "step": 139500 + }, + { + "epoch": 1.2062584845786029, + "grad_norm": 0.6314037103490787, + "learning_rate": 2.0449453133104474e-06, + "loss": 0.15914039611816405, + "step": 139505 + }, + { + "epoch": 1.206301718100146, + "grad_norm": 1.742963835705288, + "learning_rate": 2.0447521820245803e-06, + "loss": 0.05939407348632812, + "step": 139510 + }, + { + "epoch": 1.2063449516216893, + "grad_norm": 42.14444058066805, + "learning_rate": 2.0445590551442316e-06, + "loss": 0.17737960815429688, + "step": 139515 + }, + { + "epoch": 1.2063881851432328, + "grad_norm": 3.9616469572583877, + "learning_rate": 2.0443659326702914e-06, + "loss": 0.02843608856201172, + "step": 139520 + }, + { + "epoch": 1.206431418664776, + "grad_norm": 2.69748776241272, + "learning_rate": 2.0441728146036513e-06, + "loss": 0.0647216796875, + "step": 139525 + }, + { + "epoch": 1.2064746521863192, + "grad_norm": 1.6263718547537729, + "learning_rate": 2.0439797009452023e-06, + "loss": 0.0551727294921875, + "step": 139530 + }, + { + "epoch": 1.2065178857078624, + "grad_norm": 21.50283317220181, + "learning_rate": 2.0437865916958346e-06, + "loss": 0.16129608154296876, + "step": 139535 + }, + { + "epoch": 1.2065611192294057, + "grad_norm": 2.8805914228287204, + "learning_rate": 2.0435934868564385e-06, + "loss": 0.02361602783203125, + "step": 139540 + }, + { + "epoch": 1.206604352750949, + "grad_norm": 14.056432453287899, + "learning_rate": 2.043400386427904e-06, + "loss": 0.12866439819335937, + "step": 139545 + }, + { + "epoch": 1.2066475862724921, + "grad_norm": 0.34805912874822603, + "learning_rate": 2.043207290411123e-06, + "loss": 0.005389404296875, + "step": 139550 + }, + { + "epoch": 1.2066908197940356, + "grad_norm": 2.0285069246869507, + "learning_rate": 2.043014198806984e-06, + "loss": 0.03259716033935547, + "step": 139555 + }, + { + "epoch": 1.2067340533155788, + "grad_norm": 1.011370625784376, + "learning_rate": 2.042821111616381e-06, + "loss": 0.11862411499023437, + "step": 139560 + }, + { + "epoch": 1.206777286837122, + "grad_norm": 1.0360881157431732, + "learning_rate": 2.042628028840202e-06, + "loss": 0.09553031921386719, + "step": 139565 + }, + { + "epoch": 1.2068205203586653, + "grad_norm": 1.4337831678633857, + "learning_rate": 2.0424349504793377e-06, + "loss": 0.10296840667724609, + "step": 139570 + }, + { + "epoch": 1.2068637538802085, + "grad_norm": 25.02270432317581, + "learning_rate": 2.0422418765346785e-06, + "loss": 0.0569732666015625, + "step": 139575 + }, + { + "epoch": 1.2069069874017517, + "grad_norm": 1.7345847702469233, + "learning_rate": 2.0420488070071153e-06, + "loss": 0.181451416015625, + "step": 139580 + }, + { + "epoch": 1.2069502209232952, + "grad_norm": 33.785672674640146, + "learning_rate": 2.0418557418975383e-06, + "loss": 0.10498046875, + "step": 139585 + }, + { + "epoch": 1.2069934544448384, + "grad_norm": 2.350455511150478, + "learning_rate": 2.041662681206838e-06, + "loss": 0.10851860046386719, + "step": 139590 + }, + { + "epoch": 1.2070366879663816, + "grad_norm": 0.5320858895632081, + "learning_rate": 2.041469624935905e-06, + "loss": 0.11904258728027343, + "step": 139595 + }, + { + "epoch": 1.2070799214879249, + "grad_norm": 6.2268778470625, + "learning_rate": 2.0412765730856294e-06, + "loss": 0.13038253784179688, + "step": 139600 + }, + { + "epoch": 1.207123155009468, + "grad_norm": 1.0146629593092227, + "learning_rate": 2.0410835256569012e-06, + "loss": 0.34444427490234375, + "step": 139605 + }, + { + "epoch": 1.2071663885310113, + "grad_norm": 36.9294373144376, + "learning_rate": 2.0408904826506113e-06, + "loss": 0.21581611633300782, + "step": 139610 + }, + { + "epoch": 1.2072096220525546, + "grad_norm": 8.448793743442774, + "learning_rate": 2.0406974440676485e-06, + "loss": 0.024158763885498046, + "step": 139615 + }, + { + "epoch": 1.207252855574098, + "grad_norm": 0.12420131056500613, + "learning_rate": 2.0405044099089057e-06, + "loss": 0.027253246307373045, + "step": 139620 + }, + { + "epoch": 1.2072960890956412, + "grad_norm": 0.7529377688797733, + "learning_rate": 2.040311380175272e-06, + "loss": 0.08290481567382812, + "step": 139625 + }, + { + "epoch": 1.2073393226171845, + "grad_norm": 13.96020921929768, + "learning_rate": 2.040118354867637e-06, + "loss": 0.1175069808959961, + "step": 139630 + }, + { + "epoch": 1.2073825561387277, + "grad_norm": 1.7162568001154255, + "learning_rate": 2.0399253339868917e-06, + "loss": 0.02520904541015625, + "step": 139635 + }, + { + "epoch": 1.207425789660271, + "grad_norm": 0.18017791568840028, + "learning_rate": 2.0397323175339255e-06, + "loss": 0.08025665283203125, + "step": 139640 + }, + { + "epoch": 1.2074690231818144, + "grad_norm": 1.1261241254493584, + "learning_rate": 2.039539305509629e-06, + "loss": 0.07297210693359375, + "step": 139645 + }, + { + "epoch": 1.2075122567033576, + "grad_norm": 0.17728197463286488, + "learning_rate": 2.039346297914892e-06, + "loss": 0.017099761962890626, + "step": 139650 + }, + { + "epoch": 1.2075554902249008, + "grad_norm": 13.735681785864282, + "learning_rate": 2.039153294750605e-06, + "loss": 0.1351837158203125, + "step": 139655 + }, + { + "epoch": 1.207598723746444, + "grad_norm": 1.221940149558493, + "learning_rate": 2.0389602960176584e-06, + "loss": 0.040182876586914065, + "step": 139660 + }, + { + "epoch": 1.2076419572679873, + "grad_norm": 0.0841153619994259, + "learning_rate": 2.038767301716942e-06, + "loss": 0.20501861572265626, + "step": 139665 + }, + { + "epoch": 1.2076851907895305, + "grad_norm": 0.3882819599844353, + "learning_rate": 2.0385743118493466e-06, + "loss": 0.035833740234375, + "step": 139670 + }, + { + "epoch": 1.2077284243110737, + "grad_norm": 0.4002253655246514, + "learning_rate": 2.038381326415761e-06, + "loss": 0.044652557373046874, + "step": 139675 + }, + { + "epoch": 1.207771657832617, + "grad_norm": 0.2582203797354362, + "learning_rate": 2.0381883454170746e-06, + "loss": 0.030743408203125, + "step": 139680 + }, + { + "epoch": 1.2078148913541604, + "grad_norm": 11.781702713000456, + "learning_rate": 2.0379953688541797e-06, + "loss": 0.13493213653564454, + "step": 139685 + }, + { + "epoch": 1.2078581248757037, + "grad_norm": 3.9071061311516955, + "learning_rate": 2.0378023967279647e-06, + "loss": 0.33951416015625, + "step": 139690 + }, + { + "epoch": 1.2079013583972469, + "grad_norm": 0.5797469008463965, + "learning_rate": 2.037609429039321e-06, + "loss": 0.015479660034179688, + "step": 139695 + }, + { + "epoch": 1.2079445919187901, + "grad_norm": 4.23047793044427, + "learning_rate": 2.037416465789137e-06, + "loss": 0.009943771362304687, + "step": 139700 + }, + { + "epoch": 1.2079878254403333, + "grad_norm": 11.456451427577377, + "learning_rate": 2.0372235069783032e-06, + "loss": 0.06753768920898437, + "step": 139705 + }, + { + "epoch": 1.2080310589618768, + "grad_norm": 11.905771331265605, + "learning_rate": 2.0370305526077085e-06, + "loss": 0.06013507843017578, + "step": 139710 + }, + { + "epoch": 1.20807429248342, + "grad_norm": 5.667336073366325, + "learning_rate": 2.0368376026782455e-06, + "loss": 0.12918777465820314, + "step": 139715 + }, + { + "epoch": 1.2081175260049632, + "grad_norm": 0.9059056391861329, + "learning_rate": 2.0366446571908007e-06, + "loss": 0.0572601318359375, + "step": 139720 + }, + { + "epoch": 1.2081607595265065, + "grad_norm": 6.612498318692961, + "learning_rate": 2.036451716146267e-06, + "loss": 0.07929763793945313, + "step": 139725 + }, + { + "epoch": 1.2082039930480497, + "grad_norm": 1.1768635634297386, + "learning_rate": 2.036258779545533e-06, + "loss": 0.02359161376953125, + "step": 139730 + }, + { + "epoch": 1.208247226569593, + "grad_norm": 0.6867440160427659, + "learning_rate": 2.036065847389488e-06, + "loss": 0.018752288818359376, + "step": 139735 + }, + { + "epoch": 1.2082904600911362, + "grad_norm": 12.822357098407247, + "learning_rate": 2.0358729196790214e-06, + "loss": 0.2908821105957031, + "step": 139740 + }, + { + "epoch": 1.2083336936126796, + "grad_norm": 5.148481816252109, + "learning_rate": 2.035679996415024e-06, + "loss": 0.025402641296386717, + "step": 139745 + }, + { + "epoch": 1.2083769271342228, + "grad_norm": 21.539528955429347, + "learning_rate": 2.035487077598385e-06, + "loss": 0.10669975280761719, + "step": 139750 + }, + { + "epoch": 1.208420160655766, + "grad_norm": 4.373920172955343, + "learning_rate": 2.035294163229995e-06, + "loss": 0.0509857177734375, + "step": 139755 + }, + { + "epoch": 1.2084633941773093, + "grad_norm": 17.76037944012616, + "learning_rate": 2.035101253310743e-06, + "loss": 0.11336517333984375, + "step": 139760 + }, + { + "epoch": 1.2085066276988525, + "grad_norm": 3.194555838510667, + "learning_rate": 2.0349083478415193e-06, + "loss": 0.02049560546875, + "step": 139765 + }, + { + "epoch": 1.2085498612203958, + "grad_norm": 1.5655250310284758, + "learning_rate": 2.0347154468232115e-06, + "loss": 0.06271743774414062, + "step": 139770 + }, + { + "epoch": 1.2085930947419392, + "grad_norm": 0.32450200128898854, + "learning_rate": 2.0345225502567116e-06, + "loss": 0.021880340576171876, + "step": 139775 + }, + { + "epoch": 1.2086363282634824, + "grad_norm": 1.3565167559770759, + "learning_rate": 2.034329658142908e-06, + "loss": 0.18780364990234374, + "step": 139780 + }, + { + "epoch": 1.2086795617850257, + "grad_norm": 18.072938644104493, + "learning_rate": 2.0341367704826914e-06, + "loss": 0.1752786636352539, + "step": 139785 + }, + { + "epoch": 1.208722795306569, + "grad_norm": 1.4135827796598677, + "learning_rate": 2.0339438872769503e-06, + "loss": 0.15996856689453126, + "step": 139790 + }, + { + "epoch": 1.2087660288281121, + "grad_norm": 2.4510227550103654, + "learning_rate": 2.033751008526575e-06, + "loss": 0.058963775634765625, + "step": 139795 + }, + { + "epoch": 1.2088092623496554, + "grad_norm": 9.983784978169506, + "learning_rate": 2.033558134232454e-06, + "loss": 0.049652099609375, + "step": 139800 + }, + { + "epoch": 1.2088524958711986, + "grad_norm": 0.47551464243961367, + "learning_rate": 2.033365264395478e-06, + "loss": 0.01348114013671875, + "step": 139805 + }, + { + "epoch": 1.208895729392742, + "grad_norm": 0.2626725202723936, + "learning_rate": 2.033172399016535e-06, + "loss": 0.05123443603515625, + "step": 139810 + }, + { + "epoch": 1.2089389629142853, + "grad_norm": 2.126596786178463, + "learning_rate": 2.032979538096517e-06, + "loss": 0.13270339965820313, + "step": 139815 + }, + { + "epoch": 1.2089821964358285, + "grad_norm": 1.9324120459186036, + "learning_rate": 2.0327866816363116e-06, + "loss": 0.09095916748046876, + "step": 139820 + }, + { + "epoch": 1.2090254299573717, + "grad_norm": 0.38417144833485944, + "learning_rate": 2.0325938296368085e-06, + "loss": 0.17640151977539062, + "step": 139825 + }, + { + "epoch": 1.209068663478915, + "grad_norm": 9.493004403827257, + "learning_rate": 2.0324009820988964e-06, + "loss": 0.0954833984375, + "step": 139830 + }, + { + "epoch": 1.2091118970004582, + "grad_norm": 4.674357486313514, + "learning_rate": 2.032208139023466e-06, + "loss": 0.06851272583007813, + "step": 139835 + }, + { + "epoch": 1.2091551305220016, + "grad_norm": 4.976038107870917, + "learning_rate": 2.032015300411405e-06, + "loss": 0.10085372924804688, + "step": 139840 + }, + { + "epoch": 1.2091983640435449, + "grad_norm": 2.5466724400635123, + "learning_rate": 2.0318224662636053e-06, + "loss": 0.04265022277832031, + "step": 139845 + }, + { + "epoch": 1.209241597565088, + "grad_norm": 0.048478645791180296, + "learning_rate": 2.031629636580955e-06, + "loss": 0.09256515502929688, + "step": 139850 + }, + { + "epoch": 1.2092848310866313, + "grad_norm": 4.288291555613859, + "learning_rate": 2.0314368113643423e-06, + "loss": 0.0386016845703125, + "step": 139855 + }, + { + "epoch": 1.2093280646081745, + "grad_norm": 0.9797798977273501, + "learning_rate": 2.0312439906146578e-06, + "loss": 0.23282432556152344, + "step": 139860 + }, + { + "epoch": 1.2093712981297178, + "grad_norm": 1.2878387292759501, + "learning_rate": 2.031051174332791e-06, + "loss": 0.15465240478515624, + "step": 139865 + }, + { + "epoch": 1.209414531651261, + "grad_norm": 0.15031616729467315, + "learning_rate": 2.030858362519629e-06, + "loss": 0.1097900390625, + "step": 139870 + }, + { + "epoch": 1.2094577651728045, + "grad_norm": 11.602121687138766, + "learning_rate": 2.0306655551760642e-06, + "loss": 0.07202262878417968, + "step": 139875 + }, + { + "epoch": 1.2095009986943477, + "grad_norm": 14.060684229122735, + "learning_rate": 2.030472752302984e-06, + "loss": 0.16922836303710936, + "step": 139880 + }, + { + "epoch": 1.209544232215891, + "grad_norm": 21.195008171560495, + "learning_rate": 2.030279953901277e-06, + "loss": 0.067041015625, + "step": 139885 + }, + { + "epoch": 1.2095874657374341, + "grad_norm": 0.07591991960471828, + "learning_rate": 2.030087159971834e-06, + "loss": 0.024592208862304687, + "step": 139890 + }, + { + "epoch": 1.2096306992589774, + "grad_norm": 4.224053635813416, + "learning_rate": 2.029894370515543e-06, + "loss": 0.11344947814941406, + "step": 139895 + }, + { + "epoch": 1.2096739327805208, + "grad_norm": 15.200443484211348, + "learning_rate": 2.029701585533294e-06, + "loss": 0.0672271728515625, + "step": 139900 + }, + { + "epoch": 1.209717166302064, + "grad_norm": 0.8504616321669202, + "learning_rate": 2.029508805025974e-06, + "loss": 0.026032257080078124, + "step": 139905 + }, + { + "epoch": 1.2097603998236073, + "grad_norm": 0.25176745746191587, + "learning_rate": 2.0293160289944746e-06, + "loss": 0.038701629638671874, + "step": 139910 + }, + { + "epoch": 1.2098036333451505, + "grad_norm": 15.252964250956019, + "learning_rate": 2.029123257439684e-06, + "loss": 0.035770225524902347, + "step": 139915 + }, + { + "epoch": 1.2098468668666937, + "grad_norm": 0.3059590197995821, + "learning_rate": 2.028930490362491e-06, + "loss": 0.07645149230957031, + "step": 139920 + }, + { + "epoch": 1.209890100388237, + "grad_norm": 1.4653887638393863, + "learning_rate": 2.028737727763785e-06, + "loss": 0.05410614013671875, + "step": 139925 + }, + { + "epoch": 1.2099333339097802, + "grad_norm": 0.1366827138340963, + "learning_rate": 2.0285449696444554e-06, + "loss": 0.029379653930664062, + "step": 139930 + }, + { + "epoch": 1.2099765674313236, + "grad_norm": 0.7297903554592856, + "learning_rate": 2.0283522160053886e-06, + "loss": 0.07010040283203126, + "step": 139935 + }, + { + "epoch": 1.2100198009528669, + "grad_norm": 0.26422714658280694, + "learning_rate": 2.028159466847477e-06, + "loss": 0.12401924133300782, + "step": 139940 + }, + { + "epoch": 1.21006303447441, + "grad_norm": 0.3305619539152582, + "learning_rate": 2.0279667221716073e-06, + "loss": 0.0452301025390625, + "step": 139945 + }, + { + "epoch": 1.2101062679959533, + "grad_norm": 36.06707466811635, + "learning_rate": 2.0277739819786697e-06, + "loss": 0.10259323120117188, + "step": 139950 + }, + { + "epoch": 1.2101495015174966, + "grad_norm": 22.093982386294524, + "learning_rate": 2.027581246269553e-06, + "loss": 0.048557281494140625, + "step": 139955 + }, + { + "epoch": 1.2101927350390398, + "grad_norm": 26.037156003915452, + "learning_rate": 2.0273885150451454e-06, + "loss": 0.12795791625976563, + "step": 139960 + }, + { + "epoch": 1.2102359685605832, + "grad_norm": 0.9933265294102277, + "learning_rate": 2.0271957883063348e-06, + "loss": 0.10067901611328126, + "step": 139965 + }, + { + "epoch": 1.2102792020821265, + "grad_norm": 1.1539311866385595, + "learning_rate": 2.0270030660540125e-06, + "loss": 0.05173492431640625, + "step": 139970 + }, + { + "epoch": 1.2103224356036697, + "grad_norm": 20.732652570586588, + "learning_rate": 2.0268103482890657e-06, + "loss": 0.1119659423828125, + "step": 139975 + }, + { + "epoch": 1.210365669125213, + "grad_norm": 0.1676844032828477, + "learning_rate": 2.026617635012384e-06, + "loss": 0.024686813354492188, + "step": 139980 + }, + { + "epoch": 1.2104089026467562, + "grad_norm": 13.395597998779333, + "learning_rate": 2.0264249262248557e-06, + "loss": 0.051910400390625, + "step": 139985 + }, + { + "epoch": 1.2104521361682994, + "grad_norm": 0.3298458405831369, + "learning_rate": 2.02623222192737e-06, + "loss": 0.020125722885131835, + "step": 139990 + }, + { + "epoch": 1.2104953696898426, + "grad_norm": 2.8687611303266674, + "learning_rate": 2.026039522120814e-06, + "loss": 0.009920310974121094, + "step": 139995 + }, + { + "epoch": 1.210538603211386, + "grad_norm": 4.925465038426819, + "learning_rate": 2.025846826806078e-06, + "loss": 0.01595458984375, + "step": 140000 + }, + { + "epoch": 1.2105818367329293, + "grad_norm": 8.056770540700754, + "learning_rate": 2.0256541359840504e-06, + "loss": 0.036773681640625, + "step": 140005 + }, + { + "epoch": 1.2106250702544725, + "grad_norm": 0.735007858660802, + "learning_rate": 2.0254614496556206e-06, + "loss": 0.0482086181640625, + "step": 140010 + }, + { + "epoch": 1.2106683037760158, + "grad_norm": 0.5229254645056007, + "learning_rate": 2.0252687678216764e-06, + "loss": 0.16441192626953124, + "step": 140015 + }, + { + "epoch": 1.210711537297559, + "grad_norm": 0.5054654477393337, + "learning_rate": 2.0250760904831067e-06, + "loss": 0.029074478149414062, + "step": 140020 + }, + { + "epoch": 1.2107547708191022, + "grad_norm": 20.43704119124159, + "learning_rate": 2.0248834176407984e-06, + "loss": 0.12162551879882813, + "step": 140025 + }, + { + "epoch": 1.2107980043406457, + "grad_norm": 0.41474711825934024, + "learning_rate": 2.0246907492956417e-06, + "loss": 0.27179527282714844, + "step": 140030 + }, + { + "epoch": 1.210841237862189, + "grad_norm": 0.08860701642613113, + "learning_rate": 2.0244980854485264e-06, + "loss": 0.011606216430664062, + "step": 140035 + }, + { + "epoch": 1.2108844713837321, + "grad_norm": 3.2088505470768847, + "learning_rate": 2.0243054261003393e-06, + "loss": 0.10785846710205078, + "step": 140040 + }, + { + "epoch": 1.2109277049052753, + "grad_norm": 1.3122105282547787, + "learning_rate": 2.02411277125197e-06, + "loss": 0.14800186157226564, + "step": 140045 + }, + { + "epoch": 1.2109709384268186, + "grad_norm": 0.8060342532584741, + "learning_rate": 2.023920120904306e-06, + "loss": 0.013637542724609375, + "step": 140050 + }, + { + "epoch": 1.2110141719483618, + "grad_norm": 1.5110129320186803, + "learning_rate": 2.0237274750582357e-06, + "loss": 0.06529541015625, + "step": 140055 + }, + { + "epoch": 1.211057405469905, + "grad_norm": 17.775475951497587, + "learning_rate": 2.0235348337146483e-06, + "loss": 0.0378509521484375, + "step": 140060 + }, + { + "epoch": 1.2111006389914485, + "grad_norm": 1.3397224497972928, + "learning_rate": 2.0233421968744306e-06, + "loss": 0.021779632568359374, + "step": 140065 + }, + { + "epoch": 1.2111438725129917, + "grad_norm": 8.164723738289803, + "learning_rate": 2.023149564538474e-06, + "loss": 0.11655406951904297, + "step": 140070 + }, + { + "epoch": 1.211187106034535, + "grad_norm": 0.6569561078861365, + "learning_rate": 2.022956936707665e-06, + "loss": 0.02468414306640625, + "step": 140075 + }, + { + "epoch": 1.2112303395560782, + "grad_norm": 4.176216838755505, + "learning_rate": 2.0227643133828923e-06, + "loss": 0.054470062255859375, + "step": 140080 + }, + { + "epoch": 1.2112735730776214, + "grad_norm": 0.0987429422357778, + "learning_rate": 2.022571694565044e-06, + "loss": 0.027890777587890624, + "step": 140085 + }, + { + "epoch": 1.2113168065991649, + "grad_norm": 0.9115052143825515, + "learning_rate": 2.0223790802550097e-06, + "loss": 0.03125762939453125, + "step": 140090 + }, + { + "epoch": 1.211360040120708, + "grad_norm": 5.807628494364213, + "learning_rate": 2.0221864704536744e-06, + "loss": 0.031757354736328125, + "step": 140095 + }, + { + "epoch": 1.2114032736422513, + "grad_norm": 0.543267449670483, + "learning_rate": 2.02199386516193e-06, + "loss": 0.09321279525756836, + "step": 140100 + }, + { + "epoch": 1.2114465071637945, + "grad_norm": 0.28642928777722504, + "learning_rate": 2.0218012643806644e-06, + "loss": 0.02277717590332031, + "step": 140105 + }, + { + "epoch": 1.2114897406853378, + "grad_norm": 6.19992956981547, + "learning_rate": 2.021608668110764e-06, + "loss": 0.03958415985107422, + "step": 140110 + }, + { + "epoch": 1.211532974206881, + "grad_norm": 73.60117789314782, + "learning_rate": 2.021416076353118e-06, + "loss": 0.2743492126464844, + "step": 140115 + }, + { + "epoch": 1.2115762077284242, + "grad_norm": 1.5611149925076584, + "learning_rate": 2.0212234891086152e-06, + "loss": 0.02339591979980469, + "step": 140120 + }, + { + "epoch": 1.2116194412499675, + "grad_norm": 2.2771751803030007, + "learning_rate": 2.0210309063781412e-06, + "loss": 0.047454833984375, + "step": 140125 + }, + { + "epoch": 1.211662674771511, + "grad_norm": 0.2544447715490515, + "learning_rate": 2.0208383281625883e-06, + "loss": 0.05205726623535156, + "step": 140130 + }, + { + "epoch": 1.2117059082930541, + "grad_norm": 0.45601727571953743, + "learning_rate": 2.020645754462842e-06, + "loss": 0.169305419921875, + "step": 140135 + }, + { + "epoch": 1.2117491418145974, + "grad_norm": 13.759664723510317, + "learning_rate": 2.0204531852797905e-06, + "loss": 0.03459014892578125, + "step": 140140 + }, + { + "epoch": 1.2117923753361406, + "grad_norm": 0.19039581203845649, + "learning_rate": 2.020260620614323e-06, + "loss": 0.3806610107421875, + "step": 140145 + }, + { + "epoch": 1.2118356088576838, + "grad_norm": 0.5160610672413545, + "learning_rate": 2.020068060467327e-06, + "loss": 0.07267837524414063, + "step": 140150 + }, + { + "epoch": 1.2118788423792273, + "grad_norm": 0.3620082143976579, + "learning_rate": 2.0198755048396904e-06, + "loss": 0.16048507690429686, + "step": 140155 + }, + { + "epoch": 1.2119220759007705, + "grad_norm": 3.487039446887768, + "learning_rate": 2.0196829537323e-06, + "loss": 0.028572845458984374, + "step": 140160 + }, + { + "epoch": 1.2119653094223137, + "grad_norm": 4.314570890093095, + "learning_rate": 2.019490407146046e-06, + "loss": 0.025070953369140624, + "step": 140165 + }, + { + "epoch": 1.212008542943857, + "grad_norm": 0.1931163034122204, + "learning_rate": 2.019297865081816e-06, + "loss": 0.02334442138671875, + "step": 140170 + }, + { + "epoch": 1.2120517764654002, + "grad_norm": 1.133673469467243, + "learning_rate": 2.0191053275404975e-06, + "loss": 0.08493919372558593, + "step": 140175 + }, + { + "epoch": 1.2120950099869434, + "grad_norm": 3.2609125609082534, + "learning_rate": 2.0189127945229788e-06, + "loss": 0.1475383758544922, + "step": 140180 + }, + { + "epoch": 1.2121382435084866, + "grad_norm": 0.7775598074776742, + "learning_rate": 2.0187202660301475e-06, + "loss": 0.04136962890625, + "step": 140185 + }, + { + "epoch": 1.21218147703003, + "grad_norm": 4.383322721713326, + "learning_rate": 2.018527742062891e-06, + "loss": 0.08815765380859375, + "step": 140190 + }, + { + "epoch": 1.2122247105515733, + "grad_norm": 3.9323521763349887, + "learning_rate": 2.018335222622098e-06, + "loss": 0.137860107421875, + "step": 140195 + }, + { + "epoch": 1.2122679440731166, + "grad_norm": 0.8878368376615524, + "learning_rate": 2.0181427077086564e-06, + "loss": 0.025531005859375, + "step": 140200 + }, + { + "epoch": 1.2123111775946598, + "grad_norm": 0.7349908842680041, + "learning_rate": 2.017950197323454e-06, + "loss": 0.1511077880859375, + "step": 140205 + }, + { + "epoch": 1.212354411116203, + "grad_norm": 0.6868189211427367, + "learning_rate": 2.017757691467379e-06, + "loss": 0.09233102798461915, + "step": 140210 + }, + { + "epoch": 1.2123976446377462, + "grad_norm": 15.288484181176237, + "learning_rate": 2.0175651901413187e-06, + "loss": 0.24751777648925782, + "step": 140215 + }, + { + "epoch": 1.2124408781592897, + "grad_norm": 2.767399612783215, + "learning_rate": 2.0173726933461594e-06, + "loss": 0.0597930908203125, + "step": 140220 + }, + { + "epoch": 1.212484111680833, + "grad_norm": 0.37522012064823645, + "learning_rate": 2.0171802010827915e-06, + "loss": 0.05294647216796875, + "step": 140225 + }, + { + "epoch": 1.2125273452023762, + "grad_norm": 0.19576332500098542, + "learning_rate": 2.016987713352101e-06, + "loss": 0.08464813232421875, + "step": 140230 + }, + { + "epoch": 1.2125705787239194, + "grad_norm": 9.127169230221387, + "learning_rate": 2.0167952301549776e-06, + "loss": 0.03025360107421875, + "step": 140235 + }, + { + "epoch": 1.2126138122454626, + "grad_norm": 5.6463762376291875, + "learning_rate": 2.0166027514923072e-06, + "loss": 0.029411888122558592, + "step": 140240 + }, + { + "epoch": 1.2126570457670058, + "grad_norm": 63.17359862810207, + "learning_rate": 2.0164102773649782e-06, + "loss": 0.1848682403564453, + "step": 140245 + }, + { + "epoch": 1.212700279288549, + "grad_norm": 1.5398731983930085, + "learning_rate": 2.0162178077738774e-06, + "loss": 0.035726165771484374, + "step": 140250 + }, + { + "epoch": 1.2127435128100925, + "grad_norm": 3.481164132089516, + "learning_rate": 2.0160253427198925e-06, + "loss": 0.021065521240234374, + "step": 140255 + }, + { + "epoch": 1.2127867463316357, + "grad_norm": 1.7779612685989818, + "learning_rate": 2.015832882203913e-06, + "loss": 0.07366065979003907, + "step": 140260 + }, + { + "epoch": 1.212829979853179, + "grad_norm": 0.20027654148665378, + "learning_rate": 2.0156404262268254e-06, + "loss": 0.008084869384765625, + "step": 140265 + }, + { + "epoch": 1.2128732133747222, + "grad_norm": 0.25240793580355614, + "learning_rate": 2.015447974789517e-06, + "loss": 0.04245567321777344, + "step": 140270 + }, + { + "epoch": 1.2129164468962654, + "grad_norm": 5.3479597889101305, + "learning_rate": 2.0152555278928758e-06, + "loss": 0.019360733032226563, + "step": 140275 + }, + { + "epoch": 1.2129596804178087, + "grad_norm": 2.546025078136413, + "learning_rate": 2.0150630855377882e-06, + "loss": 0.013583660125732422, + "step": 140280 + }, + { + "epoch": 1.2130029139393521, + "grad_norm": 1.0414856322113772, + "learning_rate": 2.0148706477251423e-06, + "loss": 0.03056182861328125, + "step": 140285 + }, + { + "epoch": 1.2130461474608953, + "grad_norm": 1.672630795737818, + "learning_rate": 2.014678214455827e-06, + "loss": 0.17556915283203126, + "step": 140290 + }, + { + "epoch": 1.2130893809824386, + "grad_norm": 1.3318081863462032, + "learning_rate": 2.0144857857307285e-06, + "loss": 0.039811897277832034, + "step": 140295 + }, + { + "epoch": 1.2131326145039818, + "grad_norm": 4.007285066037913, + "learning_rate": 2.0142933615507348e-06, + "loss": 0.14546241760253906, + "step": 140300 + }, + { + "epoch": 1.213175848025525, + "grad_norm": 2.8328345334552694, + "learning_rate": 2.014100941916732e-06, + "loss": 0.020847320556640625, + "step": 140305 + }, + { + "epoch": 1.2132190815470683, + "grad_norm": 15.257200786457823, + "learning_rate": 2.0139085268296096e-06, + "loss": 0.03293819427490234, + "step": 140310 + }, + { + "epoch": 1.2132623150686115, + "grad_norm": 2.3884909831970225, + "learning_rate": 2.0137161162902523e-06, + "loss": 0.07324399948120117, + "step": 140315 + }, + { + "epoch": 1.213305548590155, + "grad_norm": 8.595202477497754, + "learning_rate": 2.0135237102995508e-06, + "loss": 0.11244354248046876, + "step": 140320 + }, + { + "epoch": 1.2133487821116982, + "grad_norm": 5.493477874671657, + "learning_rate": 2.0133313088583907e-06, + "loss": 0.11007766723632813, + "step": 140325 + }, + { + "epoch": 1.2133920156332414, + "grad_norm": 1.8511493181019836, + "learning_rate": 2.0131389119676594e-06, + "loss": 0.11615447998046875, + "step": 140330 + }, + { + "epoch": 1.2134352491547846, + "grad_norm": 5.097957862001608, + "learning_rate": 2.012946519628243e-06, + "loss": 0.14271697998046876, + "step": 140335 + }, + { + "epoch": 1.2134784826763279, + "grad_norm": 6.016849525942738, + "learning_rate": 2.0127541318410315e-06, + "loss": 0.036612319946289065, + "step": 140340 + }, + { + "epoch": 1.2135217161978713, + "grad_norm": 9.832056532594775, + "learning_rate": 2.0125617486069107e-06, + "loss": 0.16987380981445313, + "step": 140345 + }, + { + "epoch": 1.2135649497194145, + "grad_norm": 0.9820026219126622, + "learning_rate": 2.0123693699267656e-06, + "loss": 0.07818374633789063, + "step": 140350 + }, + { + "epoch": 1.2136081832409578, + "grad_norm": 76.52589446850574, + "learning_rate": 2.012176995801488e-06, + "loss": 0.15225868225097655, + "step": 140355 + }, + { + "epoch": 1.213651416762501, + "grad_norm": 0.09722349466059102, + "learning_rate": 2.0119846262319624e-06, + "loss": 0.33330307006835935, + "step": 140360 + }, + { + "epoch": 1.2136946502840442, + "grad_norm": 2.828652792674765, + "learning_rate": 2.011792261219076e-06, + "loss": 0.01505584716796875, + "step": 140365 + }, + { + "epoch": 1.2137378838055874, + "grad_norm": 3.5797469192327704, + "learning_rate": 2.0115999007637167e-06, + "loss": 0.0213165283203125, + "step": 140370 + }, + { + "epoch": 1.2137811173271307, + "grad_norm": 6.919719094540412, + "learning_rate": 2.0114075448667715e-06, + "loss": 0.10519447326660156, + "step": 140375 + }, + { + "epoch": 1.213824350848674, + "grad_norm": 1.6292155144468814, + "learning_rate": 2.0112151935291257e-06, + "loss": 0.011606407165527344, + "step": 140380 + }, + { + "epoch": 1.2138675843702174, + "grad_norm": 0.6767407921337387, + "learning_rate": 2.0110228467516697e-06, + "loss": 0.038639068603515625, + "step": 140385 + }, + { + "epoch": 1.2139108178917606, + "grad_norm": 42.712609651409736, + "learning_rate": 2.0108305045352892e-06, + "loss": 0.09156036376953125, + "step": 140390 + }, + { + "epoch": 1.2139540514133038, + "grad_norm": 0.05978132416874409, + "learning_rate": 2.0106381668808702e-06, + "loss": 0.0653076171875, + "step": 140395 + }, + { + "epoch": 1.213997284934847, + "grad_norm": 4.643424699177445, + "learning_rate": 2.010445833789301e-06, + "loss": 0.03949470520019531, + "step": 140400 + }, + { + "epoch": 1.2140405184563903, + "grad_norm": 6.318203580586382, + "learning_rate": 2.010253505261468e-06, + "loss": 0.11029891967773438, + "step": 140405 + }, + { + "epoch": 1.2140837519779337, + "grad_norm": 6.181507687176012, + "learning_rate": 2.0100611812982587e-06, + "loss": 0.07860870361328125, + "step": 140410 + }, + { + "epoch": 1.214126985499477, + "grad_norm": 5.779845658203179, + "learning_rate": 2.0098688619005583e-06, + "loss": 0.016881561279296874, + "step": 140415 + }, + { + "epoch": 1.2141702190210202, + "grad_norm": 2.090672327491244, + "learning_rate": 2.0096765470692565e-06, + "loss": 0.0474822998046875, + "step": 140420 + }, + { + "epoch": 1.2142134525425634, + "grad_norm": 1.5856699688669602, + "learning_rate": 2.0094842368052386e-06, + "loss": 0.009302520751953125, + "step": 140425 + }, + { + "epoch": 1.2142566860641066, + "grad_norm": 0.6300018423517066, + "learning_rate": 2.0092919311093922e-06, + "loss": 0.014987945556640625, + "step": 140430 + }, + { + "epoch": 1.2142999195856499, + "grad_norm": 1.3540020705151137, + "learning_rate": 2.009099629982604e-06, + "loss": 0.11170158386230469, + "step": 140435 + }, + { + "epoch": 1.214343153107193, + "grad_norm": 41.78240564632321, + "learning_rate": 2.008907333425761e-06, + "loss": 0.2362689971923828, + "step": 140440 + }, + { + "epoch": 1.2143863866287365, + "grad_norm": 1.4408721209125297, + "learning_rate": 2.0087150414397483e-06, + "loss": 0.09886283874511718, + "step": 140445 + }, + { + "epoch": 1.2144296201502798, + "grad_norm": 1.3898926386424633, + "learning_rate": 2.0085227540254555e-06, + "loss": 0.007867050170898438, + "step": 140450 + }, + { + "epoch": 1.214472853671823, + "grad_norm": 1.7824363373594891, + "learning_rate": 2.0083304711837676e-06, + "loss": 0.012253189086914062, + "step": 140455 + }, + { + "epoch": 1.2145160871933662, + "grad_norm": 1.3505278542039583, + "learning_rate": 2.0081381929155722e-06, + "loss": 0.022190093994140625, + "step": 140460 + }, + { + "epoch": 1.2145593207149095, + "grad_norm": 1.6123376735437676, + "learning_rate": 2.0079459192217563e-06, + "loss": 0.02534942626953125, + "step": 140465 + }, + { + "epoch": 1.2146025542364527, + "grad_norm": 61.081644132839166, + "learning_rate": 2.007753650103206e-06, + "loss": 0.17270050048828126, + "step": 140470 + }, + { + "epoch": 1.2146457877579961, + "grad_norm": 0.34501647394974655, + "learning_rate": 2.0075613855608066e-06, + "loss": 0.08810272216796874, + "step": 140475 + }, + { + "epoch": 1.2146890212795394, + "grad_norm": 9.07092764261592, + "learning_rate": 2.0073691255954474e-06, + "loss": 0.11444473266601562, + "step": 140480 + }, + { + "epoch": 1.2147322548010826, + "grad_norm": 14.366109535073724, + "learning_rate": 2.0071768702080145e-06, + "loss": 0.06019859313964844, + "step": 140485 + }, + { + "epoch": 1.2147754883226258, + "grad_norm": 0.6266883828379609, + "learning_rate": 2.0069846193993945e-06, + "loss": 0.035941696166992186, + "step": 140490 + }, + { + "epoch": 1.214818721844169, + "grad_norm": 25.314517389003193, + "learning_rate": 2.0067923731704736e-06, + "loss": 0.05788497924804688, + "step": 140495 + }, + { + "epoch": 1.2148619553657123, + "grad_norm": 3.038948417635599, + "learning_rate": 2.0066001315221375e-06, + "loss": 0.0774200439453125, + "step": 140500 + }, + { + "epoch": 1.2149051888872555, + "grad_norm": 16.77429997557612, + "learning_rate": 2.0064078944552747e-06, + "loss": 0.10893764495849609, + "step": 140505 + }, + { + "epoch": 1.214948422408799, + "grad_norm": 14.279389003690353, + "learning_rate": 2.00621566197077e-06, + "loss": 0.17122154235839843, + "step": 140510 + }, + { + "epoch": 1.2149916559303422, + "grad_norm": 0.5148453322600505, + "learning_rate": 2.0060234340695114e-06, + "loss": 0.014269256591796875, + "step": 140515 + }, + { + "epoch": 1.2150348894518854, + "grad_norm": 1.31819980080145, + "learning_rate": 2.0058312107523853e-06, + "loss": 0.25166854858398435, + "step": 140520 + }, + { + "epoch": 1.2150781229734287, + "grad_norm": 12.643225687942868, + "learning_rate": 2.0056389920202774e-06, + "loss": 0.0477813720703125, + "step": 140525 + }, + { + "epoch": 1.2151213564949719, + "grad_norm": 17.11381191663115, + "learning_rate": 2.0054467778740745e-06, + "loss": 0.08583984375, + "step": 140530 + }, + { + "epoch": 1.2151645900165151, + "grad_norm": 1.8120019278563049, + "learning_rate": 2.005254568314664e-06, + "loss": 0.11251792907714844, + "step": 140535 + }, + { + "epoch": 1.2152078235380586, + "grad_norm": 1.0247960992923035, + "learning_rate": 2.0050623633429294e-06, + "loss": 0.009865379333496094, + "step": 140540 + }, + { + "epoch": 1.2152510570596018, + "grad_norm": 1.7577028083386586, + "learning_rate": 2.004870162959761e-06, + "loss": 0.0727935791015625, + "step": 140545 + }, + { + "epoch": 1.215294290581145, + "grad_norm": 28.955403219510806, + "learning_rate": 2.0046779671660437e-06, + "loss": 0.17102203369140626, + "step": 140550 + }, + { + "epoch": 1.2153375241026882, + "grad_norm": 7.203972578918077, + "learning_rate": 2.0044857759626637e-06, + "loss": 0.267987060546875, + "step": 140555 + }, + { + "epoch": 1.2153807576242315, + "grad_norm": 1.1879595743513403, + "learning_rate": 2.0042935893505064e-06, + "loss": 0.13020763397216797, + "step": 140560 + }, + { + "epoch": 1.2154239911457747, + "grad_norm": 0.12651274682367367, + "learning_rate": 2.0041014073304594e-06, + "loss": 0.009039211273193359, + "step": 140565 + }, + { + "epoch": 1.215467224667318, + "grad_norm": 0.6629122198236199, + "learning_rate": 2.003909229903408e-06, + "loss": 0.020627593994140624, + "step": 140570 + }, + { + "epoch": 1.2155104581888614, + "grad_norm": 0.26489253871805035, + "learning_rate": 2.0037170570702405e-06, + "loss": 0.23653488159179686, + "step": 140575 + }, + { + "epoch": 1.2155536917104046, + "grad_norm": 33.262521222052385, + "learning_rate": 2.003524888831842e-06, + "loss": 0.040422821044921876, + "step": 140580 + }, + { + "epoch": 1.2155969252319478, + "grad_norm": 2.2268378877643134, + "learning_rate": 2.003332725189098e-06, + "loss": 0.018187332153320312, + "step": 140585 + }, + { + "epoch": 1.215640158753491, + "grad_norm": 1.5405494218553755, + "learning_rate": 2.0031405661428952e-06, + "loss": 0.10072650909423828, + "step": 140590 + }, + { + "epoch": 1.2156833922750343, + "grad_norm": 14.915125521769255, + "learning_rate": 2.002948411694121e-06, + "loss": 0.0628936767578125, + "step": 140595 + }, + { + "epoch": 1.2157266257965778, + "grad_norm": 10.513575528068936, + "learning_rate": 2.00275626184366e-06, + "loss": 0.11415786743164062, + "step": 140600 + }, + { + "epoch": 1.215769859318121, + "grad_norm": 2.1652164268615706, + "learning_rate": 2.002564116592398e-06, + "loss": 0.23974990844726562, + "step": 140605 + }, + { + "epoch": 1.2158130928396642, + "grad_norm": 29.170510990298887, + "learning_rate": 2.002371975941223e-06, + "loss": 0.1275686264038086, + "step": 140610 + }, + { + "epoch": 1.2158563263612074, + "grad_norm": 0.26359539453416597, + "learning_rate": 2.002179839891021e-06, + "loss": 0.2494415283203125, + "step": 140615 + }, + { + "epoch": 1.2158995598827507, + "grad_norm": 3.772735778512824, + "learning_rate": 2.0019877084426763e-06, + "loss": 0.0704376220703125, + "step": 140620 + }, + { + "epoch": 1.215942793404294, + "grad_norm": 20.708733730158112, + "learning_rate": 2.0017955815970773e-06, + "loss": 0.135858154296875, + "step": 140625 + }, + { + "epoch": 1.2159860269258371, + "grad_norm": 81.22541697474563, + "learning_rate": 2.0016034593551084e-06, + "loss": 0.1857015609741211, + "step": 140630 + }, + { + "epoch": 1.2160292604473806, + "grad_norm": 0.8601814484965307, + "learning_rate": 2.001411341717655e-06, + "loss": 0.027973747253417967, + "step": 140635 + }, + { + "epoch": 1.2160724939689238, + "grad_norm": 4.45436635013066, + "learning_rate": 2.0012192286856053e-06, + "loss": 0.08009681701660157, + "step": 140640 + }, + { + "epoch": 1.216115727490467, + "grad_norm": 10.846256091800015, + "learning_rate": 2.0010271202598444e-06, + "loss": 0.05378589630126953, + "step": 140645 + }, + { + "epoch": 1.2161589610120103, + "grad_norm": 30.79198952369164, + "learning_rate": 2.0008350164412575e-06, + "loss": 0.05342864990234375, + "step": 140650 + }, + { + "epoch": 1.2162021945335535, + "grad_norm": 9.189307524374609, + "learning_rate": 2.0006429172307323e-06, + "loss": 0.1805511474609375, + "step": 140655 + }, + { + "epoch": 1.2162454280550967, + "grad_norm": 6.461755484696611, + "learning_rate": 2.0004508226291526e-06, + "loss": 0.039585113525390625, + "step": 140660 + }, + { + "epoch": 1.2162886615766402, + "grad_norm": 11.57021706958065, + "learning_rate": 2.000258732637405e-06, + "loss": 0.07118301391601563, + "step": 140665 + }, + { + "epoch": 1.2163318950981834, + "grad_norm": 10.669210935246928, + "learning_rate": 2.0000666472563765e-06, + "loss": 0.03241806030273438, + "step": 140670 + }, + { + "epoch": 1.2163751286197266, + "grad_norm": 25.664996149440704, + "learning_rate": 1.999874566486953e-06, + "loss": 0.14585533142089843, + "step": 140675 + }, + { + "epoch": 1.2164183621412699, + "grad_norm": 0.5886992022724072, + "learning_rate": 1.9996824903300183e-06, + "loss": 0.025051116943359375, + "step": 140680 + }, + { + "epoch": 1.216461595662813, + "grad_norm": 1.6736278213160216, + "learning_rate": 1.9994904187864606e-06, + "loss": 0.020003509521484376, + "step": 140685 + }, + { + "epoch": 1.2165048291843563, + "grad_norm": 0.7154343427556892, + "learning_rate": 1.999298351857165e-06, + "loss": 0.04954376220703125, + "step": 140690 + }, + { + "epoch": 1.2165480627058995, + "grad_norm": 0.5061933171160626, + "learning_rate": 1.999106289543016e-06, + "loss": 0.06869297027587891, + "step": 140695 + }, + { + "epoch": 1.216591296227443, + "grad_norm": 0.7726596095773555, + "learning_rate": 1.9989142318449e-06, + "loss": 0.035506820678710936, + "step": 140700 + }, + { + "epoch": 1.2166345297489862, + "grad_norm": 2.2709828103965872, + "learning_rate": 1.9987221787637033e-06, + "loss": 0.10700149536132812, + "step": 140705 + }, + { + "epoch": 1.2166777632705295, + "grad_norm": 40.73598446175824, + "learning_rate": 1.998530130300312e-06, + "loss": 0.13417816162109375, + "step": 140710 + }, + { + "epoch": 1.2167209967920727, + "grad_norm": 6.146863624732085, + "learning_rate": 1.9983380864556113e-06, + "loss": 0.07944564819335938, + "step": 140715 + }, + { + "epoch": 1.216764230313616, + "grad_norm": 2.0955983408452075, + "learning_rate": 1.9981460472304872e-06, + "loss": 0.025618743896484376, + "step": 140720 + }, + { + "epoch": 1.2168074638351591, + "grad_norm": 0.5083189423072064, + "learning_rate": 1.997954012625824e-06, + "loss": 0.25545425415039064, + "step": 140725 + }, + { + "epoch": 1.2168506973567026, + "grad_norm": 0.6203307025335768, + "learning_rate": 1.9977619826425085e-06, + "loss": 0.06712074279785156, + "step": 140730 + }, + { + "epoch": 1.2168939308782458, + "grad_norm": 8.59942253227895, + "learning_rate": 1.997569957281426e-06, + "loss": 0.03761215209960937, + "step": 140735 + }, + { + "epoch": 1.216937164399789, + "grad_norm": 16.58247468220582, + "learning_rate": 1.9973779365434633e-06, + "loss": 0.06048927307128906, + "step": 140740 + }, + { + "epoch": 1.2169803979213323, + "grad_norm": 1.1968608699053407, + "learning_rate": 1.9971859204295044e-06, + "loss": 0.030479049682617186, + "step": 140745 + }, + { + "epoch": 1.2170236314428755, + "grad_norm": 0.27541815154361604, + "learning_rate": 1.9969939089404357e-06, + "loss": 0.04263916015625, + "step": 140750 + }, + { + "epoch": 1.2170668649644187, + "grad_norm": 0.16985141017265337, + "learning_rate": 1.9968019020771423e-06, + "loss": 0.050848388671875, + "step": 140755 + }, + { + "epoch": 1.217110098485962, + "grad_norm": 0.22522500874037185, + "learning_rate": 1.99660989984051e-06, + "loss": 0.08697357177734374, + "step": 140760 + }, + { + "epoch": 1.2171533320075054, + "grad_norm": 4.318837069446647, + "learning_rate": 1.9964179022314225e-06, + "loss": 0.02510986328125, + "step": 140765 + }, + { + "epoch": 1.2171965655290486, + "grad_norm": 0.8861892940568605, + "learning_rate": 1.9962259092507687e-06, + "loss": 0.0204254150390625, + "step": 140770 + }, + { + "epoch": 1.2172397990505919, + "grad_norm": 44.296172079290514, + "learning_rate": 1.9960339208994328e-06, + "loss": 0.10777740478515625, + "step": 140775 + }, + { + "epoch": 1.217283032572135, + "grad_norm": 3.980190525725364, + "learning_rate": 1.995841937178299e-06, + "loss": 0.03228912353515625, + "step": 140780 + }, + { + "epoch": 1.2173262660936783, + "grad_norm": 6.968870230104541, + "learning_rate": 1.995649958088253e-06, + "loss": 0.07015571594238282, + "step": 140785 + }, + { + "epoch": 1.2173694996152216, + "grad_norm": 0.6801394908984536, + "learning_rate": 1.9954579836301814e-06, + "loss": 0.07122879028320313, + "step": 140790 + }, + { + "epoch": 1.217412733136765, + "grad_norm": 2.0976310654912558, + "learning_rate": 1.995266013804968e-06, + "loss": 0.03221397399902344, + "step": 140795 + }, + { + "epoch": 1.2174559666583082, + "grad_norm": 0.8964285969848492, + "learning_rate": 1.9950740486134997e-06, + "loss": 0.10086536407470703, + "step": 140800 + }, + { + "epoch": 1.2174992001798515, + "grad_norm": 2.162987941576405, + "learning_rate": 1.994882088056661e-06, + "loss": 0.05950241088867188, + "step": 140805 + }, + { + "epoch": 1.2175424337013947, + "grad_norm": 14.447920950305704, + "learning_rate": 1.9946901321353376e-06, + "loss": 0.04850616455078125, + "step": 140810 + }, + { + "epoch": 1.217585667222938, + "grad_norm": 1.5218360596434706, + "learning_rate": 1.9944981808504138e-06, + "loss": 0.1653350830078125, + "step": 140815 + }, + { + "epoch": 1.2176289007444812, + "grad_norm": 0.9186204659951333, + "learning_rate": 1.9943062342027755e-06, + "loss": 0.155914306640625, + "step": 140820 + }, + { + "epoch": 1.2176721342660244, + "grad_norm": 2.3369510323292673, + "learning_rate": 1.9941142921933076e-06, + "loss": 0.03065147399902344, + "step": 140825 + }, + { + "epoch": 1.2177153677875678, + "grad_norm": 0.20548893412674232, + "learning_rate": 1.9939223548228964e-06, + "loss": 0.011295700073242187, + "step": 140830 + }, + { + "epoch": 1.217758601309111, + "grad_norm": 12.022649240169303, + "learning_rate": 1.9937304220924266e-06, + "loss": 0.05782928466796875, + "step": 140835 + }, + { + "epoch": 1.2178018348306543, + "grad_norm": 4.1680865938330065, + "learning_rate": 1.9935384940027835e-06, + "loss": 0.031083297729492188, + "step": 140840 + }, + { + "epoch": 1.2178450683521975, + "grad_norm": 1.2684035931066044, + "learning_rate": 1.993346570554851e-06, + "loss": 0.06354827880859375, + "step": 140845 + }, + { + "epoch": 1.2178883018737408, + "grad_norm": 4.25309071333854, + "learning_rate": 1.993154651749516e-06, + "loss": 0.05004119873046875, + "step": 140850 + }, + { + "epoch": 1.2179315353952842, + "grad_norm": 0.42302315683706837, + "learning_rate": 1.9929627375876626e-06, + "loss": 0.09168777465820313, + "step": 140855 + }, + { + "epoch": 1.2179747689168274, + "grad_norm": 3.1430414655917795, + "learning_rate": 1.992770828070175e-06, + "loss": 0.09715843200683594, + "step": 140860 + }, + { + "epoch": 1.2180180024383707, + "grad_norm": 0.5936226171902683, + "learning_rate": 1.9925789231979402e-06, + "loss": 0.0022029876708984375, + "step": 140865 + }, + { + "epoch": 1.218061235959914, + "grad_norm": 1.3988264842684999, + "learning_rate": 1.9923870229718423e-06, + "loss": 0.037530517578125, + "step": 140870 + }, + { + "epoch": 1.2181044694814571, + "grad_norm": 2.8730676106236133, + "learning_rate": 1.9921951273927666e-06, + "loss": 0.07248802185058593, + "step": 140875 + }, + { + "epoch": 1.2181477030030003, + "grad_norm": 0.18608987348993977, + "learning_rate": 1.992003236461598e-06, + "loss": 0.10708341598510743, + "step": 140880 + }, + { + "epoch": 1.2181909365245436, + "grad_norm": 0.44150795310781427, + "learning_rate": 1.991811350179221e-06, + "loss": 0.0483856201171875, + "step": 140885 + }, + { + "epoch": 1.218234170046087, + "grad_norm": 37.83128216776173, + "learning_rate": 1.9916194685465205e-06, + "loss": 0.25026473999023435, + "step": 140890 + }, + { + "epoch": 1.2182774035676303, + "grad_norm": 0.6609855682248017, + "learning_rate": 1.9914275915643826e-06, + "loss": 0.007085037231445312, + "step": 140895 + }, + { + "epoch": 1.2183206370891735, + "grad_norm": 0.8211611527252134, + "learning_rate": 1.9912357192336915e-06, + "loss": 0.051020050048828126, + "step": 140900 + }, + { + "epoch": 1.2183638706107167, + "grad_norm": 10.505247697382226, + "learning_rate": 1.991043851555332e-06, + "loss": 0.2402069091796875, + "step": 140905 + }, + { + "epoch": 1.21840710413226, + "grad_norm": 4.033384785230556, + "learning_rate": 1.9908519885301894e-06, + "loss": 0.047078704833984374, + "step": 140910 + }, + { + "epoch": 1.2184503376538032, + "grad_norm": 26.475057702377796, + "learning_rate": 1.9906601301591484e-06, + "loss": 0.11803903579711914, + "step": 140915 + }, + { + "epoch": 1.2184935711753466, + "grad_norm": 0.17408421321584316, + "learning_rate": 1.9904682764430926e-06, + "loss": 0.06175689697265625, + "step": 140920 + }, + { + "epoch": 1.2185368046968899, + "grad_norm": 8.794245246132142, + "learning_rate": 1.9902764273829093e-06, + "loss": 0.13883514404296876, + "step": 140925 + }, + { + "epoch": 1.218580038218433, + "grad_norm": 0.5634775445025958, + "learning_rate": 1.9900845829794805e-06, + "loss": 0.027320098876953126, + "step": 140930 + }, + { + "epoch": 1.2186232717399763, + "grad_norm": 0.9324537723351829, + "learning_rate": 1.9898927432336937e-06, + "loss": 0.09393482208251953, + "step": 140935 + }, + { + "epoch": 1.2186665052615195, + "grad_norm": 3.2151774182862067, + "learning_rate": 1.9897009081464318e-06, + "loss": 0.0838043212890625, + "step": 140940 + }, + { + "epoch": 1.2187097387830628, + "grad_norm": 4.304573904890287, + "learning_rate": 1.9895090777185804e-06, + "loss": 0.015129852294921874, + "step": 140945 + }, + { + "epoch": 1.218752972304606, + "grad_norm": 1.3305622471669631, + "learning_rate": 1.9893172519510233e-06, + "loss": 0.09127159118652343, + "step": 140950 + }, + { + "epoch": 1.2187962058261494, + "grad_norm": 0.0877454612337358, + "learning_rate": 1.9891254308446454e-06, + "loss": 0.009015655517578125, + "step": 140955 + }, + { + "epoch": 1.2188394393476927, + "grad_norm": 3.6016335262329164, + "learning_rate": 1.9889336144003314e-06, + "loss": 0.07487716674804687, + "step": 140960 + }, + { + "epoch": 1.218882672869236, + "grad_norm": 2.3575450617534077, + "learning_rate": 1.9887418026189676e-06, + "loss": 0.098321533203125, + "step": 140965 + }, + { + "epoch": 1.2189259063907791, + "grad_norm": 0.07945150488781115, + "learning_rate": 1.988549995501437e-06, + "loss": 0.32132530212402344, + "step": 140970 + }, + { + "epoch": 1.2189691399123224, + "grad_norm": 0.17039686405221557, + "learning_rate": 1.988358193048624e-06, + "loss": 0.054683685302734375, + "step": 140975 + }, + { + "epoch": 1.2190123734338656, + "grad_norm": 9.505497397550561, + "learning_rate": 1.988166395261414e-06, + "loss": 0.038896942138671876, + "step": 140980 + }, + { + "epoch": 1.219055606955409, + "grad_norm": 3.0154399584596283, + "learning_rate": 1.98797460214069e-06, + "loss": 0.027663040161132812, + "step": 140985 + }, + { + "epoch": 1.2190988404769523, + "grad_norm": 0.5332339561715662, + "learning_rate": 1.987782813687338e-06, + "loss": 0.17336082458496094, + "step": 140990 + }, + { + "epoch": 1.2191420739984955, + "grad_norm": 0.991386437955399, + "learning_rate": 1.987591029902243e-06, + "loss": 0.04337158203125, + "step": 140995 + }, + { + "epoch": 1.2191853075200387, + "grad_norm": 34.08485398013248, + "learning_rate": 1.987399250786289e-06, + "loss": 0.14418907165527345, + "step": 141000 + }, + { + "epoch": 1.219228541041582, + "grad_norm": 1.26033169050509, + "learning_rate": 1.98720747634036e-06, + "loss": 0.1311309814453125, + "step": 141005 + }, + { + "epoch": 1.2192717745631252, + "grad_norm": 8.44367455064434, + "learning_rate": 1.98701570656534e-06, + "loss": 0.074609375, + "step": 141010 + }, + { + "epoch": 1.2193150080846684, + "grad_norm": 0.3885185882317903, + "learning_rate": 1.986823941462114e-06, + "loss": 0.27303614616394045, + "step": 141015 + }, + { + "epoch": 1.2193582416062119, + "grad_norm": 0.48062105979331354, + "learning_rate": 1.9866321810315667e-06, + "loss": 0.16050262451171876, + "step": 141020 + }, + { + "epoch": 1.219401475127755, + "grad_norm": 16.403195527995646, + "learning_rate": 1.986440425274583e-06, + "loss": 0.0389373779296875, + "step": 141025 + }, + { + "epoch": 1.2194447086492983, + "grad_norm": 0.9861995282717301, + "learning_rate": 1.986248674192046e-06, + "loss": 0.02875823974609375, + "step": 141030 + }, + { + "epoch": 1.2194879421708416, + "grad_norm": 10.620288423525555, + "learning_rate": 1.9860569277848405e-06, + "loss": 0.08197174072265626, + "step": 141035 + }, + { + "epoch": 1.2195311756923848, + "grad_norm": 0.08487067940072823, + "learning_rate": 1.9858651860538506e-06, + "loss": 0.19883193969726562, + "step": 141040 + }, + { + "epoch": 1.2195744092139282, + "grad_norm": 0.5858440841031135, + "learning_rate": 1.985673448999961e-06, + "loss": 0.009450912475585938, + "step": 141045 + }, + { + "epoch": 1.2196176427354715, + "grad_norm": 9.81395073049504, + "learning_rate": 1.985481716624055e-06, + "loss": 0.0170562744140625, + "step": 141050 + }, + { + "epoch": 1.2196608762570147, + "grad_norm": 7.2544653998525215, + "learning_rate": 1.9852899889270187e-06, + "loss": 0.03974132537841797, + "step": 141055 + }, + { + "epoch": 1.219704109778558, + "grad_norm": 7.5543057531903655, + "learning_rate": 1.9850982659097356e-06, + "loss": 0.04426765441894531, + "step": 141060 + }, + { + "epoch": 1.2197473433001012, + "grad_norm": 1.5796924157452692, + "learning_rate": 1.9849065475730898e-06, + "loss": 0.010412216186523438, + "step": 141065 + }, + { + "epoch": 1.2197905768216444, + "grad_norm": 1.67674919062047, + "learning_rate": 1.984714833917964e-06, + "loss": 0.023188018798828126, + "step": 141070 + }, + { + "epoch": 1.2198338103431876, + "grad_norm": 3.3416454839491214, + "learning_rate": 1.9845231249452446e-06, + "loss": 0.025777816772460938, + "step": 141075 + }, + { + "epoch": 1.2198770438647308, + "grad_norm": 3.5253042442331557, + "learning_rate": 1.9843314206558134e-06, + "loss": 0.02974090576171875, + "step": 141080 + }, + { + "epoch": 1.2199202773862743, + "grad_norm": 1.2163097121228181, + "learning_rate": 1.9841397210505575e-06, + "loss": 0.01330413818359375, + "step": 141085 + }, + { + "epoch": 1.2199635109078175, + "grad_norm": 0.9233236327554285, + "learning_rate": 1.9839480261303593e-06, + "loss": 0.056557083129882814, + "step": 141090 + }, + { + "epoch": 1.2200067444293607, + "grad_norm": 1.4069418996372294, + "learning_rate": 1.9837563358961033e-06, + "loss": 0.11871109008789063, + "step": 141095 + }, + { + "epoch": 1.220049977950904, + "grad_norm": 34.14974629943967, + "learning_rate": 1.9835646503486725e-06, + "loss": 0.1964447021484375, + "step": 141100 + }, + { + "epoch": 1.2200932114724472, + "grad_norm": 1.4548117646355416, + "learning_rate": 1.983372969488952e-06, + "loss": 0.029533767700195314, + "step": 141105 + }, + { + "epoch": 1.2201364449939907, + "grad_norm": 1.2148897128292708, + "learning_rate": 1.983181293317826e-06, + "loss": 0.024575042724609374, + "step": 141110 + }, + { + "epoch": 1.2201796785155339, + "grad_norm": 0.3652405780064848, + "learning_rate": 1.9829896218361764e-06, + "loss": 0.061452484130859374, + "step": 141115 + }, + { + "epoch": 1.2202229120370771, + "grad_norm": 16.159377679082077, + "learning_rate": 1.9827979550448904e-06, + "loss": 0.1818115234375, + "step": 141120 + }, + { + "epoch": 1.2202661455586203, + "grad_norm": 0.7609465882473255, + "learning_rate": 1.9826062929448494e-06, + "loss": 0.0451568603515625, + "step": 141125 + }, + { + "epoch": 1.2203093790801636, + "grad_norm": 3.974706707275685, + "learning_rate": 1.982414635536939e-06, + "loss": 0.13760147094726563, + "step": 141130 + }, + { + "epoch": 1.2203526126017068, + "grad_norm": 8.84411177697163, + "learning_rate": 1.9822229828220425e-06, + "loss": 0.067071533203125, + "step": 141135 + }, + { + "epoch": 1.22039584612325, + "grad_norm": 9.072219096859802, + "learning_rate": 1.9820313348010435e-06, + "loss": 0.02883453369140625, + "step": 141140 + }, + { + "epoch": 1.2204390796447935, + "grad_norm": 0.39213265464493724, + "learning_rate": 1.981839691474825e-06, + "loss": 0.05863037109375, + "step": 141145 + }, + { + "epoch": 1.2204823131663367, + "grad_norm": 4.656818176020054, + "learning_rate": 1.9816480528442725e-06, + "loss": 0.08474884033203126, + "step": 141150 + }, + { + "epoch": 1.22052554668788, + "grad_norm": 0.8858106689567573, + "learning_rate": 1.981456418910269e-06, + "loss": 0.0339752197265625, + "step": 141155 + }, + { + "epoch": 1.2205687802094232, + "grad_norm": 5.38373767029048, + "learning_rate": 1.9812647896736996e-06, + "loss": 0.043585586547851565, + "step": 141160 + }, + { + "epoch": 1.2206120137309664, + "grad_norm": 1.9614505497812635, + "learning_rate": 1.9810731651354464e-06, + "loss": 0.08838882446289062, + "step": 141165 + }, + { + "epoch": 1.2206552472525096, + "grad_norm": 64.77456629546514, + "learning_rate": 1.980881545296394e-06, + "loss": 0.10036582946777343, + "step": 141170 + }, + { + "epoch": 1.220698480774053, + "grad_norm": 0.5469054150960144, + "learning_rate": 1.9806899301574245e-06, + "loss": 0.05063304901123047, + "step": 141175 + }, + { + "epoch": 1.2207417142955963, + "grad_norm": 0.2090521205057768, + "learning_rate": 1.9804983197194243e-06, + "loss": 0.00876026153564453, + "step": 141180 + }, + { + "epoch": 1.2207849478171395, + "grad_norm": 3.264427427436359, + "learning_rate": 1.9803067139832752e-06, + "loss": 0.01843719482421875, + "step": 141185 + }, + { + "epoch": 1.2208281813386828, + "grad_norm": 4.741449157270875, + "learning_rate": 1.980115112949862e-06, + "loss": 0.05986366271972656, + "step": 141190 + }, + { + "epoch": 1.220871414860226, + "grad_norm": 2.8477319429912877, + "learning_rate": 1.9799235166200676e-06, + "loss": 0.02176361083984375, + "step": 141195 + }, + { + "epoch": 1.2209146483817692, + "grad_norm": 0.5810458952944344, + "learning_rate": 1.979731924994776e-06, + "loss": 0.05415163040161133, + "step": 141200 + }, + { + "epoch": 1.2209578819033124, + "grad_norm": 13.062079302658079, + "learning_rate": 1.97954033807487e-06, + "loss": 0.07268218994140625, + "step": 141205 + }, + { + "epoch": 1.221001115424856, + "grad_norm": 0.4267174240388807, + "learning_rate": 1.9793487558612336e-06, + "loss": 0.0683349609375, + "step": 141210 + }, + { + "epoch": 1.2210443489463991, + "grad_norm": 0.044846218774763934, + "learning_rate": 1.9791571783547505e-06, + "loss": 0.0645965576171875, + "step": 141215 + }, + { + "epoch": 1.2210875824679424, + "grad_norm": 0.8817891872223484, + "learning_rate": 1.978965605556305e-06, + "loss": 0.1040679931640625, + "step": 141220 + }, + { + "epoch": 1.2211308159894856, + "grad_norm": 0.10611081015261811, + "learning_rate": 1.97877403746678e-06, + "loss": 0.019805145263671876, + "step": 141225 + }, + { + "epoch": 1.2211740495110288, + "grad_norm": 3.2551330025351173, + "learning_rate": 1.978582474087059e-06, + "loss": 0.05342559814453125, + "step": 141230 + }, + { + "epoch": 1.221217283032572, + "grad_norm": 0.3763554539315313, + "learning_rate": 1.9783909154180245e-06, + "loss": 0.02349090576171875, + "step": 141235 + }, + { + "epoch": 1.2212605165541155, + "grad_norm": 18.513978326948237, + "learning_rate": 1.978199361460561e-06, + "loss": 0.10657424926757812, + "step": 141240 + }, + { + "epoch": 1.2213037500756587, + "grad_norm": 25.11170358205364, + "learning_rate": 1.978007812215552e-06, + "loss": 0.08030014038085938, + "step": 141245 + }, + { + "epoch": 1.221346983597202, + "grad_norm": 17.461510126656105, + "learning_rate": 1.9778162676838806e-06, + "loss": 0.07957963943481446, + "step": 141250 + }, + { + "epoch": 1.2213902171187452, + "grad_norm": 3.0174162834165204, + "learning_rate": 1.9776247278664308e-06, + "loss": 0.11644439697265625, + "step": 141255 + }, + { + "epoch": 1.2214334506402884, + "grad_norm": 4.6928636128623085, + "learning_rate": 1.9774331927640856e-06, + "loss": 0.20832366943359376, + "step": 141260 + }, + { + "epoch": 1.2214766841618316, + "grad_norm": 1.329154045719109, + "learning_rate": 1.977241662377727e-06, + "loss": 0.07120075225830078, + "step": 141265 + }, + { + "epoch": 1.2215199176833749, + "grad_norm": 2.4054281551301115, + "learning_rate": 1.97705013670824e-06, + "loss": 0.031144142150878906, + "step": 141270 + }, + { + "epoch": 1.2215631512049183, + "grad_norm": 26.59436695668375, + "learning_rate": 1.9768586157565067e-06, + "loss": 0.16597480773925782, + "step": 141275 + }, + { + "epoch": 1.2216063847264615, + "grad_norm": 0.09369976764461306, + "learning_rate": 1.976667099523412e-06, + "loss": 0.2617940902709961, + "step": 141280 + }, + { + "epoch": 1.2216496182480048, + "grad_norm": 15.085202993149494, + "learning_rate": 1.9764755880098383e-06, + "loss": 0.05022735595703125, + "step": 141285 + }, + { + "epoch": 1.221692851769548, + "grad_norm": 5.006958151465451, + "learning_rate": 1.9762840812166687e-06, + "loss": 0.09387741088867188, + "step": 141290 + }, + { + "epoch": 1.2217360852910912, + "grad_norm": 2.7513808992974567, + "learning_rate": 1.9760925791447858e-06, + "loss": 0.041020965576171874, + "step": 141295 + }, + { + "epoch": 1.2217793188126347, + "grad_norm": 7.081677636905429, + "learning_rate": 1.9759010817950743e-06, + "loss": 0.0735321044921875, + "step": 141300 + }, + { + "epoch": 1.221822552334178, + "grad_norm": 1.2043135925636603, + "learning_rate": 1.975709589168415e-06, + "loss": 0.15402755737304688, + "step": 141305 + }, + { + "epoch": 1.2218657858557211, + "grad_norm": 11.212554905467863, + "learning_rate": 1.975518101265694e-06, + "loss": 0.023905181884765626, + "step": 141310 + }, + { + "epoch": 1.2219090193772644, + "grad_norm": 3.0029982160716537, + "learning_rate": 1.975326618087793e-06, + "loss": 0.07681732177734375, + "step": 141315 + }, + { + "epoch": 1.2219522528988076, + "grad_norm": 1.3992009095791915, + "learning_rate": 1.975135139635595e-06, + "loss": 0.057916259765625, + "step": 141320 + }, + { + "epoch": 1.2219954864203508, + "grad_norm": 11.049989356829949, + "learning_rate": 1.974943665909982e-06, + "loss": 0.0363525390625, + "step": 141325 + }, + { + "epoch": 1.222038719941894, + "grad_norm": 2.8652818490461307, + "learning_rate": 1.97475219691184e-06, + "loss": 0.01975555419921875, + "step": 141330 + }, + { + "epoch": 1.2220819534634373, + "grad_norm": 0.3465826101022387, + "learning_rate": 1.974560732642048e-06, + "loss": 0.04587745666503906, + "step": 141335 + }, + { + "epoch": 1.2221251869849807, + "grad_norm": 2.559741815745791, + "learning_rate": 1.974369273101493e-06, + "loss": 0.029764366149902344, + "step": 141340 + }, + { + "epoch": 1.222168420506524, + "grad_norm": 1.6828484197975762, + "learning_rate": 1.9741778182910565e-06, + "loss": 0.027362060546875, + "step": 141345 + }, + { + "epoch": 1.2222116540280672, + "grad_norm": 1.8803117151496367, + "learning_rate": 1.9739863682116203e-06, + "loss": 0.06016044616699219, + "step": 141350 + }, + { + "epoch": 1.2222548875496104, + "grad_norm": 0.19372525726439188, + "learning_rate": 1.973794922864069e-06, + "loss": 0.01986217498779297, + "step": 141355 + }, + { + "epoch": 1.2222981210711537, + "grad_norm": 2.1172201210182875, + "learning_rate": 1.973603482249285e-06, + "loss": 0.0448028564453125, + "step": 141360 + }, + { + "epoch": 1.222341354592697, + "grad_norm": 0.6659410224019999, + "learning_rate": 1.973412046368151e-06, + "loss": 0.08628787994384765, + "step": 141365 + }, + { + "epoch": 1.2223845881142403, + "grad_norm": 37.013459014408696, + "learning_rate": 1.9732206152215486e-06, + "loss": 0.11680011749267578, + "step": 141370 + }, + { + "epoch": 1.2224278216357836, + "grad_norm": 1.6344850604643326, + "learning_rate": 1.973029188810363e-06, + "loss": 0.33595962524414064, + "step": 141375 + }, + { + "epoch": 1.2224710551573268, + "grad_norm": 16.302117318775384, + "learning_rate": 1.972837767135476e-06, + "loss": 0.08929519653320313, + "step": 141380 + }, + { + "epoch": 1.22251428867887, + "grad_norm": 0.3875525125020718, + "learning_rate": 1.9726463501977705e-06, + "loss": 0.03979969024658203, + "step": 141385 + }, + { + "epoch": 1.2225575222004132, + "grad_norm": 0.7993692659737194, + "learning_rate": 1.9724549379981293e-06, + "loss": 0.08572921752929688, + "step": 141390 + }, + { + "epoch": 1.2226007557219565, + "grad_norm": 14.08549524871495, + "learning_rate": 1.9722635305374354e-06, + "loss": 0.0342437744140625, + "step": 141395 + }, + { + "epoch": 1.2226439892435, + "grad_norm": 1.8800561231921182, + "learning_rate": 1.97207212781657e-06, + "loss": 0.042234039306640624, + "step": 141400 + }, + { + "epoch": 1.2226872227650432, + "grad_norm": 0.2617587395367098, + "learning_rate": 1.9718807298364184e-06, + "loss": 0.039813232421875, + "step": 141405 + }, + { + "epoch": 1.2227304562865864, + "grad_norm": 0.6177416970568261, + "learning_rate": 1.9716893365978612e-06, + "loss": 0.027564620971679686, + "step": 141410 + }, + { + "epoch": 1.2227736898081296, + "grad_norm": 0.6707868718485699, + "learning_rate": 1.9714979481017827e-06, + "loss": 0.0667938232421875, + "step": 141415 + }, + { + "epoch": 1.2228169233296728, + "grad_norm": 3.389593970832987, + "learning_rate": 1.971306564349065e-06, + "loss": 0.07109527587890625, + "step": 141420 + }, + { + "epoch": 1.222860156851216, + "grad_norm": 1.5816188169572625, + "learning_rate": 1.9711151853405902e-06, + "loss": 0.02935943603515625, + "step": 141425 + }, + { + "epoch": 1.2229033903727595, + "grad_norm": 4.182108872647343, + "learning_rate": 1.9709238110772402e-06, + "loss": 0.03829269409179688, + "step": 141430 + }, + { + "epoch": 1.2229466238943028, + "grad_norm": 7.785221425638135, + "learning_rate": 1.9707324415599e-06, + "loss": 0.0508331298828125, + "step": 141435 + }, + { + "epoch": 1.222989857415846, + "grad_norm": 3.0408369026810416, + "learning_rate": 1.97054107678945e-06, + "loss": 0.04234771728515625, + "step": 141440 + }, + { + "epoch": 1.2230330909373892, + "grad_norm": 2.2320414808362736, + "learning_rate": 1.970349716766774e-06, + "loss": 0.0706085205078125, + "step": 141445 + }, + { + "epoch": 1.2230763244589324, + "grad_norm": 13.8196970589355, + "learning_rate": 1.970158361492754e-06, + "loss": 0.017993736267089843, + "step": 141450 + }, + { + "epoch": 1.2231195579804757, + "grad_norm": 20.01938645487057, + "learning_rate": 1.969967010968273e-06, + "loss": 0.4075477600097656, + "step": 141455 + }, + { + "epoch": 1.223162791502019, + "grad_norm": 9.02784259403716, + "learning_rate": 1.9697756651942127e-06, + "loss": 0.0502288818359375, + "step": 141460 + }, + { + "epoch": 1.2232060250235623, + "grad_norm": 0.11519225414615952, + "learning_rate": 1.9695843241714556e-06, + "loss": 0.04036445617675781, + "step": 141465 + }, + { + "epoch": 1.2232492585451056, + "grad_norm": 0.2836823338167552, + "learning_rate": 1.969392987900885e-06, + "loss": 0.019281005859375, + "step": 141470 + }, + { + "epoch": 1.2232924920666488, + "grad_norm": 20.628679102293287, + "learning_rate": 1.969201656383383e-06, + "loss": 0.2187103271484375, + "step": 141475 + }, + { + "epoch": 1.223335725588192, + "grad_norm": 39.14061962329286, + "learning_rate": 1.9690103296198327e-06, + "loss": 0.25757293701171874, + "step": 141480 + }, + { + "epoch": 1.2233789591097353, + "grad_norm": 1.1177127662625956, + "learning_rate": 1.968819007611115e-06, + "loss": 0.01397705078125, + "step": 141485 + }, + { + "epoch": 1.2234221926312785, + "grad_norm": 0.5040253747817167, + "learning_rate": 1.9686276903581124e-06, + "loss": 0.08429126739501953, + "step": 141490 + }, + { + "epoch": 1.223465426152822, + "grad_norm": 3.375515693493452, + "learning_rate": 1.9684363778617076e-06, + "loss": 0.2268850326538086, + "step": 141495 + }, + { + "epoch": 1.2235086596743652, + "grad_norm": 0.2528539066373272, + "learning_rate": 1.968245070122783e-06, + "loss": 0.09957256317138671, + "step": 141500 + }, + { + "epoch": 1.2235518931959084, + "grad_norm": 8.915850077500755, + "learning_rate": 1.9680537671422214e-06, + "loss": 0.05065155029296875, + "step": 141505 + }, + { + "epoch": 1.2235951267174516, + "grad_norm": 6.402657012977996, + "learning_rate": 1.9678624689209053e-06, + "loss": 0.07283859252929688, + "step": 141510 + }, + { + "epoch": 1.2236383602389949, + "grad_norm": 0.24624127070640214, + "learning_rate": 1.9676711754597157e-06, + "loss": 0.06255512237548828, + "step": 141515 + }, + { + "epoch": 1.223681593760538, + "grad_norm": 12.400238052725802, + "learning_rate": 1.967479886759535e-06, + "loss": 0.0648183822631836, + "step": 141520 + }, + { + "epoch": 1.2237248272820813, + "grad_norm": 0.019675552004388346, + "learning_rate": 1.9672886028212456e-06, + "loss": 0.01966266632080078, + "step": 141525 + }, + { + "epoch": 1.2237680608036248, + "grad_norm": 33.5517953261314, + "learning_rate": 1.9670973236457312e-06, + "loss": 0.311773681640625, + "step": 141530 + }, + { + "epoch": 1.223811294325168, + "grad_norm": 0.11251546779578529, + "learning_rate": 1.9669060492338726e-06, + "loss": 0.03322649002075195, + "step": 141535 + }, + { + "epoch": 1.2238545278467112, + "grad_norm": 2.629601841851703, + "learning_rate": 1.9667147795865513e-06, + "loss": 0.06572971343994141, + "step": 141540 + }, + { + "epoch": 1.2238977613682545, + "grad_norm": 0.14104804103423824, + "learning_rate": 1.9665235147046506e-06, + "loss": 0.025978851318359374, + "step": 141545 + }, + { + "epoch": 1.2239409948897977, + "grad_norm": 10.336733886191942, + "learning_rate": 1.9663322545890517e-06, + "loss": 0.05418872833251953, + "step": 141550 + }, + { + "epoch": 1.2239842284113411, + "grad_norm": 0.3061164701812687, + "learning_rate": 1.9661409992406377e-06, + "loss": 0.010541915893554688, + "step": 141555 + }, + { + "epoch": 1.2240274619328844, + "grad_norm": 0.9864297832590101, + "learning_rate": 1.9659497486602886e-06, + "loss": 0.007366943359375, + "step": 141560 + }, + { + "epoch": 1.2240706954544276, + "grad_norm": 2.5445642479488844, + "learning_rate": 1.965758502848889e-06, + "loss": 0.3193855285644531, + "step": 141565 + }, + { + "epoch": 1.2241139289759708, + "grad_norm": 25.411669565084157, + "learning_rate": 1.96556726180732e-06, + "loss": 0.052059364318847653, + "step": 141570 + }, + { + "epoch": 1.224157162497514, + "grad_norm": 0.7820466856653389, + "learning_rate": 1.965376025536463e-06, + "loss": 0.03351287841796875, + "step": 141575 + }, + { + "epoch": 1.2242003960190573, + "grad_norm": 0.16267199347212966, + "learning_rate": 1.9651847940372004e-06, + "loss": 0.015753555297851562, + "step": 141580 + }, + { + "epoch": 1.2242436295406005, + "grad_norm": 48.71496290872564, + "learning_rate": 1.9649935673104146e-06, + "loss": 0.5827346801757812, + "step": 141585 + }, + { + "epoch": 1.224286863062144, + "grad_norm": 0.09819346099455015, + "learning_rate": 1.9648023453569856e-06, + "loss": 0.026221466064453126, + "step": 141590 + }, + { + "epoch": 1.2243300965836872, + "grad_norm": 13.996556937564273, + "learning_rate": 1.9646111281777976e-06, + "loss": 0.08394374847412109, + "step": 141595 + }, + { + "epoch": 1.2243733301052304, + "grad_norm": 0.8068049001964728, + "learning_rate": 1.9644199157737325e-06, + "loss": 0.0862091064453125, + "step": 141600 + }, + { + "epoch": 1.2244165636267736, + "grad_norm": 11.380233342596194, + "learning_rate": 1.9642287081456695e-06, + "loss": 0.0330596923828125, + "step": 141605 + }, + { + "epoch": 1.2244597971483169, + "grad_norm": 2.06765030905103, + "learning_rate": 1.9640375052944936e-06, + "loss": 0.09171104431152344, + "step": 141610 + }, + { + "epoch": 1.22450303066986, + "grad_norm": 26.806414563840825, + "learning_rate": 1.963846307221085e-06, + "loss": 0.08850746154785157, + "step": 141615 + }, + { + "epoch": 1.2245462641914036, + "grad_norm": 2.3954824666354946, + "learning_rate": 1.963655113926324e-06, + "loss": 0.18984527587890626, + "step": 141620 + }, + { + "epoch": 1.2245894977129468, + "grad_norm": 0.1481897073924637, + "learning_rate": 1.963463925411096e-06, + "loss": 0.44213104248046875, + "step": 141625 + }, + { + "epoch": 1.22463273123449, + "grad_norm": 5.482788488050528, + "learning_rate": 1.96327274167628e-06, + "loss": 0.047878265380859375, + "step": 141630 + }, + { + "epoch": 1.2246759647560332, + "grad_norm": 32.3226949180494, + "learning_rate": 1.963081562722759e-06, + "loss": 0.09999160766601563, + "step": 141635 + }, + { + "epoch": 1.2247191982775765, + "grad_norm": 3.648594858447019, + "learning_rate": 1.962890388551414e-06, + "loss": 0.04212455749511719, + "step": 141640 + }, + { + "epoch": 1.2247624317991197, + "grad_norm": 6.088917230394389, + "learning_rate": 1.9626992191631268e-06, + "loss": 0.030259323120117188, + "step": 141645 + }, + { + "epoch": 1.224805665320663, + "grad_norm": 0.3673147707852706, + "learning_rate": 1.9625080545587792e-06, + "loss": 0.08230552673339844, + "step": 141650 + }, + { + "epoch": 1.2248488988422064, + "grad_norm": 0.06600922428803518, + "learning_rate": 1.962316894739252e-06, + "loss": 0.031213760375976562, + "step": 141655 + }, + { + "epoch": 1.2248921323637496, + "grad_norm": 58.2445652448892, + "learning_rate": 1.9621257397054286e-06, + "loss": 0.3099662780761719, + "step": 141660 + }, + { + "epoch": 1.2249353658852928, + "grad_norm": 1.4045972602480732, + "learning_rate": 1.9619345894581887e-06, + "loss": 0.0330413818359375, + "step": 141665 + }, + { + "epoch": 1.224978599406836, + "grad_norm": 1.594822163093147, + "learning_rate": 1.961743443998416e-06, + "loss": 0.09055557250976562, + "step": 141670 + }, + { + "epoch": 1.2250218329283793, + "grad_norm": 27.741157742960485, + "learning_rate": 1.96155230332699e-06, + "loss": 0.4181205749511719, + "step": 141675 + }, + { + "epoch": 1.2250650664499225, + "grad_norm": 2.2961594034655284, + "learning_rate": 1.9613611674447933e-06, + "loss": 0.05442047119140625, + "step": 141680 + }, + { + "epoch": 1.225108299971466, + "grad_norm": 1.199428954952146, + "learning_rate": 1.961170036352706e-06, + "loss": 0.026751708984375, + "step": 141685 + }, + { + "epoch": 1.2251515334930092, + "grad_norm": 3.368449495189946, + "learning_rate": 1.960978910051612e-06, + "loss": 0.07547988891601562, + "step": 141690 + }, + { + "epoch": 1.2251947670145524, + "grad_norm": 16.05734005806096, + "learning_rate": 1.960787788542391e-06, + "loss": 0.014154815673828125, + "step": 141695 + }, + { + "epoch": 1.2252380005360957, + "grad_norm": 7.629507861765745, + "learning_rate": 1.960596671825925e-06, + "loss": 0.06987380981445312, + "step": 141700 + }, + { + "epoch": 1.225281234057639, + "grad_norm": 0.6260145024797374, + "learning_rate": 1.9604055599030954e-06, + "loss": 0.023308563232421874, + "step": 141705 + }, + { + "epoch": 1.2253244675791821, + "grad_norm": 1.4369613058713568, + "learning_rate": 1.960214452774784e-06, + "loss": 0.08759498596191406, + "step": 141710 + }, + { + "epoch": 1.2253677011007253, + "grad_norm": 0.023326488334107158, + "learning_rate": 1.960023350441871e-06, + "loss": 0.0601318359375, + "step": 141715 + }, + { + "epoch": 1.2254109346222688, + "grad_norm": 3.3118929864287265, + "learning_rate": 1.959832252905238e-06, + "loss": 0.06385841369628906, + "step": 141720 + }, + { + "epoch": 1.225454168143812, + "grad_norm": 5.409808299739851, + "learning_rate": 1.9596411601657665e-06, + "loss": 0.06621685028076171, + "step": 141725 + }, + { + "epoch": 1.2254974016653553, + "grad_norm": 23.923152737870463, + "learning_rate": 1.959450072224339e-06, + "loss": 0.08630447387695313, + "step": 141730 + }, + { + "epoch": 1.2255406351868985, + "grad_norm": 36.47594453997601, + "learning_rate": 1.959258989081836e-06, + "loss": 0.18338623046875, + "step": 141735 + }, + { + "epoch": 1.2255838687084417, + "grad_norm": 0.2860692762278572, + "learning_rate": 1.959067910739138e-06, + "loss": 0.014100265502929688, + "step": 141740 + }, + { + "epoch": 1.2256271022299852, + "grad_norm": 0.13385621644336512, + "learning_rate": 1.958876837197127e-06, + "loss": 0.015207672119140625, + "step": 141745 + }, + { + "epoch": 1.2256703357515284, + "grad_norm": 2.2561877891219138, + "learning_rate": 1.958685768456683e-06, + "loss": 0.035799407958984376, + "step": 141750 + }, + { + "epoch": 1.2257135692730716, + "grad_norm": 2.7049072122864257, + "learning_rate": 1.9584947045186893e-06, + "loss": 0.05569725036621094, + "step": 141755 + }, + { + "epoch": 1.2257568027946149, + "grad_norm": 0.5740291522720294, + "learning_rate": 1.9583036453840265e-06, + "loss": 0.02374267578125, + "step": 141760 + }, + { + "epoch": 1.225800036316158, + "grad_norm": 0.7339321167257313, + "learning_rate": 1.958112591053575e-06, + "loss": 0.059326171875, + "step": 141765 + }, + { + "epoch": 1.2258432698377013, + "grad_norm": 4.0997649374167215, + "learning_rate": 1.9579215415282153e-06, + "loss": 0.12471771240234375, + "step": 141770 + }, + { + "epoch": 1.2258865033592445, + "grad_norm": 8.650264872868245, + "learning_rate": 1.9577304968088305e-06, + "loss": 0.030484390258789063, + "step": 141775 + }, + { + "epoch": 1.2259297368807878, + "grad_norm": 0.08251373350987902, + "learning_rate": 1.957539456896299e-06, + "loss": 0.17891082763671876, + "step": 141780 + }, + { + "epoch": 1.2259729704023312, + "grad_norm": 12.73380707488708, + "learning_rate": 1.9573484217915045e-06, + "loss": 0.1637298583984375, + "step": 141785 + }, + { + "epoch": 1.2260162039238744, + "grad_norm": 1.7420670799957019, + "learning_rate": 1.9571573914953273e-06, + "loss": 0.03592987060546875, + "step": 141790 + }, + { + "epoch": 1.2260594374454177, + "grad_norm": 52.993744183830124, + "learning_rate": 1.9569663660086476e-06, + "loss": 0.13873291015625, + "step": 141795 + }, + { + "epoch": 1.226102670966961, + "grad_norm": 0.3686422520158693, + "learning_rate": 1.9567753453323464e-06, + "loss": 0.023756790161132812, + "step": 141800 + }, + { + "epoch": 1.2261459044885041, + "grad_norm": 3.989052404713771, + "learning_rate": 1.956584329467306e-06, + "loss": 0.027332878112792967, + "step": 141805 + }, + { + "epoch": 1.2261891380100476, + "grad_norm": 2.6866544162534236, + "learning_rate": 1.9563933184144063e-06, + "loss": 0.03429412841796875, + "step": 141810 + }, + { + "epoch": 1.2262323715315908, + "grad_norm": 0.6143902488707137, + "learning_rate": 1.956202312174527e-06, + "loss": 0.014387893676757812, + "step": 141815 + }, + { + "epoch": 1.226275605053134, + "grad_norm": 91.761426216043, + "learning_rate": 1.956011310748552e-06, + "loss": 0.153839111328125, + "step": 141820 + }, + { + "epoch": 1.2263188385746773, + "grad_norm": 6.17193602657931, + "learning_rate": 1.9558203141373606e-06, + "loss": 0.031018447875976563, + "step": 141825 + }, + { + "epoch": 1.2263620720962205, + "grad_norm": 1.1519385761323893, + "learning_rate": 1.9556293223418322e-06, + "loss": 0.07052898406982422, + "step": 141830 + }, + { + "epoch": 1.2264053056177637, + "grad_norm": 1.7168799009103857, + "learning_rate": 1.9554383353628505e-06, + "loss": 0.063751220703125, + "step": 141835 + }, + { + "epoch": 1.226448539139307, + "grad_norm": 2.8775298795044533, + "learning_rate": 1.955247353201295e-06, + "loss": 0.1118408203125, + "step": 141840 + }, + { + "epoch": 1.2264917726608504, + "grad_norm": 1.4805296531404135, + "learning_rate": 1.9550563758580448e-06, + "loss": 0.16792144775390624, + "step": 141845 + }, + { + "epoch": 1.2265350061823936, + "grad_norm": 1.752089927663871, + "learning_rate": 1.9548654033339835e-06, + "loss": 0.01363067626953125, + "step": 141850 + }, + { + "epoch": 1.2265782397039369, + "grad_norm": 0.15593629356856856, + "learning_rate": 1.954674435629991e-06, + "loss": 0.011732101440429688, + "step": 141855 + }, + { + "epoch": 1.22662147322548, + "grad_norm": 0.6912584554463735, + "learning_rate": 1.9544834727469465e-06, + "loss": 0.25066299438476564, + "step": 141860 + }, + { + "epoch": 1.2266647067470233, + "grad_norm": 1.0228353128403256, + "learning_rate": 1.9542925146857327e-06, + "loss": 0.01352996826171875, + "step": 141865 + }, + { + "epoch": 1.2267079402685666, + "grad_norm": 2.6428011734367454, + "learning_rate": 1.9541015614472297e-06, + "loss": 0.079144287109375, + "step": 141870 + }, + { + "epoch": 1.22675117379011, + "grad_norm": 1.424841116836834, + "learning_rate": 1.953910613032316e-06, + "loss": 0.014725494384765624, + "step": 141875 + }, + { + "epoch": 1.2267944073116532, + "grad_norm": 2.9380961243595176, + "learning_rate": 1.953719669441876e-06, + "loss": 0.018619537353515625, + "step": 141880 + }, + { + "epoch": 1.2268376408331965, + "grad_norm": 7.176952785256763, + "learning_rate": 1.9535287306767876e-06, + "loss": 0.03397674560546875, + "step": 141885 + }, + { + "epoch": 1.2268808743547397, + "grad_norm": 4.6520219409126, + "learning_rate": 1.953337796737932e-06, + "loss": 0.0126129150390625, + "step": 141890 + }, + { + "epoch": 1.226924107876283, + "grad_norm": 0.8659932167272933, + "learning_rate": 1.953146867626191e-06, + "loss": 0.14862747192382814, + "step": 141895 + }, + { + "epoch": 1.2269673413978262, + "grad_norm": 31.742443716226493, + "learning_rate": 1.952955943342444e-06, + "loss": 0.46925601959228513, + "step": 141900 + }, + { + "epoch": 1.2270105749193694, + "grad_norm": 10.822393774190111, + "learning_rate": 1.952765023887572e-06, + "loss": 0.029027557373046874, + "step": 141905 + }, + { + "epoch": 1.2270538084409128, + "grad_norm": 3.8990926375680908, + "learning_rate": 1.952574109262453e-06, + "loss": 0.03000030517578125, + "step": 141910 + }, + { + "epoch": 1.227097041962456, + "grad_norm": 2.5554514039294625, + "learning_rate": 1.9523831994679716e-06, + "loss": 0.007462310791015625, + "step": 141915 + }, + { + "epoch": 1.2271402754839993, + "grad_norm": 7.67404392084459, + "learning_rate": 1.9521922945050056e-06, + "loss": 0.015459632873535157, + "step": 141920 + }, + { + "epoch": 1.2271835090055425, + "grad_norm": 3.2801310702786823, + "learning_rate": 1.9520013943744363e-06, + "loss": 0.02836456298828125, + "step": 141925 + }, + { + "epoch": 1.2272267425270857, + "grad_norm": 1.4859748226312588, + "learning_rate": 1.9518104990771445e-06, + "loss": 0.11550064086914062, + "step": 141930 + }, + { + "epoch": 1.227269976048629, + "grad_norm": 4.670482494800213, + "learning_rate": 1.95161960861401e-06, + "loss": 0.0199737548828125, + "step": 141935 + }, + { + "epoch": 1.2273132095701724, + "grad_norm": 75.42351776144325, + "learning_rate": 1.951428722985912e-06, + "loss": 0.19140777587890626, + "step": 141940 + }, + { + "epoch": 1.2273564430917157, + "grad_norm": 0.427109401108819, + "learning_rate": 1.951237842193733e-06, + "loss": 0.05164680480957031, + "step": 141945 + }, + { + "epoch": 1.2273996766132589, + "grad_norm": 3.027877326198787, + "learning_rate": 1.951046966238352e-06, + "loss": 0.23973388671875, + "step": 141950 + }, + { + "epoch": 1.2274429101348021, + "grad_norm": 19.222447874821686, + "learning_rate": 1.9508560951206504e-06, + "loss": 0.3660064697265625, + "step": 141955 + }, + { + "epoch": 1.2274861436563453, + "grad_norm": 0.5214809608208292, + "learning_rate": 1.9506652288415076e-06, + "loss": 0.22655487060546875, + "step": 141960 + }, + { + "epoch": 1.2275293771778886, + "grad_norm": 1.2244977053905635, + "learning_rate": 1.950474367401804e-06, + "loss": 0.4125926971435547, + "step": 141965 + }, + { + "epoch": 1.2275726106994318, + "grad_norm": 6.557180441700041, + "learning_rate": 1.950283510802419e-06, + "loss": 0.03220367431640625, + "step": 141970 + }, + { + "epoch": 1.2276158442209752, + "grad_norm": 5.306045754435493, + "learning_rate": 1.9500926590442337e-06, + "loss": 0.1262460708618164, + "step": 141975 + }, + { + "epoch": 1.2276590777425185, + "grad_norm": 1.6225569085477511, + "learning_rate": 1.9499018121281294e-06, + "loss": 0.0165374755859375, + "step": 141980 + }, + { + "epoch": 1.2277023112640617, + "grad_norm": 1.8933764847771237, + "learning_rate": 1.949710970054985e-06, + "loss": 0.13110694885253907, + "step": 141985 + }, + { + "epoch": 1.227745544785605, + "grad_norm": 0.3799809552834536, + "learning_rate": 1.9495201328256813e-06, + "loss": 0.010330963134765624, + "step": 141990 + }, + { + "epoch": 1.2277887783071482, + "grad_norm": 1.8405730890143235, + "learning_rate": 1.9493293004410967e-06, + "loss": 0.04759902954101562, + "step": 141995 + }, + { + "epoch": 1.2278320118286916, + "grad_norm": 30.49163752667784, + "learning_rate": 1.9491384729021133e-06, + "loss": 0.09658050537109375, + "step": 142000 + }, + { + "epoch": 1.2278752453502348, + "grad_norm": 0.6524412471661107, + "learning_rate": 1.9489476502096093e-06, + "loss": 0.006306838989257812, + "step": 142005 + }, + { + "epoch": 1.227918478871778, + "grad_norm": 1.133888912667103, + "learning_rate": 1.948756832364467e-06, + "loss": 0.007554244995117187, + "step": 142010 + }, + { + "epoch": 1.2279617123933213, + "grad_norm": 18.86137533369474, + "learning_rate": 1.9485660193675652e-06, + "loss": 0.033736801147460936, + "step": 142015 + }, + { + "epoch": 1.2280049459148645, + "grad_norm": 0.8160126691864864, + "learning_rate": 1.948375211219784e-06, + "loss": 0.00887603759765625, + "step": 142020 + }, + { + "epoch": 1.2280481794364078, + "grad_norm": 0.46641991970596036, + "learning_rate": 1.9481844079220027e-06, + "loss": 0.29064102172851564, + "step": 142025 + }, + { + "epoch": 1.228091412957951, + "grad_norm": 3.6513583090855284, + "learning_rate": 1.9479936094751027e-06, + "loss": 0.012630462646484375, + "step": 142030 + }, + { + "epoch": 1.2281346464794942, + "grad_norm": 9.635913711416752, + "learning_rate": 1.947802815879962e-06, + "loss": 0.009549331665039063, + "step": 142035 + }, + { + "epoch": 1.2281778800010377, + "grad_norm": 7.954615420564412, + "learning_rate": 1.947612027137463e-06, + "loss": 0.1813385009765625, + "step": 142040 + }, + { + "epoch": 1.228221113522581, + "grad_norm": 2.8238756299891, + "learning_rate": 1.947421243248484e-06, + "loss": 0.0306243896484375, + "step": 142045 + }, + { + "epoch": 1.2282643470441241, + "grad_norm": 34.75335169320606, + "learning_rate": 1.9472304642139065e-06, + "loss": 0.1481729507446289, + "step": 142050 + }, + { + "epoch": 1.2283075805656674, + "grad_norm": 0.17765009503241527, + "learning_rate": 1.947039690034607e-06, + "loss": 0.05313796997070312, + "step": 142055 + }, + { + "epoch": 1.2283508140872106, + "grad_norm": 0.5515326793654541, + "learning_rate": 1.9468489207114684e-06, + "loss": 0.12201385498046875, + "step": 142060 + }, + { + "epoch": 1.228394047608754, + "grad_norm": 0.3358512482675164, + "learning_rate": 1.9466581562453704e-06, + "loss": 0.0129486083984375, + "step": 142065 + }, + { + "epoch": 1.2284372811302973, + "grad_norm": 4.158300688848214, + "learning_rate": 1.9464673966371892e-06, + "loss": 0.09827423095703125, + "step": 142070 + }, + { + "epoch": 1.2284805146518405, + "grad_norm": 13.947026446245244, + "learning_rate": 1.9462766418878097e-06, + "loss": 0.06237010955810547, + "step": 142075 + }, + { + "epoch": 1.2285237481733837, + "grad_norm": 0.2795680311764937, + "learning_rate": 1.946085891998109e-06, + "loss": 0.088214111328125, + "step": 142080 + }, + { + "epoch": 1.228566981694927, + "grad_norm": 12.737103784233936, + "learning_rate": 1.9458951469689657e-06, + "loss": 0.17442779541015624, + "step": 142085 + }, + { + "epoch": 1.2286102152164702, + "grad_norm": 0.8161265035721368, + "learning_rate": 1.945704406801262e-06, + "loss": 0.008161163330078125, + "step": 142090 + }, + { + "epoch": 1.2286534487380134, + "grad_norm": 0.21286614641142598, + "learning_rate": 1.9455136714958764e-06, + "loss": 0.2035400390625, + "step": 142095 + }, + { + "epoch": 1.2286966822595569, + "grad_norm": 0.9798112264087768, + "learning_rate": 1.945322941053688e-06, + "loss": 0.04263381958007813, + "step": 142100 + }, + { + "epoch": 1.2287399157811, + "grad_norm": 36.94046431429217, + "learning_rate": 1.9451322154755767e-06, + "loss": 0.04554824829101563, + "step": 142105 + }, + { + "epoch": 1.2287831493026433, + "grad_norm": 1.3275390547505446, + "learning_rate": 1.944941494762424e-06, + "loss": 0.06568183898925781, + "step": 142110 + }, + { + "epoch": 1.2288263828241865, + "grad_norm": 17.621037250127028, + "learning_rate": 1.944750778915106e-06, + "loss": 0.09929428100585938, + "step": 142115 + }, + { + "epoch": 1.2288696163457298, + "grad_norm": 0.3605714728824701, + "learning_rate": 1.9445600679345053e-06, + "loss": 0.08280181884765625, + "step": 142120 + }, + { + "epoch": 1.228912849867273, + "grad_norm": 5.421763314622781, + "learning_rate": 1.9443693618215005e-06, + "loss": 0.22733535766601562, + "step": 142125 + }, + { + "epoch": 1.2289560833888165, + "grad_norm": 0.23832892628720387, + "learning_rate": 1.9441786605769697e-06, + "loss": 0.02812042236328125, + "step": 142130 + }, + { + "epoch": 1.2289993169103597, + "grad_norm": 1.1295632652458854, + "learning_rate": 1.9439879642017945e-06, + "loss": 0.08198089599609375, + "step": 142135 + }, + { + "epoch": 1.229042550431903, + "grad_norm": 6.141111474249231, + "learning_rate": 1.9437972726968538e-06, + "loss": 0.09229965209960937, + "step": 142140 + }, + { + "epoch": 1.2290857839534461, + "grad_norm": 9.354959894904605, + "learning_rate": 1.943606586063026e-06, + "loss": 0.21178665161132812, + "step": 142145 + }, + { + "epoch": 1.2291290174749894, + "grad_norm": 57.5926473402886, + "learning_rate": 1.9434159043011918e-06, + "loss": 0.22279319763183594, + "step": 142150 + }, + { + "epoch": 1.2291722509965326, + "grad_norm": 7.50253827081952, + "learning_rate": 1.9432252274122306e-06, + "loss": 0.09940547943115234, + "step": 142155 + }, + { + "epoch": 1.2292154845180758, + "grad_norm": 7.272877078050375, + "learning_rate": 1.9430345553970204e-06, + "loss": 0.1498180389404297, + "step": 142160 + }, + { + "epoch": 1.2292587180396193, + "grad_norm": 18.540946025586162, + "learning_rate": 1.942843888256441e-06, + "loss": 0.19371376037597657, + "step": 142165 + }, + { + "epoch": 1.2293019515611625, + "grad_norm": 2.0780524666956888, + "learning_rate": 1.942653225991372e-06, + "loss": 0.021961212158203125, + "step": 142170 + }, + { + "epoch": 1.2293451850827057, + "grad_norm": 1.5866647900186424, + "learning_rate": 1.9424625686026942e-06, + "loss": 0.03804597854614258, + "step": 142175 + }, + { + "epoch": 1.229388418604249, + "grad_norm": 7.668395680374919, + "learning_rate": 1.9422719160912854e-06, + "loss": 0.023297119140625, + "step": 142180 + }, + { + "epoch": 1.2294316521257922, + "grad_norm": 0.40198776887517945, + "learning_rate": 1.9420812684580253e-06, + "loss": 0.06810302734375, + "step": 142185 + }, + { + "epoch": 1.2294748856473354, + "grad_norm": 1.9734716371124872, + "learning_rate": 1.9418906257037922e-06, + "loss": 0.22179756164550782, + "step": 142190 + }, + { + "epoch": 1.2295181191688789, + "grad_norm": 1.150387843679567, + "learning_rate": 1.9416999878294655e-06, + "loss": 0.091363525390625, + "step": 142195 + }, + { + "epoch": 1.229561352690422, + "grad_norm": 1.4281222133868159, + "learning_rate": 1.941509354835925e-06, + "loss": 0.069085693359375, + "step": 142200 + }, + { + "epoch": 1.2296045862119653, + "grad_norm": 0.3729833439321357, + "learning_rate": 1.941318726724051e-06, + "loss": 0.024740982055664062, + "step": 142205 + }, + { + "epoch": 1.2296478197335086, + "grad_norm": 13.754853725668832, + "learning_rate": 1.941128103494721e-06, + "loss": 0.06112804412841797, + "step": 142210 + }, + { + "epoch": 1.2296910532550518, + "grad_norm": 15.682625366628903, + "learning_rate": 1.940937485148815e-06, + "loss": 0.10788192749023437, + "step": 142215 + }, + { + "epoch": 1.229734286776595, + "grad_norm": 5.313988068813317, + "learning_rate": 1.940746871687211e-06, + "loss": 0.0261993408203125, + "step": 142220 + }, + { + "epoch": 1.2297775202981382, + "grad_norm": 1.8622949692085244, + "learning_rate": 1.940556263110789e-06, + "loss": 0.0360443115234375, + "step": 142225 + }, + { + "epoch": 1.2298207538196817, + "grad_norm": 21.762027112980558, + "learning_rate": 1.9403656594204275e-06, + "loss": 0.19442577362060548, + "step": 142230 + }, + { + "epoch": 1.229863987341225, + "grad_norm": 0.6681400305044509, + "learning_rate": 1.940175060617007e-06, + "loss": 0.015998077392578126, + "step": 142235 + }, + { + "epoch": 1.2299072208627682, + "grad_norm": 10.9043772613308, + "learning_rate": 1.939984466701405e-06, + "loss": 0.2494537353515625, + "step": 142240 + }, + { + "epoch": 1.2299504543843114, + "grad_norm": 12.873438394140969, + "learning_rate": 1.9397938776745014e-06, + "loss": 0.039302825927734375, + "step": 142245 + }, + { + "epoch": 1.2299936879058546, + "grad_norm": 3.9595796386826647, + "learning_rate": 1.939603293537174e-06, + "loss": 0.06496353149414062, + "step": 142250 + }, + { + "epoch": 1.230036921427398, + "grad_norm": 3.3744558120071977, + "learning_rate": 1.939412714290303e-06, + "loss": 0.05602531433105469, + "step": 142255 + }, + { + "epoch": 1.2300801549489413, + "grad_norm": 1.1040836276027435, + "learning_rate": 1.939222139934766e-06, + "loss": 0.0289825439453125, + "step": 142260 + }, + { + "epoch": 1.2301233884704845, + "grad_norm": 4.6618575787848995, + "learning_rate": 1.9390315704714432e-06, + "loss": 0.038354110717773435, + "step": 142265 + }, + { + "epoch": 1.2301666219920278, + "grad_norm": 5.925455179006508, + "learning_rate": 1.938841005901214e-06, + "loss": 0.05278911590576172, + "step": 142270 + }, + { + "epoch": 1.230209855513571, + "grad_norm": 4.512165608510845, + "learning_rate": 1.9386504462249556e-06, + "loss": 0.04849395751953125, + "step": 142275 + }, + { + "epoch": 1.2302530890351142, + "grad_norm": 1.8041166598210756, + "learning_rate": 1.938459891443547e-06, + "loss": 0.008130645751953125, + "step": 142280 + }, + { + "epoch": 1.2302963225566574, + "grad_norm": 2.8952708478889346, + "learning_rate": 1.938269341557869e-06, + "loss": 0.22964067459106446, + "step": 142285 + }, + { + "epoch": 1.2303395560782007, + "grad_norm": 0.9654344065770558, + "learning_rate": 1.9380787965687965e-06, + "loss": 0.03388195037841797, + "step": 142290 + }, + { + "epoch": 1.2303827895997441, + "grad_norm": 12.763305454818724, + "learning_rate": 1.9378882564772127e-06, + "loss": 0.06087827682495117, + "step": 142295 + }, + { + "epoch": 1.2304260231212873, + "grad_norm": 3.050367702331255, + "learning_rate": 1.9376977212839947e-06, + "loss": 0.024265289306640625, + "step": 142300 + }, + { + "epoch": 1.2304692566428306, + "grad_norm": 2.6028288883349284, + "learning_rate": 1.9375071909900207e-06, + "loss": 0.09874267578125, + "step": 142305 + }, + { + "epoch": 1.2305124901643738, + "grad_norm": 2.630669262360216, + "learning_rate": 1.9373166655961687e-06, + "loss": 0.016601181030273436, + "step": 142310 + }, + { + "epoch": 1.230555723685917, + "grad_norm": 7.591537339263706, + "learning_rate": 1.937126145103319e-06, + "loss": 0.06159515380859375, + "step": 142315 + }, + { + "epoch": 1.2305989572074605, + "grad_norm": 1.162603016423457, + "learning_rate": 1.9369356295123482e-06, + "loss": 0.12739028930664062, + "step": 142320 + }, + { + "epoch": 1.2306421907290037, + "grad_norm": 2.223414803696329, + "learning_rate": 1.9367451188241385e-06, + "loss": 0.14281005859375, + "step": 142325 + }, + { + "epoch": 1.230685424250547, + "grad_norm": 1.0646471012349856, + "learning_rate": 1.9365546130395656e-06, + "loss": 0.10317726135253906, + "step": 142330 + }, + { + "epoch": 1.2307286577720902, + "grad_norm": 1.5583630160700739, + "learning_rate": 1.936364112159509e-06, + "loss": 0.205145263671875, + "step": 142335 + }, + { + "epoch": 1.2307718912936334, + "grad_norm": 12.473676858596557, + "learning_rate": 1.9361736161848465e-06, + "loss": 0.05632705688476562, + "step": 142340 + }, + { + "epoch": 1.2308151248151766, + "grad_norm": 41.87785633636031, + "learning_rate": 1.935983125116458e-06, + "loss": 0.11290264129638672, + "step": 142345 + }, + { + "epoch": 1.2308583583367199, + "grad_norm": 8.265962187585462, + "learning_rate": 1.9357926389552214e-06, + "loss": 0.03184700012207031, + "step": 142350 + }, + { + "epoch": 1.2309015918582633, + "grad_norm": 0.15736383942452736, + "learning_rate": 1.935602157702014e-06, + "loss": 0.2282886505126953, + "step": 142355 + }, + { + "epoch": 1.2309448253798065, + "grad_norm": 0.34822452334158105, + "learning_rate": 1.935411681357716e-06, + "loss": 0.0185028076171875, + "step": 142360 + }, + { + "epoch": 1.2309880589013498, + "grad_norm": 3.2865412056490024, + "learning_rate": 1.9352212099232056e-06, + "loss": 0.08550605773925782, + "step": 142365 + }, + { + "epoch": 1.231031292422893, + "grad_norm": 32.15254517307176, + "learning_rate": 1.9350307433993603e-06, + "loss": 0.10815200805664063, + "step": 142370 + }, + { + "epoch": 1.2310745259444362, + "grad_norm": 4.4702513903518275, + "learning_rate": 1.93484028178706e-06, + "loss": 0.052861785888671874, + "step": 142375 + }, + { + "epoch": 1.2311177594659795, + "grad_norm": 1.5929665048367447, + "learning_rate": 1.9346498250871816e-06, + "loss": 0.05435333251953125, + "step": 142380 + }, + { + "epoch": 1.231160992987523, + "grad_norm": 0.46718675218658984, + "learning_rate": 1.9344593733006037e-06, + "loss": 0.06658592224121093, + "step": 142385 + }, + { + "epoch": 1.2312042265090661, + "grad_norm": 4.61635277686688, + "learning_rate": 1.9342689264282052e-06, + "loss": 0.13100357055664064, + "step": 142390 + }, + { + "epoch": 1.2312474600306094, + "grad_norm": 0.6081852106681834, + "learning_rate": 1.9340784844708645e-06, + "loss": 0.020755577087402343, + "step": 142395 + }, + { + "epoch": 1.2312906935521526, + "grad_norm": 0.08004929057748851, + "learning_rate": 1.9338880474294594e-06, + "loss": 0.015282058715820312, + "step": 142400 + }, + { + "epoch": 1.2313339270736958, + "grad_norm": 0.8438630853298844, + "learning_rate": 1.9336976153048698e-06, + "loss": 0.080645751953125, + "step": 142405 + }, + { + "epoch": 1.231377160595239, + "grad_norm": 0.9166508534069491, + "learning_rate": 1.9335071880979712e-06, + "loss": 0.05642547607421875, + "step": 142410 + }, + { + "epoch": 1.2314203941167823, + "grad_norm": 0.4966765642158706, + "learning_rate": 1.9333167658096436e-06, + "loss": 0.04098968505859375, + "step": 142415 + }, + { + "epoch": 1.2314636276383257, + "grad_norm": 5.072692010636066, + "learning_rate": 1.933126348440764e-06, + "loss": 0.03463687896728516, + "step": 142420 + }, + { + "epoch": 1.231506861159869, + "grad_norm": 0.25720589139218153, + "learning_rate": 1.9329359359922116e-06, + "loss": 0.11161651611328124, + "step": 142425 + }, + { + "epoch": 1.2315500946814122, + "grad_norm": 14.62313752231993, + "learning_rate": 1.9327455284648654e-06, + "loss": 0.079705810546875, + "step": 142430 + }, + { + "epoch": 1.2315933282029554, + "grad_norm": 2.606131858785517, + "learning_rate": 1.932555125859602e-06, + "loss": 0.02387542724609375, + "step": 142435 + }, + { + "epoch": 1.2316365617244986, + "grad_norm": 0.263374689676342, + "learning_rate": 1.9323647281773004e-06, + "loss": 0.0412445068359375, + "step": 142440 + }, + { + "epoch": 1.2316797952460419, + "grad_norm": 20.840911793021732, + "learning_rate": 1.9321743354188375e-06, + "loss": 0.10381317138671875, + "step": 142445 + }, + { + "epoch": 1.2317230287675853, + "grad_norm": 5.056250665312897, + "learning_rate": 1.9319839475850924e-06, + "loss": 0.28743820190429686, + "step": 142450 + }, + { + "epoch": 1.2317662622891286, + "grad_norm": 47.296852765412325, + "learning_rate": 1.9317935646769426e-06, + "loss": 0.4834281921386719, + "step": 142455 + }, + { + "epoch": 1.2318094958106718, + "grad_norm": 0.43184900620229744, + "learning_rate": 1.9316031866952673e-06, + "loss": 0.0196014404296875, + "step": 142460 + }, + { + "epoch": 1.231852729332215, + "grad_norm": 4.383774286514058, + "learning_rate": 1.931412813640944e-06, + "loss": 0.030133819580078124, + "step": 142465 + }, + { + "epoch": 1.2318959628537582, + "grad_norm": 0.496076707182999, + "learning_rate": 1.93122244551485e-06, + "loss": 0.1264495849609375, + "step": 142470 + }, + { + "epoch": 1.2319391963753015, + "grad_norm": 2.537254829631447, + "learning_rate": 1.9310320823178634e-06, + "loss": 0.008286380767822265, + "step": 142475 + }, + { + "epoch": 1.2319824298968447, + "grad_norm": 0.44476893238109716, + "learning_rate": 1.930841724050862e-06, + "loss": 0.042150115966796874, + "step": 142480 + }, + { + "epoch": 1.2320256634183882, + "grad_norm": 3.3199717581112815, + "learning_rate": 1.9306513707147247e-06, + "loss": 0.0845489501953125, + "step": 142485 + }, + { + "epoch": 1.2320688969399314, + "grad_norm": 19.56957744313675, + "learning_rate": 1.9304610223103286e-06, + "loss": 0.1307830810546875, + "step": 142490 + }, + { + "epoch": 1.2321121304614746, + "grad_norm": 0.9803552194924932, + "learning_rate": 1.9302706788385526e-06, + "loss": 0.057706260681152345, + "step": 142495 + }, + { + "epoch": 1.2321553639830178, + "grad_norm": 7.36396877714143, + "learning_rate": 1.930080340300273e-06, + "loss": 0.03757476806640625, + "step": 142500 + }, + { + "epoch": 1.232198597504561, + "grad_norm": 2.5863899214657615, + "learning_rate": 1.929890006696368e-06, + "loss": 0.1580474853515625, + "step": 142505 + }, + { + "epoch": 1.2322418310261045, + "grad_norm": 23.180596665290118, + "learning_rate": 1.929699678027716e-06, + "loss": 0.2073760986328125, + "step": 142510 + }, + { + "epoch": 1.2322850645476477, + "grad_norm": 1.6193939350026936, + "learning_rate": 1.9295093542951937e-06, + "loss": 0.058214664459228516, + "step": 142515 + }, + { + "epoch": 1.232328298069191, + "grad_norm": 40.64472825829692, + "learning_rate": 1.929319035499681e-06, + "loss": 0.26802825927734375, + "step": 142520 + }, + { + "epoch": 1.2323715315907342, + "grad_norm": 0.20784841255321873, + "learning_rate": 1.929128721642054e-06, + "loss": 0.009431838989257812, + "step": 142525 + }, + { + "epoch": 1.2324147651122774, + "grad_norm": 11.907869271998345, + "learning_rate": 1.9289384127231907e-06, + "loss": 0.15542526245117189, + "step": 142530 + }, + { + "epoch": 1.2324579986338207, + "grad_norm": 5.07433676255754, + "learning_rate": 1.9287481087439684e-06, + "loss": 0.03411102294921875, + "step": 142535 + }, + { + "epoch": 1.232501232155364, + "grad_norm": 7.791576095108344, + "learning_rate": 1.9285578097052657e-06, + "loss": 0.07027740478515625, + "step": 142540 + }, + { + "epoch": 1.2325444656769073, + "grad_norm": 0.36694210008640016, + "learning_rate": 1.9283675156079584e-06, + "loss": 0.022472190856933593, + "step": 142545 + }, + { + "epoch": 1.2325876991984506, + "grad_norm": 5.71409702704505, + "learning_rate": 1.9281772264529265e-06, + "loss": 0.08476943969726562, + "step": 142550 + }, + { + "epoch": 1.2326309327199938, + "grad_norm": 3.1073517231231587, + "learning_rate": 1.927986942241047e-06, + "loss": 0.040692138671875, + "step": 142555 + }, + { + "epoch": 1.232674166241537, + "grad_norm": 2.476553020025535, + "learning_rate": 1.9277966629731968e-06, + "loss": 0.0668004035949707, + "step": 142560 + }, + { + "epoch": 1.2327173997630803, + "grad_norm": 1.9371055272628364, + "learning_rate": 1.9276063886502526e-06, + "loss": 0.01571044921875, + "step": 142565 + }, + { + "epoch": 1.2327606332846235, + "grad_norm": 0.019165462650829204, + "learning_rate": 1.927416119273094e-06, + "loss": 0.1113351821899414, + "step": 142570 + }, + { + "epoch": 1.232803866806167, + "grad_norm": 0.5075999026010586, + "learning_rate": 1.9272258548425965e-06, + "loss": 0.1089630126953125, + "step": 142575 + }, + { + "epoch": 1.2328471003277102, + "grad_norm": 1.130963609100029, + "learning_rate": 1.9270355953596396e-06, + "loss": 0.07379684448242188, + "step": 142580 + }, + { + "epoch": 1.2328903338492534, + "grad_norm": 51.07245836085987, + "learning_rate": 1.9268453408250995e-06, + "loss": 0.29778213500976564, + "step": 142585 + }, + { + "epoch": 1.2329335673707966, + "grad_norm": 1.1533834596749666, + "learning_rate": 1.926655091239854e-06, + "loss": 0.07809562683105468, + "step": 142590 + }, + { + "epoch": 1.2329768008923399, + "grad_norm": 3.365656594838892, + "learning_rate": 1.9264648466047797e-06, + "loss": 0.053707122802734375, + "step": 142595 + }, + { + "epoch": 1.233020034413883, + "grad_norm": 5.574519671443565, + "learning_rate": 1.926274606920755e-06, + "loss": 0.17262802124023438, + "step": 142600 + }, + { + "epoch": 1.2330632679354263, + "grad_norm": 3.0066602445230206, + "learning_rate": 1.9260843721886572e-06, + "loss": 0.0407470703125, + "step": 142605 + }, + { + "epoch": 1.2331065014569698, + "grad_norm": 1.6699358029970108, + "learning_rate": 1.9258941424093628e-06, + "loss": 0.02908477783203125, + "step": 142610 + }, + { + "epoch": 1.233149734978513, + "grad_norm": 1.2821840546578225, + "learning_rate": 1.92570391758375e-06, + "loss": 0.14065685272216796, + "step": 142615 + }, + { + "epoch": 1.2331929685000562, + "grad_norm": 18.205437072090486, + "learning_rate": 1.9255136977126957e-06, + "loss": 0.4187164306640625, + "step": 142620 + }, + { + "epoch": 1.2332362020215994, + "grad_norm": 19.837664579385795, + "learning_rate": 1.9253234827970778e-06, + "loss": 0.1685455322265625, + "step": 142625 + }, + { + "epoch": 1.2332794355431427, + "grad_norm": 1.7832267838568863, + "learning_rate": 1.9251332728377728e-06, + "loss": 0.031041717529296874, + "step": 142630 + }, + { + "epoch": 1.233322669064686, + "grad_norm": 23.780726869531364, + "learning_rate": 1.924943067835658e-06, + "loss": 0.2851871490478516, + "step": 142635 + }, + { + "epoch": 1.2333659025862294, + "grad_norm": 0.7877955263991568, + "learning_rate": 1.9247528677916103e-06, + "loss": 0.08541355133056641, + "step": 142640 + }, + { + "epoch": 1.2334091361077726, + "grad_norm": 5.302132968906419, + "learning_rate": 1.9245626727065084e-06, + "loss": 0.12291717529296875, + "step": 142645 + }, + { + "epoch": 1.2334523696293158, + "grad_norm": 0.29259904406964937, + "learning_rate": 1.9243724825812273e-06, + "loss": 0.025078678131103517, + "step": 142650 + }, + { + "epoch": 1.233495603150859, + "grad_norm": 0.026232867928271574, + "learning_rate": 1.924182297416646e-06, + "loss": 0.20483856201171874, + "step": 142655 + }, + { + "epoch": 1.2335388366724023, + "grad_norm": 18.048429374276452, + "learning_rate": 1.9239921172136414e-06, + "loss": 0.09673919677734374, + "step": 142660 + }, + { + "epoch": 1.2335820701939455, + "grad_norm": 0.06360285459087366, + "learning_rate": 1.9238019419730895e-06, + "loss": 0.22188682556152345, + "step": 142665 + }, + { + "epoch": 1.2336253037154887, + "grad_norm": 0.5241983325662093, + "learning_rate": 1.923611771695867e-06, + "loss": 0.04133949279785156, + "step": 142670 + }, + { + "epoch": 1.2336685372370322, + "grad_norm": 1.1729682638109313, + "learning_rate": 1.9234216063828533e-06, + "loss": 0.05676956176757812, + "step": 142675 + }, + { + "epoch": 1.2337117707585754, + "grad_norm": 1.5900587331805913, + "learning_rate": 1.9232314460349236e-06, + "loss": 0.0890869140625, + "step": 142680 + }, + { + "epoch": 1.2337550042801186, + "grad_norm": 3.5742243016013147, + "learning_rate": 1.923041290652955e-06, + "loss": 0.13617401123046874, + "step": 142685 + }, + { + "epoch": 1.2337982378016619, + "grad_norm": 18.285170589145476, + "learning_rate": 1.9228511402378253e-06, + "loss": 0.06651153564453124, + "step": 142690 + }, + { + "epoch": 1.233841471323205, + "grad_norm": 2.3403490911354194, + "learning_rate": 1.9226609947904113e-06, + "loss": 0.05496063232421875, + "step": 142695 + }, + { + "epoch": 1.2338847048447485, + "grad_norm": 3.0451075330808814, + "learning_rate": 1.922470854311589e-06, + "loss": 0.02832489013671875, + "step": 142700 + }, + { + "epoch": 1.2339279383662918, + "grad_norm": 0.20473561522048517, + "learning_rate": 1.922280718802235e-06, + "loss": 0.05734100341796875, + "step": 142705 + }, + { + "epoch": 1.233971171887835, + "grad_norm": 1.9959819851348062, + "learning_rate": 1.922090588263228e-06, + "loss": 0.05711631774902344, + "step": 142710 + }, + { + "epoch": 1.2340144054093782, + "grad_norm": 14.157520625239533, + "learning_rate": 1.921900462695444e-06, + "loss": 0.133111572265625, + "step": 142715 + }, + { + "epoch": 1.2340576389309215, + "grad_norm": 8.567510328120857, + "learning_rate": 1.9217103420997604e-06, + "loss": 0.041162872314453126, + "step": 142720 + }, + { + "epoch": 1.2341008724524647, + "grad_norm": 20.960524159528784, + "learning_rate": 1.921520226477053e-06, + "loss": 0.13998641967773437, + "step": 142725 + }, + { + "epoch": 1.234144105974008, + "grad_norm": 4.040847169242854, + "learning_rate": 1.921330115828198e-06, + "loss": 0.10480785369873047, + "step": 142730 + }, + { + "epoch": 1.2341873394955512, + "grad_norm": 0.17603703014989042, + "learning_rate": 1.9211400101540742e-06, + "loss": 0.035495758056640625, + "step": 142735 + }, + { + "epoch": 1.2342305730170946, + "grad_norm": 14.982834103179009, + "learning_rate": 1.9209499094555565e-06, + "loss": 0.028510665893554686, + "step": 142740 + }, + { + "epoch": 1.2342738065386378, + "grad_norm": 0.3730273245086078, + "learning_rate": 1.920759813733524e-06, + "loss": 0.15658912658691407, + "step": 142745 + }, + { + "epoch": 1.234317040060181, + "grad_norm": 0.8186330172888645, + "learning_rate": 1.9205697229888508e-06, + "loss": 0.046105575561523435, + "step": 142750 + }, + { + "epoch": 1.2343602735817243, + "grad_norm": 11.94332533373595, + "learning_rate": 1.9203796372224156e-06, + "loss": 0.05816574096679687, + "step": 142755 + }, + { + "epoch": 1.2344035071032675, + "grad_norm": 46.79783172041802, + "learning_rate": 1.920189556435093e-06, + "loss": 0.15037307739257813, + "step": 142760 + }, + { + "epoch": 1.234446740624811, + "grad_norm": 25.138064434012914, + "learning_rate": 1.919999480627761e-06, + "loss": 0.07322845458984376, + "step": 142765 + }, + { + "epoch": 1.2344899741463542, + "grad_norm": 1.810161923587747, + "learning_rate": 1.9198094098012952e-06, + "loss": 0.10954303741455078, + "step": 142770 + }, + { + "epoch": 1.2345332076678974, + "grad_norm": 1.451298928578177, + "learning_rate": 1.9196193439565744e-06, + "loss": 0.03373394012451172, + "step": 142775 + }, + { + "epoch": 1.2345764411894407, + "grad_norm": 0.5754064711914979, + "learning_rate": 1.919429283094473e-06, + "loss": 0.0306488037109375, + "step": 142780 + }, + { + "epoch": 1.2346196747109839, + "grad_norm": 0.13897463765480847, + "learning_rate": 1.9192392272158696e-06, + "loss": 0.07941474914550781, + "step": 142785 + }, + { + "epoch": 1.2346629082325271, + "grad_norm": 0.8603144259289398, + "learning_rate": 1.9190491763216373e-06, + "loss": 0.08370132446289062, + "step": 142790 + }, + { + "epoch": 1.2347061417540703, + "grad_norm": 0.220093277214049, + "learning_rate": 1.918859130412656e-06, + "loss": 0.07904739379882812, + "step": 142795 + }, + { + "epoch": 1.2347493752756138, + "grad_norm": 3.140770987589429, + "learning_rate": 1.9186690894897997e-06, + "loss": 0.34026641845703126, + "step": 142800 + }, + { + "epoch": 1.234792608797157, + "grad_norm": 4.267425274236561, + "learning_rate": 1.918479053553947e-06, + "loss": 0.044411849975585935, + "step": 142805 + }, + { + "epoch": 1.2348358423187002, + "grad_norm": 0.7303871900000303, + "learning_rate": 1.9182890226059734e-06, + "loss": 0.041857147216796876, + "step": 142810 + }, + { + "epoch": 1.2348790758402435, + "grad_norm": 2.4056909405868705, + "learning_rate": 1.9180989966467555e-06, + "loss": 0.07380142211914062, + "step": 142815 + }, + { + "epoch": 1.2349223093617867, + "grad_norm": 3.445840386664883, + "learning_rate": 1.917908975677168e-06, + "loss": 0.25376358032226565, + "step": 142820 + }, + { + "epoch": 1.23496554288333, + "grad_norm": 1.4720773694260654, + "learning_rate": 1.9177189596980904e-06, + "loss": 0.08349609375, + "step": 142825 + }, + { + "epoch": 1.2350087764048734, + "grad_norm": 56.914713833402196, + "learning_rate": 1.9175289487103954e-06, + "loss": 0.5753616333007813, + "step": 142830 + }, + { + "epoch": 1.2350520099264166, + "grad_norm": 0.7153233890858457, + "learning_rate": 1.917338942714963e-06, + "loss": 0.09868545532226562, + "step": 142835 + }, + { + "epoch": 1.2350952434479598, + "grad_norm": 1.2629396801905832, + "learning_rate": 1.9171489417126675e-06, + "loss": 0.028816986083984374, + "step": 142840 + }, + { + "epoch": 1.235138476969503, + "grad_norm": 61.23556146638765, + "learning_rate": 1.916958945704384e-06, + "loss": 0.10850677490234376, + "step": 142845 + }, + { + "epoch": 1.2351817104910463, + "grad_norm": 8.107949120111721, + "learning_rate": 1.916768954690992e-06, + "loss": 0.19218215942382813, + "step": 142850 + }, + { + "epoch": 1.2352249440125895, + "grad_norm": 5.767020559715554, + "learning_rate": 1.916578968673365e-06, + "loss": 0.03443851470947266, + "step": 142855 + }, + { + "epoch": 1.2352681775341328, + "grad_norm": 1.834838642484595, + "learning_rate": 1.9163889876523805e-06, + "loss": 0.0230255126953125, + "step": 142860 + }, + { + "epoch": 1.2353114110556762, + "grad_norm": 4.771434280752644, + "learning_rate": 1.9161990116289125e-06, + "loss": 0.08104248046875, + "step": 142865 + }, + { + "epoch": 1.2353546445772194, + "grad_norm": 9.937365510977303, + "learning_rate": 1.9160090406038406e-06, + "loss": 0.04472427368164063, + "step": 142870 + }, + { + "epoch": 1.2353978780987627, + "grad_norm": 53.464231845904685, + "learning_rate": 1.9158190745780385e-06, + "loss": 0.25654144287109376, + "step": 142875 + }, + { + "epoch": 1.235441111620306, + "grad_norm": 17.653281461380857, + "learning_rate": 1.9156291135523836e-06, + "loss": 0.10853118896484375, + "step": 142880 + }, + { + "epoch": 1.2354843451418491, + "grad_norm": 2.424011850228174, + "learning_rate": 1.9154391575277515e-06, + "loss": 0.012957382202148437, + "step": 142885 + }, + { + "epoch": 1.2355275786633924, + "grad_norm": 2.3081182796480384, + "learning_rate": 1.9152492065050183e-06, + "loss": 0.3414447784423828, + "step": 142890 + }, + { + "epoch": 1.2355708121849358, + "grad_norm": 15.950053970432416, + "learning_rate": 1.9150592604850586e-06, + "loss": 0.17956466674804689, + "step": 142895 + }, + { + "epoch": 1.235614045706479, + "grad_norm": 0.5175195873554176, + "learning_rate": 1.914869319468751e-06, + "loss": 0.053469276428222655, + "step": 142900 + }, + { + "epoch": 1.2356572792280223, + "grad_norm": 1.3037543707964785, + "learning_rate": 1.914679383456969e-06, + "loss": 0.1099334716796875, + "step": 142905 + }, + { + "epoch": 1.2357005127495655, + "grad_norm": 0.1760676526541262, + "learning_rate": 1.914489452450591e-06, + "loss": 0.018971633911132813, + "step": 142910 + }, + { + "epoch": 1.2357437462711087, + "grad_norm": 8.765708542446912, + "learning_rate": 1.9142995264504913e-06, + "loss": 0.06974277496337891, + "step": 142915 + }, + { + "epoch": 1.235786979792652, + "grad_norm": 3.591374330964381, + "learning_rate": 1.914109605457547e-06, + "loss": 0.1843416213989258, + "step": 142920 + }, + { + "epoch": 1.2358302133141952, + "grad_norm": 18.15093289032299, + "learning_rate": 1.9139196894726314e-06, + "loss": 0.08673858642578125, + "step": 142925 + }, + { + "epoch": 1.2358734468357386, + "grad_norm": 7.124964822861518, + "learning_rate": 1.9137297784966237e-06, + "loss": 0.05061264038085937, + "step": 142930 + }, + { + "epoch": 1.2359166803572819, + "grad_norm": 12.784237167980537, + "learning_rate": 1.9135398725303974e-06, + "loss": 0.0582977294921875, + "step": 142935 + }, + { + "epoch": 1.235959913878825, + "grad_norm": 34.48795387633885, + "learning_rate": 1.91334997157483e-06, + "loss": 0.09928245544433593, + "step": 142940 + }, + { + "epoch": 1.2360031474003683, + "grad_norm": 1.2130567440345692, + "learning_rate": 1.9131600756307967e-06, + "loss": 0.048760986328125, + "step": 142945 + }, + { + "epoch": 1.2360463809219115, + "grad_norm": 1.1589810225773485, + "learning_rate": 1.9129701846991727e-06, + "loss": 0.08131484985351563, + "step": 142950 + }, + { + "epoch": 1.236089614443455, + "grad_norm": 10.874033293707969, + "learning_rate": 1.912780298780834e-06, + "loss": 0.05353469848632812, + "step": 142955 + }, + { + "epoch": 1.2361328479649982, + "grad_norm": 15.816093004664866, + "learning_rate": 1.9125904178766558e-06, + "loss": 0.10434646606445312, + "step": 142960 + }, + { + "epoch": 1.2361760814865415, + "grad_norm": 7.3558767620560195, + "learning_rate": 1.912400541987515e-06, + "loss": 0.011876678466796875, + "step": 142965 + }, + { + "epoch": 1.2362193150080847, + "grad_norm": 3.500259858119039, + "learning_rate": 1.9122106711142876e-06, + "loss": 0.113006591796875, + "step": 142970 + }, + { + "epoch": 1.236262548529628, + "grad_norm": 1.9276941258845703, + "learning_rate": 1.912020805257848e-06, + "loss": 0.015765380859375, + "step": 142975 + }, + { + "epoch": 1.2363057820511711, + "grad_norm": 0.41130086748985334, + "learning_rate": 1.9118309444190726e-06, + "loss": 0.07094879150390625, + "step": 142980 + }, + { + "epoch": 1.2363490155727144, + "grad_norm": 0.18090963328194565, + "learning_rate": 1.9116410885988362e-06, + "loss": 0.04552459716796875, + "step": 142985 + }, + { + "epoch": 1.2363922490942576, + "grad_norm": 0.16030079673676914, + "learning_rate": 1.9114512377980146e-06, + "loss": 0.025131988525390624, + "step": 142990 + }, + { + "epoch": 1.236435482615801, + "grad_norm": 1.820197658280974, + "learning_rate": 1.9112613920174836e-06, + "loss": 0.02677764892578125, + "step": 142995 + }, + { + "epoch": 1.2364787161373443, + "grad_norm": 22.92266360311679, + "learning_rate": 1.9110715512581198e-06, + "loss": 0.078619384765625, + "step": 143000 + }, + { + "epoch": 1.2365219496588875, + "grad_norm": 1.9281709709067432, + "learning_rate": 1.910881715520798e-06, + "loss": 0.06346321105957031, + "step": 143005 + }, + { + "epoch": 1.2365651831804307, + "grad_norm": 6.8125874413788905, + "learning_rate": 1.910691884806393e-06, + "loss": 0.0947998046875, + "step": 143010 + }, + { + "epoch": 1.236608416701974, + "grad_norm": 1.1439195994553153, + "learning_rate": 1.9105020591157806e-06, + "loss": 0.028301239013671875, + "step": 143015 + }, + { + "epoch": 1.2366516502235174, + "grad_norm": 29.94925784221545, + "learning_rate": 1.910312238449837e-06, + "loss": 0.07046222686767578, + "step": 143020 + }, + { + "epoch": 1.2366948837450606, + "grad_norm": 1.0980361612679446, + "learning_rate": 1.9101224228094352e-06, + "loss": 0.03512725830078125, + "step": 143025 + }, + { + "epoch": 1.2367381172666039, + "grad_norm": 33.19217326616614, + "learning_rate": 1.9099326121954544e-06, + "loss": 0.12358894348144531, + "step": 143030 + }, + { + "epoch": 1.236781350788147, + "grad_norm": 0.7940066876737757, + "learning_rate": 1.909742806608768e-06, + "loss": 0.014510726928710938, + "step": 143035 + }, + { + "epoch": 1.2368245843096903, + "grad_norm": 4.513145902840142, + "learning_rate": 1.9095530060502505e-06, + "loss": 0.1081207275390625, + "step": 143040 + }, + { + "epoch": 1.2368678178312336, + "grad_norm": 0.6481562702636621, + "learning_rate": 1.909363210520779e-06, + "loss": 0.018684005737304686, + "step": 143045 + }, + { + "epoch": 1.2369110513527768, + "grad_norm": 34.53278746499122, + "learning_rate": 1.9091734200212283e-06, + "loss": 0.11235275268554687, + "step": 143050 + }, + { + "epoch": 1.2369542848743202, + "grad_norm": 8.557666824142528, + "learning_rate": 1.9089836345524717e-06, + "loss": 0.03141326904296875, + "step": 143055 + }, + { + "epoch": 1.2369975183958635, + "grad_norm": 0.8230975883746433, + "learning_rate": 1.9087938541153877e-06, + "loss": 0.08739395141601562, + "step": 143060 + }, + { + "epoch": 1.2370407519174067, + "grad_norm": 0.777741002871124, + "learning_rate": 1.9086040787108495e-06, + "loss": 0.04695339202880859, + "step": 143065 + }, + { + "epoch": 1.23708398543895, + "grad_norm": 1.9526735314004664, + "learning_rate": 1.9084143083397327e-06, + "loss": 0.2071929931640625, + "step": 143070 + }, + { + "epoch": 1.2371272189604932, + "grad_norm": 53.334182169913994, + "learning_rate": 1.9082245430029134e-06, + "loss": 0.15526199340820312, + "step": 143075 + }, + { + "epoch": 1.2371704524820364, + "grad_norm": 7.396188476215052, + "learning_rate": 1.9080347827012657e-06, + "loss": 0.05125846862792969, + "step": 143080 + }, + { + "epoch": 1.2372136860035798, + "grad_norm": 0.22178016056331915, + "learning_rate": 1.9078450274356643e-06, + "loss": 0.047955322265625, + "step": 143085 + }, + { + "epoch": 1.237256919525123, + "grad_norm": 7.2270631044722276, + "learning_rate": 1.907655277206986e-06, + "loss": 0.04174346923828125, + "step": 143090 + }, + { + "epoch": 1.2373001530466663, + "grad_norm": 45.12200440489969, + "learning_rate": 1.907465532016105e-06, + "loss": 0.21199073791503906, + "step": 143095 + }, + { + "epoch": 1.2373433865682095, + "grad_norm": 0.3096393353183172, + "learning_rate": 1.9072757918638958e-06, + "loss": 0.00604400634765625, + "step": 143100 + }, + { + "epoch": 1.2373866200897528, + "grad_norm": 6.529680726084898, + "learning_rate": 1.907086056751235e-06, + "loss": 0.04432258605957031, + "step": 143105 + }, + { + "epoch": 1.237429853611296, + "grad_norm": 5.363918674728279, + "learning_rate": 1.9068963266789966e-06, + "loss": 0.0948577880859375, + "step": 143110 + }, + { + "epoch": 1.2374730871328392, + "grad_norm": 2.7479880050001357, + "learning_rate": 1.9067066016480554e-06, + "loss": 0.018445587158203124, + "step": 143115 + }, + { + "epoch": 1.2375163206543827, + "grad_norm": 0.5687551783408805, + "learning_rate": 1.9065168816592858e-06, + "loss": 0.034508132934570314, + "step": 143120 + }, + { + "epoch": 1.237559554175926, + "grad_norm": 1.370453257212978, + "learning_rate": 1.9063271667135653e-06, + "loss": 0.052941513061523435, + "step": 143125 + }, + { + "epoch": 1.2376027876974691, + "grad_norm": 9.522421205012431, + "learning_rate": 1.9061374568117663e-06, + "loss": 0.032332229614257815, + "step": 143130 + }, + { + "epoch": 1.2376460212190123, + "grad_norm": 8.643493238026124, + "learning_rate": 1.905947751954765e-06, + "loss": 0.15357599258422852, + "step": 143135 + }, + { + "epoch": 1.2376892547405556, + "grad_norm": 3.3156797347635973, + "learning_rate": 1.9057580521434363e-06, + "loss": 0.018090534210205077, + "step": 143140 + }, + { + "epoch": 1.2377324882620988, + "grad_norm": 0.14893556045319106, + "learning_rate": 1.905568357378655e-06, + "loss": 0.018974685668945314, + "step": 143145 + }, + { + "epoch": 1.2377757217836423, + "grad_norm": 45.701928322487525, + "learning_rate": 1.9053786676612945e-06, + "loss": 0.15746536254882812, + "step": 143150 + }, + { + "epoch": 1.2378189553051855, + "grad_norm": 0.39518729568427996, + "learning_rate": 1.905188982992232e-06, + "loss": 0.06882781982421875, + "step": 143155 + }, + { + "epoch": 1.2378621888267287, + "grad_norm": 0.22658614709931366, + "learning_rate": 1.90499930337234e-06, + "loss": 0.03095588684082031, + "step": 143160 + }, + { + "epoch": 1.237905422348272, + "grad_norm": 5.250667379578956, + "learning_rate": 1.9048096288024957e-06, + "loss": 0.05928993225097656, + "step": 143165 + }, + { + "epoch": 1.2379486558698152, + "grad_norm": 47.47054223239794, + "learning_rate": 1.9046199592835727e-06, + "loss": 0.12648468017578124, + "step": 143170 + }, + { + "epoch": 1.2379918893913584, + "grad_norm": 1.5303109700366286, + "learning_rate": 1.9044302948164452e-06, + "loss": 0.017400550842285156, + "step": 143175 + }, + { + "epoch": 1.2380351229129016, + "grad_norm": 0.9227727360566425, + "learning_rate": 1.9042406354019875e-06, + "loss": 0.010143280029296875, + "step": 143180 + }, + { + "epoch": 1.238078356434445, + "grad_norm": 0.24295510583710359, + "learning_rate": 1.9040509810410763e-06, + "loss": 0.02966156005859375, + "step": 143185 + }, + { + "epoch": 1.2381215899559883, + "grad_norm": 0.09039403607345094, + "learning_rate": 1.9038613317345846e-06, + "loss": 0.004591560363769532, + "step": 143190 + }, + { + "epoch": 1.2381648234775315, + "grad_norm": 1.8985277345162894, + "learning_rate": 1.9036716874833877e-06, + "loss": 0.05774688720703125, + "step": 143195 + }, + { + "epoch": 1.2382080569990748, + "grad_norm": 4.709403663669914, + "learning_rate": 1.9034820482883605e-06, + "loss": 0.035076904296875, + "step": 143200 + }, + { + "epoch": 1.238251290520618, + "grad_norm": 11.169138284121061, + "learning_rate": 1.9032924141503772e-06, + "loss": 0.16695098876953124, + "step": 143205 + }, + { + "epoch": 1.2382945240421614, + "grad_norm": 0.5568603952000074, + "learning_rate": 1.903102785070312e-06, + "loss": 0.04531784057617187, + "step": 143210 + }, + { + "epoch": 1.2383377575637047, + "grad_norm": 1.3944414101754925, + "learning_rate": 1.902913161049039e-06, + "loss": 0.3005378723144531, + "step": 143215 + }, + { + "epoch": 1.238380991085248, + "grad_norm": 64.93554145426103, + "learning_rate": 1.9027235420874344e-06, + "loss": 0.11128005981445313, + "step": 143220 + }, + { + "epoch": 1.2384242246067911, + "grad_norm": 17.34038160460681, + "learning_rate": 1.902533928186372e-06, + "loss": 0.0372650146484375, + "step": 143225 + }, + { + "epoch": 1.2384674581283344, + "grad_norm": 2.014826240512035, + "learning_rate": 1.9023443193467265e-06, + "loss": 0.0366973876953125, + "step": 143230 + }, + { + "epoch": 1.2385106916498776, + "grad_norm": 15.927271805926976, + "learning_rate": 1.902154715569372e-06, + "loss": 0.0634840965270996, + "step": 143235 + }, + { + "epoch": 1.2385539251714208, + "grad_norm": 56.8306828557471, + "learning_rate": 1.9019651168551823e-06, + "loss": 0.16240921020507812, + "step": 143240 + }, + { + "epoch": 1.2385971586929643, + "grad_norm": 0.5742021299063178, + "learning_rate": 1.9017755232050315e-06, + "loss": 0.05001983642578125, + "step": 143245 + }, + { + "epoch": 1.2386403922145075, + "grad_norm": 0.9437039296066638, + "learning_rate": 1.9015859346197966e-06, + "loss": 0.05907306671142578, + "step": 143250 + }, + { + "epoch": 1.2386836257360507, + "grad_norm": 0.12636683270578872, + "learning_rate": 1.90139635110035e-06, + "loss": 0.021602630615234375, + "step": 143255 + }, + { + "epoch": 1.238726859257594, + "grad_norm": 7.650661521856913, + "learning_rate": 1.9012067726475665e-06, + "loss": 0.047467803955078124, + "step": 143260 + }, + { + "epoch": 1.2387700927791372, + "grad_norm": 10.628183480211362, + "learning_rate": 1.9010171992623194e-06, + "loss": 0.16553802490234376, + "step": 143265 + }, + { + "epoch": 1.2388133263006804, + "grad_norm": 0.6879247897797021, + "learning_rate": 1.9008276309454852e-06, + "loss": 0.007814407348632812, + "step": 143270 + }, + { + "epoch": 1.2388565598222239, + "grad_norm": 0.6354011374173878, + "learning_rate": 1.9006380676979352e-06, + "loss": 0.13331680297851561, + "step": 143275 + }, + { + "epoch": 1.238899793343767, + "grad_norm": 4.105921390599568, + "learning_rate": 1.9004485095205464e-06, + "loss": 0.10109481811523438, + "step": 143280 + }, + { + "epoch": 1.2389430268653103, + "grad_norm": 1.0721934027233155, + "learning_rate": 1.900258956414192e-06, + "loss": 0.07307968139648438, + "step": 143285 + }, + { + "epoch": 1.2389862603868536, + "grad_norm": 0.32380606038962756, + "learning_rate": 1.900069408379746e-06, + "loss": 0.13631725311279297, + "step": 143290 + }, + { + "epoch": 1.2390294939083968, + "grad_norm": 12.959986457059811, + "learning_rate": 1.8998798654180823e-06, + "loss": 0.03935165405273437, + "step": 143295 + }, + { + "epoch": 1.23907272742994, + "grad_norm": 0.04497292994285954, + "learning_rate": 1.8996903275300757e-06, + "loss": 0.04040946960449219, + "step": 143300 + }, + { + "epoch": 1.2391159609514832, + "grad_norm": 0.6920260225295026, + "learning_rate": 1.8995007947166006e-06, + "loss": 0.27779693603515626, + "step": 143305 + }, + { + "epoch": 1.2391591944730267, + "grad_norm": 1.7087830240226645, + "learning_rate": 1.8993112669785288e-06, + "loss": 0.0286865234375, + "step": 143310 + }, + { + "epoch": 1.23920242799457, + "grad_norm": 3.374402707555425, + "learning_rate": 1.8991217443167379e-06, + "loss": 0.0456634521484375, + "step": 143315 + }, + { + "epoch": 1.2392456615161132, + "grad_norm": 0.4677065526922594, + "learning_rate": 1.8989322267321e-06, + "loss": 0.11620407104492188, + "step": 143320 + }, + { + "epoch": 1.2392888950376564, + "grad_norm": 2.4909492657224446, + "learning_rate": 1.8987427142254887e-06, + "loss": 0.03823089599609375, + "step": 143325 + }, + { + "epoch": 1.2393321285591996, + "grad_norm": 8.410893962092056, + "learning_rate": 1.8985532067977795e-06, + "loss": 0.0823089599609375, + "step": 143330 + }, + { + "epoch": 1.2393753620807428, + "grad_norm": 0.6979308361690426, + "learning_rate": 1.8983637044498454e-06, + "loss": 0.04820938110351562, + "step": 143335 + }, + { + "epoch": 1.2394185956022863, + "grad_norm": 8.617708388145212, + "learning_rate": 1.8981742071825596e-06, + "loss": 0.028202056884765625, + "step": 143340 + }, + { + "epoch": 1.2394618291238295, + "grad_norm": 5.798715022020914, + "learning_rate": 1.897984714996798e-06, + "loss": 0.0929718017578125, + "step": 143345 + }, + { + "epoch": 1.2395050626453727, + "grad_norm": 8.690576675390934, + "learning_rate": 1.8977952278934336e-06, + "loss": 0.11996421813964844, + "step": 143350 + }, + { + "epoch": 1.239548296166916, + "grad_norm": 36.04004220178086, + "learning_rate": 1.8976057458733393e-06, + "loss": 0.11317253112792969, + "step": 143355 + }, + { + "epoch": 1.2395915296884592, + "grad_norm": 7.851018251403636, + "learning_rate": 1.8974162689373908e-06, + "loss": 0.09623603820800782, + "step": 143360 + }, + { + "epoch": 1.2396347632100024, + "grad_norm": 5.643270359877789, + "learning_rate": 1.8972267970864614e-06, + "loss": 0.030129432678222656, + "step": 143365 + }, + { + "epoch": 1.2396779967315457, + "grad_norm": 1.1419685053198403, + "learning_rate": 1.897037330321424e-06, + "loss": 0.05033721923828125, + "step": 143370 + }, + { + "epoch": 1.2397212302530891, + "grad_norm": 0.6816082881054926, + "learning_rate": 1.896847868643152e-06, + "loss": 0.07419281005859375, + "step": 143375 + }, + { + "epoch": 1.2397644637746323, + "grad_norm": 0.21236093643743711, + "learning_rate": 1.8966584120525212e-06, + "loss": 0.014006423950195312, + "step": 143380 + }, + { + "epoch": 1.2398076972961756, + "grad_norm": 43.13041597211839, + "learning_rate": 1.8964689605504037e-06, + "loss": 0.147357177734375, + "step": 143385 + }, + { + "epoch": 1.2398509308177188, + "grad_norm": 0.42768237677281684, + "learning_rate": 1.8962795141376746e-06, + "loss": 0.1158294677734375, + "step": 143390 + }, + { + "epoch": 1.239894164339262, + "grad_norm": 8.218653204350767, + "learning_rate": 1.8960900728152069e-06, + "loss": 0.14360618591308594, + "step": 143395 + }, + { + "epoch": 1.2399373978608053, + "grad_norm": 42.144238954804216, + "learning_rate": 1.8959006365838741e-06, + "loss": 0.05202789306640625, + "step": 143400 + }, + { + "epoch": 1.2399806313823487, + "grad_norm": 52.90951183852001, + "learning_rate": 1.895711205444549e-06, + "loss": 0.27698097229003904, + "step": 143405 + }, + { + "epoch": 1.240023864903892, + "grad_norm": 8.520937126189787, + "learning_rate": 1.8955217793981074e-06, + "loss": 0.03588943481445313, + "step": 143410 + }, + { + "epoch": 1.2400670984254352, + "grad_norm": 1.0672845851746013, + "learning_rate": 1.895332358445421e-06, + "loss": 0.1862579345703125, + "step": 143415 + }, + { + "epoch": 1.2401103319469784, + "grad_norm": 1.7862576789361386, + "learning_rate": 1.8951429425873648e-06, + "loss": 0.0205352783203125, + "step": 143420 + }, + { + "epoch": 1.2401535654685216, + "grad_norm": 2.8244969009967873, + "learning_rate": 1.8949535318248118e-06, + "loss": 0.040465927124023436, + "step": 143425 + }, + { + "epoch": 1.2401967989900649, + "grad_norm": 8.550240109895665, + "learning_rate": 1.8947641261586354e-06, + "loss": 0.0358673095703125, + "step": 143430 + }, + { + "epoch": 1.240240032511608, + "grad_norm": 37.417220647474274, + "learning_rate": 1.8945747255897079e-06, + "loss": 0.26737518310546876, + "step": 143435 + }, + { + "epoch": 1.2402832660331515, + "grad_norm": 8.49878883729995, + "learning_rate": 1.8943853301189047e-06, + "loss": 0.036810302734375, + "step": 143440 + }, + { + "epoch": 1.2403264995546948, + "grad_norm": 25.53576625710215, + "learning_rate": 1.8941959397470994e-06, + "loss": 0.078619384765625, + "step": 143445 + }, + { + "epoch": 1.240369733076238, + "grad_norm": 3.6112843204018557, + "learning_rate": 1.8940065544751646e-06, + "loss": 0.07130279541015624, + "step": 143450 + }, + { + "epoch": 1.2404129665977812, + "grad_norm": 5.36274114007043, + "learning_rate": 1.8938171743039743e-06, + "loss": 0.06190338134765625, + "step": 143455 + }, + { + "epoch": 1.2404562001193244, + "grad_norm": 1.1703294763506464, + "learning_rate": 1.893627799234401e-06, + "loss": 0.22936859130859374, + "step": 143460 + }, + { + "epoch": 1.240499433640868, + "grad_norm": 5.995301868389271, + "learning_rate": 1.8934384292673178e-06, + "loss": 0.02570934295654297, + "step": 143465 + }, + { + "epoch": 1.2405426671624111, + "grad_norm": 25.020519307212016, + "learning_rate": 1.8932490644035983e-06, + "loss": 0.06538772583007812, + "step": 143470 + }, + { + "epoch": 1.2405859006839544, + "grad_norm": 2.772625372240683, + "learning_rate": 1.8930597046441178e-06, + "loss": 0.084576416015625, + "step": 143475 + }, + { + "epoch": 1.2406291342054976, + "grad_norm": 64.8672295040512, + "learning_rate": 1.8928703499897477e-06, + "loss": 0.5895271301269531, + "step": 143480 + }, + { + "epoch": 1.2406723677270408, + "grad_norm": 0.17929370085258786, + "learning_rate": 1.892681000441362e-06, + "loss": 0.09712791442871094, + "step": 143485 + }, + { + "epoch": 1.240715601248584, + "grad_norm": 33.68243992771336, + "learning_rate": 1.8924916559998332e-06, + "loss": 0.1020721435546875, + "step": 143490 + }, + { + "epoch": 1.2407588347701273, + "grad_norm": 2.042310447096182, + "learning_rate": 1.8923023166660351e-06, + "loss": 0.058945083618164064, + "step": 143495 + }, + { + "epoch": 1.2408020682916707, + "grad_norm": 2.2651711949760513, + "learning_rate": 1.8921129824408401e-06, + "loss": 0.017352294921875, + "step": 143500 + }, + { + "epoch": 1.240845301813214, + "grad_norm": 0.17277093278264985, + "learning_rate": 1.8919236533251234e-06, + "loss": 0.01066274642944336, + "step": 143505 + }, + { + "epoch": 1.2408885353347572, + "grad_norm": 2.1036224063985713, + "learning_rate": 1.8917343293197568e-06, + "loss": 0.024105596542358398, + "step": 143510 + }, + { + "epoch": 1.2409317688563004, + "grad_norm": 0.6087853327425193, + "learning_rate": 1.8915450104256135e-06, + "loss": 0.12548980712890626, + "step": 143515 + }, + { + "epoch": 1.2409750023778436, + "grad_norm": 13.281428387516517, + "learning_rate": 1.8913556966435661e-06, + "loss": 0.03268146514892578, + "step": 143520 + }, + { + "epoch": 1.2410182358993869, + "grad_norm": 4.827155462458667, + "learning_rate": 1.8911663879744887e-06, + "loss": 0.06502685546875, + "step": 143525 + }, + { + "epoch": 1.2410614694209303, + "grad_norm": 0.5982717816080295, + "learning_rate": 1.8909770844192526e-06, + "loss": 0.027030181884765626, + "step": 143530 + }, + { + "epoch": 1.2411047029424735, + "grad_norm": 7.928662691019856, + "learning_rate": 1.8907877859787339e-06, + "loss": 0.034102249145507815, + "step": 143535 + }, + { + "epoch": 1.2411479364640168, + "grad_norm": 8.234167040622818, + "learning_rate": 1.8905984926538035e-06, + "loss": 0.12369403839111329, + "step": 143540 + }, + { + "epoch": 1.24119116998556, + "grad_norm": 20.036558971216184, + "learning_rate": 1.8904092044453348e-06, + "loss": 0.0517852783203125, + "step": 143545 + }, + { + "epoch": 1.2412344035071032, + "grad_norm": 1.1114367184490896, + "learning_rate": 1.8902199213542007e-06, + "loss": 0.14737777709960936, + "step": 143550 + }, + { + "epoch": 1.2412776370286465, + "grad_norm": 3.5775910949094607, + "learning_rate": 1.890030643381274e-06, + "loss": 0.0212432861328125, + "step": 143555 + }, + { + "epoch": 1.2413208705501897, + "grad_norm": 1.9614961637051092, + "learning_rate": 1.8898413705274288e-06, + "loss": 0.1644012451171875, + "step": 143560 + }, + { + "epoch": 1.2413641040717331, + "grad_norm": 7.427323099248902, + "learning_rate": 1.8896521027935354e-06, + "loss": 0.02719879150390625, + "step": 143565 + }, + { + "epoch": 1.2414073375932764, + "grad_norm": 48.13425579011451, + "learning_rate": 1.8894628401804696e-06, + "loss": 0.4635166168212891, + "step": 143570 + }, + { + "epoch": 1.2414505711148196, + "grad_norm": 5.013516560170218, + "learning_rate": 1.8892735826891036e-06, + "loss": 0.22915430068969728, + "step": 143575 + }, + { + "epoch": 1.2414938046363628, + "grad_norm": 5.751382770031492, + "learning_rate": 1.8890843303203086e-06, + "loss": 0.057088279724121095, + "step": 143580 + }, + { + "epoch": 1.241537038157906, + "grad_norm": 24.44359893671614, + "learning_rate": 1.8888950830749591e-06, + "loss": 0.018301010131835938, + "step": 143585 + }, + { + "epoch": 1.2415802716794493, + "grad_norm": 5.02440807484314, + "learning_rate": 1.8887058409539275e-06, + "loss": 0.023302459716796876, + "step": 143590 + }, + { + "epoch": 1.2416235052009927, + "grad_norm": 0.12184546678889256, + "learning_rate": 1.888516603958085e-06, + "loss": 0.08732643127441406, + "step": 143595 + }, + { + "epoch": 1.241666738722536, + "grad_norm": 37.79647975047895, + "learning_rate": 1.888327372088307e-06, + "loss": 0.3332550048828125, + "step": 143600 + }, + { + "epoch": 1.2417099722440792, + "grad_norm": 23.640759392809578, + "learning_rate": 1.8881381453454649e-06, + "loss": 0.028176116943359374, + "step": 143605 + }, + { + "epoch": 1.2417532057656224, + "grad_norm": 0.7621019532012718, + "learning_rate": 1.8879489237304309e-06, + "loss": 0.10695838928222656, + "step": 143610 + }, + { + "epoch": 1.2417964392871657, + "grad_norm": 39.41356124263443, + "learning_rate": 1.8877597072440785e-06, + "loss": 0.14443702697753907, + "step": 143615 + }, + { + "epoch": 1.2418396728087089, + "grad_norm": 10.924461110312325, + "learning_rate": 1.8875704958872803e-06, + "loss": 0.049747467041015625, + "step": 143620 + }, + { + "epoch": 1.2418829063302521, + "grad_norm": 39.638256873438365, + "learning_rate": 1.8873812896609073e-06, + "loss": 0.10734214782714843, + "step": 143625 + }, + { + "epoch": 1.2419261398517956, + "grad_norm": 13.751466679379668, + "learning_rate": 1.887192088565835e-06, + "loss": 0.160308837890625, + "step": 143630 + }, + { + "epoch": 1.2419693733733388, + "grad_norm": 2.240833159657777, + "learning_rate": 1.8870028926029342e-06, + "loss": 0.014458847045898438, + "step": 143635 + }, + { + "epoch": 1.242012606894882, + "grad_norm": 0.41829108809466214, + "learning_rate": 1.8868137017730771e-06, + "loss": 0.007550811767578125, + "step": 143640 + }, + { + "epoch": 1.2420558404164252, + "grad_norm": 1.0735522423247292, + "learning_rate": 1.8866245160771379e-06, + "loss": 0.03605976104736328, + "step": 143645 + }, + { + "epoch": 1.2420990739379685, + "grad_norm": 2.6291553401886354, + "learning_rate": 1.8864353355159873e-06, + "loss": 0.06992950439453124, + "step": 143650 + }, + { + "epoch": 1.242142307459512, + "grad_norm": 6.387424432049275, + "learning_rate": 1.886246160090499e-06, + "loss": 0.06845130920410156, + "step": 143655 + }, + { + "epoch": 1.2421855409810552, + "grad_norm": 1.4047579230142218, + "learning_rate": 1.8860569898015437e-06, + "loss": 0.054827880859375, + "step": 143660 + }, + { + "epoch": 1.2422287745025984, + "grad_norm": 12.680956369569603, + "learning_rate": 1.885867824649996e-06, + "loss": 0.07448101043701172, + "step": 143665 + }, + { + "epoch": 1.2422720080241416, + "grad_norm": 13.022078687932089, + "learning_rate": 1.8856786646367275e-06, + "loss": 0.03747215270996094, + "step": 143670 + }, + { + "epoch": 1.2423152415456848, + "grad_norm": 2.631881401872414, + "learning_rate": 1.8854895097626108e-06, + "loss": 0.07952232360839843, + "step": 143675 + }, + { + "epoch": 1.242358475067228, + "grad_norm": 0.904686665229055, + "learning_rate": 1.8853003600285183e-06, + "loss": 0.037939453125, + "step": 143680 + }, + { + "epoch": 1.2424017085887713, + "grad_norm": 0.03878953658229397, + "learning_rate": 1.8851112154353217e-06, + "loss": 0.0202972412109375, + "step": 143685 + }, + { + "epoch": 1.2424449421103145, + "grad_norm": 4.748393332696233, + "learning_rate": 1.8849220759838926e-06, + "loss": 0.23018722534179686, + "step": 143690 + }, + { + "epoch": 1.242488175631858, + "grad_norm": 45.05743696427436, + "learning_rate": 1.8847329416751046e-06, + "loss": 0.419781494140625, + "step": 143695 + }, + { + "epoch": 1.2425314091534012, + "grad_norm": 1.881260891086071, + "learning_rate": 1.8845438125098308e-06, + "loss": 0.07849884033203125, + "step": 143700 + }, + { + "epoch": 1.2425746426749444, + "grad_norm": 44.23563327867372, + "learning_rate": 1.8843546884889424e-06, + "loss": 0.09010677337646485, + "step": 143705 + }, + { + "epoch": 1.2426178761964877, + "grad_norm": 0.08371473415516419, + "learning_rate": 1.8841655696133114e-06, + "loss": 0.01313018798828125, + "step": 143710 + }, + { + "epoch": 1.242661109718031, + "grad_norm": 36.45799807506287, + "learning_rate": 1.8839764558838093e-06, + "loss": 0.1647632598876953, + "step": 143715 + }, + { + "epoch": 1.2427043432395743, + "grad_norm": 2.2921393937082617, + "learning_rate": 1.8837873473013104e-06, + "loss": 0.03376693725585937, + "step": 143720 + }, + { + "epoch": 1.2427475767611176, + "grad_norm": 2.7629200417573445, + "learning_rate": 1.8835982438666836e-06, + "loss": 0.03014984130859375, + "step": 143725 + }, + { + "epoch": 1.2427908102826608, + "grad_norm": 0.5146262681231736, + "learning_rate": 1.883409145580805e-06, + "loss": 0.018761444091796874, + "step": 143730 + }, + { + "epoch": 1.242834043804204, + "grad_norm": 0.17032246179079413, + "learning_rate": 1.8832200524445446e-06, + "loss": 0.05326995849609375, + "step": 143735 + }, + { + "epoch": 1.2428772773257473, + "grad_norm": 1.3655083377922943, + "learning_rate": 1.8830309644587741e-06, + "loss": 0.0262176513671875, + "step": 143740 + }, + { + "epoch": 1.2429205108472905, + "grad_norm": 0.2959778132494384, + "learning_rate": 1.8828418816243664e-06, + "loss": 0.058415985107421874, + "step": 143745 + }, + { + "epoch": 1.2429637443688337, + "grad_norm": 0.9513460965719539, + "learning_rate": 1.8826528039421933e-06, + "loss": 0.026078033447265624, + "step": 143750 + }, + { + "epoch": 1.2430069778903772, + "grad_norm": 3.443497869288242, + "learning_rate": 1.8824637314131255e-06, + "loss": 0.16895523071289062, + "step": 143755 + }, + { + "epoch": 1.2430502114119204, + "grad_norm": 4.872624057329323, + "learning_rate": 1.8822746640380375e-06, + "loss": 0.06632614135742188, + "step": 143760 + }, + { + "epoch": 1.2430934449334636, + "grad_norm": 0.36222463916400605, + "learning_rate": 1.8820856018178e-06, + "loss": 0.0941497802734375, + "step": 143765 + }, + { + "epoch": 1.2431366784550069, + "grad_norm": 30.04435694613381, + "learning_rate": 1.8818965447532851e-06, + "loss": 0.09407272338867187, + "step": 143770 + }, + { + "epoch": 1.24317991197655, + "grad_norm": 18.384870184018176, + "learning_rate": 1.8817074928453634e-06, + "loss": 0.07657318115234375, + "step": 143775 + }, + { + "epoch": 1.2432231454980933, + "grad_norm": 11.860387208953739, + "learning_rate": 1.881518446094909e-06, + "loss": 0.19822654724121094, + "step": 143780 + }, + { + "epoch": 1.2432663790196368, + "grad_norm": 2.284665524878711, + "learning_rate": 1.8813294045027914e-06, + "loss": 0.05929489135742187, + "step": 143785 + }, + { + "epoch": 1.24330961254118, + "grad_norm": 1.8038422933168083, + "learning_rate": 1.8811403680698849e-06, + "loss": 0.04516677856445313, + "step": 143790 + }, + { + "epoch": 1.2433528460627232, + "grad_norm": 0.523056636248364, + "learning_rate": 1.8809513367970605e-06, + "loss": 0.19168529510498047, + "step": 143795 + }, + { + "epoch": 1.2433960795842665, + "grad_norm": 5.172480832152145, + "learning_rate": 1.880762310685189e-06, + "loss": 0.05449061393737793, + "step": 143800 + }, + { + "epoch": 1.2434393131058097, + "grad_norm": 24.23826926599471, + "learning_rate": 1.8805732897351429e-06, + "loss": 0.08302001953125, + "step": 143805 + }, + { + "epoch": 1.243482546627353, + "grad_norm": 0.037040124343646966, + "learning_rate": 1.880384273947794e-06, + "loss": 0.061688995361328124, + "step": 143810 + }, + { + "epoch": 1.2435257801488961, + "grad_norm": 0.2725004314716907, + "learning_rate": 1.8801952633240141e-06, + "loss": 0.06634407043457032, + "step": 143815 + }, + { + "epoch": 1.2435690136704396, + "grad_norm": 13.72316052871594, + "learning_rate": 1.8800062578646732e-06, + "loss": 0.16647796630859374, + "step": 143820 + }, + { + "epoch": 1.2436122471919828, + "grad_norm": 2.1679667024466696, + "learning_rate": 1.8798172575706461e-06, + "loss": 0.1501697540283203, + "step": 143825 + }, + { + "epoch": 1.243655480713526, + "grad_norm": 1.8156975207631554, + "learning_rate": 1.8796282624428026e-06, + "loss": 0.477532958984375, + "step": 143830 + }, + { + "epoch": 1.2436987142350693, + "grad_norm": 0.3789364809254862, + "learning_rate": 1.879439272482014e-06, + "loss": 0.024506378173828124, + "step": 143835 + }, + { + "epoch": 1.2437419477566125, + "grad_norm": 18.634981921996598, + "learning_rate": 1.8792502876891531e-06, + "loss": 0.23850555419921876, + "step": 143840 + }, + { + "epoch": 1.2437851812781557, + "grad_norm": 0.11508498323024575, + "learning_rate": 1.8790613080650909e-06, + "loss": 0.24790077209472655, + "step": 143845 + }, + { + "epoch": 1.2438284147996992, + "grad_norm": 1.6179917044853076, + "learning_rate": 1.8788723336106974e-06, + "loss": 0.01568603515625, + "step": 143850 + }, + { + "epoch": 1.2438716483212424, + "grad_norm": 15.49459833293878, + "learning_rate": 1.8786833643268473e-06, + "loss": 0.11102294921875, + "step": 143855 + }, + { + "epoch": 1.2439148818427856, + "grad_norm": 0.3289994906940508, + "learning_rate": 1.8784944002144102e-06, + "loss": 0.027715301513671874, + "step": 143860 + }, + { + "epoch": 1.2439581153643289, + "grad_norm": 49.22051007994051, + "learning_rate": 1.8783054412742568e-06, + "loss": 0.3217903137207031, + "step": 143865 + }, + { + "epoch": 1.244001348885872, + "grad_norm": 2.4419195399679836, + "learning_rate": 1.8781164875072608e-06, + "loss": 0.08742084503173828, + "step": 143870 + }, + { + "epoch": 1.2440445824074153, + "grad_norm": 9.43393847999811, + "learning_rate": 1.8779275389142924e-06, + "loss": 0.05340576171875, + "step": 143875 + }, + { + "epoch": 1.2440878159289586, + "grad_norm": 1.0336246043375898, + "learning_rate": 1.8777385954962213e-06, + "loss": 0.03250694274902344, + "step": 143880 + }, + { + "epoch": 1.244131049450502, + "grad_norm": 11.854296114942708, + "learning_rate": 1.8775496572539225e-06, + "loss": 0.046830368041992185, + "step": 143885 + }, + { + "epoch": 1.2441742829720452, + "grad_norm": 0.7068959539548281, + "learning_rate": 1.8773607241882646e-06, + "loss": 0.12617721557617187, + "step": 143890 + }, + { + "epoch": 1.2442175164935885, + "grad_norm": 1.6131166252447184, + "learning_rate": 1.8771717963001203e-06, + "loss": 0.03668270111083984, + "step": 143895 + }, + { + "epoch": 1.2442607500151317, + "grad_norm": 0.23578118668246026, + "learning_rate": 1.8769828735903608e-06, + "loss": 0.009657669067382812, + "step": 143900 + }, + { + "epoch": 1.244303983536675, + "grad_norm": 3.8196791701438646, + "learning_rate": 1.876793956059857e-06, + "loss": 0.029259109497070314, + "step": 143905 + }, + { + "epoch": 1.2443472170582184, + "grad_norm": 1.1451819604334335, + "learning_rate": 1.8766050437094792e-06, + "loss": 0.011932182312011718, + "step": 143910 + }, + { + "epoch": 1.2443904505797616, + "grad_norm": 0.9301359345749889, + "learning_rate": 1.8764161365401e-06, + "loss": 0.008312225341796875, + "step": 143915 + }, + { + "epoch": 1.2444336841013048, + "grad_norm": 1.5154773171250677, + "learning_rate": 1.8762272345525904e-06, + "loss": 0.07428665161132812, + "step": 143920 + }, + { + "epoch": 1.244476917622848, + "grad_norm": 3.5148304633599925, + "learning_rate": 1.8760383377478216e-06, + "loss": 0.24384117126464844, + "step": 143925 + }, + { + "epoch": 1.2445201511443913, + "grad_norm": 7.265822858515902, + "learning_rate": 1.875849446126665e-06, + "loss": 0.04693737030029297, + "step": 143930 + }, + { + "epoch": 1.2445633846659345, + "grad_norm": 3.371459768014417, + "learning_rate": 1.8756605596899916e-06, + "loss": 0.05079193115234375, + "step": 143935 + }, + { + "epoch": 1.2446066181874778, + "grad_norm": 37.07913318443366, + "learning_rate": 1.8754716784386718e-06, + "loss": 0.11187248229980469, + "step": 143940 + }, + { + "epoch": 1.244649851709021, + "grad_norm": 1.4731268830405457, + "learning_rate": 1.875282802373577e-06, + "loss": 0.06718330383300782, + "step": 143945 + }, + { + "epoch": 1.2446930852305644, + "grad_norm": 3.5332489990910827, + "learning_rate": 1.8750939314955784e-06, + "loss": 0.06933708190917968, + "step": 143950 + }, + { + "epoch": 1.2447363187521077, + "grad_norm": 44.38955133495396, + "learning_rate": 1.8749050658055481e-06, + "loss": 0.22741012573242186, + "step": 143955 + }, + { + "epoch": 1.244779552273651, + "grad_norm": 2.7072193449743187, + "learning_rate": 1.874716205304356e-06, + "loss": 0.0731201171875, + "step": 143960 + }, + { + "epoch": 1.2448227857951941, + "grad_norm": 1.1228517860027605, + "learning_rate": 1.8745273499928732e-06, + "loss": 0.16268558502197267, + "step": 143965 + }, + { + "epoch": 1.2448660193167373, + "grad_norm": 5.775689185342435, + "learning_rate": 1.8743384998719707e-06, + "loss": 0.081072998046875, + "step": 143970 + }, + { + "epoch": 1.2449092528382808, + "grad_norm": 2.7206825279673943, + "learning_rate": 1.8741496549425193e-06, + "loss": 0.096636962890625, + "step": 143975 + }, + { + "epoch": 1.244952486359824, + "grad_norm": 1.7996239850805429, + "learning_rate": 1.8739608152053905e-06, + "loss": 0.028188705444335938, + "step": 143980 + }, + { + "epoch": 1.2449957198813673, + "grad_norm": 1.6744546994816236, + "learning_rate": 1.8737719806614548e-06, + "loss": 0.1083292007446289, + "step": 143985 + }, + { + "epoch": 1.2450389534029105, + "grad_norm": 33.495942698800526, + "learning_rate": 1.8735831513115837e-06, + "loss": 0.09314117431640626, + "step": 143990 + }, + { + "epoch": 1.2450821869244537, + "grad_norm": 10.943367983267368, + "learning_rate": 1.8733943271566477e-06, + "loss": 0.08991012573242188, + "step": 143995 + }, + { + "epoch": 1.245125420445997, + "grad_norm": 0.169994506808726, + "learning_rate": 1.8732055081975166e-06, + "loss": 0.16624069213867188, + "step": 144000 + }, + { + "epoch": 1.2451686539675402, + "grad_norm": 28.279318509028638, + "learning_rate": 1.873016694435063e-06, + "loss": 0.19463653564453126, + "step": 144005 + }, + { + "epoch": 1.2452118874890836, + "grad_norm": 18.397170375213012, + "learning_rate": 1.8728278858701554e-06, + "loss": 0.18786392211914063, + "step": 144010 + }, + { + "epoch": 1.2452551210106269, + "grad_norm": 6.6051132491899285, + "learning_rate": 1.8726390825036674e-06, + "loss": 0.013541412353515626, + "step": 144015 + }, + { + "epoch": 1.24529835453217, + "grad_norm": 0.35734360331394477, + "learning_rate": 1.8724502843364683e-06, + "loss": 0.04355144500732422, + "step": 144020 + }, + { + "epoch": 1.2453415880537133, + "grad_norm": 41.090522432870394, + "learning_rate": 1.8722614913694288e-06, + "loss": 0.1020315170288086, + "step": 144025 + }, + { + "epoch": 1.2453848215752565, + "grad_norm": 2.105768628848896, + "learning_rate": 1.8720727036034189e-06, + "loss": 0.048274803161621097, + "step": 144030 + }, + { + "epoch": 1.2454280550967998, + "grad_norm": 6.311362135710157, + "learning_rate": 1.8718839210393108e-06, + "loss": 0.0622894287109375, + "step": 144035 + }, + { + "epoch": 1.2454712886183432, + "grad_norm": 11.225493090795531, + "learning_rate": 1.8716951436779733e-06, + "loss": 0.2508522033691406, + "step": 144040 + }, + { + "epoch": 1.2455145221398864, + "grad_norm": 0.4960187536494859, + "learning_rate": 1.8715063715202791e-06, + "loss": 0.06763381958007812, + "step": 144045 + }, + { + "epoch": 1.2455577556614297, + "grad_norm": 2.7038827460033388, + "learning_rate": 1.871317604567098e-06, + "loss": 0.09311141967773437, + "step": 144050 + }, + { + "epoch": 1.245600989182973, + "grad_norm": 4.297042568261167, + "learning_rate": 1.8711288428193e-06, + "loss": 0.054691314697265625, + "step": 144055 + }, + { + "epoch": 1.2456442227045161, + "grad_norm": 26.902590483138056, + "learning_rate": 1.8709400862777558e-06, + "loss": 0.10619430541992188, + "step": 144060 + }, + { + "epoch": 1.2456874562260594, + "grad_norm": 0.4796947318528786, + "learning_rate": 1.8707513349433364e-06, + "loss": 0.04661216735839844, + "step": 144065 + }, + { + "epoch": 1.2457306897476026, + "grad_norm": 10.08677741393925, + "learning_rate": 1.8705625888169122e-06, + "loss": 0.05618581771850586, + "step": 144070 + }, + { + "epoch": 1.245773923269146, + "grad_norm": 26.185478381088572, + "learning_rate": 1.870373847899352e-06, + "loss": 0.1158172607421875, + "step": 144075 + }, + { + "epoch": 1.2458171567906893, + "grad_norm": 16.80024733220045, + "learning_rate": 1.87018511219153e-06, + "loss": 0.028565406799316406, + "step": 144080 + }, + { + "epoch": 1.2458603903122325, + "grad_norm": 3.0473600179694933, + "learning_rate": 1.869996381694313e-06, + "loss": 0.4326957702636719, + "step": 144085 + }, + { + "epoch": 1.2459036238337757, + "grad_norm": 3.8066526974480728, + "learning_rate": 1.8698076564085739e-06, + "loss": 0.05582504272460938, + "step": 144090 + }, + { + "epoch": 1.245946857355319, + "grad_norm": 44.513270360477534, + "learning_rate": 1.8696189363351815e-06, + "loss": 0.081488037109375, + "step": 144095 + }, + { + "epoch": 1.2459900908768622, + "grad_norm": 40.30478709578383, + "learning_rate": 1.8694302214750073e-06, + "loss": 0.09736099243164062, + "step": 144100 + }, + { + "epoch": 1.2460333243984056, + "grad_norm": 4.141780904221181, + "learning_rate": 1.869241511828919e-06, + "loss": 0.05194168090820313, + "step": 144105 + }, + { + "epoch": 1.2460765579199489, + "grad_norm": 9.908246573867912, + "learning_rate": 1.8690528073977906e-06, + "loss": 0.03110198974609375, + "step": 144110 + }, + { + "epoch": 1.246119791441492, + "grad_norm": 0.7582074261465299, + "learning_rate": 1.8688641081824903e-06, + "loss": 0.0440155029296875, + "step": 144115 + }, + { + "epoch": 1.2461630249630353, + "grad_norm": 0.8598824558478956, + "learning_rate": 1.8686754141838891e-06, + "loss": 0.047365760803222655, + "step": 144120 + }, + { + "epoch": 1.2462062584845786, + "grad_norm": 2.4654383061303404, + "learning_rate": 1.8684867254028573e-06, + "loss": 0.029864501953125, + "step": 144125 + }, + { + "epoch": 1.2462494920061218, + "grad_norm": 0.20156750524298161, + "learning_rate": 1.8682980418402645e-06, + "loss": 0.025095367431640626, + "step": 144130 + }, + { + "epoch": 1.246292725527665, + "grad_norm": 0.06400922333529556, + "learning_rate": 1.86810936349698e-06, + "loss": 0.17047157287597656, + "step": 144135 + }, + { + "epoch": 1.2463359590492085, + "grad_norm": 2.5454740613154887, + "learning_rate": 1.867920690373876e-06, + "loss": 0.022336864471435548, + "step": 144140 + }, + { + "epoch": 1.2463791925707517, + "grad_norm": 10.047089568488227, + "learning_rate": 1.8677320224718213e-06, + "loss": 0.2017181396484375, + "step": 144145 + }, + { + "epoch": 1.246422426092295, + "grad_norm": 2.8728328309729534, + "learning_rate": 1.8675433597916874e-06, + "loss": 0.1191314697265625, + "step": 144150 + }, + { + "epoch": 1.2464656596138382, + "grad_norm": 6.3895368705147515, + "learning_rate": 1.867354702334343e-06, + "loss": 0.01596832275390625, + "step": 144155 + }, + { + "epoch": 1.2465088931353814, + "grad_norm": 1.1063562645887872, + "learning_rate": 1.867166050100659e-06, + "loss": 0.09647808074951172, + "step": 144160 + }, + { + "epoch": 1.2465521266569248, + "grad_norm": 0.1856221475987971, + "learning_rate": 1.8669774030915043e-06, + "loss": 0.03148345947265625, + "step": 144165 + }, + { + "epoch": 1.246595360178468, + "grad_norm": 12.072398002665905, + "learning_rate": 1.8667887613077494e-06, + "loss": 0.029401016235351563, + "step": 144170 + }, + { + "epoch": 1.2466385937000113, + "grad_norm": 0.8213270534786775, + "learning_rate": 1.866600124750265e-06, + "loss": 0.10009841918945313, + "step": 144175 + }, + { + "epoch": 1.2466818272215545, + "grad_norm": 0.8976214501356068, + "learning_rate": 1.8664114934199214e-06, + "loss": 0.0061130523681640625, + "step": 144180 + }, + { + "epoch": 1.2467250607430977, + "grad_norm": 0.49151236505575574, + "learning_rate": 1.8662228673175874e-06, + "loss": 0.04033737182617188, + "step": 144185 + }, + { + "epoch": 1.246768294264641, + "grad_norm": 0.3558771822933691, + "learning_rate": 1.8660342464441336e-06, + "loss": 0.004428863525390625, + "step": 144190 + }, + { + "epoch": 1.2468115277861842, + "grad_norm": 29.169573566671193, + "learning_rate": 1.8658456308004293e-06, + "loss": 0.12291374206542968, + "step": 144195 + }, + { + "epoch": 1.2468547613077277, + "grad_norm": 23.646793222256626, + "learning_rate": 1.8656570203873443e-06, + "loss": 0.057360076904296876, + "step": 144200 + }, + { + "epoch": 1.2468979948292709, + "grad_norm": 2.031596155828425, + "learning_rate": 1.865468415205749e-06, + "loss": 0.30225143432617185, + "step": 144205 + }, + { + "epoch": 1.2469412283508141, + "grad_norm": 0.9187968065877381, + "learning_rate": 1.8652798152565139e-06, + "loss": 0.017114639282226562, + "step": 144210 + }, + { + "epoch": 1.2469844618723573, + "grad_norm": 8.27838272681348, + "learning_rate": 1.8650912205405077e-06, + "loss": 0.058658599853515625, + "step": 144215 + }, + { + "epoch": 1.2470276953939006, + "grad_norm": 7.841593447216027, + "learning_rate": 1.8649026310586007e-06, + "loss": 0.0695648193359375, + "step": 144220 + }, + { + "epoch": 1.2470709289154438, + "grad_norm": 18.3324122640714, + "learning_rate": 1.8647140468116617e-06, + "loss": 0.08863334655761719, + "step": 144225 + }, + { + "epoch": 1.2471141624369873, + "grad_norm": 4.234010413482124, + "learning_rate": 1.8645254678005616e-06, + "loss": 0.0881439208984375, + "step": 144230 + }, + { + "epoch": 1.2471573959585305, + "grad_norm": 1.2550960580889388, + "learning_rate": 1.8643368940261694e-06, + "loss": 0.2966911315917969, + "step": 144235 + }, + { + "epoch": 1.2472006294800737, + "grad_norm": 12.807847532758055, + "learning_rate": 1.8641483254893559e-06, + "loss": 0.11475601196289062, + "step": 144240 + }, + { + "epoch": 1.247243863001617, + "grad_norm": 0.7973657446511742, + "learning_rate": 1.8639597621909898e-06, + "loss": 0.16352195739746095, + "step": 144245 + }, + { + "epoch": 1.2472870965231602, + "grad_norm": 13.035881285256796, + "learning_rate": 1.8637712041319408e-06, + "loss": 0.12618408203125, + "step": 144250 + }, + { + "epoch": 1.2473303300447034, + "grad_norm": 0.07570206657974812, + "learning_rate": 1.8635826513130783e-06, + "loss": 0.04315395355224609, + "step": 144255 + }, + { + "epoch": 1.2473735635662466, + "grad_norm": 24.804658425633892, + "learning_rate": 1.8633941037352726e-06, + "loss": 0.087115478515625, + "step": 144260 + }, + { + "epoch": 1.24741679708779, + "grad_norm": 13.565013967701901, + "learning_rate": 1.8632055613993917e-06, + "loss": 0.03441963195800781, + "step": 144265 + }, + { + "epoch": 1.2474600306093333, + "grad_norm": 1.4278626620305537, + "learning_rate": 1.8630170243063074e-06, + "loss": 0.11042709350585937, + "step": 144270 + }, + { + "epoch": 1.2475032641308765, + "grad_norm": 5.897854357856947, + "learning_rate": 1.8628284924568883e-06, + "loss": 0.04324951171875, + "step": 144275 + }, + { + "epoch": 1.2475464976524198, + "grad_norm": 0.850836510502972, + "learning_rate": 1.8626399658520035e-06, + "loss": 0.04083099365234375, + "step": 144280 + }, + { + "epoch": 1.247589731173963, + "grad_norm": 2.4489003651608776, + "learning_rate": 1.8624514444925221e-06, + "loss": 0.031259918212890626, + "step": 144285 + }, + { + "epoch": 1.2476329646955062, + "grad_norm": 1.2053568142421207, + "learning_rate": 1.8622629283793148e-06, + "loss": 0.033477783203125, + "step": 144290 + }, + { + "epoch": 1.2476761982170497, + "grad_norm": 1.7157067463996647, + "learning_rate": 1.8620744175132492e-06, + "loss": 0.07240524291992187, + "step": 144295 + }, + { + "epoch": 1.247719431738593, + "grad_norm": 0.7315289925705435, + "learning_rate": 1.861885911895197e-06, + "loss": 0.03455410003662109, + "step": 144300 + }, + { + "epoch": 1.2477626652601361, + "grad_norm": 5.299317132203822, + "learning_rate": 1.8616974115260264e-06, + "loss": 0.04497451782226562, + "step": 144305 + }, + { + "epoch": 1.2478058987816794, + "grad_norm": 1.5081673040259604, + "learning_rate": 1.8615089164066063e-06, + "loss": 0.027400970458984375, + "step": 144310 + }, + { + "epoch": 1.2478491323032226, + "grad_norm": 1.0299399281718575, + "learning_rate": 1.8613204265378067e-06, + "loss": 0.035494232177734376, + "step": 144315 + }, + { + "epoch": 1.2478923658247658, + "grad_norm": 0.6625682951793682, + "learning_rate": 1.8611319419204972e-06, + "loss": 0.07549057006835938, + "step": 144320 + }, + { + "epoch": 1.247935599346309, + "grad_norm": 0.9770973641070813, + "learning_rate": 1.8609434625555462e-06, + "loss": 0.25223846435546876, + "step": 144325 + }, + { + "epoch": 1.2479788328678525, + "grad_norm": 1.6120120838812784, + "learning_rate": 1.860754988443822e-06, + "loss": 0.015488433837890624, + "step": 144330 + }, + { + "epoch": 1.2480220663893957, + "grad_norm": 2.2413493903902966, + "learning_rate": 1.8605665195861966e-06, + "loss": 0.13843116760253907, + "step": 144335 + }, + { + "epoch": 1.248065299910939, + "grad_norm": 28.842042172019173, + "learning_rate": 1.860378055983537e-06, + "loss": 0.12743091583251953, + "step": 144340 + }, + { + "epoch": 1.2481085334324822, + "grad_norm": 51.480308787186644, + "learning_rate": 1.8601895976367139e-06, + "loss": 0.066046142578125, + "step": 144345 + }, + { + "epoch": 1.2481517669540254, + "grad_norm": 0.42348509425088926, + "learning_rate": 1.8600011445465955e-06, + "loss": 0.048523330688476564, + "step": 144350 + }, + { + "epoch": 1.2481950004755689, + "grad_norm": 0.9318824637659576, + "learning_rate": 1.8598126967140512e-06, + "loss": 0.03381805419921875, + "step": 144355 + }, + { + "epoch": 1.248238233997112, + "grad_norm": 1.0319362875114242, + "learning_rate": 1.8596242541399485e-06, + "loss": 0.04691314697265625, + "step": 144360 + }, + { + "epoch": 1.2482814675186553, + "grad_norm": 0.23873289306319972, + "learning_rate": 1.8594358168251594e-06, + "loss": 0.01849517822265625, + "step": 144365 + }, + { + "epoch": 1.2483247010401985, + "grad_norm": 3.5237733322093505, + "learning_rate": 1.859247384770551e-06, + "loss": 0.06623306274414062, + "step": 144370 + }, + { + "epoch": 1.2483679345617418, + "grad_norm": 2.0900105694520787, + "learning_rate": 1.8590589579769934e-06, + "loss": 0.038958740234375, + "step": 144375 + }, + { + "epoch": 1.248411168083285, + "grad_norm": 33.54407449150413, + "learning_rate": 1.858870536445355e-06, + "loss": 0.32868194580078125, + "step": 144380 + }, + { + "epoch": 1.2484544016048282, + "grad_norm": 4.88835236561313, + "learning_rate": 1.8586821201765048e-06, + "loss": 0.052962875366210936, + "step": 144385 + }, + { + "epoch": 1.2484976351263715, + "grad_norm": 0.1802561123894682, + "learning_rate": 1.8584937091713105e-06, + "loss": 0.22855682373046876, + "step": 144390 + }, + { + "epoch": 1.248540868647915, + "grad_norm": 7.569006932092989, + "learning_rate": 1.858305303430644e-06, + "loss": 0.0816802978515625, + "step": 144395 + }, + { + "epoch": 1.2485841021694581, + "grad_norm": 4.1615802554404615, + "learning_rate": 1.8581169029553716e-06, + "loss": 0.0896066665649414, + "step": 144400 + }, + { + "epoch": 1.2486273356910014, + "grad_norm": 0.14371128068051553, + "learning_rate": 1.8579285077463637e-06, + "loss": 0.0316436767578125, + "step": 144405 + }, + { + "epoch": 1.2486705692125446, + "grad_norm": 16.490481692972203, + "learning_rate": 1.857740117804489e-06, + "loss": 0.07532157897949218, + "step": 144410 + }, + { + "epoch": 1.2487138027340878, + "grad_norm": 0.806867626775556, + "learning_rate": 1.8575517331306156e-06, + "loss": 0.01005706787109375, + "step": 144415 + }, + { + "epoch": 1.2487570362556313, + "grad_norm": 4.586638773049821, + "learning_rate": 1.8573633537256122e-06, + "loss": 0.1294321060180664, + "step": 144420 + }, + { + "epoch": 1.2488002697771745, + "grad_norm": 2.2398119328579926, + "learning_rate": 1.8571749795903478e-06, + "loss": 0.13186492919921874, + "step": 144425 + }, + { + "epoch": 1.2488435032987177, + "grad_norm": 4.540463617333346, + "learning_rate": 1.8569866107256918e-06, + "loss": 0.06487054824829101, + "step": 144430 + }, + { + "epoch": 1.248886736820261, + "grad_norm": 26.363822093836536, + "learning_rate": 1.8567982471325128e-06, + "loss": 0.075909423828125, + "step": 144435 + }, + { + "epoch": 1.2489299703418042, + "grad_norm": 0.9631082973547425, + "learning_rate": 1.8566098888116795e-06, + "loss": 0.10284881591796875, + "step": 144440 + }, + { + "epoch": 1.2489732038633474, + "grad_norm": 0.36315893415506023, + "learning_rate": 1.8564215357640601e-06, + "loss": 0.07418098449707031, + "step": 144445 + }, + { + "epoch": 1.2490164373848907, + "grad_norm": 0.4425052910234189, + "learning_rate": 1.8562331879905232e-06, + "loss": 0.015264892578125, + "step": 144450 + }, + { + "epoch": 1.249059670906434, + "grad_norm": 27.240822971389605, + "learning_rate": 1.8560448454919381e-06, + "loss": 0.04448890686035156, + "step": 144455 + }, + { + "epoch": 1.2491029044279773, + "grad_norm": 1.4198388528468084, + "learning_rate": 1.8558565082691723e-06, + "loss": 0.034723663330078126, + "step": 144460 + }, + { + "epoch": 1.2491461379495206, + "grad_norm": 0.6981265021585314, + "learning_rate": 1.8556681763230965e-06, + "loss": 0.08846626281738282, + "step": 144465 + }, + { + "epoch": 1.2491893714710638, + "grad_norm": 114.96248093984038, + "learning_rate": 1.8554798496545772e-06, + "loss": 0.12362747192382813, + "step": 144470 + }, + { + "epoch": 1.249232604992607, + "grad_norm": 0.9139932227568247, + "learning_rate": 1.8552915282644844e-06, + "loss": 0.020003509521484376, + "step": 144475 + }, + { + "epoch": 1.2492758385141502, + "grad_norm": 20.136258691530774, + "learning_rate": 1.8551032121536853e-06, + "loss": 0.09676132202148438, + "step": 144480 + }, + { + "epoch": 1.2493190720356937, + "grad_norm": 0.6149549355964333, + "learning_rate": 1.8549149013230477e-06, + "loss": 0.07790336608886719, + "step": 144485 + }, + { + "epoch": 1.249362305557237, + "grad_norm": 2.1376155255361864, + "learning_rate": 1.8547265957734432e-06, + "loss": 0.04598731994628906, + "step": 144490 + }, + { + "epoch": 1.2494055390787802, + "grad_norm": 35.80900732387384, + "learning_rate": 1.8545382955057385e-06, + "loss": 0.062459564208984374, + "step": 144495 + }, + { + "epoch": 1.2494487726003234, + "grad_norm": 0.2577321493165733, + "learning_rate": 1.8543500005208017e-06, + "loss": 0.15926971435546874, + "step": 144500 + }, + { + "epoch": 1.2494920061218666, + "grad_norm": 10.274179750482512, + "learning_rate": 1.8541617108195014e-06, + "loss": 0.08188743591308593, + "step": 144505 + }, + { + "epoch": 1.2495352396434098, + "grad_norm": 0.03305898067116628, + "learning_rate": 1.8539734264027056e-06, + "loss": 0.021856117248535156, + "step": 144510 + }, + { + "epoch": 1.249578473164953, + "grad_norm": 1.4411043967315098, + "learning_rate": 1.853785147271284e-06, + "loss": 0.006368446350097656, + "step": 144515 + }, + { + "epoch": 1.2496217066864965, + "grad_norm": 14.67489427871742, + "learning_rate": 1.8535968734261026e-06, + "loss": 0.08130950927734375, + "step": 144520 + }, + { + "epoch": 1.2496649402080398, + "grad_norm": 10.764248629142514, + "learning_rate": 1.853408604868032e-06, + "loss": 0.043284034729003905, + "step": 144525 + }, + { + "epoch": 1.249708173729583, + "grad_norm": 0.08382057343313794, + "learning_rate": 1.85322034159794e-06, + "loss": 0.054981231689453125, + "step": 144530 + }, + { + "epoch": 1.2497514072511262, + "grad_norm": 2.430260282739557, + "learning_rate": 1.8530320836166935e-06, + "loss": 0.015727996826171875, + "step": 144535 + }, + { + "epoch": 1.2497946407726694, + "grad_norm": 1.8263494218075498, + "learning_rate": 1.8528438309251625e-06, + "loss": 0.027157974243164063, + "step": 144540 + }, + { + "epoch": 1.2498378742942127, + "grad_norm": 0.5634605573636431, + "learning_rate": 1.8526555835242145e-06, + "loss": 0.07698078155517578, + "step": 144545 + }, + { + "epoch": 1.2498811078157561, + "grad_norm": 23.139880838195154, + "learning_rate": 1.852467341414716e-06, + "loss": 0.10675506591796875, + "step": 144550 + }, + { + "epoch": 1.2499243413372993, + "grad_norm": 0.6385220636485158, + "learning_rate": 1.8522791045975382e-06, + "loss": 0.0957906723022461, + "step": 144555 + }, + { + "epoch": 1.2499675748588426, + "grad_norm": 0.06630120822431722, + "learning_rate": 1.8520908730735477e-06, + "loss": 0.054715728759765624, + "step": 144560 + }, + { + "epoch": 1.2500108083803858, + "grad_norm": 7.3473193006747275, + "learning_rate": 1.8519026468436123e-06, + "loss": 0.045513916015625, + "step": 144565 + }, + { + "epoch": 1.250054041901929, + "grad_norm": 0.1986087653593587, + "learning_rate": 1.8517144259086007e-06, + "loss": 0.0558990478515625, + "step": 144570 + }, + { + "epoch": 1.2500972754234723, + "grad_norm": 2.4502490588648542, + "learning_rate": 1.8515262102693808e-06, + "loss": 0.14252166748046874, + "step": 144575 + }, + { + "epoch": 1.2501405089450155, + "grad_norm": 0.26169813195406705, + "learning_rate": 1.8513379999268193e-06, + "loss": 0.07008819580078125, + "step": 144580 + }, + { + "epoch": 1.250183742466559, + "grad_norm": 66.90979287962335, + "learning_rate": 1.8511497948817867e-06, + "loss": 0.1047607421875, + "step": 144585 + }, + { + "epoch": 1.2502269759881022, + "grad_norm": 2.4256454409857593, + "learning_rate": 1.8509615951351497e-06, + "loss": 0.02422027587890625, + "step": 144590 + }, + { + "epoch": 1.2502702095096454, + "grad_norm": 1.8199491089319226, + "learning_rate": 1.8507734006877755e-06, + "loss": 0.07796859741210938, + "step": 144595 + }, + { + "epoch": 1.2503134430311886, + "grad_norm": 10.722258838439997, + "learning_rate": 1.8505852115405338e-06, + "loss": 0.06685638427734375, + "step": 144600 + }, + { + "epoch": 1.2503566765527319, + "grad_norm": 33.65273161488361, + "learning_rate": 1.8503970276942911e-06, + "loss": 0.31460227966308596, + "step": 144605 + }, + { + "epoch": 1.2503999100742753, + "grad_norm": 1.7376142425171897, + "learning_rate": 1.8502088491499163e-06, + "loss": 0.01306610107421875, + "step": 144610 + }, + { + "epoch": 1.2504431435958185, + "grad_norm": 7.12714499138025, + "learning_rate": 1.850020675908275e-06, + "loss": 0.1157745361328125, + "step": 144615 + }, + { + "epoch": 1.2504863771173618, + "grad_norm": 4.450230246642004, + "learning_rate": 1.8498325079702385e-06, + "loss": 0.08044548034667968, + "step": 144620 + }, + { + "epoch": 1.250529610638905, + "grad_norm": 12.416674053850624, + "learning_rate": 1.8496443453366715e-06, + "loss": 0.10916252136230468, + "step": 144625 + }, + { + "epoch": 1.2505728441604482, + "grad_norm": 5.761936417729242, + "learning_rate": 1.8494561880084442e-06, + "loss": 0.2720973968505859, + "step": 144630 + }, + { + "epoch": 1.2506160776819915, + "grad_norm": 1.035750065959412, + "learning_rate": 1.8492680359864237e-06, + "loss": 0.20843029022216797, + "step": 144635 + }, + { + "epoch": 1.2506593112035347, + "grad_norm": 0.6833553217499875, + "learning_rate": 1.8490798892714764e-06, + "loss": 0.023195457458496094, + "step": 144640 + }, + { + "epoch": 1.250702544725078, + "grad_norm": 0.5915752600617009, + "learning_rate": 1.84889174786447e-06, + "loss": 0.1284393310546875, + "step": 144645 + }, + { + "epoch": 1.2507457782466214, + "grad_norm": 0.8715174104442939, + "learning_rate": 1.848703611766275e-06, + "loss": 0.03132781982421875, + "step": 144650 + }, + { + "epoch": 1.2507890117681646, + "grad_norm": 0.11453923364572523, + "learning_rate": 1.848515480977756e-06, + "loss": 0.012430191040039062, + "step": 144655 + }, + { + "epoch": 1.2508322452897078, + "grad_norm": 12.533684509531579, + "learning_rate": 1.8483273554997824e-06, + "loss": 0.15328006744384765, + "step": 144660 + }, + { + "epoch": 1.250875478811251, + "grad_norm": 1.27917641415255, + "learning_rate": 1.8481392353332218e-06, + "loss": 0.08342742919921875, + "step": 144665 + }, + { + "epoch": 1.2509187123327943, + "grad_norm": 5.296924579018619, + "learning_rate": 1.847951120478941e-06, + "loss": 0.42342147827148435, + "step": 144670 + }, + { + "epoch": 1.2509619458543377, + "grad_norm": 2.243601466675744, + "learning_rate": 1.8477630109378071e-06, + "loss": 0.1531665802001953, + "step": 144675 + }, + { + "epoch": 1.251005179375881, + "grad_norm": 1.3610461580732174, + "learning_rate": 1.8475749067106883e-06, + "loss": 0.08317794799804687, + "step": 144680 + }, + { + "epoch": 1.2510484128974242, + "grad_norm": 0.6243052372727773, + "learning_rate": 1.8473868077984526e-06, + "loss": 0.09234695434570313, + "step": 144685 + }, + { + "epoch": 1.2510916464189674, + "grad_norm": 3.3569781334194286, + "learning_rate": 1.847198714201967e-06, + "loss": 0.014739418029785156, + "step": 144690 + }, + { + "epoch": 1.2511348799405106, + "grad_norm": 0.4032569699641144, + "learning_rate": 1.8470106259221e-06, + "loss": 0.0396575927734375, + "step": 144695 + }, + { + "epoch": 1.2511781134620539, + "grad_norm": 10.981377304286944, + "learning_rate": 1.8468225429597174e-06, + "loss": 0.026558303833007814, + "step": 144700 + }, + { + "epoch": 1.251221346983597, + "grad_norm": 0.13837570766029392, + "learning_rate": 1.8466344653156873e-06, + "loss": 0.06451168060302734, + "step": 144705 + }, + { + "epoch": 1.2512645805051403, + "grad_norm": 6.325486228845072, + "learning_rate": 1.846446392990876e-06, + "loss": 0.10819511413574219, + "step": 144710 + }, + { + "epoch": 1.2513078140266838, + "grad_norm": 54.8921842465737, + "learning_rate": 1.8462583259861534e-06, + "loss": 0.21709098815917968, + "step": 144715 + }, + { + "epoch": 1.251351047548227, + "grad_norm": 7.062142806173703, + "learning_rate": 1.8460702643023856e-06, + "loss": 0.0764739990234375, + "step": 144720 + }, + { + "epoch": 1.2513942810697702, + "grad_norm": 2.3759482138259065, + "learning_rate": 1.8458822079404396e-06, + "loss": 0.033098602294921876, + "step": 144725 + }, + { + "epoch": 1.2514375145913135, + "grad_norm": 9.645595770485855, + "learning_rate": 1.8456941569011829e-06, + "loss": 0.058854293823242185, + "step": 144730 + }, + { + "epoch": 1.251480748112857, + "grad_norm": 0.1756513483217183, + "learning_rate": 1.8455061111854822e-06, + "loss": 0.059710693359375, + "step": 144735 + }, + { + "epoch": 1.2515239816344002, + "grad_norm": 3.1321013034940544, + "learning_rate": 1.8453180707942044e-06, + "loss": 0.029777145385742186, + "step": 144740 + }, + { + "epoch": 1.2515672151559434, + "grad_norm": 0.024636748728085356, + "learning_rate": 1.8451300357282193e-06, + "loss": 0.04353752136230469, + "step": 144745 + }, + { + "epoch": 1.2516104486774866, + "grad_norm": 17.23184977281755, + "learning_rate": 1.8449420059883923e-06, + "loss": 0.06480846405029297, + "step": 144750 + }, + { + "epoch": 1.2516536821990298, + "grad_norm": 5.035079340555492, + "learning_rate": 1.8447539815755906e-06, + "loss": 0.04695205688476563, + "step": 144755 + }, + { + "epoch": 1.251696915720573, + "grad_norm": 0.22076287426952565, + "learning_rate": 1.844565962490681e-06, + "loss": 0.0190155029296875, + "step": 144760 + }, + { + "epoch": 1.2517401492421163, + "grad_norm": 5.78774907989541, + "learning_rate": 1.8443779487345314e-06, + "loss": 0.046550750732421875, + "step": 144765 + }, + { + "epoch": 1.2517833827636595, + "grad_norm": 0.09090381254693883, + "learning_rate": 1.8441899403080087e-06, + "loss": 0.02401275634765625, + "step": 144770 + }, + { + "epoch": 1.251826616285203, + "grad_norm": 3.6136925228038255, + "learning_rate": 1.8440019372119789e-06, + "loss": 0.06851348876953126, + "step": 144775 + }, + { + "epoch": 1.2518698498067462, + "grad_norm": 0.6842572938556206, + "learning_rate": 1.8438139394473112e-06, + "loss": 0.057161712646484376, + "step": 144780 + }, + { + "epoch": 1.2519130833282894, + "grad_norm": 1.765108317763091, + "learning_rate": 1.8436259470148712e-06, + "loss": 0.0246917724609375, + "step": 144785 + }, + { + "epoch": 1.2519563168498327, + "grad_norm": 2.377409514896901, + "learning_rate": 1.8434379599155257e-06, + "loss": 0.021483993530273436, + "step": 144790 + }, + { + "epoch": 1.251999550371376, + "grad_norm": 11.499154063459905, + "learning_rate": 1.8432499781501425e-06, + "loss": 0.071099853515625, + "step": 144795 + }, + { + "epoch": 1.2520427838929193, + "grad_norm": 0.3027914627539151, + "learning_rate": 1.8430620017195887e-06, + "loss": 0.05674037933349609, + "step": 144800 + }, + { + "epoch": 1.2520860174144626, + "grad_norm": 4.420132602471957, + "learning_rate": 1.842874030624729e-06, + "loss": 0.017681884765625, + "step": 144805 + }, + { + "epoch": 1.2521292509360058, + "grad_norm": 0.05580177513247806, + "learning_rate": 1.8426860648664336e-06, + "loss": 0.029229736328125, + "step": 144810 + }, + { + "epoch": 1.252172484457549, + "grad_norm": 17.110864260534054, + "learning_rate": 1.8424981044455675e-06, + "loss": 0.060874557495117186, + "step": 144815 + }, + { + "epoch": 1.2522157179790923, + "grad_norm": 3.1109413782463684, + "learning_rate": 1.8423101493629974e-06, + "loss": 0.020877838134765625, + "step": 144820 + }, + { + "epoch": 1.2522589515006355, + "grad_norm": 33.18175276513681, + "learning_rate": 1.8421221996195913e-06, + "loss": 0.14734649658203125, + "step": 144825 + }, + { + "epoch": 1.2523021850221787, + "grad_norm": 0.6117050603594052, + "learning_rate": 1.8419342552162153e-06, + "loss": 0.011572265625, + "step": 144830 + }, + { + "epoch": 1.252345418543722, + "grad_norm": 0.5432280703666628, + "learning_rate": 1.8417463161537347e-06, + "loss": 0.11010971069335937, + "step": 144835 + }, + { + "epoch": 1.2523886520652654, + "grad_norm": 0.9249043824167156, + "learning_rate": 1.841558382433019e-06, + "loss": 0.014890289306640625, + "step": 144840 + }, + { + "epoch": 1.2524318855868086, + "grad_norm": 14.227890030426547, + "learning_rate": 1.841370454054934e-06, + "loss": 0.0704071044921875, + "step": 144845 + }, + { + "epoch": 1.2524751191083519, + "grad_norm": 1.337429750127262, + "learning_rate": 1.8411825310203452e-06, + "loss": 0.012320709228515626, + "step": 144850 + }, + { + "epoch": 1.252518352629895, + "grad_norm": 7.06268156742184, + "learning_rate": 1.840994613330121e-06, + "loss": 0.05848541259765625, + "step": 144855 + }, + { + "epoch": 1.2525615861514383, + "grad_norm": 26.62604257480157, + "learning_rate": 1.8408067009851272e-06, + "loss": 0.15652923583984374, + "step": 144860 + }, + { + "epoch": 1.2526048196729818, + "grad_norm": 7.892502242697045, + "learning_rate": 1.8406187939862304e-06, + "loss": 0.3135875701904297, + "step": 144865 + }, + { + "epoch": 1.252648053194525, + "grad_norm": 13.401080680714587, + "learning_rate": 1.8404308923342958e-06, + "loss": 0.05611763000488281, + "step": 144870 + }, + { + "epoch": 1.2526912867160682, + "grad_norm": 0.653715058896821, + "learning_rate": 1.8402429960301928e-06, + "loss": 0.050969696044921874, + "step": 144875 + }, + { + "epoch": 1.2527345202376114, + "grad_norm": 4.774180035728409, + "learning_rate": 1.8400551050747865e-06, + "loss": 0.05746612548828125, + "step": 144880 + }, + { + "epoch": 1.2527777537591547, + "grad_norm": 16.14018137084361, + "learning_rate": 1.8398672194689436e-06, + "loss": 0.28812923431396487, + "step": 144885 + }, + { + "epoch": 1.252820987280698, + "grad_norm": 3.4483120863104766, + "learning_rate": 1.839679339213531e-06, + "loss": 0.015146636962890625, + "step": 144890 + }, + { + "epoch": 1.2528642208022411, + "grad_norm": 1.031142350407182, + "learning_rate": 1.8394914643094142e-06, + "loss": 0.0757720947265625, + "step": 144895 + }, + { + "epoch": 1.2529074543237844, + "grad_norm": 0.6912533607517974, + "learning_rate": 1.8393035947574597e-06, + "loss": 0.039571380615234374, + "step": 144900 + }, + { + "epoch": 1.2529506878453278, + "grad_norm": 1.9634421007916545, + "learning_rate": 1.839115730558535e-06, + "loss": 0.0340728759765625, + "step": 144905 + }, + { + "epoch": 1.252993921366871, + "grad_norm": 7.115724238187684, + "learning_rate": 1.838927871713506e-06, + "loss": 0.0336639404296875, + "step": 144910 + }, + { + "epoch": 1.2530371548884143, + "grad_norm": 2.1593996143888035, + "learning_rate": 1.8387400182232393e-06, + "loss": 0.033367919921875, + "step": 144915 + }, + { + "epoch": 1.2530803884099575, + "grad_norm": 2.2982257015367646, + "learning_rate": 1.838552170088601e-06, + "loss": 0.04677276611328125, + "step": 144920 + }, + { + "epoch": 1.2531236219315007, + "grad_norm": 7.168824201884611, + "learning_rate": 1.8383643273104574e-06, + "loss": 0.0390380859375, + "step": 144925 + }, + { + "epoch": 1.2531668554530442, + "grad_norm": 14.291234761515948, + "learning_rate": 1.8381764898896738e-06, + "loss": 0.11727981567382813, + "step": 144930 + }, + { + "epoch": 1.2532100889745874, + "grad_norm": 17.52654450453024, + "learning_rate": 1.8379886578271182e-06, + "loss": 0.29922027587890626, + "step": 144935 + }, + { + "epoch": 1.2532533224961306, + "grad_norm": 3.742633403432674, + "learning_rate": 1.8378008311236568e-06, + "loss": 0.038202667236328126, + "step": 144940 + }, + { + "epoch": 1.2532965560176739, + "grad_norm": 17.054134674032653, + "learning_rate": 1.8376130097801553e-06, + "loss": 0.16960983276367186, + "step": 144945 + }, + { + "epoch": 1.253339789539217, + "grad_norm": 0.4061608898679595, + "learning_rate": 1.8374251937974798e-06, + "loss": 0.091107177734375, + "step": 144950 + }, + { + "epoch": 1.2533830230607603, + "grad_norm": 0.14741373636091917, + "learning_rate": 1.8372373831764962e-06, + "loss": 0.11493682861328125, + "step": 144955 + }, + { + "epoch": 1.2534262565823036, + "grad_norm": 0.15480532009049572, + "learning_rate": 1.8370495779180716e-06, + "loss": 0.024454498291015626, + "step": 144960 + }, + { + "epoch": 1.2534694901038468, + "grad_norm": 29.243799870040824, + "learning_rate": 1.83686177802307e-06, + "loss": 0.1297637939453125, + "step": 144965 + }, + { + "epoch": 1.2535127236253902, + "grad_norm": 13.333403237437137, + "learning_rate": 1.8366739834923607e-06, + "loss": 0.07460174560546876, + "step": 144970 + }, + { + "epoch": 1.2535559571469335, + "grad_norm": 25.405121355689666, + "learning_rate": 1.8364861943268084e-06, + "loss": 0.17673225402832032, + "step": 144975 + }, + { + "epoch": 1.2535991906684767, + "grad_norm": 2.5568510659326873, + "learning_rate": 1.8362984105272783e-06, + "loss": 0.015276336669921875, + "step": 144980 + }, + { + "epoch": 1.25364242419002, + "grad_norm": 4.720173969075239, + "learning_rate": 1.836110632094637e-06, + "loss": 0.025723934173583984, + "step": 144985 + }, + { + "epoch": 1.2536856577115634, + "grad_norm": 0.5690550583366424, + "learning_rate": 1.835922859029751e-06, + "loss": 0.021776199340820312, + "step": 144990 + }, + { + "epoch": 1.2537288912331066, + "grad_norm": 24.66711086488925, + "learning_rate": 1.8357350913334847e-06, + "loss": 0.11648025512695312, + "step": 144995 + }, + { + "epoch": 1.2537721247546498, + "grad_norm": 0.495220021242697, + "learning_rate": 1.835547329006707e-06, + "loss": 0.09894180297851562, + "step": 145000 + }, + { + "epoch": 1.253815358276193, + "grad_norm": 0.9477553584189375, + "learning_rate": 1.8353595720502815e-06, + "loss": 0.04881439208984375, + "step": 145005 + }, + { + "epoch": 1.2538585917977363, + "grad_norm": 0.2586721537223803, + "learning_rate": 1.835171820465075e-06, + "loss": 0.03430328369140625, + "step": 145010 + }, + { + "epoch": 1.2539018253192795, + "grad_norm": 11.66273673578511, + "learning_rate": 1.8349840742519525e-06, + "loss": 0.06890478134155273, + "step": 145015 + }, + { + "epoch": 1.2539450588408227, + "grad_norm": 14.29598845243818, + "learning_rate": 1.8347963334117808e-06, + "loss": 0.07486686706542969, + "step": 145020 + }, + { + "epoch": 1.253988292362366, + "grad_norm": 0.4867685923818132, + "learning_rate": 1.8346085979454261e-06, + "loss": 0.017458343505859376, + "step": 145025 + }, + { + "epoch": 1.2540315258839094, + "grad_norm": 1.6011046075156612, + "learning_rate": 1.8344208678537518e-06, + "loss": 0.0062885284423828125, + "step": 145030 + }, + { + "epoch": 1.2540747594054527, + "grad_norm": 1.5992989379201306, + "learning_rate": 1.834233143137627e-06, + "loss": 0.04792232513427734, + "step": 145035 + }, + { + "epoch": 1.2541179929269959, + "grad_norm": 12.588706816997572, + "learning_rate": 1.8340454237979157e-06, + "loss": 0.08206100463867187, + "step": 145040 + }, + { + "epoch": 1.2541612264485391, + "grad_norm": 0.6108537167694098, + "learning_rate": 1.8338577098354836e-06, + "loss": 0.03790969848632812, + "step": 145045 + }, + { + "epoch": 1.2542044599700823, + "grad_norm": 2.933985728014206, + "learning_rate": 1.833670001251197e-06, + "loss": 0.23816986083984376, + "step": 145050 + }, + { + "epoch": 1.2542476934916258, + "grad_norm": 0.8031103404616894, + "learning_rate": 1.8334822980459213e-06, + "loss": 0.055667877197265625, + "step": 145055 + }, + { + "epoch": 1.254290927013169, + "grad_norm": 2.1157182011638773, + "learning_rate": 1.833294600220521e-06, + "loss": 0.04668922424316406, + "step": 145060 + }, + { + "epoch": 1.2543341605347123, + "grad_norm": 13.576580881954953, + "learning_rate": 1.8331069077758642e-06, + "loss": 0.12845115661621093, + "step": 145065 + }, + { + "epoch": 1.2543773940562555, + "grad_norm": 1.677140731340758, + "learning_rate": 1.8329192207128154e-06, + "loss": 0.0225311279296875, + "step": 145070 + }, + { + "epoch": 1.2544206275777987, + "grad_norm": 1.1028568245789503, + "learning_rate": 1.8327315390322391e-06, + "loss": 0.052910995483398435, + "step": 145075 + }, + { + "epoch": 1.254463861099342, + "grad_norm": 0.19161443574525586, + "learning_rate": 1.8325438627350026e-06, + "loss": 0.09205322265625, + "step": 145080 + }, + { + "epoch": 1.2545070946208852, + "grad_norm": 11.449177879567808, + "learning_rate": 1.8323561918219705e-06, + "loss": 0.0938018798828125, + "step": 145085 + }, + { + "epoch": 1.2545503281424284, + "grad_norm": 1.6595969259641772, + "learning_rate": 1.8321685262940072e-06, + "loss": 0.06505966186523438, + "step": 145090 + }, + { + "epoch": 1.2545935616639718, + "grad_norm": 0.7546320508404597, + "learning_rate": 1.8319808661519808e-06, + "loss": 0.014367294311523438, + "step": 145095 + }, + { + "epoch": 1.254636795185515, + "grad_norm": 2.024004715802073, + "learning_rate": 1.8317932113967555e-06, + "loss": 0.006931686401367187, + "step": 145100 + }, + { + "epoch": 1.2546800287070583, + "grad_norm": 4.057659452908047, + "learning_rate": 1.831605562029196e-06, + "loss": 0.012203598022460937, + "step": 145105 + }, + { + "epoch": 1.2547232622286015, + "grad_norm": 2.0057656124602325, + "learning_rate": 1.831417918050169e-06, + "loss": 0.16327133178710937, + "step": 145110 + }, + { + "epoch": 1.2547664957501448, + "grad_norm": 3.916391510466245, + "learning_rate": 1.8312302794605398e-06, + "loss": 0.04213981628417969, + "step": 145115 + }, + { + "epoch": 1.2548097292716882, + "grad_norm": 1.250372961200809, + "learning_rate": 1.8310426462611727e-06, + "loss": 0.03748779296875, + "step": 145120 + }, + { + "epoch": 1.2548529627932314, + "grad_norm": 1.2440147742454029, + "learning_rate": 1.8308550184529324e-06, + "loss": 0.01849365234375, + "step": 145125 + }, + { + "epoch": 1.2548961963147747, + "grad_norm": 14.993593018812062, + "learning_rate": 1.8306673960366867e-06, + "loss": 0.079144287109375, + "step": 145130 + }, + { + "epoch": 1.254939429836318, + "grad_norm": 9.819448454301138, + "learning_rate": 1.8304797790132992e-06, + "loss": 0.026779937744140624, + "step": 145135 + }, + { + "epoch": 1.2549826633578611, + "grad_norm": 8.51872281081205, + "learning_rate": 1.8302921673836362e-06, + "loss": 0.08217391967773438, + "step": 145140 + }, + { + "epoch": 1.2550258968794044, + "grad_norm": 13.442300261673912, + "learning_rate": 1.8301045611485626e-06, + "loss": 0.0942138671875, + "step": 145145 + }, + { + "epoch": 1.2550691304009476, + "grad_norm": 0.9916230875248715, + "learning_rate": 1.829916960308943e-06, + "loss": 0.05126190185546875, + "step": 145150 + }, + { + "epoch": 1.2551123639224908, + "grad_norm": 11.893432800574423, + "learning_rate": 1.8297293648656419e-06, + "loss": 0.040036773681640624, + "step": 145155 + }, + { + "epoch": 1.2551555974440343, + "grad_norm": 26.492573435785054, + "learning_rate": 1.829541774819526e-06, + "loss": 0.0390472412109375, + "step": 145160 + }, + { + "epoch": 1.2551988309655775, + "grad_norm": 15.730126850560998, + "learning_rate": 1.829354190171461e-06, + "loss": 0.059095001220703124, + "step": 145165 + }, + { + "epoch": 1.2552420644871207, + "grad_norm": 5.3782589187412855, + "learning_rate": 1.8291666109223105e-06, + "loss": 0.043272781372070315, + "step": 145170 + }, + { + "epoch": 1.255285298008664, + "grad_norm": 8.871436024605563, + "learning_rate": 1.8289790370729406e-06, + "loss": 0.08463363647460938, + "step": 145175 + }, + { + "epoch": 1.2553285315302072, + "grad_norm": 0.059271822536162026, + "learning_rate": 1.8287914686242154e-06, + "loss": 0.04796943664550781, + "step": 145180 + }, + { + "epoch": 1.2553717650517506, + "grad_norm": 20.515269017228846, + "learning_rate": 1.828603905577e-06, + "loss": 0.04215660095214844, + "step": 145185 + }, + { + "epoch": 1.2554149985732939, + "grad_norm": 6.949390211138538, + "learning_rate": 1.8284163479321598e-06, + "loss": 0.041558837890625, + "step": 145190 + }, + { + "epoch": 1.255458232094837, + "grad_norm": 34.01013476123989, + "learning_rate": 1.8282287956905613e-06, + "loss": 0.08961944580078125, + "step": 145195 + }, + { + "epoch": 1.2555014656163803, + "grad_norm": 0.4672061625450012, + "learning_rate": 1.828041248853067e-06, + "loss": 0.02070770263671875, + "step": 145200 + }, + { + "epoch": 1.2555446991379235, + "grad_norm": 0.7314756766101308, + "learning_rate": 1.8278537074205438e-06, + "loss": 0.068975830078125, + "step": 145205 + }, + { + "epoch": 1.2555879326594668, + "grad_norm": 10.458324485165692, + "learning_rate": 1.8276661713938545e-06, + "loss": 0.0635009765625, + "step": 145210 + }, + { + "epoch": 1.25563116618101, + "grad_norm": 0.18330409076923662, + "learning_rate": 1.8274786407738661e-06, + "loss": 0.15539512634277344, + "step": 145215 + }, + { + "epoch": 1.2556743997025532, + "grad_norm": 0.14130847724918288, + "learning_rate": 1.8272911155614417e-06, + "loss": 0.028303909301757812, + "step": 145220 + }, + { + "epoch": 1.2557176332240967, + "grad_norm": 0.1304678671459934, + "learning_rate": 1.8271035957574478e-06, + "loss": 0.1183807373046875, + "step": 145225 + }, + { + "epoch": 1.25576086674564, + "grad_norm": 0.2592587328117445, + "learning_rate": 1.8269160813627489e-06, + "loss": 0.016732406616210938, + "step": 145230 + }, + { + "epoch": 1.2558041002671831, + "grad_norm": 17.363891466282116, + "learning_rate": 1.8267285723782088e-06, + "loss": 0.061048698425292966, + "step": 145235 + }, + { + "epoch": 1.2558473337887264, + "grad_norm": 18.68769633156723, + "learning_rate": 1.8265410688046924e-06, + "loss": 0.15571441650390624, + "step": 145240 + }, + { + "epoch": 1.2558905673102698, + "grad_norm": 0.23539908424465078, + "learning_rate": 1.826353570643066e-06, + "loss": 0.11519870758056641, + "step": 145245 + }, + { + "epoch": 1.255933800831813, + "grad_norm": 6.671060202396183, + "learning_rate": 1.8261660778941918e-06, + "loss": 0.024275588989257812, + "step": 145250 + }, + { + "epoch": 1.2559770343533563, + "grad_norm": 0.8374891542123015, + "learning_rate": 1.8259785905589367e-06, + "loss": 0.090032958984375, + "step": 145255 + }, + { + "epoch": 1.2560202678748995, + "grad_norm": 26.361067435573815, + "learning_rate": 1.825791108638165e-06, + "loss": 0.11554527282714844, + "step": 145260 + }, + { + "epoch": 1.2560635013964427, + "grad_norm": 1.1127575507081762, + "learning_rate": 1.8256036321327406e-06, + "loss": 0.2046539306640625, + "step": 145265 + }, + { + "epoch": 1.256106734917986, + "grad_norm": 6.936208057909277, + "learning_rate": 1.8254161610435282e-06, + "loss": 0.1311431884765625, + "step": 145270 + }, + { + "epoch": 1.2561499684395292, + "grad_norm": 8.23025314956253, + "learning_rate": 1.8252286953713932e-06, + "loss": 0.02882804870605469, + "step": 145275 + }, + { + "epoch": 1.2561932019610724, + "grad_norm": 5.7692663483813975, + "learning_rate": 1.8250412351171987e-06, + "loss": 0.03204803466796875, + "step": 145280 + }, + { + "epoch": 1.2562364354826159, + "grad_norm": 4.7709363042771065, + "learning_rate": 1.8248537802818112e-06, + "loss": 0.10730628967285157, + "step": 145285 + }, + { + "epoch": 1.256279669004159, + "grad_norm": 8.913853828317391, + "learning_rate": 1.824666330866094e-06, + "loss": 0.13713493347167968, + "step": 145290 + }, + { + "epoch": 1.2563229025257023, + "grad_norm": 4.248399274178745, + "learning_rate": 1.8244788868709122e-06, + "loss": 0.04652557373046875, + "step": 145295 + }, + { + "epoch": 1.2563661360472456, + "grad_norm": 2.0952941184523652, + "learning_rate": 1.8242914482971291e-06, + "loss": 0.106927490234375, + "step": 145300 + }, + { + "epoch": 1.2564093695687888, + "grad_norm": 18.846345170443414, + "learning_rate": 1.8241040151456106e-06, + "loss": 0.06471710205078125, + "step": 145305 + }, + { + "epoch": 1.2564526030903322, + "grad_norm": 3.6548144428135476, + "learning_rate": 1.8239165874172208e-06, + "loss": 0.01476593017578125, + "step": 145310 + }, + { + "epoch": 1.2564958366118755, + "grad_norm": 10.471893008809863, + "learning_rate": 1.8237291651128219e-06, + "loss": 0.2602424621582031, + "step": 145315 + }, + { + "epoch": 1.2565390701334187, + "grad_norm": 19.790396347346572, + "learning_rate": 1.8235417482332818e-06, + "loss": 0.08047027587890625, + "step": 145320 + }, + { + "epoch": 1.256582303654962, + "grad_norm": 7.877189086685989, + "learning_rate": 1.823354336779463e-06, + "loss": 0.011563491821289063, + "step": 145325 + }, + { + "epoch": 1.2566255371765052, + "grad_norm": 3.654709795314228, + "learning_rate": 1.8231669307522297e-06, + "loss": 0.04932327270507812, + "step": 145330 + }, + { + "epoch": 1.2566687706980484, + "grad_norm": 45.13165511917178, + "learning_rate": 1.8229795301524467e-06, + "loss": 0.10594024658203124, + "step": 145335 + }, + { + "epoch": 1.2567120042195916, + "grad_norm": 2.071870102710356, + "learning_rate": 1.8227921349809785e-06, + "loss": 0.05469512939453125, + "step": 145340 + }, + { + "epoch": 1.2567552377411348, + "grad_norm": 0.54082661733129, + "learning_rate": 1.8226047452386878e-06, + "loss": 0.011983108520507813, + "step": 145345 + }, + { + "epoch": 1.2567984712626783, + "grad_norm": 2.2055361599786143, + "learning_rate": 1.822417360926441e-06, + "loss": 0.13167953491210938, + "step": 145350 + }, + { + "epoch": 1.2568417047842215, + "grad_norm": 3.023515160759982, + "learning_rate": 1.8222299820451005e-06, + "loss": 0.048050308227539064, + "step": 145355 + }, + { + "epoch": 1.2568849383057648, + "grad_norm": 0.43896883351268523, + "learning_rate": 1.822042608595532e-06, + "loss": 0.052001953125, + "step": 145360 + }, + { + "epoch": 1.256928171827308, + "grad_norm": 2.064045499519427, + "learning_rate": 1.821855240578599e-06, + "loss": 0.01320037841796875, + "step": 145365 + }, + { + "epoch": 1.2569714053488512, + "grad_norm": 6.059387638763989, + "learning_rate": 1.8216678779951656e-06, + "loss": 0.014241981506347656, + "step": 145370 + }, + { + "epoch": 1.2570146388703947, + "grad_norm": 0.46125183503281825, + "learning_rate": 1.8214805208460954e-06, + "loss": 0.014134597778320313, + "step": 145375 + }, + { + "epoch": 1.257057872391938, + "grad_norm": 0.7795869306458103, + "learning_rate": 1.8212931691322523e-06, + "loss": 0.06966609954833984, + "step": 145380 + }, + { + "epoch": 1.2571011059134811, + "grad_norm": 1.2530234005085532, + "learning_rate": 1.8211058228545012e-06, + "loss": 0.022672653198242188, + "step": 145385 + }, + { + "epoch": 1.2571443394350243, + "grad_norm": 0.5121375145884186, + "learning_rate": 1.8209184820137064e-06, + "loss": 0.010306739807128906, + "step": 145390 + }, + { + "epoch": 1.2571875729565676, + "grad_norm": 0.261356651678529, + "learning_rate": 1.8207311466107316e-06, + "loss": 0.03697891235351562, + "step": 145395 + }, + { + "epoch": 1.2572308064781108, + "grad_norm": 10.731591774384405, + "learning_rate": 1.8205438166464406e-06, + "loss": 0.06182518005371094, + "step": 145400 + }, + { + "epoch": 1.257274039999654, + "grad_norm": 1.269583504265063, + "learning_rate": 1.8203564921216963e-06, + "loss": 0.015375518798828125, + "step": 145405 + }, + { + "epoch": 1.2573172735211973, + "grad_norm": 0.22378173200921406, + "learning_rate": 1.820169173037364e-06, + "loss": 0.0377044677734375, + "step": 145410 + }, + { + "epoch": 1.2573605070427407, + "grad_norm": 6.799366027018792, + "learning_rate": 1.8199818593943073e-06, + "loss": 0.08825263977050782, + "step": 145415 + }, + { + "epoch": 1.257403740564284, + "grad_norm": 7.2108740428680465, + "learning_rate": 1.8197945511933903e-06, + "loss": 0.016202926635742188, + "step": 145420 + }, + { + "epoch": 1.2574469740858272, + "grad_norm": 9.000152331575665, + "learning_rate": 1.8196072484354763e-06, + "loss": 0.058800697326660156, + "step": 145425 + }, + { + "epoch": 1.2574902076073704, + "grad_norm": 15.769642810026419, + "learning_rate": 1.8194199511214297e-06, + "loss": 0.05933456420898438, + "step": 145430 + }, + { + "epoch": 1.2575334411289136, + "grad_norm": 1.482412289710381, + "learning_rate": 1.8192326592521135e-06, + "loss": 0.017682647705078124, + "step": 145435 + }, + { + "epoch": 1.257576674650457, + "grad_norm": 1.909671830442739, + "learning_rate": 1.819045372828392e-06, + "loss": 0.0795928955078125, + "step": 145440 + }, + { + "epoch": 1.2576199081720003, + "grad_norm": 1.6053984414711826, + "learning_rate": 1.8188580918511283e-06, + "loss": 0.06912918090820312, + "step": 145445 + }, + { + "epoch": 1.2576631416935435, + "grad_norm": 2.3630928980402843, + "learning_rate": 1.818670816321188e-06, + "loss": 0.010525894165039063, + "step": 145450 + }, + { + "epoch": 1.2577063752150868, + "grad_norm": 25.62108823099697, + "learning_rate": 1.818483546239433e-06, + "loss": 0.1382293701171875, + "step": 145455 + }, + { + "epoch": 1.25774960873663, + "grad_norm": 0.36261656779152696, + "learning_rate": 1.8182962816067278e-06, + "loss": 0.05620994567871094, + "step": 145460 + }, + { + "epoch": 1.2577928422581732, + "grad_norm": 0.46611251854204866, + "learning_rate": 1.818109022423935e-06, + "loss": 0.09724960327148438, + "step": 145465 + }, + { + "epoch": 1.2578360757797165, + "grad_norm": 1.860493460993573, + "learning_rate": 1.8179217686919198e-06, + "loss": 0.015023612976074218, + "step": 145470 + }, + { + "epoch": 1.25787930930126, + "grad_norm": 3.291483523104394, + "learning_rate": 1.8177345204115437e-06, + "loss": 0.11918182373046875, + "step": 145475 + }, + { + "epoch": 1.2579225428228031, + "grad_norm": 11.655072069797813, + "learning_rate": 1.8175472775836722e-06, + "loss": 0.1745269775390625, + "step": 145480 + }, + { + "epoch": 1.2579657763443464, + "grad_norm": 9.161367042618629, + "learning_rate": 1.8173600402091687e-06, + "loss": 0.03782634735107422, + "step": 145485 + }, + { + "epoch": 1.2580090098658896, + "grad_norm": 0.6886400050165371, + "learning_rate": 1.817172808288896e-06, + "loss": 0.0459136962890625, + "step": 145490 + }, + { + "epoch": 1.2580522433874328, + "grad_norm": 2.4104038257891, + "learning_rate": 1.8169855818237171e-06, + "loss": 0.04433135986328125, + "step": 145495 + }, + { + "epoch": 1.2580954769089763, + "grad_norm": 0.9336275679331709, + "learning_rate": 1.816798360814497e-06, + "loss": 0.05842437744140625, + "step": 145500 + }, + { + "epoch": 1.2581387104305195, + "grad_norm": 39.55879849028086, + "learning_rate": 1.816611145262097e-06, + "loss": 0.3375873565673828, + "step": 145505 + }, + { + "epoch": 1.2581819439520627, + "grad_norm": 0.8708651485114821, + "learning_rate": 1.8164239351673832e-06, + "loss": 0.043438720703125, + "step": 145510 + }, + { + "epoch": 1.258225177473606, + "grad_norm": 2.4496071806811166, + "learning_rate": 1.816236730531217e-06, + "loss": 0.0901031494140625, + "step": 145515 + }, + { + "epoch": 1.2582684109951492, + "grad_norm": 9.920142331445929, + "learning_rate": 1.8160495313544632e-06, + "loss": 0.2791740417480469, + "step": 145520 + }, + { + "epoch": 1.2583116445166924, + "grad_norm": 2.4843697144537833, + "learning_rate": 1.8158623376379834e-06, + "loss": 0.1591944694519043, + "step": 145525 + }, + { + "epoch": 1.2583548780382356, + "grad_norm": 0.9549308137822491, + "learning_rate": 1.8156751493826427e-06, + "loss": 0.1404338836669922, + "step": 145530 + }, + { + "epoch": 1.2583981115597789, + "grad_norm": 4.230781145777422, + "learning_rate": 1.815487966589302e-06, + "loss": 0.1192474365234375, + "step": 145535 + }, + { + "epoch": 1.2584413450813223, + "grad_norm": 19.27569655466076, + "learning_rate": 1.8153007892588277e-06, + "loss": 0.2811130523681641, + "step": 145540 + }, + { + "epoch": 1.2584845786028656, + "grad_norm": 31.40943750514157, + "learning_rate": 1.815113617392081e-06, + "loss": 0.2455230712890625, + "step": 145545 + }, + { + "epoch": 1.2585278121244088, + "grad_norm": 106.58926530648831, + "learning_rate": 1.8149264509899257e-06, + "loss": 0.3501853942871094, + "step": 145550 + }, + { + "epoch": 1.258571045645952, + "grad_norm": 5.410580000574859, + "learning_rate": 1.8147392900532246e-06, + "loss": 0.04122772216796875, + "step": 145555 + }, + { + "epoch": 1.2586142791674952, + "grad_norm": 10.275556959009615, + "learning_rate": 1.8145521345828414e-06, + "loss": 0.0452423095703125, + "step": 145560 + }, + { + "epoch": 1.2586575126890387, + "grad_norm": 27.90184907326545, + "learning_rate": 1.8143649845796394e-06, + "loss": 0.05352783203125, + "step": 145565 + }, + { + "epoch": 1.258700746210582, + "grad_norm": 62.004682895619794, + "learning_rate": 1.8141778400444798e-06, + "loss": 0.34806365966796876, + "step": 145570 + }, + { + "epoch": 1.2587439797321252, + "grad_norm": 5.024188077738675, + "learning_rate": 1.8139907009782289e-06, + "loss": 0.0502288818359375, + "step": 145575 + }, + { + "epoch": 1.2587872132536684, + "grad_norm": 0.6538110401514049, + "learning_rate": 1.8138035673817468e-06, + "loss": 0.0214263916015625, + "step": 145580 + }, + { + "epoch": 1.2588304467752116, + "grad_norm": 3.4435481706321647, + "learning_rate": 1.8136164392558987e-06, + "loss": 0.04081926345825195, + "step": 145585 + }, + { + "epoch": 1.2588736802967548, + "grad_norm": 0.7088315826238453, + "learning_rate": 1.8134293166015472e-06, + "loss": 0.02412872314453125, + "step": 145590 + }, + { + "epoch": 1.258916913818298, + "grad_norm": 7.137136127849041, + "learning_rate": 1.8132421994195542e-06, + "loss": 0.0885498046875, + "step": 145595 + }, + { + "epoch": 1.2589601473398413, + "grad_norm": 41.26309987219476, + "learning_rate": 1.8130550877107825e-06, + "loss": 0.2864643096923828, + "step": 145600 + }, + { + "epoch": 1.2590033808613847, + "grad_norm": 19.60486969208834, + "learning_rate": 1.812867981476097e-06, + "loss": 0.03691611289978027, + "step": 145605 + }, + { + "epoch": 1.259046614382928, + "grad_norm": 0.17628718310306565, + "learning_rate": 1.8126808807163585e-06, + "loss": 0.03232765197753906, + "step": 145610 + }, + { + "epoch": 1.2590898479044712, + "grad_norm": 0.29917240177346965, + "learning_rate": 1.8124937854324321e-06, + "loss": 0.0531219482421875, + "step": 145615 + }, + { + "epoch": 1.2591330814260144, + "grad_norm": 27.79110465060658, + "learning_rate": 1.8123066956251792e-06, + "loss": 0.11842384338378906, + "step": 145620 + }, + { + "epoch": 1.2591763149475577, + "grad_norm": 1.597541508555416, + "learning_rate": 1.8121196112954625e-06, + "loss": 0.51962890625, + "step": 145625 + }, + { + "epoch": 1.2592195484691011, + "grad_norm": 0.6769563758037243, + "learning_rate": 1.8119325324441445e-06, + "loss": 0.021660041809082032, + "step": 145630 + }, + { + "epoch": 1.2592627819906443, + "grad_norm": 1.5837788129485895, + "learning_rate": 1.8117454590720897e-06, + "loss": 0.10930747985839843, + "step": 145635 + }, + { + "epoch": 1.2593060155121876, + "grad_norm": 1.3394315152257121, + "learning_rate": 1.8115583911801593e-06, + "loss": 0.28037185668945314, + "step": 145640 + }, + { + "epoch": 1.2593492490337308, + "grad_norm": 8.387331468184918, + "learning_rate": 1.8113713287692168e-06, + "loss": 0.4710670471191406, + "step": 145645 + }, + { + "epoch": 1.259392482555274, + "grad_norm": 12.489878574338634, + "learning_rate": 1.8111842718401253e-06, + "loss": 0.05632867813110352, + "step": 145650 + }, + { + "epoch": 1.2594357160768173, + "grad_norm": 1.8055007458122054, + "learning_rate": 1.8109972203937466e-06, + "loss": 0.4630472183227539, + "step": 145655 + }, + { + "epoch": 1.2594789495983605, + "grad_norm": 1.2335800214895407, + "learning_rate": 1.8108101744309428e-06, + "loss": 0.083880615234375, + "step": 145660 + }, + { + "epoch": 1.2595221831199037, + "grad_norm": 1.457675523044422, + "learning_rate": 1.8106231339525777e-06, + "loss": 0.12197456359863282, + "step": 145665 + }, + { + "epoch": 1.2595654166414472, + "grad_norm": 17.206379589428256, + "learning_rate": 1.8104360989595133e-06, + "loss": 0.23303794860839844, + "step": 145670 + }, + { + "epoch": 1.2596086501629904, + "grad_norm": 4.663262787895056, + "learning_rate": 1.8102490694526136e-06, + "loss": 0.04260940551757812, + "step": 145675 + }, + { + "epoch": 1.2596518836845336, + "grad_norm": 12.913380235094817, + "learning_rate": 1.8100620454327401e-06, + "loss": 0.04527435302734375, + "step": 145680 + }, + { + "epoch": 1.2596951172060769, + "grad_norm": 4.9579739449044595, + "learning_rate": 1.8098750269007546e-06, + "loss": 0.14100265502929688, + "step": 145685 + }, + { + "epoch": 1.2597383507276203, + "grad_norm": 2.5149743906520086, + "learning_rate": 1.8096880138575203e-06, + "loss": 0.019173431396484374, + "step": 145690 + }, + { + "epoch": 1.2597815842491635, + "grad_norm": 0.17250568841789937, + "learning_rate": 1.8095010063038994e-06, + "loss": 0.0074368476867675785, + "step": 145695 + }, + { + "epoch": 1.2598248177707068, + "grad_norm": 0.6099592715822156, + "learning_rate": 1.809314004240755e-06, + "loss": 0.05862998962402344, + "step": 145700 + }, + { + "epoch": 1.25986805129225, + "grad_norm": 0.8391444436742042, + "learning_rate": 1.80912700766895e-06, + "loss": 0.031280517578125, + "step": 145705 + }, + { + "epoch": 1.2599112848137932, + "grad_norm": 1.8687611002045237, + "learning_rate": 1.8089400165893455e-06, + "loss": 0.06127738952636719, + "step": 145710 + }, + { + "epoch": 1.2599545183353364, + "grad_norm": 38.80373675774415, + "learning_rate": 1.8087530310028045e-06, + "loss": 0.14828929901123047, + "step": 145715 + }, + { + "epoch": 1.2599977518568797, + "grad_norm": 36.459483250649, + "learning_rate": 1.8085660509101886e-06, + "loss": 0.11732139587402343, + "step": 145720 + }, + { + "epoch": 1.260040985378423, + "grad_norm": 1.549348966484545, + "learning_rate": 1.8083790763123616e-06, + "loss": 0.025234222412109375, + "step": 145725 + }, + { + "epoch": 1.2600842188999664, + "grad_norm": 0.9609119628963823, + "learning_rate": 1.8081921072101838e-06, + "loss": 0.15070648193359376, + "step": 145730 + }, + { + "epoch": 1.2601274524215096, + "grad_norm": 0.26418293482261385, + "learning_rate": 1.8080051436045194e-06, + "loss": 0.2167116165161133, + "step": 145735 + }, + { + "epoch": 1.2601706859430528, + "grad_norm": 10.21308794132731, + "learning_rate": 1.8078181854962303e-06, + "loss": 0.10132064819335937, + "step": 145740 + }, + { + "epoch": 1.260213919464596, + "grad_norm": 52.47433898692255, + "learning_rate": 1.8076312328861784e-06, + "loss": 0.26403656005859377, + "step": 145745 + }, + { + "epoch": 1.2602571529861393, + "grad_norm": 5.344134214336695, + "learning_rate": 1.8074442857752254e-06, + "loss": 0.031476593017578124, + "step": 145750 + }, + { + "epoch": 1.2603003865076827, + "grad_norm": 22.59753894848743, + "learning_rate": 1.8072573441642345e-06, + "loss": 0.17372894287109375, + "step": 145755 + }, + { + "epoch": 1.260343620029226, + "grad_norm": 12.614180619397732, + "learning_rate": 1.8070704080540662e-06, + "loss": 0.048297119140625, + "step": 145760 + }, + { + "epoch": 1.2603868535507692, + "grad_norm": 0.9124395257742153, + "learning_rate": 1.806883477445585e-06, + "loss": 0.05919361114501953, + "step": 145765 + }, + { + "epoch": 1.2604300870723124, + "grad_norm": 0.5138389289307402, + "learning_rate": 1.8066965523396517e-06, + "loss": 0.136260986328125, + "step": 145770 + }, + { + "epoch": 1.2604733205938556, + "grad_norm": 1.0011008837786746, + "learning_rate": 1.8065096327371283e-06, + "loss": 0.05784111022949219, + "step": 145775 + }, + { + "epoch": 1.2605165541153989, + "grad_norm": 1.974525834444403, + "learning_rate": 1.8063227186388765e-06, + "loss": 0.01622772216796875, + "step": 145780 + }, + { + "epoch": 1.260559787636942, + "grad_norm": 6.943368075770545, + "learning_rate": 1.8061358100457597e-06, + "loss": 0.04173431396484375, + "step": 145785 + }, + { + "epoch": 1.2606030211584853, + "grad_norm": 14.21237422475183, + "learning_rate": 1.8059489069586373e-06, + "loss": 0.21549835205078124, + "step": 145790 + }, + { + "epoch": 1.2606462546800288, + "grad_norm": 1.505602602410856, + "learning_rate": 1.8057620093783748e-06, + "loss": 0.07339935302734375, + "step": 145795 + }, + { + "epoch": 1.260689488201572, + "grad_norm": 0.6501812298237761, + "learning_rate": 1.805575117305832e-06, + "loss": 0.010578155517578125, + "step": 145800 + }, + { + "epoch": 1.2607327217231152, + "grad_norm": 0.5224898128903085, + "learning_rate": 1.805388230741871e-06, + "loss": 0.016701316833496092, + "step": 145805 + }, + { + "epoch": 1.2607759552446585, + "grad_norm": 12.110859239049857, + "learning_rate": 1.8052013496873543e-06, + "loss": 0.12041549682617188, + "step": 145810 + }, + { + "epoch": 1.2608191887662017, + "grad_norm": 1.3337172532836465, + "learning_rate": 1.8050144741431431e-06, + "loss": 0.014783859252929688, + "step": 145815 + }, + { + "epoch": 1.2608624222877451, + "grad_norm": 3.6184330709736106, + "learning_rate": 1.8048276041101001e-06, + "loss": 0.04261970520019531, + "step": 145820 + }, + { + "epoch": 1.2609056558092884, + "grad_norm": 0.9446658442813812, + "learning_rate": 1.804640739589085e-06, + "loss": 0.026257705688476563, + "step": 145825 + }, + { + "epoch": 1.2609488893308316, + "grad_norm": 1.7520192089106101, + "learning_rate": 1.8044538805809628e-06, + "loss": 0.06743431091308594, + "step": 145830 + }, + { + "epoch": 1.2609921228523748, + "grad_norm": 4.330266024478093, + "learning_rate": 1.8042670270865928e-06, + "loss": 0.1865234375, + "step": 145835 + }, + { + "epoch": 1.261035356373918, + "grad_norm": 8.133301664363092, + "learning_rate": 1.8040801791068382e-06, + "loss": 0.07032318115234375, + "step": 145840 + }, + { + "epoch": 1.2610785898954613, + "grad_norm": 8.36740565425088, + "learning_rate": 1.80389333664256e-06, + "loss": 0.03477935791015625, + "step": 145845 + }, + { + "epoch": 1.2611218234170045, + "grad_norm": 7.347317822678795, + "learning_rate": 1.8037064996946203e-06, + "loss": 0.074884033203125, + "step": 145850 + }, + { + "epoch": 1.2611650569385477, + "grad_norm": 0.45689824147471625, + "learning_rate": 1.803519668263879e-06, + "loss": 0.0228240966796875, + "step": 145855 + }, + { + "epoch": 1.2612082904600912, + "grad_norm": 3.840453229135214, + "learning_rate": 1.8033328423512005e-06, + "loss": 0.01055755615234375, + "step": 145860 + }, + { + "epoch": 1.2612515239816344, + "grad_norm": 5.145442724942477, + "learning_rate": 1.8031460219574449e-06, + "loss": 0.050855255126953124, + "step": 145865 + }, + { + "epoch": 1.2612947575031777, + "grad_norm": 17.110523950075763, + "learning_rate": 1.8029592070834745e-06, + "loss": 0.08447227478027344, + "step": 145870 + }, + { + "epoch": 1.2613379910247209, + "grad_norm": 0.6817714157315248, + "learning_rate": 1.802772397730151e-06, + "loss": 0.03636322021484375, + "step": 145875 + }, + { + "epoch": 1.2613812245462641, + "grad_norm": 4.121375988067588, + "learning_rate": 1.8025855938983344e-06, + "loss": 0.20573272705078124, + "step": 145880 + }, + { + "epoch": 1.2614244580678076, + "grad_norm": 0.156201528602932, + "learning_rate": 1.8023987955888868e-06, + "loss": 0.5120582580566406, + "step": 145885 + }, + { + "epoch": 1.2614676915893508, + "grad_norm": 6.101782672001191, + "learning_rate": 1.802212002802671e-06, + "loss": 0.025017929077148438, + "step": 145890 + }, + { + "epoch": 1.261510925110894, + "grad_norm": 0.07977174768636058, + "learning_rate": 1.802025215540547e-06, + "loss": 0.1112715721130371, + "step": 145895 + }, + { + "epoch": 1.2615541586324373, + "grad_norm": 14.407007334984991, + "learning_rate": 1.801838433803378e-06, + "loss": 0.098565673828125, + "step": 145900 + }, + { + "epoch": 1.2615973921539805, + "grad_norm": 4.491471411250257, + "learning_rate": 1.8016516575920234e-06, + "loss": 0.034683609008789064, + "step": 145905 + }, + { + "epoch": 1.2616406256755237, + "grad_norm": 0.4341069338456496, + "learning_rate": 1.801464886907346e-06, + "loss": 0.02199554443359375, + "step": 145910 + }, + { + "epoch": 1.261683859197067, + "grad_norm": 0.11822910414258091, + "learning_rate": 1.801278121750206e-06, + "loss": 0.04019355773925781, + "step": 145915 + }, + { + "epoch": 1.2617270927186102, + "grad_norm": 6.225955158970666, + "learning_rate": 1.8010913621214654e-06, + "loss": 0.038542556762695315, + "step": 145920 + }, + { + "epoch": 1.2617703262401536, + "grad_norm": 2.5724223461475266, + "learning_rate": 1.8009046080219856e-06, + "loss": 0.04111404418945312, + "step": 145925 + }, + { + "epoch": 1.2618135597616968, + "grad_norm": 21.943807909337934, + "learning_rate": 1.800717859452628e-06, + "loss": 0.21029624938964844, + "step": 145930 + }, + { + "epoch": 1.26185679328324, + "grad_norm": 1.0311873554328914, + "learning_rate": 1.800531116414254e-06, + "loss": 0.03755950927734375, + "step": 145935 + }, + { + "epoch": 1.2619000268047833, + "grad_norm": 1.0383122612001747, + "learning_rate": 1.8003443789077243e-06, + "loss": 0.01300811767578125, + "step": 145940 + }, + { + "epoch": 1.2619432603263268, + "grad_norm": 4.185036880331274, + "learning_rate": 1.8001576469338998e-06, + "loss": 0.06035614013671875, + "step": 145945 + }, + { + "epoch": 1.26198649384787, + "grad_norm": 4.811170991359989, + "learning_rate": 1.799970920493642e-06, + "loss": 0.20634613037109376, + "step": 145950 + }, + { + "epoch": 1.2620297273694132, + "grad_norm": 2.3272922033448706, + "learning_rate": 1.7997841995878128e-06, + "loss": 0.07249908447265625, + "step": 145955 + }, + { + "epoch": 1.2620729608909564, + "grad_norm": 0.47282301614819067, + "learning_rate": 1.799597484217273e-06, + "loss": 0.14956340789794922, + "step": 145960 + }, + { + "epoch": 1.2621161944124997, + "grad_norm": 84.55217245965383, + "learning_rate": 1.7994107743828837e-06, + "loss": 0.3349308013916016, + "step": 145965 + }, + { + "epoch": 1.262159427934043, + "grad_norm": 0.15195374545037688, + "learning_rate": 1.7992240700855058e-06, + "loss": 0.0327056884765625, + "step": 145970 + }, + { + "epoch": 1.2622026614555861, + "grad_norm": 32.245677927758756, + "learning_rate": 1.7990373713259995e-06, + "loss": 0.15746097564697265, + "step": 145975 + }, + { + "epoch": 1.2622458949771294, + "grad_norm": 0.612796183734122, + "learning_rate": 1.7988506781052278e-06, + "loss": 0.048593902587890626, + "step": 145980 + }, + { + "epoch": 1.2622891284986728, + "grad_norm": 0.6220223052061838, + "learning_rate": 1.7986639904240493e-06, + "loss": 0.1097625732421875, + "step": 145985 + }, + { + "epoch": 1.262332362020216, + "grad_norm": 2.4534217389894066, + "learning_rate": 1.7984773082833275e-06, + "loss": 0.01792449951171875, + "step": 145990 + }, + { + "epoch": 1.2623755955417593, + "grad_norm": 19.069349833181743, + "learning_rate": 1.798290631683922e-06, + "loss": 0.017138671875, + "step": 145995 + }, + { + "epoch": 1.2624188290633025, + "grad_norm": 1.6075167552227172, + "learning_rate": 1.7981039606266938e-06, + "loss": 0.09612579345703125, + "step": 146000 + }, + { + "epoch": 1.2624620625848457, + "grad_norm": 16.45964001237505, + "learning_rate": 1.7979172951125041e-06, + "loss": 0.27805633544921876, + "step": 146005 + }, + { + "epoch": 1.2625052961063892, + "grad_norm": 1.5995773654981589, + "learning_rate": 1.7977306351422137e-06, + "loss": 0.007249069213867187, + "step": 146010 + }, + { + "epoch": 1.2625485296279324, + "grad_norm": 4.034814704172675, + "learning_rate": 1.7975439807166818e-06, + "loss": 0.06593017578125, + "step": 146015 + }, + { + "epoch": 1.2625917631494756, + "grad_norm": 1.8674941812677084, + "learning_rate": 1.7973573318367727e-06, + "loss": 0.1456634521484375, + "step": 146020 + }, + { + "epoch": 1.2626349966710189, + "grad_norm": 9.095344914991452, + "learning_rate": 1.7971706885033448e-06, + "loss": 0.24819297790527345, + "step": 146025 + }, + { + "epoch": 1.262678230192562, + "grad_norm": 8.0614750264467, + "learning_rate": 1.7969840507172589e-06, + "loss": 0.030319595336914064, + "step": 146030 + }, + { + "epoch": 1.2627214637141053, + "grad_norm": 0.7219044746032208, + "learning_rate": 1.796797418479377e-06, + "loss": 0.0157073974609375, + "step": 146035 + }, + { + "epoch": 1.2627646972356485, + "grad_norm": 3.8089504761143904, + "learning_rate": 1.7966107917905593e-06, + "loss": 0.07282638549804688, + "step": 146040 + }, + { + "epoch": 1.2628079307571918, + "grad_norm": 0.15665713697044478, + "learning_rate": 1.7964241706516645e-06, + "loss": 0.14965591430664063, + "step": 146045 + }, + { + "epoch": 1.2628511642787352, + "grad_norm": 0.33682286686748747, + "learning_rate": 1.7962375550635567e-06, + "loss": 0.4864528656005859, + "step": 146050 + }, + { + "epoch": 1.2628943978002785, + "grad_norm": 18.50355150496625, + "learning_rate": 1.796050945027095e-06, + "loss": 0.07710151672363282, + "step": 146055 + }, + { + "epoch": 1.2629376313218217, + "grad_norm": 12.423464232492273, + "learning_rate": 1.7958643405431392e-06, + "loss": 0.07177352905273438, + "step": 146060 + }, + { + "epoch": 1.262980864843365, + "grad_norm": 33.588540246946934, + "learning_rate": 1.795677741612551e-06, + "loss": 0.12972602844238282, + "step": 146065 + }, + { + "epoch": 1.2630240983649081, + "grad_norm": 1.913628404539753, + "learning_rate": 1.7954911482361908e-06, + "loss": 0.03463668823242187, + "step": 146070 + }, + { + "epoch": 1.2630673318864516, + "grad_norm": 0.24377541596654279, + "learning_rate": 1.795304560414919e-06, + "loss": 0.02579498291015625, + "step": 146075 + }, + { + "epoch": 1.2631105654079948, + "grad_norm": 0.743414783598153, + "learning_rate": 1.7951179781495953e-06, + "loss": 0.06579303741455078, + "step": 146080 + }, + { + "epoch": 1.263153798929538, + "grad_norm": 0.06094708312253796, + "learning_rate": 1.7949314014410823e-06, + "loss": 0.059851455688476565, + "step": 146085 + }, + { + "epoch": 1.2631970324510813, + "grad_norm": 7.327992352522486, + "learning_rate": 1.7947448302902379e-06, + "loss": 0.17470989227294922, + "step": 146090 + }, + { + "epoch": 1.2632402659726245, + "grad_norm": 0.2651798197754622, + "learning_rate": 1.794558264697925e-06, + "loss": 0.081805419921875, + "step": 146095 + }, + { + "epoch": 1.2632834994941677, + "grad_norm": 4.653902756557741, + "learning_rate": 1.7943717046650026e-06, + "loss": 0.027884769439697265, + "step": 146100 + }, + { + "epoch": 1.263326733015711, + "grad_norm": 0.46378500821480845, + "learning_rate": 1.7941851501923314e-06, + "loss": 0.2813243865966797, + "step": 146105 + }, + { + "epoch": 1.2633699665372542, + "grad_norm": 0.6264347192081072, + "learning_rate": 1.7939986012807705e-06, + "loss": 0.031828689575195315, + "step": 146110 + }, + { + "epoch": 1.2634132000587976, + "grad_norm": 11.960466118028952, + "learning_rate": 1.7938120579311828e-06, + "loss": 0.06041412353515625, + "step": 146115 + }, + { + "epoch": 1.2634564335803409, + "grad_norm": 5.286019683906544, + "learning_rate": 1.7936255201444266e-06, + "loss": 0.04381637573242188, + "step": 146120 + }, + { + "epoch": 1.263499667101884, + "grad_norm": 0.9991279613910354, + "learning_rate": 1.7934389879213638e-06, + "loss": 0.12088623046875, + "step": 146125 + }, + { + "epoch": 1.2635429006234273, + "grad_norm": 3.58146488337873, + "learning_rate": 1.7932524612628537e-06, + "loss": 0.12144813537597657, + "step": 146130 + }, + { + "epoch": 1.2635861341449706, + "grad_norm": 5.002704927654412, + "learning_rate": 1.7930659401697561e-06, + "loss": 0.036885833740234374, + "step": 146135 + }, + { + "epoch": 1.263629367666514, + "grad_norm": 13.817299192560874, + "learning_rate": 1.792879424642931e-06, + "loss": 0.2072845458984375, + "step": 146140 + }, + { + "epoch": 1.2636726011880572, + "grad_norm": 5.784409404336276, + "learning_rate": 1.7926929146832404e-06, + "loss": 0.018183135986328126, + "step": 146145 + }, + { + "epoch": 1.2637158347096005, + "grad_norm": 0.6986458967455195, + "learning_rate": 1.7925064102915428e-06, + "loss": 0.06376914978027344, + "step": 146150 + }, + { + "epoch": 1.2637590682311437, + "grad_norm": 1.6103681165059711, + "learning_rate": 1.7923199114686996e-06, + "loss": 0.04058990478515625, + "step": 146155 + }, + { + "epoch": 1.263802301752687, + "grad_norm": 0.6131835185980131, + "learning_rate": 1.79213341821557e-06, + "loss": 0.06849365234375, + "step": 146160 + }, + { + "epoch": 1.2638455352742302, + "grad_norm": 7.146625510521279, + "learning_rate": 1.7919469305330147e-06, + "loss": 0.020811080932617188, + "step": 146165 + }, + { + "epoch": 1.2638887687957734, + "grad_norm": 0.8999472265870717, + "learning_rate": 1.7917604484218925e-06, + "loss": 0.027771377563476564, + "step": 146170 + }, + { + "epoch": 1.2639320023173166, + "grad_norm": 16.61902721184197, + "learning_rate": 1.7915739718830647e-06, + "loss": 0.06540908813476562, + "step": 146175 + }, + { + "epoch": 1.26397523583886, + "grad_norm": 0.6752440538479771, + "learning_rate": 1.7913875009173908e-06, + "loss": 0.034735107421875, + "step": 146180 + }, + { + "epoch": 1.2640184693604033, + "grad_norm": 0.03312607003777122, + "learning_rate": 1.7912010355257315e-06, + "loss": 0.06766910552978515, + "step": 146185 + }, + { + "epoch": 1.2640617028819465, + "grad_norm": 5.063401439332522, + "learning_rate": 1.7910145757089464e-06, + "loss": 0.06332931518554688, + "step": 146190 + }, + { + "epoch": 1.2641049364034898, + "grad_norm": 46.65851390422818, + "learning_rate": 1.790828121467895e-06, + "loss": 0.23374176025390625, + "step": 146195 + }, + { + "epoch": 1.2641481699250332, + "grad_norm": 19.44333493354612, + "learning_rate": 1.7906416728034366e-06, + "loss": 0.05829010009765625, + "step": 146200 + }, + { + "epoch": 1.2641914034465764, + "grad_norm": 7.268407680147459, + "learning_rate": 1.7904552297164319e-06, + "loss": 0.02209892272949219, + "step": 146205 + }, + { + "epoch": 1.2642346369681197, + "grad_norm": 27.487096792568508, + "learning_rate": 1.7902687922077417e-06, + "loss": 0.25045928955078123, + "step": 146210 + }, + { + "epoch": 1.264277870489663, + "grad_norm": 7.971230459627957, + "learning_rate": 1.7900823602782255e-06, + "loss": 0.12595672607421876, + "step": 146215 + }, + { + "epoch": 1.2643211040112061, + "grad_norm": 9.914486261211476, + "learning_rate": 1.7898959339287417e-06, + "loss": 0.1854839324951172, + "step": 146220 + }, + { + "epoch": 1.2643643375327493, + "grad_norm": 9.823046798314712, + "learning_rate": 1.7897095131601511e-06, + "loss": 0.055035400390625, + "step": 146225 + }, + { + "epoch": 1.2644075710542926, + "grad_norm": 2.8570016764124317, + "learning_rate": 1.7895230979733135e-06, + "loss": 0.03384208679199219, + "step": 146230 + }, + { + "epoch": 1.2644508045758358, + "grad_norm": 26.118813206541468, + "learning_rate": 1.7893366883690868e-06, + "loss": 0.09701309204101563, + "step": 146235 + }, + { + "epoch": 1.2644940380973793, + "grad_norm": 0.43246684295620663, + "learning_rate": 1.789150284348334e-06, + "loss": 0.012517547607421875, + "step": 146240 + }, + { + "epoch": 1.2645372716189225, + "grad_norm": 0.023465413269999444, + "learning_rate": 1.7889638859119132e-06, + "loss": 0.1192169189453125, + "step": 146245 + }, + { + "epoch": 1.2645805051404657, + "grad_norm": 2.3309625934491764, + "learning_rate": 1.7887774930606837e-06, + "loss": 0.05354232788085937, + "step": 146250 + }, + { + "epoch": 1.264623738662009, + "grad_norm": 5.1436418442903005, + "learning_rate": 1.7885911057955047e-06, + "loss": 0.20629806518554689, + "step": 146255 + }, + { + "epoch": 1.2646669721835522, + "grad_norm": 1.1498879301266982, + "learning_rate": 1.7884047241172373e-06, + "loss": 0.30084075927734377, + "step": 146260 + }, + { + "epoch": 1.2647102057050956, + "grad_norm": 6.238238558577527, + "learning_rate": 1.7882183480267401e-06, + "loss": 0.090032958984375, + "step": 146265 + }, + { + "epoch": 1.2647534392266389, + "grad_norm": 25.843827857687966, + "learning_rate": 1.7880319775248717e-06, + "loss": 0.21913604736328124, + "step": 146270 + }, + { + "epoch": 1.264796672748182, + "grad_norm": 5.988856668968141, + "learning_rate": 1.7878456126124941e-06, + "loss": 0.03941192626953125, + "step": 146275 + }, + { + "epoch": 1.2648399062697253, + "grad_norm": 2.131087283202724, + "learning_rate": 1.7876592532904651e-06, + "loss": 0.022563934326171875, + "step": 146280 + }, + { + "epoch": 1.2648831397912685, + "grad_norm": 0.46392617753259163, + "learning_rate": 1.7874728995596439e-06, + "loss": 0.01911468505859375, + "step": 146285 + }, + { + "epoch": 1.2649263733128118, + "grad_norm": 3.768356029315571, + "learning_rate": 1.7872865514208914e-06, + "loss": 0.053415107727050784, + "step": 146290 + }, + { + "epoch": 1.264969606834355, + "grad_norm": 0.20491081709482825, + "learning_rate": 1.7871002088750663e-06, + "loss": 0.01124401092529297, + "step": 146295 + }, + { + "epoch": 1.2650128403558982, + "grad_norm": 2.1487921679157744, + "learning_rate": 1.7869138719230264e-06, + "loss": 0.13532867431640624, + "step": 146300 + }, + { + "epoch": 1.2650560738774417, + "grad_norm": 1.4914104219074231, + "learning_rate": 1.7867275405656337e-06, + "loss": 0.18635902404785157, + "step": 146305 + }, + { + "epoch": 1.265099307398985, + "grad_norm": 3.1719270452872235, + "learning_rate": 1.7865412148037464e-06, + "loss": 0.05981025695800781, + "step": 146310 + }, + { + "epoch": 1.2651425409205281, + "grad_norm": 1.6473438188008243, + "learning_rate": 1.7863548946382233e-06, + "loss": 0.0835113525390625, + "step": 146315 + }, + { + "epoch": 1.2651857744420714, + "grad_norm": 4.046075931077897, + "learning_rate": 1.7861685800699248e-06, + "loss": 0.02857666015625, + "step": 146320 + }, + { + "epoch": 1.2652290079636146, + "grad_norm": 0.5942547506575845, + "learning_rate": 1.7859822710997094e-06, + "loss": 0.066644287109375, + "step": 146325 + }, + { + "epoch": 1.265272241485158, + "grad_norm": 0.31016647712533013, + "learning_rate": 1.7857959677284365e-06, + "loss": 0.162176513671875, + "step": 146330 + }, + { + "epoch": 1.2653154750067013, + "grad_norm": 12.492793430358738, + "learning_rate": 1.785609669956964e-06, + "loss": 0.04552268981933594, + "step": 146335 + }, + { + "epoch": 1.2653587085282445, + "grad_norm": 12.09986764222969, + "learning_rate": 1.7854233777861538e-06, + "loss": 0.08725776672363281, + "step": 146340 + }, + { + "epoch": 1.2654019420497877, + "grad_norm": 4.143844771653891, + "learning_rate": 1.7852370912168631e-06, + "loss": 0.006984329223632813, + "step": 146345 + }, + { + "epoch": 1.265445175571331, + "grad_norm": 0.12675486234607988, + "learning_rate": 1.7850508102499522e-06, + "loss": 0.10915699005126953, + "step": 146350 + }, + { + "epoch": 1.2654884090928742, + "grad_norm": 0.22451778010611828, + "learning_rate": 1.7848645348862795e-06, + "loss": 0.02346954345703125, + "step": 146355 + }, + { + "epoch": 1.2655316426144174, + "grad_norm": 1.5185848673713314, + "learning_rate": 1.7846782651267041e-06, + "loss": 0.051756668090820315, + "step": 146360 + }, + { + "epoch": 1.2655748761359606, + "grad_norm": 1.4633511278768527, + "learning_rate": 1.784492000972084e-06, + "loss": 0.048729324340820314, + "step": 146365 + }, + { + "epoch": 1.265618109657504, + "grad_norm": 6.928860192680148, + "learning_rate": 1.7843057424232809e-06, + "loss": 0.055717086791992186, + "step": 146370 + }, + { + "epoch": 1.2656613431790473, + "grad_norm": 2.8047896962122723, + "learning_rate": 1.7841194894811514e-06, + "loss": 0.016873550415039063, + "step": 146375 + }, + { + "epoch": 1.2657045767005906, + "grad_norm": 30.533618081238334, + "learning_rate": 1.7839332421465563e-06, + "loss": 0.011133193969726562, + "step": 146380 + }, + { + "epoch": 1.2657478102221338, + "grad_norm": 1.3395595503272757, + "learning_rate": 1.783747000420353e-06, + "loss": 0.08311271667480469, + "step": 146385 + }, + { + "epoch": 1.2657910437436772, + "grad_norm": 2.4683277748325345, + "learning_rate": 1.7835607643034014e-06, + "loss": 0.021990203857421876, + "step": 146390 + }, + { + "epoch": 1.2658342772652205, + "grad_norm": 0.9126724840226407, + "learning_rate": 1.7833745337965589e-06, + "loss": 0.055959320068359374, + "step": 146395 + }, + { + "epoch": 1.2658775107867637, + "grad_norm": 13.060370095546931, + "learning_rate": 1.7831883089006868e-06, + "loss": 0.22242240905761718, + "step": 146400 + }, + { + "epoch": 1.265920744308307, + "grad_norm": 5.991836480795402, + "learning_rate": 1.7830020896166419e-06, + "loss": 0.028042411804199217, + "step": 146405 + }, + { + "epoch": 1.2659639778298502, + "grad_norm": 33.16774293614769, + "learning_rate": 1.7828158759452845e-06, + "loss": 0.083306884765625, + "step": 146410 + }, + { + "epoch": 1.2660072113513934, + "grad_norm": 6.026742745822826, + "learning_rate": 1.7826296678874729e-06, + "loss": 0.02886962890625, + "step": 146415 + }, + { + "epoch": 1.2660504448729366, + "grad_norm": 4.84965471352583, + "learning_rate": 1.7824434654440656e-06, + "loss": 0.025492095947265626, + "step": 146420 + }, + { + "epoch": 1.2660936783944798, + "grad_norm": 7.658280609465227, + "learning_rate": 1.7822572686159211e-06, + "loss": 0.0947113037109375, + "step": 146425 + }, + { + "epoch": 1.2661369119160233, + "grad_norm": 4.720148282889979, + "learning_rate": 1.7820710774038976e-06, + "loss": 0.1873809814453125, + "step": 146430 + }, + { + "epoch": 1.2661801454375665, + "grad_norm": 1.0090556111162452, + "learning_rate": 1.7818848918088559e-06, + "loss": 0.015285491943359375, + "step": 146435 + }, + { + "epoch": 1.2662233789591097, + "grad_norm": 15.209931195398433, + "learning_rate": 1.7816987118316533e-06, + "loss": 0.06375999450683593, + "step": 146440 + }, + { + "epoch": 1.266266612480653, + "grad_norm": 1.7701963139952985, + "learning_rate": 1.7815125374731486e-06, + "loss": 0.064947509765625, + "step": 146445 + }, + { + "epoch": 1.2663098460021962, + "grad_norm": 0.880861762298148, + "learning_rate": 1.7813263687342003e-06, + "loss": 0.006481170654296875, + "step": 146450 + }, + { + "epoch": 1.2663530795237397, + "grad_norm": 22.05009007524834, + "learning_rate": 1.7811402056156672e-06, + "loss": 0.08459510803222656, + "step": 146455 + }, + { + "epoch": 1.2663963130452829, + "grad_norm": 102.99241933776457, + "learning_rate": 1.7809540481184065e-06, + "loss": 0.15743331909179686, + "step": 146460 + }, + { + "epoch": 1.2664395465668261, + "grad_norm": 4.859279102484517, + "learning_rate": 1.7807678962432797e-06, + "loss": 0.04324607849121094, + "step": 146465 + }, + { + "epoch": 1.2664827800883693, + "grad_norm": 0.9125773056515746, + "learning_rate": 1.7805817499911437e-06, + "loss": 0.03831634521484375, + "step": 146470 + }, + { + "epoch": 1.2665260136099126, + "grad_norm": 4.7603679612832295, + "learning_rate": 1.7803956093628562e-06, + "loss": 0.03459930419921875, + "step": 146475 + }, + { + "epoch": 1.2665692471314558, + "grad_norm": 5.9014348116456645, + "learning_rate": 1.7802094743592762e-06, + "loss": 0.08191547393798829, + "step": 146480 + }, + { + "epoch": 1.266612480652999, + "grad_norm": 39.27523594685748, + "learning_rate": 1.780023344981263e-06, + "loss": 0.12544708251953124, + "step": 146485 + }, + { + "epoch": 1.2666557141745423, + "grad_norm": 5.78828327697317, + "learning_rate": 1.7798372212296729e-06, + "loss": 0.03988723754882813, + "step": 146490 + }, + { + "epoch": 1.2666989476960857, + "grad_norm": 13.416532093247016, + "learning_rate": 1.7796511031053674e-06, + "loss": 0.0431365966796875, + "step": 146495 + }, + { + "epoch": 1.266742181217629, + "grad_norm": 0.20200875577843766, + "learning_rate": 1.7794649906092025e-06, + "loss": 0.18921051025390626, + "step": 146500 + }, + { + "epoch": 1.2667854147391722, + "grad_norm": 80.5478264091069, + "learning_rate": 1.7792788837420377e-06, + "loss": 0.2536712646484375, + "step": 146505 + }, + { + "epoch": 1.2668286482607154, + "grad_norm": 2.266180334059112, + "learning_rate": 1.77909278250473e-06, + "loss": 0.013648414611816406, + "step": 146510 + }, + { + "epoch": 1.2668718817822586, + "grad_norm": 0.06754506687911198, + "learning_rate": 1.7789066868981392e-06, + "loss": 0.15136260986328126, + "step": 146515 + }, + { + "epoch": 1.266915115303802, + "grad_norm": 1.6395692929875965, + "learning_rate": 1.778720596923123e-06, + "loss": 0.0275634765625, + "step": 146520 + }, + { + "epoch": 1.2669583488253453, + "grad_norm": 15.691348492578902, + "learning_rate": 1.7785345125805382e-06, + "loss": 0.02584381103515625, + "step": 146525 + }, + { + "epoch": 1.2670015823468885, + "grad_norm": 25.664944534753843, + "learning_rate": 1.778348433871245e-06, + "loss": 0.07025318145751953, + "step": 146530 + }, + { + "epoch": 1.2670448158684318, + "grad_norm": 5.510367534065819, + "learning_rate": 1.7781623607961017e-06, + "loss": 0.018989944458007814, + "step": 146535 + }, + { + "epoch": 1.267088049389975, + "grad_norm": 0.21963923130386648, + "learning_rate": 1.7779762933559642e-06, + "loss": 0.017458534240722655, + "step": 146540 + }, + { + "epoch": 1.2671312829115182, + "grad_norm": 0.5358446919478406, + "learning_rate": 1.777790231551693e-06, + "loss": 0.08803939819335938, + "step": 146545 + }, + { + "epoch": 1.2671745164330614, + "grad_norm": 1.0900057065304878, + "learning_rate": 1.7776041753841459e-06, + "loss": 0.059732246398925784, + "step": 146550 + }, + { + "epoch": 1.2672177499546047, + "grad_norm": 0.13080530271457028, + "learning_rate": 1.7774181248541783e-06, + "loss": 0.02763519287109375, + "step": 146555 + }, + { + "epoch": 1.2672609834761481, + "grad_norm": 0.8237669409796048, + "learning_rate": 1.7772320799626518e-06, + "loss": 0.019928741455078124, + "step": 146560 + }, + { + "epoch": 1.2673042169976914, + "grad_norm": 3.7548196027048837, + "learning_rate": 1.777046040710423e-06, + "loss": 0.061553955078125, + "step": 146565 + }, + { + "epoch": 1.2673474505192346, + "grad_norm": 0.9251178988627641, + "learning_rate": 1.7768600070983487e-06, + "loss": 0.02408294677734375, + "step": 146570 + }, + { + "epoch": 1.2673906840407778, + "grad_norm": 3.33393854228147, + "learning_rate": 1.776673979127289e-06, + "loss": 0.0471588134765625, + "step": 146575 + }, + { + "epoch": 1.267433917562321, + "grad_norm": 44.237450440910635, + "learning_rate": 1.776487956798101e-06, + "loss": 0.07031440734863281, + "step": 146580 + }, + { + "epoch": 1.2674771510838645, + "grad_norm": 7.790366162352499, + "learning_rate": 1.776301940111641e-06, + "loss": 0.0234588623046875, + "step": 146585 + }, + { + "epoch": 1.2675203846054077, + "grad_norm": 0.23382922284418037, + "learning_rate": 1.7761159290687692e-06, + "loss": 0.0455657958984375, + "step": 146590 + }, + { + "epoch": 1.267563618126951, + "grad_norm": 6.479130043867644, + "learning_rate": 1.7759299236703432e-06, + "loss": 0.05876007080078125, + "step": 146595 + }, + { + "epoch": 1.2676068516484942, + "grad_norm": 11.933083762741122, + "learning_rate": 1.7757439239172191e-06, + "loss": 0.04574928283691406, + "step": 146600 + }, + { + "epoch": 1.2676500851700374, + "grad_norm": 5.514763931871551, + "learning_rate": 1.7755579298102568e-06, + "loss": 0.15270004272460938, + "step": 146605 + }, + { + "epoch": 1.2676933186915806, + "grad_norm": 0.9654428797954759, + "learning_rate": 1.775371941350313e-06, + "loss": 0.016897964477539062, + "step": 146610 + }, + { + "epoch": 1.2677365522131239, + "grad_norm": 0.6000862324410955, + "learning_rate": 1.7751859585382457e-06, + "loss": 0.03598785400390625, + "step": 146615 + }, + { + "epoch": 1.267779785734667, + "grad_norm": 0.599287849344039, + "learning_rate": 1.774999981374911e-06, + "loss": 0.14223594665527345, + "step": 146620 + }, + { + "epoch": 1.2678230192562105, + "grad_norm": 9.734263639673605, + "learning_rate": 1.774814009861169e-06, + "loss": 0.040488815307617186, + "step": 146625 + }, + { + "epoch": 1.2678662527777538, + "grad_norm": 13.611165570051055, + "learning_rate": 1.7746280439978771e-06, + "loss": 0.029549407958984374, + "step": 146630 + }, + { + "epoch": 1.267909486299297, + "grad_norm": 5.471875946725065, + "learning_rate": 1.7744420837858926e-06, + "loss": 0.059879112243652347, + "step": 146635 + }, + { + "epoch": 1.2679527198208402, + "grad_norm": 0.8270063241534169, + "learning_rate": 1.7742561292260726e-06, + "loss": 0.10394420623779296, + "step": 146640 + }, + { + "epoch": 1.2679959533423837, + "grad_norm": 1.4586840475466658, + "learning_rate": 1.7740701803192752e-06, + "loss": 0.016463470458984376, + "step": 146645 + }, + { + "epoch": 1.268039186863927, + "grad_norm": 18.916556561743754, + "learning_rate": 1.773884237066356e-06, + "loss": 0.06734390258789062, + "step": 146650 + }, + { + "epoch": 1.2680824203854701, + "grad_norm": 0.34554751902054004, + "learning_rate": 1.7736982994681758e-06, + "loss": 0.06328506469726562, + "step": 146655 + }, + { + "epoch": 1.2681256539070134, + "grad_norm": 10.154260639979475, + "learning_rate": 1.7735123675255908e-06, + "loss": 0.05790557861328125, + "step": 146660 + }, + { + "epoch": 1.2681688874285566, + "grad_norm": 0.6601457247651252, + "learning_rate": 1.7733264412394585e-06, + "loss": 0.012054443359375, + "step": 146665 + }, + { + "epoch": 1.2682121209500998, + "grad_norm": 0.7975512042271128, + "learning_rate": 1.7731405206106369e-06, + "loss": 0.018905830383300782, + "step": 146670 + }, + { + "epoch": 1.268255354471643, + "grad_norm": 18.243221822092664, + "learning_rate": 1.7729546056399814e-06, + "loss": 0.13332672119140626, + "step": 146675 + }, + { + "epoch": 1.2682985879931863, + "grad_norm": 0.41327375954997103, + "learning_rate": 1.7727686963283518e-06, + "loss": 0.1465301513671875, + "step": 146680 + }, + { + "epoch": 1.2683418215147297, + "grad_norm": 8.35038739071777, + "learning_rate": 1.7725827926766032e-06, + "loss": 0.1237335205078125, + "step": 146685 + }, + { + "epoch": 1.268385055036273, + "grad_norm": 1.7569586588097688, + "learning_rate": 1.7723968946855952e-06, + "loss": 0.016793060302734374, + "step": 146690 + }, + { + "epoch": 1.2684282885578162, + "grad_norm": 62.56603437125767, + "learning_rate": 1.7722110023561848e-06, + "loss": 0.38873748779296874, + "step": 146695 + }, + { + "epoch": 1.2684715220793594, + "grad_norm": 10.334883642910794, + "learning_rate": 1.7720251156892285e-06, + "loss": 0.018058013916015626, + "step": 146700 + }, + { + "epoch": 1.2685147556009027, + "grad_norm": 13.040677411441651, + "learning_rate": 1.7718392346855831e-06, + "loss": 0.033317184448242186, + "step": 146705 + }, + { + "epoch": 1.268557989122446, + "grad_norm": 1.9462422940263377, + "learning_rate": 1.7716533593461074e-06, + "loss": 0.17498626708984374, + "step": 146710 + }, + { + "epoch": 1.2686012226439893, + "grad_norm": 0.03811332276385615, + "learning_rate": 1.7714674896716567e-06, + "loss": 0.006806755065917968, + "step": 146715 + }, + { + "epoch": 1.2686444561655326, + "grad_norm": 6.102170247377011, + "learning_rate": 1.7712816256630909e-06, + "loss": 0.0609771728515625, + "step": 146720 + }, + { + "epoch": 1.2686876896870758, + "grad_norm": 82.18775404381473, + "learning_rate": 1.771095767321265e-06, + "loss": 0.34397258758544924, + "step": 146725 + }, + { + "epoch": 1.268730923208619, + "grad_norm": 18.469406187184557, + "learning_rate": 1.7709099146470373e-06, + "loss": 0.262188720703125, + "step": 146730 + }, + { + "epoch": 1.2687741567301623, + "grad_norm": 1.1158906863051483, + "learning_rate": 1.7707240676412634e-06, + "loss": 0.0274383544921875, + "step": 146735 + }, + { + "epoch": 1.2688173902517055, + "grad_norm": 1.1363228143789041, + "learning_rate": 1.7705382263048025e-06, + "loss": 0.03648567199707031, + "step": 146740 + }, + { + "epoch": 1.2688606237732487, + "grad_norm": 11.765143688653641, + "learning_rate": 1.7703523906385096e-06, + "loss": 0.03943071365356445, + "step": 146745 + }, + { + "epoch": 1.2689038572947922, + "grad_norm": 5.439243436903285, + "learning_rate": 1.7701665606432436e-06, + "loss": 0.2434467315673828, + "step": 146750 + }, + { + "epoch": 1.2689470908163354, + "grad_norm": 0.09952644209167798, + "learning_rate": 1.7699807363198612e-06, + "loss": 0.026914215087890624, + "step": 146755 + }, + { + "epoch": 1.2689903243378786, + "grad_norm": 6.438921090948533, + "learning_rate": 1.7697949176692188e-06, + "loss": 0.055490875244140626, + "step": 146760 + }, + { + "epoch": 1.2690335578594218, + "grad_norm": 35.77454703642094, + "learning_rate": 1.7696091046921726e-06, + "loss": 0.049295806884765626, + "step": 146765 + }, + { + "epoch": 1.269076791380965, + "grad_norm": 10.4778737950124, + "learning_rate": 1.7694232973895812e-06, + "loss": 0.04398136138916016, + "step": 146770 + }, + { + "epoch": 1.2691200249025085, + "grad_norm": 2.6581484947267207, + "learning_rate": 1.7692374957623012e-06, + "loss": 0.03326873779296875, + "step": 146775 + }, + { + "epoch": 1.2691632584240518, + "grad_norm": 0.4516998336678568, + "learning_rate": 1.7690516998111875e-06, + "loss": 0.084716796875, + "step": 146780 + }, + { + "epoch": 1.269206491945595, + "grad_norm": 0.7230457788448463, + "learning_rate": 1.7688659095370998e-06, + "loss": 0.02989368438720703, + "step": 146785 + }, + { + "epoch": 1.2692497254671382, + "grad_norm": 1.6305837791080868, + "learning_rate": 1.768680124940894e-06, + "loss": 0.0318267822265625, + "step": 146790 + }, + { + "epoch": 1.2692929589886814, + "grad_norm": 0.19719828351978683, + "learning_rate": 1.768494346023426e-06, + "loss": 0.009812545776367188, + "step": 146795 + }, + { + "epoch": 1.2693361925102247, + "grad_norm": 4.502771406851766, + "learning_rate": 1.7683085727855539e-06, + "loss": 0.05888786315917969, + "step": 146800 + }, + { + "epoch": 1.269379426031768, + "grad_norm": 3.927671975834571, + "learning_rate": 1.7681228052281338e-06, + "loss": 0.15326156616210937, + "step": 146805 + }, + { + "epoch": 1.2694226595533111, + "grad_norm": 1.2351633111088045, + "learning_rate": 1.767937043352021e-06, + "loss": 0.012562370300292969, + "step": 146810 + }, + { + "epoch": 1.2694658930748546, + "grad_norm": 0.4062182318862294, + "learning_rate": 1.7677512871580752e-06, + "loss": 0.20044708251953125, + "step": 146815 + }, + { + "epoch": 1.2695091265963978, + "grad_norm": 0.4778644002222492, + "learning_rate": 1.7675655366471518e-06, + "loss": 0.008794403076171875, + "step": 146820 + }, + { + "epoch": 1.269552360117941, + "grad_norm": 4.909746511418852, + "learning_rate": 1.767379791820106e-06, + "loss": 0.07470779418945313, + "step": 146825 + }, + { + "epoch": 1.2695955936394843, + "grad_norm": 1.8806252890428459, + "learning_rate": 1.7671940526777968e-06, + "loss": 0.033367919921875, + "step": 146830 + }, + { + "epoch": 1.2696388271610275, + "grad_norm": 49.710562386831924, + "learning_rate": 1.7670083192210794e-06, + "loss": 0.4088153839111328, + "step": 146835 + }, + { + "epoch": 1.269682060682571, + "grad_norm": 0.3482753920487356, + "learning_rate": 1.7668225914508096e-06, + "loss": 0.011597442626953124, + "step": 146840 + }, + { + "epoch": 1.2697252942041142, + "grad_norm": 0.21762160909348363, + "learning_rate": 1.7666368693678465e-06, + "loss": 0.10012779235839844, + "step": 146845 + }, + { + "epoch": 1.2697685277256574, + "grad_norm": 0.3346632742841278, + "learning_rate": 1.7664511529730441e-06, + "loss": 0.127099609375, + "step": 146850 + }, + { + "epoch": 1.2698117612472006, + "grad_norm": 12.89082149656481, + "learning_rate": 1.7662654422672608e-06, + "loss": 0.0258087158203125, + "step": 146855 + }, + { + "epoch": 1.2698549947687439, + "grad_norm": 9.71596806880014, + "learning_rate": 1.7660797372513524e-06, + "loss": 0.03893623352050781, + "step": 146860 + }, + { + "epoch": 1.269898228290287, + "grad_norm": 0.17274350930636376, + "learning_rate": 1.765894037926175e-06, + "loss": 0.00937957763671875, + "step": 146865 + }, + { + "epoch": 1.2699414618118303, + "grad_norm": 1.6824935587703187, + "learning_rate": 1.7657083442925856e-06, + "loss": 0.1677734375, + "step": 146870 + }, + { + "epoch": 1.2699846953333735, + "grad_norm": 0.6890370837863657, + "learning_rate": 1.7655226563514383e-06, + "loss": 0.06474838256835938, + "step": 146875 + }, + { + "epoch": 1.270027928854917, + "grad_norm": 49.19099023448728, + "learning_rate": 1.765336974103593e-06, + "loss": 0.07311782836914063, + "step": 146880 + }, + { + "epoch": 1.2700711623764602, + "grad_norm": 0.5142956399927106, + "learning_rate": 1.7651512975499047e-06, + "loss": 0.04686794281005859, + "step": 146885 + }, + { + "epoch": 1.2701143958980035, + "grad_norm": 0.41085887542438, + "learning_rate": 1.7649656266912291e-06, + "loss": 0.13770017623901368, + "step": 146890 + }, + { + "epoch": 1.2701576294195467, + "grad_norm": 10.452906407905436, + "learning_rate": 1.7647799615284235e-06, + "loss": 0.056447601318359374, + "step": 146895 + }, + { + "epoch": 1.2702008629410901, + "grad_norm": 11.925488640199777, + "learning_rate": 1.7645943020623426e-06, + "loss": 0.04156494140625, + "step": 146900 + }, + { + "epoch": 1.2702440964626334, + "grad_norm": 0.08001019042256584, + "learning_rate": 1.7644086482938434e-06, + "loss": 0.029207992553710937, + "step": 146905 + }, + { + "epoch": 1.2702873299841766, + "grad_norm": 7.14798799670468, + "learning_rate": 1.7642230002237827e-06, + "loss": 0.05311126708984375, + "step": 146910 + }, + { + "epoch": 1.2703305635057198, + "grad_norm": 1.0902969497902513, + "learning_rate": 1.7640373578530167e-06, + "loss": 0.01048431396484375, + "step": 146915 + }, + { + "epoch": 1.270373797027263, + "grad_norm": 10.908437458534513, + "learning_rate": 1.7638517211824016e-06, + "loss": 0.07399120330810546, + "step": 146920 + }, + { + "epoch": 1.2704170305488063, + "grad_norm": 0.29313456994366743, + "learning_rate": 1.7636660902127929e-06, + "loss": 0.012622833251953125, + "step": 146925 + }, + { + "epoch": 1.2704602640703495, + "grad_norm": 3.8340634492126857, + "learning_rate": 1.7634804649450463e-06, + "loss": 0.044436073303222655, + "step": 146930 + }, + { + "epoch": 1.2705034975918927, + "grad_norm": 11.499960024236366, + "learning_rate": 1.7632948453800185e-06, + "loss": 0.07654266357421875, + "step": 146935 + }, + { + "epoch": 1.2705467311134362, + "grad_norm": 0.3498293793190215, + "learning_rate": 1.7631092315185659e-06, + "loss": 0.18647270202636718, + "step": 146940 + }, + { + "epoch": 1.2705899646349794, + "grad_norm": 5.1253469638464795, + "learning_rate": 1.7629236233615447e-06, + "loss": 0.03794174194335938, + "step": 146945 + }, + { + "epoch": 1.2706331981565226, + "grad_norm": 0.2191643098093055, + "learning_rate": 1.7627380209098103e-06, + "loss": 0.01937103271484375, + "step": 146950 + }, + { + "epoch": 1.2706764316780659, + "grad_norm": 9.235608121228427, + "learning_rate": 1.7625524241642188e-06, + "loss": 0.0366729736328125, + "step": 146955 + }, + { + "epoch": 1.270719665199609, + "grad_norm": 3.1108529636133957, + "learning_rate": 1.7623668331256255e-06, + "loss": 0.02093963623046875, + "step": 146960 + }, + { + "epoch": 1.2707628987211526, + "grad_norm": 0.14382874691009503, + "learning_rate": 1.7621812477948878e-06, + "loss": 0.12401390075683594, + "step": 146965 + }, + { + "epoch": 1.2708061322426958, + "grad_norm": 3.234409732340736, + "learning_rate": 1.7619956681728595e-06, + "loss": 0.025922775268554688, + "step": 146970 + }, + { + "epoch": 1.270849365764239, + "grad_norm": 86.72449795972904, + "learning_rate": 1.761810094260399e-06, + "loss": 0.19646759033203126, + "step": 146975 + }, + { + "epoch": 1.2708925992857822, + "grad_norm": 1.777674315986357, + "learning_rate": 1.7616245260583608e-06, + "loss": 0.01683349609375, + "step": 146980 + }, + { + "epoch": 1.2709358328073255, + "grad_norm": 1.8237939517368877, + "learning_rate": 1.7614389635676012e-06, + "loss": 0.054994964599609376, + "step": 146985 + }, + { + "epoch": 1.2709790663288687, + "grad_norm": 30.985055186354824, + "learning_rate": 1.761253406788975e-06, + "loss": 0.029143905639648436, + "step": 146990 + }, + { + "epoch": 1.271022299850412, + "grad_norm": 2.6929019816211075, + "learning_rate": 1.7610678557233387e-06, + "loss": 0.0886566162109375, + "step": 146995 + }, + { + "epoch": 1.2710655333719552, + "grad_norm": 2.5675786421941242, + "learning_rate": 1.7608823103715476e-06, + "loss": 0.10864028930664063, + "step": 147000 + }, + { + "epoch": 1.2711087668934986, + "grad_norm": 8.760052825653291, + "learning_rate": 1.7606967707344583e-06, + "loss": 0.13162155151367189, + "step": 147005 + }, + { + "epoch": 1.2711520004150418, + "grad_norm": 3.0644823517406268, + "learning_rate": 1.7605112368129265e-06, + "loss": 0.25827484130859374, + "step": 147010 + }, + { + "epoch": 1.271195233936585, + "grad_norm": 2.2623509983624865, + "learning_rate": 1.7603257086078073e-06, + "loss": 0.0435028076171875, + "step": 147015 + }, + { + "epoch": 1.2712384674581283, + "grad_norm": 8.056333390951364, + "learning_rate": 1.7601401861199555e-06, + "loss": 0.06737480163574219, + "step": 147020 + }, + { + "epoch": 1.2712817009796715, + "grad_norm": 2.3800616333201, + "learning_rate": 1.7599546693502283e-06, + "loss": 0.01674957275390625, + "step": 147025 + }, + { + "epoch": 1.271324934501215, + "grad_norm": 0.5699643911425156, + "learning_rate": 1.7597691582994806e-06, + "loss": 0.02180633544921875, + "step": 147030 + }, + { + "epoch": 1.2713681680227582, + "grad_norm": 2.7133858793036896, + "learning_rate": 1.759583652968567e-06, + "loss": 0.35080795288085936, + "step": 147035 + }, + { + "epoch": 1.2714114015443014, + "grad_norm": 52.74584666347691, + "learning_rate": 1.759398153358345e-06, + "loss": 0.1336273193359375, + "step": 147040 + }, + { + "epoch": 1.2714546350658447, + "grad_norm": 6.319928960089721, + "learning_rate": 1.7592126594696687e-06, + "loss": 0.041941070556640626, + "step": 147045 + }, + { + "epoch": 1.271497868587388, + "grad_norm": 1.4781956309205544, + "learning_rate": 1.759027171303394e-06, + "loss": 0.012333297729492187, + "step": 147050 + }, + { + "epoch": 1.2715411021089311, + "grad_norm": 66.86183480353046, + "learning_rate": 1.7588416888603767e-06, + "loss": 0.156878662109375, + "step": 147055 + }, + { + "epoch": 1.2715843356304743, + "grad_norm": 6.8969611691211465, + "learning_rate": 1.7586562121414718e-06, + "loss": 0.036153411865234374, + "step": 147060 + }, + { + "epoch": 1.2716275691520176, + "grad_norm": 0.5632486227039072, + "learning_rate": 1.7584707411475333e-06, + "loss": 0.04991836547851562, + "step": 147065 + }, + { + "epoch": 1.271670802673561, + "grad_norm": 13.280299344606933, + "learning_rate": 1.7582852758794197e-06, + "loss": 0.11170501708984375, + "step": 147070 + }, + { + "epoch": 1.2717140361951043, + "grad_norm": 1.3209186980521685, + "learning_rate": 1.758099816337984e-06, + "loss": 0.014337158203125, + "step": 147075 + }, + { + "epoch": 1.2717572697166475, + "grad_norm": 9.303750231750657, + "learning_rate": 1.7579143625240827e-06, + "loss": 0.16617202758789062, + "step": 147080 + }, + { + "epoch": 1.2718005032381907, + "grad_norm": 0.12623819804647166, + "learning_rate": 1.7577289144385703e-06, + "loss": 0.052508544921875, + "step": 147085 + }, + { + "epoch": 1.271843736759734, + "grad_norm": 0.08750541092752094, + "learning_rate": 1.7575434720823029e-06, + "loss": 0.11390876770019531, + "step": 147090 + }, + { + "epoch": 1.2718869702812774, + "grad_norm": 37.22031385454893, + "learning_rate": 1.7573580354561338e-06, + "loss": 0.12656230926513673, + "step": 147095 + }, + { + "epoch": 1.2719302038028206, + "grad_norm": 2.178212654918961, + "learning_rate": 1.7571726045609211e-06, + "loss": 0.0547698974609375, + "step": 147100 + }, + { + "epoch": 1.2719734373243639, + "grad_norm": 0.4423753295020894, + "learning_rate": 1.7569871793975177e-06, + "loss": 0.07271194458007812, + "step": 147105 + }, + { + "epoch": 1.272016670845907, + "grad_norm": 1.0298523998426177, + "learning_rate": 1.75680175996678e-06, + "loss": 0.0355621337890625, + "step": 147110 + }, + { + "epoch": 1.2720599043674503, + "grad_norm": 40.43873215125101, + "learning_rate": 1.756616346269563e-06, + "loss": 0.1712982177734375, + "step": 147115 + }, + { + "epoch": 1.2721031378889935, + "grad_norm": 0.12790340941999978, + "learning_rate": 1.7564309383067217e-06, + "loss": 0.10086631774902344, + "step": 147120 + }, + { + "epoch": 1.2721463714105368, + "grad_norm": 0.3431649675593611, + "learning_rate": 1.75624553607911e-06, + "loss": 0.049896240234375, + "step": 147125 + }, + { + "epoch": 1.2721896049320802, + "grad_norm": 1.2246786269444385, + "learning_rate": 1.7560601395875844e-06, + "loss": 0.33088531494140627, + "step": 147130 + }, + { + "epoch": 1.2722328384536234, + "grad_norm": 0.6587736780952661, + "learning_rate": 1.7558747488329992e-06, + "loss": 0.1573699951171875, + "step": 147135 + }, + { + "epoch": 1.2722760719751667, + "grad_norm": 41.4042727129067, + "learning_rate": 1.7556893638162105e-06, + "loss": 0.25655555725097656, + "step": 147140 + }, + { + "epoch": 1.27231930549671, + "grad_norm": 1.1268647001876435, + "learning_rate": 1.7555039845380724e-06, + "loss": 0.046112442016601564, + "step": 147145 + }, + { + "epoch": 1.2723625390182531, + "grad_norm": 1.4299612543850873, + "learning_rate": 1.7553186109994401e-06, + "loss": 0.042529296875, + "step": 147150 + }, + { + "epoch": 1.2724057725397966, + "grad_norm": 0.1458262031499102, + "learning_rate": 1.7551332432011677e-06, + "loss": 0.034281158447265626, + "step": 147155 + }, + { + "epoch": 1.2724490060613398, + "grad_norm": 5.018398369607176, + "learning_rate": 1.7549478811441109e-06, + "loss": 0.10683746337890625, + "step": 147160 + }, + { + "epoch": 1.272492239582883, + "grad_norm": 1.0224363892289012, + "learning_rate": 1.754762524829124e-06, + "loss": 0.01152801513671875, + "step": 147165 + }, + { + "epoch": 1.2725354731044263, + "grad_norm": 1.4260640982557315, + "learning_rate": 1.7545771742570636e-06, + "loss": 0.1925872802734375, + "step": 147170 + }, + { + "epoch": 1.2725787066259695, + "grad_norm": 1.17047421619773, + "learning_rate": 1.7543918294287825e-06, + "loss": 0.02530364990234375, + "step": 147175 + }, + { + "epoch": 1.2726219401475127, + "grad_norm": 8.883611710733634, + "learning_rate": 1.7542064903451366e-06, + "loss": 0.0314483642578125, + "step": 147180 + }, + { + "epoch": 1.272665173669056, + "grad_norm": 4.62474646135938, + "learning_rate": 1.7540211570069798e-06, + "loss": 0.0755197525024414, + "step": 147185 + }, + { + "epoch": 1.2727084071905992, + "grad_norm": 46.20326532480721, + "learning_rate": 1.7538358294151672e-06, + "loss": 0.13568077087402344, + "step": 147190 + }, + { + "epoch": 1.2727516407121426, + "grad_norm": 2.1634831453792054, + "learning_rate": 1.7536505075705533e-06, + "loss": 0.0291473388671875, + "step": 147195 + }, + { + "epoch": 1.2727948742336859, + "grad_norm": 1.5955325471343265, + "learning_rate": 1.7534651914739946e-06, + "loss": 0.03307952880859375, + "step": 147200 + }, + { + "epoch": 1.272838107755229, + "grad_norm": 2.9138525877589263, + "learning_rate": 1.7532798811263438e-06, + "loss": 0.0202056884765625, + "step": 147205 + }, + { + "epoch": 1.2728813412767723, + "grad_norm": 0.3017910020102906, + "learning_rate": 1.7530945765284556e-06, + "loss": 0.179656982421875, + "step": 147210 + }, + { + "epoch": 1.2729245747983156, + "grad_norm": 8.788875146822333, + "learning_rate": 1.7529092776811848e-06, + "loss": 0.04554443359375, + "step": 147215 + }, + { + "epoch": 1.272967808319859, + "grad_norm": 2.9371212837903498, + "learning_rate": 1.752723984585387e-06, + "loss": 0.032441329956054685, + "step": 147220 + }, + { + "epoch": 1.2730110418414022, + "grad_norm": 30.19765372702906, + "learning_rate": 1.7525386972419143e-06, + "loss": 0.12169647216796875, + "step": 147225 + }, + { + "epoch": 1.2730542753629455, + "grad_norm": 1.2426127675378387, + "learning_rate": 1.7523534156516248e-06, + "loss": 0.0439178466796875, + "step": 147230 + }, + { + "epoch": 1.2730975088844887, + "grad_norm": 0.28278234398874, + "learning_rate": 1.7521681398153703e-06, + "loss": 0.032566070556640625, + "step": 147235 + }, + { + "epoch": 1.273140742406032, + "grad_norm": 0.5922868025521237, + "learning_rate": 1.7519828697340065e-06, + "loss": 0.0212127685546875, + "step": 147240 + }, + { + "epoch": 1.2731839759275752, + "grad_norm": 5.901182838524975, + "learning_rate": 1.7517976054083872e-06, + "loss": 0.05986175537109375, + "step": 147245 + }, + { + "epoch": 1.2732272094491184, + "grad_norm": 1.8607597502497442, + "learning_rate": 1.7516123468393673e-06, + "loss": 0.03759346008300781, + "step": 147250 + }, + { + "epoch": 1.2732704429706616, + "grad_norm": 1.8984295143283278, + "learning_rate": 1.7514270940278004e-06, + "loss": 0.16548919677734375, + "step": 147255 + }, + { + "epoch": 1.273313676492205, + "grad_norm": 5.942026656348355, + "learning_rate": 1.7512418469745407e-06, + "loss": 0.156097412109375, + "step": 147260 + }, + { + "epoch": 1.2733569100137483, + "grad_norm": 0.06621033496659573, + "learning_rate": 1.7510566056804443e-06, + "loss": 0.0429168701171875, + "step": 147265 + }, + { + "epoch": 1.2734001435352915, + "grad_norm": 3.338994244264297, + "learning_rate": 1.7508713701463645e-06, + "loss": 0.06754417419433593, + "step": 147270 + }, + { + "epoch": 1.2734433770568347, + "grad_norm": 4.490357954211536, + "learning_rate": 1.7506861403731559e-06, + "loss": 0.015114212036132812, + "step": 147275 + }, + { + "epoch": 1.273486610578378, + "grad_norm": 2.9537207078059278, + "learning_rate": 1.7505009163616722e-06, + "loss": 0.04807338714599609, + "step": 147280 + }, + { + "epoch": 1.2735298440999214, + "grad_norm": 0.7714356320239064, + "learning_rate": 1.7503156981127682e-06, + "loss": 0.1000091552734375, + "step": 147285 + }, + { + "epoch": 1.2735730776214647, + "grad_norm": 0.7420476794450952, + "learning_rate": 1.750130485627296e-06, + "loss": 0.037862396240234374, + "step": 147290 + }, + { + "epoch": 1.2736163111430079, + "grad_norm": 21.705250567016446, + "learning_rate": 1.749945278906113e-06, + "loss": 0.05748147964477539, + "step": 147295 + }, + { + "epoch": 1.2736595446645511, + "grad_norm": 0.48619257995655024, + "learning_rate": 1.749760077950072e-06, + "loss": 0.08872432708740234, + "step": 147300 + }, + { + "epoch": 1.2737027781860943, + "grad_norm": 1.0814477529508182, + "learning_rate": 1.7495748827600273e-06, + "loss": 0.14722614288330077, + "step": 147305 + }, + { + "epoch": 1.2737460117076376, + "grad_norm": 0.43684389793667133, + "learning_rate": 1.749389693336831e-06, + "loss": 0.26506500244140624, + "step": 147310 + }, + { + "epoch": 1.2737892452291808, + "grad_norm": 15.733012596470788, + "learning_rate": 1.7492045096813407e-06, + "loss": 0.05047149658203125, + "step": 147315 + }, + { + "epoch": 1.273832478750724, + "grad_norm": 3.621983451171886, + "learning_rate": 1.7490193317944072e-06, + "loss": 0.026491928100585937, + "step": 147320 + }, + { + "epoch": 1.2738757122722675, + "grad_norm": 1.809673379994302, + "learning_rate": 1.7488341596768871e-06, + "loss": 0.025208282470703124, + "step": 147325 + }, + { + "epoch": 1.2739189457938107, + "grad_norm": 1.5943661950154104, + "learning_rate": 1.7486489933296333e-06, + "loss": 0.06102294921875, + "step": 147330 + }, + { + "epoch": 1.273962179315354, + "grad_norm": 0.5498910890098443, + "learning_rate": 1.7484638327535e-06, + "loss": 0.03853874206542969, + "step": 147335 + }, + { + "epoch": 1.2740054128368972, + "grad_norm": 1.5417943226540376, + "learning_rate": 1.7482786779493409e-06, + "loss": 0.030955123901367187, + "step": 147340 + }, + { + "epoch": 1.2740486463584406, + "grad_norm": 1.041221875367025, + "learning_rate": 1.7480935289180097e-06, + "loss": 0.030389404296875, + "step": 147345 + }, + { + "epoch": 1.2740918798799838, + "grad_norm": 3.1335042242852484, + "learning_rate": 1.7479083856603594e-06, + "loss": 0.22273788452148438, + "step": 147350 + }, + { + "epoch": 1.274135113401527, + "grad_norm": 0.9289853483398514, + "learning_rate": 1.747723248177246e-06, + "loss": 0.10132923126220703, + "step": 147355 + }, + { + "epoch": 1.2741783469230703, + "grad_norm": 0.25725433444190154, + "learning_rate": 1.7475381164695228e-06, + "loss": 0.01949596405029297, + "step": 147360 + }, + { + "epoch": 1.2742215804446135, + "grad_norm": 2.784709796932725, + "learning_rate": 1.7473529905380429e-06, + "loss": 0.07273406982421875, + "step": 147365 + }, + { + "epoch": 1.2742648139661568, + "grad_norm": 1.0358427642480323, + "learning_rate": 1.7471678703836592e-06, + "loss": 0.016481399536132812, + "step": 147370 + }, + { + "epoch": 1.2743080474877, + "grad_norm": 4.735880596984376, + "learning_rate": 1.7469827560072278e-06, + "loss": 0.03046722412109375, + "step": 147375 + }, + { + "epoch": 1.2743512810092432, + "grad_norm": 5.409846659756351, + "learning_rate": 1.746797647409601e-06, + "loss": 0.0485076904296875, + "step": 147380 + }, + { + "epoch": 1.2743945145307867, + "grad_norm": 1.4291860461484065, + "learning_rate": 1.7466125445916317e-06, + "loss": 0.01501312255859375, + "step": 147385 + }, + { + "epoch": 1.27443774805233, + "grad_norm": 4.017400315940309, + "learning_rate": 1.7464274475541758e-06, + "loss": 0.0245513916015625, + "step": 147390 + }, + { + "epoch": 1.2744809815738731, + "grad_norm": 1.4922853622200372, + "learning_rate": 1.746242356298085e-06, + "loss": 0.03807144165039063, + "step": 147395 + }, + { + "epoch": 1.2745242150954164, + "grad_norm": 44.78815775082465, + "learning_rate": 1.7460572708242146e-06, + "loss": 0.24233970642089844, + "step": 147400 + }, + { + "epoch": 1.2745674486169596, + "grad_norm": 34.79079636068095, + "learning_rate": 1.7458721911334171e-06, + "loss": 0.244244384765625, + "step": 147405 + }, + { + "epoch": 1.274610682138503, + "grad_norm": 25.40565187606553, + "learning_rate": 1.7456871172265462e-06, + "loss": 0.16433334350585938, + "step": 147410 + }, + { + "epoch": 1.2746539156600463, + "grad_norm": 21.02895864668925, + "learning_rate": 1.7455020491044542e-06, + "loss": 0.2944000244140625, + "step": 147415 + }, + { + "epoch": 1.2746971491815895, + "grad_norm": 5.24572655600625, + "learning_rate": 1.7453169867679972e-06, + "loss": 0.033930206298828126, + "step": 147420 + }, + { + "epoch": 1.2747403827031327, + "grad_norm": 10.868299810840298, + "learning_rate": 1.7451319302180274e-06, + "loss": 0.15138587951660157, + "step": 147425 + }, + { + "epoch": 1.274783616224676, + "grad_norm": 1.4138961685174343, + "learning_rate": 1.7449468794553966e-06, + "loss": 0.06172637939453125, + "step": 147430 + }, + { + "epoch": 1.2748268497462192, + "grad_norm": 10.385168916239516, + "learning_rate": 1.7447618344809617e-06, + "loss": 0.089300537109375, + "step": 147435 + }, + { + "epoch": 1.2748700832677624, + "grad_norm": 2.2371411915120683, + "learning_rate": 1.7445767952955738e-06, + "loss": 0.020928955078125, + "step": 147440 + }, + { + "epoch": 1.2749133167893056, + "grad_norm": 1.7787451227019915, + "learning_rate": 1.7443917619000863e-06, + "loss": 0.03491363525390625, + "step": 147445 + }, + { + "epoch": 1.274956550310849, + "grad_norm": 3.2933645418044595, + "learning_rate": 1.7442067342953534e-06, + "loss": 0.01978015899658203, + "step": 147450 + }, + { + "epoch": 1.2749997838323923, + "grad_norm": 7.281655497765242, + "learning_rate": 1.7440217124822287e-06, + "loss": 0.07226448059082032, + "step": 147455 + }, + { + "epoch": 1.2750430173539355, + "grad_norm": 0.8401171177642601, + "learning_rate": 1.7438366964615646e-06, + "loss": 0.0490447998046875, + "step": 147460 + }, + { + "epoch": 1.2750862508754788, + "grad_norm": 4.166209801107811, + "learning_rate": 1.7436516862342144e-06, + "loss": 0.018527984619140625, + "step": 147465 + }, + { + "epoch": 1.275129484397022, + "grad_norm": 0.24300105888906756, + "learning_rate": 1.7434666818010321e-06, + "loss": 0.017159271240234374, + "step": 147470 + }, + { + "epoch": 1.2751727179185655, + "grad_norm": 0.8517597322359146, + "learning_rate": 1.74328168316287e-06, + "loss": 0.058310317993164065, + "step": 147475 + }, + { + "epoch": 1.2752159514401087, + "grad_norm": 0.7204321875563432, + "learning_rate": 1.7430966903205814e-06, + "loss": 0.00841522216796875, + "step": 147480 + }, + { + "epoch": 1.275259184961652, + "grad_norm": 1.2076514959346565, + "learning_rate": 1.742911703275019e-06, + "loss": 0.043450927734375, + "step": 147485 + }, + { + "epoch": 1.2753024184831951, + "grad_norm": 8.595461681398843, + "learning_rate": 1.7427267220270381e-06, + "loss": 0.08821182250976563, + "step": 147490 + }, + { + "epoch": 1.2753456520047384, + "grad_norm": 2.3234622217293284, + "learning_rate": 1.7425417465774906e-06, + "loss": 0.09036846160888672, + "step": 147495 + }, + { + "epoch": 1.2753888855262816, + "grad_norm": 5.400075526692265, + "learning_rate": 1.7423567769272291e-06, + "loss": 0.03788681030273437, + "step": 147500 + }, + { + "epoch": 1.2754321190478248, + "grad_norm": 3.467142212335881, + "learning_rate": 1.7421718130771077e-06, + "loss": 0.04729537963867188, + "step": 147505 + }, + { + "epoch": 1.275475352569368, + "grad_norm": 24.513448855764214, + "learning_rate": 1.7419868550279767e-06, + "loss": 0.09659347534179688, + "step": 147510 + }, + { + "epoch": 1.2755185860909115, + "grad_norm": 10.082799164766108, + "learning_rate": 1.741801902780693e-06, + "loss": 0.0900634765625, + "step": 147515 + }, + { + "epoch": 1.2755618196124547, + "grad_norm": 0.17194193494065615, + "learning_rate": 1.7416169563361076e-06, + "loss": 0.10961074829101562, + "step": 147520 + }, + { + "epoch": 1.275605053133998, + "grad_norm": 6.030331521173895, + "learning_rate": 1.7414320156950735e-06, + "loss": 0.0472991943359375, + "step": 147525 + }, + { + "epoch": 1.2756482866555412, + "grad_norm": 11.56816518184554, + "learning_rate": 1.741247080858444e-06, + "loss": 0.32183074951171875, + "step": 147530 + }, + { + "epoch": 1.2756915201770844, + "grad_norm": 23.943395434362248, + "learning_rate": 1.74106215182707e-06, + "loss": 0.038619041442871094, + "step": 147535 + }, + { + "epoch": 1.2757347536986279, + "grad_norm": 4.250974259853588, + "learning_rate": 1.7408772286018077e-06, + "loss": 0.06263179779052734, + "step": 147540 + }, + { + "epoch": 1.275777987220171, + "grad_norm": 0.7536825739906332, + "learning_rate": 1.7406923111835074e-06, + "loss": 0.02358856201171875, + "step": 147545 + }, + { + "epoch": 1.2758212207417143, + "grad_norm": 1.2258501158128041, + "learning_rate": 1.7405073995730235e-06, + "loss": 0.054486846923828124, + "step": 147550 + }, + { + "epoch": 1.2758644542632576, + "grad_norm": 6.837515892572705, + "learning_rate": 1.7403224937712085e-06, + "loss": 0.03953857421875, + "step": 147555 + }, + { + "epoch": 1.2759076877848008, + "grad_norm": 0.9123728405049559, + "learning_rate": 1.7401375937789147e-06, + "loss": 0.0178070068359375, + "step": 147560 + }, + { + "epoch": 1.275950921306344, + "grad_norm": 4.132459892042112, + "learning_rate": 1.7399526995969953e-06, + "loss": 0.05679779052734375, + "step": 147565 + }, + { + "epoch": 1.2759941548278873, + "grad_norm": 0.17204907234567493, + "learning_rate": 1.7397678112263025e-06, + "loss": 0.06262969970703125, + "step": 147570 + }, + { + "epoch": 1.2760373883494305, + "grad_norm": 0.42028781838834534, + "learning_rate": 1.7395829286676879e-06, + "loss": 0.01669921875, + "step": 147575 + }, + { + "epoch": 1.276080621870974, + "grad_norm": 2.411477489499507, + "learning_rate": 1.7393980519220066e-06, + "loss": 0.24878959655761718, + "step": 147580 + }, + { + "epoch": 1.2761238553925172, + "grad_norm": 2.8172980065225994, + "learning_rate": 1.7392131809901103e-06, + "loss": 0.0209686279296875, + "step": 147585 + }, + { + "epoch": 1.2761670889140604, + "grad_norm": 1.4219651519164997, + "learning_rate": 1.7390283158728515e-06, + "loss": 0.006448078155517578, + "step": 147590 + }, + { + "epoch": 1.2762103224356036, + "grad_norm": 43.642218089801624, + "learning_rate": 1.7388434565710813e-06, + "loss": 0.20093135833740233, + "step": 147595 + }, + { + "epoch": 1.276253555957147, + "grad_norm": 27.97553702172424, + "learning_rate": 1.7386586030856552e-06, + "loss": 0.08603286743164062, + "step": 147600 + }, + { + "epoch": 1.2762967894786903, + "grad_norm": 9.9119516644393, + "learning_rate": 1.7384737554174226e-06, + "loss": 0.22733116149902344, + "step": 147605 + }, + { + "epoch": 1.2763400230002335, + "grad_norm": 3.1209429987023336, + "learning_rate": 1.738288913567239e-06, + "loss": 0.06884498596191406, + "step": 147610 + }, + { + "epoch": 1.2763832565217768, + "grad_norm": 30.486916730767025, + "learning_rate": 1.7381040775359554e-06, + "loss": 0.1955799102783203, + "step": 147615 + }, + { + "epoch": 1.27642649004332, + "grad_norm": 0.28876956415294697, + "learning_rate": 1.7379192473244242e-06, + "loss": 0.01645355224609375, + "step": 147620 + }, + { + "epoch": 1.2764697235648632, + "grad_norm": 0.49537314101985386, + "learning_rate": 1.7377344229334975e-06, + "loss": 0.035167694091796875, + "step": 147625 + }, + { + "epoch": 1.2765129570864064, + "grad_norm": 4.448912057242801, + "learning_rate": 1.737549604364029e-06, + "loss": 0.2906646728515625, + "step": 147630 + }, + { + "epoch": 1.2765561906079497, + "grad_norm": 2.3464022618545246, + "learning_rate": 1.7373647916168695e-06, + "loss": 0.033161163330078125, + "step": 147635 + }, + { + "epoch": 1.2765994241294931, + "grad_norm": 0.20809489200262377, + "learning_rate": 1.737179984692871e-06, + "loss": 0.015103912353515625, + "step": 147640 + }, + { + "epoch": 1.2766426576510363, + "grad_norm": 0.16683079957660665, + "learning_rate": 1.7369951835928882e-06, + "loss": 0.013175582885742188, + "step": 147645 + }, + { + "epoch": 1.2766858911725796, + "grad_norm": 34.068231527410106, + "learning_rate": 1.7368103883177716e-06, + "loss": 0.07843017578125, + "step": 147650 + }, + { + "epoch": 1.2767291246941228, + "grad_norm": 0.32836938037565877, + "learning_rate": 1.736625598868373e-06, + "loss": 0.04041900634765625, + "step": 147655 + }, + { + "epoch": 1.276772358215666, + "grad_norm": 0.9227868492776349, + "learning_rate": 1.7364408152455467e-06, + "loss": 0.05728302001953125, + "step": 147660 + }, + { + "epoch": 1.2768155917372095, + "grad_norm": 15.19626079167544, + "learning_rate": 1.7362560374501435e-06, + "loss": 0.07416610717773438, + "step": 147665 + }, + { + "epoch": 1.2768588252587527, + "grad_norm": 0.08122335112976549, + "learning_rate": 1.7360712654830144e-06, + "loss": 0.1821868896484375, + "step": 147670 + }, + { + "epoch": 1.276902058780296, + "grad_norm": 8.818814441002473, + "learning_rate": 1.7358864993450148e-06, + "loss": 0.09079055786132813, + "step": 147675 + }, + { + "epoch": 1.2769452923018392, + "grad_norm": 0.557533977731093, + "learning_rate": 1.7357017390369944e-06, + "loss": 0.042176437377929685, + "step": 147680 + }, + { + "epoch": 1.2769885258233824, + "grad_norm": 54.54764858252828, + "learning_rate": 1.735516984559806e-06, + "loss": 0.07818088531494141, + "step": 147685 + }, + { + "epoch": 1.2770317593449256, + "grad_norm": 11.602172848959642, + "learning_rate": 1.7353322359143016e-06, + "loss": 0.03804779052734375, + "step": 147690 + }, + { + "epoch": 1.2770749928664689, + "grad_norm": 1.6675622841776967, + "learning_rate": 1.7351474931013332e-06, + "loss": 0.026381683349609376, + "step": 147695 + }, + { + "epoch": 1.277118226388012, + "grad_norm": 2.155988289783583, + "learning_rate": 1.7349627561217518e-06, + "loss": 0.026912117004394533, + "step": 147700 + }, + { + "epoch": 1.2771614599095555, + "grad_norm": 14.940386786068617, + "learning_rate": 1.7347780249764113e-06, + "loss": 0.08371353149414062, + "step": 147705 + }, + { + "epoch": 1.2772046934310988, + "grad_norm": 1.156776890649015, + "learning_rate": 1.7345932996661618e-06, + "loss": 0.13481063842773439, + "step": 147710 + }, + { + "epoch": 1.277247926952642, + "grad_norm": 0.3752452778220673, + "learning_rate": 1.7344085801918573e-06, + "loss": 0.08085365295410156, + "step": 147715 + }, + { + "epoch": 1.2772911604741852, + "grad_norm": 0.95142163989171, + "learning_rate": 1.7342238665543484e-06, + "loss": 0.46436080932617185, + "step": 147720 + }, + { + "epoch": 1.2773343939957285, + "grad_norm": 11.00807265341361, + "learning_rate": 1.7340391587544872e-06, + "loss": 0.1048095703125, + "step": 147725 + }, + { + "epoch": 1.277377627517272, + "grad_norm": 2.422524608772564, + "learning_rate": 1.7338544567931259e-06, + "loss": 0.061709022521972655, + "step": 147730 + }, + { + "epoch": 1.2774208610388151, + "grad_norm": 1.3224036448431555, + "learning_rate": 1.733669760671114e-06, + "loss": 0.02398681640625, + "step": 147735 + }, + { + "epoch": 1.2774640945603584, + "grad_norm": 16.824673610370755, + "learning_rate": 1.7334850703893072e-06, + "loss": 0.15431594848632812, + "step": 147740 + }, + { + "epoch": 1.2775073280819016, + "grad_norm": 0.9061478252171842, + "learning_rate": 1.7333003859485551e-06, + "loss": 0.01493988037109375, + "step": 147745 + }, + { + "epoch": 1.2775505616034448, + "grad_norm": 0.27040261883358924, + "learning_rate": 1.73311570734971e-06, + "loss": 0.02916259765625, + "step": 147750 + }, + { + "epoch": 1.277593795124988, + "grad_norm": 0.09151052989080422, + "learning_rate": 1.7329310345936226e-06, + "loss": 0.036882781982421876, + "step": 147755 + }, + { + "epoch": 1.2776370286465313, + "grad_norm": 1.7390839627757801, + "learning_rate": 1.7327463676811446e-06, + "loss": 0.05523109436035156, + "step": 147760 + }, + { + "epoch": 1.2776802621680745, + "grad_norm": 0.36459950495664367, + "learning_rate": 1.7325617066131296e-06, + "loss": 0.143780517578125, + "step": 147765 + }, + { + "epoch": 1.277723495689618, + "grad_norm": 23.193459372339127, + "learning_rate": 1.7323770513904267e-06, + "loss": 0.06172943115234375, + "step": 147770 + }, + { + "epoch": 1.2777667292111612, + "grad_norm": 33.140021690414216, + "learning_rate": 1.7321924020138902e-06, + "loss": 0.2201385498046875, + "step": 147775 + }, + { + "epoch": 1.2778099627327044, + "grad_norm": 0.6772326082398591, + "learning_rate": 1.7320077584843702e-06, + "loss": 0.13920097351074218, + "step": 147780 + }, + { + "epoch": 1.2778531962542476, + "grad_norm": 6.732116413736642, + "learning_rate": 1.7318231208027182e-06, + "loss": 0.034820556640625, + "step": 147785 + }, + { + "epoch": 1.2778964297757909, + "grad_norm": 0.04441229439225368, + "learning_rate": 1.7316384889697862e-06, + "loss": 0.041305923461914064, + "step": 147790 + }, + { + "epoch": 1.2779396632973343, + "grad_norm": 4.630744842242504, + "learning_rate": 1.7314538629864239e-06, + "loss": 0.043950653076171874, + "step": 147795 + }, + { + "epoch": 1.2779828968188776, + "grad_norm": 3.9606628739858087, + "learning_rate": 1.7312692428534855e-06, + "loss": 0.03337554931640625, + "step": 147800 + }, + { + "epoch": 1.2780261303404208, + "grad_norm": 7.122218657493884, + "learning_rate": 1.7310846285718215e-06, + "loss": 0.11379165649414062, + "step": 147805 + }, + { + "epoch": 1.278069363861964, + "grad_norm": 3.077340830614984, + "learning_rate": 1.7309000201422832e-06, + "loss": 0.09119949340820313, + "step": 147810 + }, + { + "epoch": 1.2781125973835072, + "grad_norm": 2.2243473993701466, + "learning_rate": 1.7307154175657214e-06, + "loss": 0.01605224609375, + "step": 147815 + }, + { + "epoch": 1.2781558309050505, + "grad_norm": 0.6953877815266389, + "learning_rate": 1.730530820842987e-06, + "loss": 0.03826065063476562, + "step": 147820 + }, + { + "epoch": 1.2781990644265937, + "grad_norm": 0.07359269616405657, + "learning_rate": 1.7303462299749335e-06, + "loss": 0.025485992431640625, + "step": 147825 + }, + { + "epoch": 1.278242297948137, + "grad_norm": 6.7170987953013075, + "learning_rate": 1.73016164496241e-06, + "loss": 0.21812896728515624, + "step": 147830 + }, + { + "epoch": 1.2782855314696804, + "grad_norm": 0.6931291727858018, + "learning_rate": 1.72997706580627e-06, + "loss": 0.056623077392578124, + "step": 147835 + }, + { + "epoch": 1.2783287649912236, + "grad_norm": 0.7355339102743716, + "learning_rate": 1.7297924925073632e-06, + "loss": 0.06914825439453125, + "step": 147840 + }, + { + "epoch": 1.2783719985127668, + "grad_norm": 0.7683732535962553, + "learning_rate": 1.729607925066541e-06, + "loss": 0.018515777587890626, + "step": 147845 + }, + { + "epoch": 1.27841523203431, + "grad_norm": 12.611728658732055, + "learning_rate": 1.729423363484655e-06, + "loss": 0.052899932861328124, + "step": 147850 + }, + { + "epoch": 1.2784584655558535, + "grad_norm": 1.7227975182585744, + "learning_rate": 1.7292388077625562e-06, + "loss": 0.0627197265625, + "step": 147855 + }, + { + "epoch": 1.2785016990773967, + "grad_norm": 44.130979772241034, + "learning_rate": 1.729054257901095e-06, + "loss": 0.0852935791015625, + "step": 147860 + }, + { + "epoch": 1.27854493259894, + "grad_norm": 0.3808712249678803, + "learning_rate": 1.728869713901124e-06, + "loss": 0.06874237060546876, + "step": 147865 + }, + { + "epoch": 1.2785881661204832, + "grad_norm": 5.667509489088291, + "learning_rate": 1.7286851757634934e-06, + "loss": 0.11079540252685546, + "step": 147870 + }, + { + "epoch": 1.2786313996420264, + "grad_norm": 30.032770185983257, + "learning_rate": 1.728500643489055e-06, + "loss": 0.07596206665039062, + "step": 147875 + }, + { + "epoch": 1.2786746331635697, + "grad_norm": 2.6732504025354284, + "learning_rate": 1.7283161170786575e-06, + "loss": 0.0708740234375, + "step": 147880 + }, + { + "epoch": 1.278717866685113, + "grad_norm": 1.9912599210298692, + "learning_rate": 1.728131596533155e-06, + "loss": 0.031221771240234376, + "step": 147885 + }, + { + "epoch": 1.2787611002066561, + "grad_norm": 2.4987594144689127, + "learning_rate": 1.7279470818533963e-06, + "loss": 0.14156227111816405, + "step": 147890 + }, + { + "epoch": 1.2788043337281996, + "grad_norm": 2.3098663040379965, + "learning_rate": 1.7277625730402341e-06, + "loss": 0.014562225341796875, + "step": 147895 + }, + { + "epoch": 1.2788475672497428, + "grad_norm": 0.1615703069338217, + "learning_rate": 1.7275780700945188e-06, + "loss": 0.043889617919921874, + "step": 147900 + }, + { + "epoch": 1.278890800771286, + "grad_norm": 13.866679636466566, + "learning_rate": 1.727393573017101e-06, + "loss": 0.028326797485351562, + "step": 147905 + }, + { + "epoch": 1.2789340342928293, + "grad_norm": 0.4182700097184476, + "learning_rate": 1.7272090818088313e-06, + "loss": 0.02396240234375, + "step": 147910 + }, + { + "epoch": 1.2789772678143725, + "grad_norm": 0.49100546117723803, + "learning_rate": 1.7270245964705613e-06, + "loss": 0.036557579040527345, + "step": 147915 + }, + { + "epoch": 1.279020501335916, + "grad_norm": 1.3219649959430597, + "learning_rate": 1.726840117003141e-06, + "loss": 0.08523788452148437, + "step": 147920 + }, + { + "epoch": 1.2790637348574592, + "grad_norm": 7.549104401772849, + "learning_rate": 1.7266556434074208e-06, + "loss": 0.1423370361328125, + "step": 147925 + }, + { + "epoch": 1.2791069683790024, + "grad_norm": 4.692268276945877, + "learning_rate": 1.7264711756842528e-06, + "loss": 0.057586669921875, + "step": 147930 + }, + { + "epoch": 1.2791502019005456, + "grad_norm": 1.6457713037869295, + "learning_rate": 1.7262867138344866e-06, + "loss": 0.024729347229003905, + "step": 147935 + }, + { + "epoch": 1.2791934354220889, + "grad_norm": 1.0070541088342624, + "learning_rate": 1.7261022578589743e-06, + "loss": 0.16134109497070312, + "step": 147940 + }, + { + "epoch": 1.279236668943632, + "grad_norm": 1.2213003252643688, + "learning_rate": 1.7259178077585663e-06, + "loss": 0.0540283203125, + "step": 147945 + }, + { + "epoch": 1.2792799024651753, + "grad_norm": 0.29045324472771855, + "learning_rate": 1.7257333635341124e-06, + "loss": 0.209552001953125, + "step": 147950 + }, + { + "epoch": 1.2793231359867185, + "grad_norm": 0.9825442894262656, + "learning_rate": 1.7255489251864624e-06, + "loss": 0.07875213623046876, + "step": 147955 + }, + { + "epoch": 1.279366369508262, + "grad_norm": 0.19021620672228273, + "learning_rate": 1.7253644927164694e-06, + "loss": 0.1052642822265625, + "step": 147960 + }, + { + "epoch": 1.2794096030298052, + "grad_norm": 0.054491721847183386, + "learning_rate": 1.725180066124983e-06, + "loss": 0.21148643493652344, + "step": 147965 + }, + { + "epoch": 1.2794528365513484, + "grad_norm": 1.8308410196356881, + "learning_rate": 1.724995645412853e-06, + "loss": 0.011740875244140626, + "step": 147970 + }, + { + "epoch": 1.2794960700728917, + "grad_norm": 1.8168257666285328, + "learning_rate": 1.724811230580931e-06, + "loss": 0.10568351745605468, + "step": 147975 + }, + { + "epoch": 1.279539303594435, + "grad_norm": 6.018785669981183, + "learning_rate": 1.7246268216300665e-06, + "loss": 0.08626556396484375, + "step": 147980 + }, + { + "epoch": 1.2795825371159784, + "grad_norm": 2.331710638527123, + "learning_rate": 1.7244424185611097e-06, + "loss": 0.01761016845703125, + "step": 147985 + }, + { + "epoch": 1.2796257706375216, + "grad_norm": 3.629187123007659, + "learning_rate": 1.7242580213749123e-06, + "loss": 0.06299209594726562, + "step": 147990 + }, + { + "epoch": 1.2796690041590648, + "grad_norm": 10.684388792009303, + "learning_rate": 1.7240736300723233e-06, + "loss": 0.4866218566894531, + "step": 147995 + }, + { + "epoch": 1.279712237680608, + "grad_norm": 1.513307983193962, + "learning_rate": 1.7238892446541954e-06, + "loss": 0.04851531982421875, + "step": 148000 + }, + { + "epoch": 1.2797554712021513, + "grad_norm": 0.23841293680508965, + "learning_rate": 1.7237048651213771e-06, + "loss": 0.04012107849121094, + "step": 148005 + }, + { + "epoch": 1.2797987047236945, + "grad_norm": 1.426640054916802, + "learning_rate": 1.7235204914747194e-06, + "loss": 0.11191596984863281, + "step": 148010 + }, + { + "epoch": 1.2798419382452377, + "grad_norm": 2.188243708953122, + "learning_rate": 1.7233361237150726e-06, + "loss": 0.010261344909667968, + "step": 148015 + }, + { + "epoch": 1.279885171766781, + "grad_norm": 4.749579040584393, + "learning_rate": 1.7231517618432851e-06, + "loss": 0.13408966064453126, + "step": 148020 + }, + { + "epoch": 1.2799284052883244, + "grad_norm": 7.272595498398686, + "learning_rate": 1.7229674058602103e-06, + "loss": 0.03697967529296875, + "step": 148025 + }, + { + "epoch": 1.2799716388098676, + "grad_norm": 2.0990699516120976, + "learning_rate": 1.722783055766697e-06, + "loss": 0.010650253295898438, + "step": 148030 + }, + { + "epoch": 1.2800148723314109, + "grad_norm": 10.208645746067917, + "learning_rate": 1.7225987115635955e-06, + "loss": 0.16515331268310546, + "step": 148035 + }, + { + "epoch": 1.280058105852954, + "grad_norm": 2.0243288612723154, + "learning_rate": 1.7224143732517557e-06, + "loss": 0.025939178466796876, + "step": 148040 + }, + { + "epoch": 1.2801013393744975, + "grad_norm": 1.2199839423693593, + "learning_rate": 1.7222300408320268e-06, + "loss": 0.0417755126953125, + "step": 148045 + }, + { + "epoch": 1.2801445728960408, + "grad_norm": 23.15440485419056, + "learning_rate": 1.7220457143052616e-06, + "loss": 0.05052947998046875, + "step": 148050 + }, + { + "epoch": 1.280187806417584, + "grad_norm": 50.610057575442056, + "learning_rate": 1.7218613936723068e-06, + "loss": 0.15069046020507812, + "step": 148055 + }, + { + "epoch": 1.2802310399391272, + "grad_norm": 9.173369553108046, + "learning_rate": 1.7216770789340156e-06, + "loss": 0.03535003662109375, + "step": 148060 + }, + { + "epoch": 1.2802742734606705, + "grad_norm": 1.64265410265683, + "learning_rate": 1.7214927700912373e-06, + "loss": 0.035255813598632814, + "step": 148065 + }, + { + "epoch": 1.2803175069822137, + "grad_norm": 0.37436595956966706, + "learning_rate": 1.7213084671448209e-06, + "loss": 0.087835693359375, + "step": 148070 + }, + { + "epoch": 1.280360740503757, + "grad_norm": 1.3762952653142215, + "learning_rate": 1.7211241700956172e-06, + "loss": 0.07216014862060546, + "step": 148075 + }, + { + "epoch": 1.2804039740253002, + "grad_norm": 0.8101018841005895, + "learning_rate": 1.7209398789444758e-06, + "loss": 0.033164596557617186, + "step": 148080 + }, + { + "epoch": 1.2804472075468436, + "grad_norm": 30.944836822963993, + "learning_rate": 1.7207555936922452e-06, + "loss": 0.4976593017578125, + "step": 148085 + }, + { + "epoch": 1.2804904410683868, + "grad_norm": 7.341476931771787, + "learning_rate": 1.720571314339778e-06, + "loss": 0.028907012939453126, + "step": 148090 + }, + { + "epoch": 1.28053367458993, + "grad_norm": 0.3630527132402047, + "learning_rate": 1.720387040887923e-06, + "loss": 0.3985755920410156, + "step": 148095 + }, + { + "epoch": 1.2805769081114733, + "grad_norm": 0.8047561431597057, + "learning_rate": 1.7202027733375294e-06, + "loss": 0.17510757446289063, + "step": 148100 + }, + { + "epoch": 1.2806201416330165, + "grad_norm": 48.15200187339019, + "learning_rate": 1.720018511689447e-06, + "loss": 0.37434921264648435, + "step": 148105 + }, + { + "epoch": 1.28066337515456, + "grad_norm": 0.7966604700434959, + "learning_rate": 1.7198342559445267e-06, + "loss": 0.096044921875, + "step": 148110 + }, + { + "epoch": 1.2807066086761032, + "grad_norm": 1.3101340842293219, + "learning_rate": 1.719650006103617e-06, + "loss": 0.01232147216796875, + "step": 148115 + }, + { + "epoch": 1.2807498421976464, + "grad_norm": 0.7629071905240137, + "learning_rate": 1.7194657621675693e-06, + "loss": 0.01981201171875, + "step": 148120 + }, + { + "epoch": 1.2807930757191897, + "grad_norm": 4.090707321157793, + "learning_rate": 1.7192815241372328e-06, + "loss": 0.014056396484375, + "step": 148125 + }, + { + "epoch": 1.2808363092407329, + "grad_norm": 2.0362065856786775, + "learning_rate": 1.7190972920134563e-06, + "loss": 0.1854156494140625, + "step": 148130 + }, + { + "epoch": 1.2808795427622761, + "grad_norm": 72.71386375919948, + "learning_rate": 1.7189130657970898e-06, + "loss": 0.177227783203125, + "step": 148135 + }, + { + "epoch": 1.2809227762838193, + "grad_norm": 1.8847284938351134, + "learning_rate": 1.7187288454889833e-06, + "loss": 0.0471954345703125, + "step": 148140 + }, + { + "epoch": 1.2809660098053626, + "grad_norm": 1.4124338795447549, + "learning_rate": 1.7185446310899851e-06, + "loss": 0.04890594482421875, + "step": 148145 + }, + { + "epoch": 1.281009243326906, + "grad_norm": 12.290338874867501, + "learning_rate": 1.7183604226009465e-06, + "loss": 0.09947509765625, + "step": 148150 + }, + { + "epoch": 1.2810524768484493, + "grad_norm": 0.7149035084774946, + "learning_rate": 1.7181762200227166e-06, + "loss": 0.20793609619140624, + "step": 148155 + }, + { + "epoch": 1.2810957103699925, + "grad_norm": 2.877366270722861, + "learning_rate": 1.717992023356144e-06, + "loss": 0.19728164672851561, + "step": 148160 + }, + { + "epoch": 1.2811389438915357, + "grad_norm": 2.620761139175806, + "learning_rate": 1.7178078326020796e-06, + "loss": 0.1717905044555664, + "step": 148165 + }, + { + "epoch": 1.281182177413079, + "grad_norm": 6.466441225026648, + "learning_rate": 1.717623647761372e-06, + "loss": 0.02959728240966797, + "step": 148170 + }, + { + "epoch": 1.2812254109346224, + "grad_norm": 27.560948328344203, + "learning_rate": 1.7174394688348711e-06, + "loss": 0.04285507202148438, + "step": 148175 + }, + { + "epoch": 1.2812686444561656, + "grad_norm": 2.665483530018098, + "learning_rate": 1.7172552958234249e-06, + "loss": 0.028482818603515626, + "step": 148180 + }, + { + "epoch": 1.2813118779777088, + "grad_norm": 57.33307250516694, + "learning_rate": 1.717071128727885e-06, + "loss": 0.20663604736328126, + "step": 148185 + }, + { + "epoch": 1.281355111499252, + "grad_norm": 0.4655772963448537, + "learning_rate": 1.7168869675491e-06, + "loss": 0.008281898498535157, + "step": 148190 + }, + { + "epoch": 1.2813983450207953, + "grad_norm": 0.9762754902138657, + "learning_rate": 1.7167028122879186e-06, + "loss": 0.029550933837890626, + "step": 148195 + }, + { + "epoch": 1.2814415785423385, + "grad_norm": 25.120404417787235, + "learning_rate": 1.7165186629451909e-06, + "loss": 0.0929962158203125, + "step": 148200 + }, + { + "epoch": 1.2814848120638818, + "grad_norm": 11.318131877231686, + "learning_rate": 1.7163345195217653e-06, + "loss": 0.12601318359375, + "step": 148205 + }, + { + "epoch": 1.281528045585425, + "grad_norm": 0.5022779862909547, + "learning_rate": 1.71615038201849e-06, + "loss": 0.09104156494140625, + "step": 148210 + }, + { + "epoch": 1.2815712791069684, + "grad_norm": 0.6612046570309233, + "learning_rate": 1.7159662504362172e-06, + "loss": 0.14735336303710939, + "step": 148215 + }, + { + "epoch": 1.2816145126285117, + "grad_norm": 1.3940651572581624, + "learning_rate": 1.7157821247757934e-06, + "loss": 0.04836883544921875, + "step": 148220 + }, + { + "epoch": 1.281657746150055, + "grad_norm": 48.82819670474331, + "learning_rate": 1.7155980050380705e-06, + "loss": 0.15528478622436523, + "step": 148225 + }, + { + "epoch": 1.2817009796715981, + "grad_norm": 19.852218651114917, + "learning_rate": 1.7154138912238954e-06, + "loss": 0.12448806762695312, + "step": 148230 + }, + { + "epoch": 1.2817442131931414, + "grad_norm": 0.6663580379201094, + "learning_rate": 1.7152297833341185e-06, + "loss": 0.23520336151123047, + "step": 148235 + }, + { + "epoch": 1.2817874467146848, + "grad_norm": 0.18427642046836393, + "learning_rate": 1.715045681369587e-06, + "loss": 0.09947357177734376, + "step": 148240 + }, + { + "epoch": 1.281830680236228, + "grad_norm": 7.985831895780795, + "learning_rate": 1.7148615853311524e-06, + "loss": 0.06663360595703124, + "step": 148245 + }, + { + "epoch": 1.2818739137577713, + "grad_norm": 12.820814740163042, + "learning_rate": 1.7146774952196624e-06, + "loss": 0.06394805908203124, + "step": 148250 + }, + { + "epoch": 1.2819171472793145, + "grad_norm": 1.2884407345834887, + "learning_rate": 1.7144934110359667e-06, + "loss": 0.031698989868164065, + "step": 148255 + }, + { + "epoch": 1.2819603808008577, + "grad_norm": 0.18510594918116546, + "learning_rate": 1.7143093327809136e-06, + "loss": 0.020476913452148436, + "step": 148260 + }, + { + "epoch": 1.282003614322401, + "grad_norm": 4.944963185085121, + "learning_rate": 1.714125260455352e-06, + "loss": 0.03122711181640625, + "step": 148265 + }, + { + "epoch": 1.2820468478439442, + "grad_norm": 9.011080126830313, + "learning_rate": 1.7139411940601302e-06, + "loss": 0.051751708984375, + "step": 148270 + }, + { + "epoch": 1.2820900813654874, + "grad_norm": 3.572883600094393, + "learning_rate": 1.713757133596099e-06, + "loss": 0.01618499755859375, + "step": 148275 + }, + { + "epoch": 1.2821333148870309, + "grad_norm": 1.3220097777281128, + "learning_rate": 1.7135730790641054e-06, + "loss": 0.02560844421386719, + "step": 148280 + }, + { + "epoch": 1.282176548408574, + "grad_norm": 1.4752547482575649, + "learning_rate": 1.7133890304649995e-06, + "loss": 0.02685127258300781, + "step": 148285 + }, + { + "epoch": 1.2822197819301173, + "grad_norm": 0.8861772565473555, + "learning_rate": 1.7132049877996306e-06, + "loss": 0.0349365234375, + "step": 148290 + }, + { + "epoch": 1.2822630154516605, + "grad_norm": 26.00146323765929, + "learning_rate": 1.713020951068846e-06, + "loss": 0.22557830810546875, + "step": 148295 + }, + { + "epoch": 1.282306248973204, + "grad_norm": 7.072125082481713, + "learning_rate": 1.7128369202734951e-06, + "loss": 0.01141204833984375, + "step": 148300 + }, + { + "epoch": 1.2823494824947472, + "grad_norm": 27.144408967942425, + "learning_rate": 1.7126528954144257e-06, + "loss": 0.17729110717773439, + "step": 148305 + }, + { + "epoch": 1.2823927160162905, + "grad_norm": 40.09564582840965, + "learning_rate": 1.7124688764924883e-06, + "loss": 0.05080413818359375, + "step": 148310 + }, + { + "epoch": 1.2824359495378337, + "grad_norm": 7.841926944370391, + "learning_rate": 1.7122848635085308e-06, + "loss": 0.022068214416503907, + "step": 148315 + }, + { + "epoch": 1.282479183059377, + "grad_norm": 4.485635910123957, + "learning_rate": 1.7121008564634016e-06, + "loss": 0.0397064208984375, + "step": 148320 + }, + { + "epoch": 1.2825224165809201, + "grad_norm": 20.081329195478794, + "learning_rate": 1.7119168553579497e-06, + "loss": 0.06788291931152343, + "step": 148325 + }, + { + "epoch": 1.2825656501024634, + "grad_norm": 0.9805073420701629, + "learning_rate": 1.711732860193022e-06, + "loss": 0.07615203857421875, + "step": 148330 + }, + { + "epoch": 1.2826088836240066, + "grad_norm": 6.109058328252734, + "learning_rate": 1.71154887096947e-06, + "loss": 0.09412193298339844, + "step": 148335 + }, + { + "epoch": 1.28265211714555, + "grad_norm": 1.746480896382398, + "learning_rate": 1.7113648876881393e-06, + "loss": 0.14564743041992187, + "step": 148340 + }, + { + "epoch": 1.2826953506670933, + "grad_norm": 0.9190627986371863, + "learning_rate": 1.7111809103498812e-06, + "loss": 0.0672271728515625, + "step": 148345 + }, + { + "epoch": 1.2827385841886365, + "grad_norm": 0.517045453506322, + "learning_rate": 1.7109969389555429e-06, + "loss": 0.0621826171875, + "step": 148350 + }, + { + "epoch": 1.2827818177101797, + "grad_norm": 0.22496484566216413, + "learning_rate": 1.7108129735059727e-06, + "loss": 0.123260498046875, + "step": 148355 + }, + { + "epoch": 1.282825051231723, + "grad_norm": 0.8297116157726684, + "learning_rate": 1.710629014002019e-06, + "loss": 0.17778091430664061, + "step": 148360 + }, + { + "epoch": 1.2828682847532664, + "grad_norm": 3.7677295798041395, + "learning_rate": 1.7104450604445305e-06, + "loss": 0.24073028564453125, + "step": 148365 + }, + { + "epoch": 1.2829115182748096, + "grad_norm": 3.8444736900044965, + "learning_rate": 1.7102611128343547e-06, + "loss": 0.11122894287109375, + "step": 148370 + }, + { + "epoch": 1.2829547517963529, + "grad_norm": 6.746445785434472, + "learning_rate": 1.7100771711723414e-06, + "loss": 0.04168243408203125, + "step": 148375 + }, + { + "epoch": 1.282997985317896, + "grad_norm": 2.6573139705442763, + "learning_rate": 1.709893235459338e-06, + "loss": 0.06463623046875, + "step": 148380 + }, + { + "epoch": 1.2830412188394393, + "grad_norm": 3.6879160997163125, + "learning_rate": 1.709709305696192e-06, + "loss": 0.023116302490234376, + "step": 148385 + }, + { + "epoch": 1.2830844523609826, + "grad_norm": 15.023075121296658, + "learning_rate": 1.7095253818837541e-06, + "loss": 0.11783885955810547, + "step": 148390 + }, + { + "epoch": 1.2831276858825258, + "grad_norm": 0.2161658176383311, + "learning_rate": 1.7093414640228709e-06, + "loss": 0.07639141082763672, + "step": 148395 + }, + { + "epoch": 1.283170919404069, + "grad_norm": 21.31744115148167, + "learning_rate": 1.70915755211439e-06, + "loss": 0.06664600372314453, + "step": 148400 + }, + { + "epoch": 1.2832141529256125, + "grad_norm": 2.919532616858341, + "learning_rate": 1.7089736461591606e-06, + "loss": 0.03166885375976562, + "step": 148405 + }, + { + "epoch": 1.2832573864471557, + "grad_norm": 4.650927065713521, + "learning_rate": 1.7087897461580315e-06, + "loss": 0.36612930297851565, + "step": 148410 + }, + { + "epoch": 1.283300619968699, + "grad_norm": 5.18748693284951, + "learning_rate": 1.7086058521118502e-06, + "loss": 0.06602630615234376, + "step": 148415 + }, + { + "epoch": 1.2833438534902422, + "grad_norm": 0.177981683825863, + "learning_rate": 1.708421964021464e-06, + "loss": 0.035555267333984376, + "step": 148420 + }, + { + "epoch": 1.2833870870117854, + "grad_norm": 11.398769963905046, + "learning_rate": 1.7082380818877216e-06, + "loss": 0.047317123413085936, + "step": 148425 + }, + { + "epoch": 1.2834303205333288, + "grad_norm": 49.579772066040704, + "learning_rate": 1.7080542057114715e-06, + "loss": 0.15746421813964845, + "step": 148430 + }, + { + "epoch": 1.283473554054872, + "grad_norm": 11.922181700264668, + "learning_rate": 1.7078703354935598e-06, + "loss": 0.05934410095214844, + "step": 148435 + }, + { + "epoch": 1.2835167875764153, + "grad_norm": 0.40671867155156105, + "learning_rate": 1.707686471234837e-06, + "loss": 0.017750930786132813, + "step": 148440 + }, + { + "epoch": 1.2835600210979585, + "grad_norm": 0.6923407215103377, + "learning_rate": 1.7075026129361492e-06, + "loss": 0.15013198852539061, + "step": 148445 + }, + { + "epoch": 1.2836032546195018, + "grad_norm": 17.88892123734779, + "learning_rate": 1.707318760598346e-06, + "loss": 0.0649169921875, + "step": 148450 + }, + { + "epoch": 1.283646488141045, + "grad_norm": 0.13835683549026107, + "learning_rate": 1.7071349142222745e-06, + "loss": 0.0870941162109375, + "step": 148455 + }, + { + "epoch": 1.2836897216625882, + "grad_norm": 0.760776454623868, + "learning_rate": 1.7069510738087828e-06, + "loss": 0.06387100219726563, + "step": 148460 + }, + { + "epoch": 1.2837329551841314, + "grad_norm": 2.752024392510488, + "learning_rate": 1.7067672393587167e-06, + "loss": 0.04010009765625, + "step": 148465 + }, + { + "epoch": 1.283776188705675, + "grad_norm": 0.42222650180774884, + "learning_rate": 1.7065834108729277e-06, + "loss": 0.0210723876953125, + "step": 148470 + }, + { + "epoch": 1.2838194222272181, + "grad_norm": 7.38231251678474, + "learning_rate": 1.7063995883522614e-06, + "loss": 0.06088333129882813, + "step": 148475 + }, + { + "epoch": 1.2838626557487613, + "grad_norm": 4.98699581464746, + "learning_rate": 1.706215771797566e-06, + "loss": 0.09545440673828125, + "step": 148480 + }, + { + "epoch": 1.2839058892703046, + "grad_norm": 1.3613920576495817, + "learning_rate": 1.7060319612096888e-06, + "loss": 0.06393585205078126, + "step": 148485 + }, + { + "epoch": 1.2839491227918478, + "grad_norm": 6.575519742131866, + "learning_rate": 1.7058481565894782e-06, + "loss": 0.053738975524902345, + "step": 148490 + }, + { + "epoch": 1.2839923563133913, + "grad_norm": 3.1594784325703085, + "learning_rate": 1.70566435793778e-06, + "loss": 0.0694915771484375, + "step": 148495 + }, + { + "epoch": 1.2840355898349345, + "grad_norm": 42.92229945606733, + "learning_rate": 1.705480565255445e-06, + "loss": 0.2203521728515625, + "step": 148500 + }, + { + "epoch": 1.2840788233564777, + "grad_norm": 3.618806776092431, + "learning_rate": 1.705296778543318e-06, + "loss": 0.045730972290039064, + "step": 148505 + }, + { + "epoch": 1.284122056878021, + "grad_norm": 1.1493721355782511, + "learning_rate": 1.7051129978022486e-06, + "loss": 0.1684112548828125, + "step": 148510 + }, + { + "epoch": 1.2841652903995642, + "grad_norm": 1.2377110199071049, + "learning_rate": 1.7049292230330843e-06, + "loss": 0.045407867431640624, + "step": 148515 + }, + { + "epoch": 1.2842085239211074, + "grad_norm": 2.6820608061425015, + "learning_rate": 1.7047454542366715e-06, + "loss": 0.011588287353515626, + "step": 148520 + }, + { + "epoch": 1.2842517574426506, + "grad_norm": 1.1276969709389533, + "learning_rate": 1.7045616914138586e-06, + "loss": 0.14581146240234374, + "step": 148525 + }, + { + "epoch": 1.2842949909641939, + "grad_norm": 0.15743017246854576, + "learning_rate": 1.7043779345654912e-06, + "loss": 0.14048938751220702, + "step": 148530 + }, + { + "epoch": 1.2843382244857373, + "grad_norm": 8.496347349806937, + "learning_rate": 1.7041941836924197e-06, + "loss": 0.06856231689453125, + "step": 148535 + }, + { + "epoch": 1.2843814580072805, + "grad_norm": 0.6321864049799717, + "learning_rate": 1.7040104387954898e-06, + "loss": 0.06378517150878907, + "step": 148540 + }, + { + "epoch": 1.2844246915288238, + "grad_norm": 21.597921096731373, + "learning_rate": 1.7038266998755494e-06, + "loss": 0.07779541015625, + "step": 148545 + }, + { + "epoch": 1.284467925050367, + "grad_norm": 2.171806043879562, + "learning_rate": 1.703642966933446e-06, + "loss": 0.0185821533203125, + "step": 148550 + }, + { + "epoch": 1.2845111585719104, + "grad_norm": 0.15150596541396213, + "learning_rate": 1.703459239970025e-06, + "loss": 0.007854461669921875, + "step": 148555 + }, + { + "epoch": 1.2845543920934537, + "grad_norm": 1.7763576788192657, + "learning_rate": 1.7032755189861359e-06, + "loss": 0.031976318359375, + "step": 148560 + }, + { + "epoch": 1.284597625614997, + "grad_norm": 1.3070613739572134, + "learning_rate": 1.7030918039826265e-06, + "loss": 0.03934459686279297, + "step": 148565 + }, + { + "epoch": 1.2846408591365401, + "grad_norm": 12.822086032526945, + "learning_rate": 1.702908094960343e-06, + "loss": 0.10889816284179688, + "step": 148570 + }, + { + "epoch": 1.2846840926580834, + "grad_norm": 15.024288780742621, + "learning_rate": 1.702724391920133e-06, + "loss": 0.11886749267578126, + "step": 148575 + }, + { + "epoch": 1.2847273261796266, + "grad_norm": 43.16936712553501, + "learning_rate": 1.7025406948628433e-06, + "loss": 0.32008895874023435, + "step": 148580 + }, + { + "epoch": 1.2847705597011698, + "grad_norm": 2.187561817372397, + "learning_rate": 1.7023570037893212e-06, + "loss": 0.016852188110351562, + "step": 148585 + }, + { + "epoch": 1.284813793222713, + "grad_norm": 3.473102556434877, + "learning_rate": 1.7021733187004145e-06, + "loss": 0.197857666015625, + "step": 148590 + }, + { + "epoch": 1.2848570267442565, + "grad_norm": 1.4355767809062623, + "learning_rate": 1.701989639596968e-06, + "loss": 0.3487640380859375, + "step": 148595 + }, + { + "epoch": 1.2849002602657997, + "grad_norm": 0.9369219413224885, + "learning_rate": 1.701805966479832e-06, + "loss": 0.04546051025390625, + "step": 148600 + }, + { + "epoch": 1.284943493787343, + "grad_norm": 1.5414528620500614, + "learning_rate": 1.7016222993498518e-06, + "loss": 0.063262939453125, + "step": 148605 + }, + { + "epoch": 1.2849867273088862, + "grad_norm": 0.32700477255365573, + "learning_rate": 1.701438638207874e-06, + "loss": 0.13437576293945314, + "step": 148610 + }, + { + "epoch": 1.2850299608304294, + "grad_norm": 0.09633597037299788, + "learning_rate": 1.7012549830547478e-06, + "loss": 0.012936687469482422, + "step": 148615 + }, + { + "epoch": 1.2850731943519729, + "grad_norm": 0.14383305661736404, + "learning_rate": 1.7010713338913184e-06, + "loss": 0.029950714111328124, + "step": 148620 + }, + { + "epoch": 1.285116427873516, + "grad_norm": 1.3294497457019323, + "learning_rate": 1.7008876907184325e-06, + "loss": 0.0752685546875, + "step": 148625 + }, + { + "epoch": 1.2851596613950593, + "grad_norm": 8.806770572654163, + "learning_rate": 1.7007040535369387e-06, + "loss": 0.03574676513671875, + "step": 148630 + }, + { + "epoch": 1.2852028949166026, + "grad_norm": 0.4063562801648468, + "learning_rate": 1.700520422347683e-06, + "loss": 0.03842926025390625, + "step": 148635 + }, + { + "epoch": 1.2852461284381458, + "grad_norm": 0.7910125998231377, + "learning_rate": 1.7003367971515124e-06, + "loss": 0.03632621765136719, + "step": 148640 + }, + { + "epoch": 1.285289361959689, + "grad_norm": 7.244544198511873, + "learning_rate": 1.7001531779492733e-06, + "loss": 0.052434539794921874, + "step": 148645 + }, + { + "epoch": 1.2853325954812322, + "grad_norm": 0.038928250344395524, + "learning_rate": 1.6999695647418136e-06, + "loss": 0.0023279190063476562, + "step": 148650 + }, + { + "epoch": 1.2853758290027755, + "grad_norm": 8.77574977690387, + "learning_rate": 1.6997859575299775e-06, + "loss": 0.080126953125, + "step": 148655 + }, + { + "epoch": 1.285419062524319, + "grad_norm": 0.1768914887032816, + "learning_rate": 1.6996023563146154e-06, + "loss": 0.08459625244140626, + "step": 148660 + }, + { + "epoch": 1.2854622960458622, + "grad_norm": 0.23006330811920456, + "learning_rate": 1.699418761096572e-06, + "loss": 0.12232208251953125, + "step": 148665 + }, + { + "epoch": 1.2855055295674054, + "grad_norm": 2.4316986404728005, + "learning_rate": 1.6992351718766933e-06, + "loss": 0.01812896728515625, + "step": 148670 + }, + { + "epoch": 1.2855487630889486, + "grad_norm": 0.34342279494579003, + "learning_rate": 1.699051588655828e-06, + "loss": 0.13677978515625, + "step": 148675 + }, + { + "epoch": 1.2855919966104918, + "grad_norm": 6.93242051836567, + "learning_rate": 1.698868011434822e-06, + "loss": 0.03961315155029297, + "step": 148680 + }, + { + "epoch": 1.2856352301320353, + "grad_norm": 0.8257843307742095, + "learning_rate": 1.6986844402145217e-06, + "loss": 0.07495288848876953, + "step": 148685 + }, + { + "epoch": 1.2856784636535785, + "grad_norm": 22.924248431862196, + "learning_rate": 1.6985008749957727e-06, + "loss": 0.1233642578125, + "step": 148690 + }, + { + "epoch": 1.2857216971751217, + "grad_norm": 9.319587630775876, + "learning_rate": 1.6983173157794238e-06, + "loss": 0.16894264221191407, + "step": 148695 + }, + { + "epoch": 1.285764930696665, + "grad_norm": 0.9000897280373078, + "learning_rate": 1.6981337625663203e-06, + "loss": 0.034859085083007814, + "step": 148700 + }, + { + "epoch": 1.2858081642182082, + "grad_norm": 0.83093045483424, + "learning_rate": 1.6979502153573092e-06, + "loss": 0.3031044006347656, + "step": 148705 + }, + { + "epoch": 1.2858513977397514, + "grad_norm": 3.3132442858572033, + "learning_rate": 1.6977666741532364e-06, + "loss": 0.06202850341796875, + "step": 148710 + }, + { + "epoch": 1.2858946312612947, + "grad_norm": 6.025149119430935, + "learning_rate": 1.6975831389549488e-06, + "loss": 0.060535621643066403, + "step": 148715 + }, + { + "epoch": 1.285937864782838, + "grad_norm": 2.437416413268045, + "learning_rate": 1.6973996097632913e-06, + "loss": 0.46062240600585935, + "step": 148720 + }, + { + "epoch": 1.2859810983043813, + "grad_norm": 1.331413354699273, + "learning_rate": 1.6972160865791128e-06, + "loss": 0.010544109344482421, + "step": 148725 + }, + { + "epoch": 1.2860243318259246, + "grad_norm": 0.6242089279039476, + "learning_rate": 1.6970325694032575e-06, + "loss": 0.038003158569335935, + "step": 148730 + }, + { + "epoch": 1.2860675653474678, + "grad_norm": 6.471487616009842, + "learning_rate": 1.696849058236574e-06, + "loss": 0.03370399475097656, + "step": 148735 + }, + { + "epoch": 1.286110798869011, + "grad_norm": 5.558826548809309, + "learning_rate": 1.6966655530799077e-06, + "loss": 0.04246864318847656, + "step": 148740 + }, + { + "epoch": 1.2861540323905543, + "grad_norm": 1.1015125484046249, + "learning_rate": 1.6964820539341043e-06, + "loss": 0.09539451599121093, + "step": 148745 + }, + { + "epoch": 1.2861972659120977, + "grad_norm": 0.13257473694955896, + "learning_rate": 1.69629856080001e-06, + "loss": 0.015747833251953124, + "step": 148750 + }, + { + "epoch": 1.286240499433641, + "grad_norm": 1.377154320669182, + "learning_rate": 1.6961150736784722e-06, + "loss": 0.0945831298828125, + "step": 148755 + }, + { + "epoch": 1.2862837329551842, + "grad_norm": 0.17874581001161802, + "learning_rate": 1.6959315925703367e-06, + "loss": 0.08807258605957032, + "step": 148760 + }, + { + "epoch": 1.2863269664767274, + "grad_norm": 5.5926904605446115, + "learning_rate": 1.6957481174764491e-06, + "loss": 0.13857498168945312, + "step": 148765 + }, + { + "epoch": 1.2863701999982706, + "grad_norm": 0.11704007376437865, + "learning_rate": 1.6955646483976563e-06, + "loss": 0.14827632904052734, + "step": 148770 + }, + { + "epoch": 1.2864134335198139, + "grad_norm": 19.657960144445052, + "learning_rate": 1.6953811853348026e-06, + "loss": 0.1505290985107422, + "step": 148775 + }, + { + "epoch": 1.286456667041357, + "grad_norm": 0.04714069244963588, + "learning_rate": 1.6951977282887373e-06, + "loss": 0.0417266845703125, + "step": 148780 + }, + { + "epoch": 1.2864999005629005, + "grad_norm": 0.37029523679162946, + "learning_rate": 1.6950142772603033e-06, + "loss": 0.09144134521484375, + "step": 148785 + }, + { + "epoch": 1.2865431340844438, + "grad_norm": 2.178994855810359, + "learning_rate": 1.6948308322503492e-06, + "loss": 0.08624706268310547, + "step": 148790 + }, + { + "epoch": 1.286586367605987, + "grad_norm": 1.3569773718950267, + "learning_rate": 1.69464739325972e-06, + "loss": 0.044802093505859376, + "step": 148795 + }, + { + "epoch": 1.2866296011275302, + "grad_norm": 21.031562815176866, + "learning_rate": 1.6944639602892615e-06, + "loss": 0.07596511840820312, + "step": 148800 + }, + { + "epoch": 1.2866728346490734, + "grad_norm": 1.0783821786006733, + "learning_rate": 1.6942805333398201e-06, + "loss": 0.021516990661621094, + "step": 148805 + }, + { + "epoch": 1.286716068170617, + "grad_norm": 4.282236474502169, + "learning_rate": 1.6940971124122417e-06, + "loss": 0.12801380157470704, + "step": 148810 + }, + { + "epoch": 1.2867593016921601, + "grad_norm": 0.03717739676333863, + "learning_rate": 1.6939136975073706e-06, + "loss": 0.03056678771972656, + "step": 148815 + }, + { + "epoch": 1.2868025352137034, + "grad_norm": 0.35261414778382943, + "learning_rate": 1.6937302886260553e-06, + "loss": 0.0101898193359375, + "step": 148820 + }, + { + "epoch": 1.2868457687352466, + "grad_norm": 0.14558681386301836, + "learning_rate": 1.6935468857691405e-06, + "loss": 0.3416751861572266, + "step": 148825 + }, + { + "epoch": 1.2868890022567898, + "grad_norm": 4.361701313429652, + "learning_rate": 1.693363488937472e-06, + "loss": 0.01457061767578125, + "step": 148830 + }, + { + "epoch": 1.286932235778333, + "grad_norm": 1.7532573471644006, + "learning_rate": 1.6931800981318946e-06, + "loss": 0.02012939453125, + "step": 148835 + }, + { + "epoch": 1.2869754692998763, + "grad_norm": 0.6112448575320327, + "learning_rate": 1.6929967133532563e-06, + "loss": 0.13876876831054688, + "step": 148840 + }, + { + "epoch": 1.2870187028214195, + "grad_norm": 6.313733693777752, + "learning_rate": 1.6928133346024003e-06, + "loss": 0.042083740234375, + "step": 148845 + }, + { + "epoch": 1.287061936342963, + "grad_norm": 0.05116534218976703, + "learning_rate": 1.692629961880175e-06, + "loss": 0.1266172409057617, + "step": 148850 + }, + { + "epoch": 1.2871051698645062, + "grad_norm": 0.7222042663853057, + "learning_rate": 1.6924465951874247e-06, + "loss": 0.11449127197265625, + "step": 148855 + }, + { + "epoch": 1.2871484033860494, + "grad_norm": 0.2473788252457964, + "learning_rate": 1.6922632345249954e-06, + "loss": 0.02599945068359375, + "step": 148860 + }, + { + "epoch": 1.2871916369075926, + "grad_norm": 35.61968901107368, + "learning_rate": 1.6920798798937324e-06, + "loss": 0.1902599334716797, + "step": 148865 + }, + { + "epoch": 1.2872348704291359, + "grad_norm": 5.4241573340691795, + "learning_rate": 1.6918965312944815e-06, + "loss": 0.020737457275390624, + "step": 148870 + }, + { + "epoch": 1.2872781039506793, + "grad_norm": 0.5748417184930266, + "learning_rate": 1.691713188728088e-06, + "loss": 0.029807281494140626, + "step": 148875 + }, + { + "epoch": 1.2873213374722225, + "grad_norm": 0.35186304399321683, + "learning_rate": 1.6915298521953966e-06, + "loss": 0.09735794067382812, + "step": 148880 + }, + { + "epoch": 1.2873645709937658, + "grad_norm": 7.9225133083214665, + "learning_rate": 1.6913465216972548e-06, + "loss": 0.1281341552734375, + "step": 148885 + }, + { + "epoch": 1.287407804515309, + "grad_norm": 0.5260694932932635, + "learning_rate": 1.6911631972345074e-06, + "loss": 0.03857269287109375, + "step": 148890 + }, + { + "epoch": 1.2874510380368522, + "grad_norm": 4.055568261486119, + "learning_rate": 1.6909798788079988e-06, + "loss": 0.17982635498046876, + "step": 148895 + }, + { + "epoch": 1.2874942715583955, + "grad_norm": 26.378790811490166, + "learning_rate": 1.6907965664185762e-06, + "loss": 0.39556083679199217, + "step": 148900 + }, + { + "epoch": 1.2875375050799387, + "grad_norm": 0.41425974548512184, + "learning_rate": 1.6906132600670842e-06, + "loss": 0.10522079467773438, + "step": 148905 + }, + { + "epoch": 1.287580738601482, + "grad_norm": 12.365521179673058, + "learning_rate": 1.690429959754367e-06, + "loss": 0.023525238037109375, + "step": 148910 + }, + { + "epoch": 1.2876239721230254, + "grad_norm": 1.9396278698863603, + "learning_rate": 1.6902466654812722e-06, + "loss": 0.061106109619140626, + "step": 148915 + }, + { + "epoch": 1.2876672056445686, + "grad_norm": 6.858399770903085, + "learning_rate": 1.6900633772486441e-06, + "loss": 0.1201944351196289, + "step": 148920 + }, + { + "epoch": 1.2877104391661118, + "grad_norm": 5.451833880794699, + "learning_rate": 1.689880095057328e-06, + "loss": 0.055539703369140624, + "step": 148925 + }, + { + "epoch": 1.287753672687655, + "grad_norm": 2.066493005292545, + "learning_rate": 1.6896968189081692e-06, + "loss": 0.060491943359375, + "step": 148930 + }, + { + "epoch": 1.2877969062091983, + "grad_norm": 3.160016155262825, + "learning_rate": 1.6895135488020128e-06, + "loss": 0.03230209350585937, + "step": 148935 + }, + { + "epoch": 1.2878401397307417, + "grad_norm": 1.150014896760599, + "learning_rate": 1.6893302847397042e-06, + "loss": 0.23818473815917968, + "step": 148940 + }, + { + "epoch": 1.287883373252285, + "grad_norm": 8.127562370935532, + "learning_rate": 1.689147026722087e-06, + "loss": 0.08072738647460938, + "step": 148945 + }, + { + "epoch": 1.2879266067738282, + "grad_norm": 1.4371063024327664, + "learning_rate": 1.6889637747500095e-06, + "loss": 0.06957330703735351, + "step": 148950 + }, + { + "epoch": 1.2879698402953714, + "grad_norm": 4.5182400493766846, + "learning_rate": 1.6887805288243142e-06, + "loss": 0.06964263916015626, + "step": 148955 + }, + { + "epoch": 1.2880130738169147, + "grad_norm": 0.17933023101995282, + "learning_rate": 1.6885972889458477e-06, + "loss": 0.1098907470703125, + "step": 148960 + }, + { + "epoch": 1.2880563073384579, + "grad_norm": 12.161014523876142, + "learning_rate": 1.688414055115455e-06, + "loss": 0.23359909057617187, + "step": 148965 + }, + { + "epoch": 1.2880995408600011, + "grad_norm": 3.0075695650757583, + "learning_rate": 1.6882308273339808e-06, + "loss": 0.018201828002929688, + "step": 148970 + }, + { + "epoch": 1.2881427743815443, + "grad_norm": 7.287233492593945, + "learning_rate": 1.688047605602269e-06, + "loss": 0.07267608642578124, + "step": 148975 + }, + { + "epoch": 1.2881860079030878, + "grad_norm": 0.48747551598507993, + "learning_rate": 1.687864389921167e-06, + "loss": 0.09580802917480469, + "step": 148980 + }, + { + "epoch": 1.288229241424631, + "grad_norm": 18.81486488510495, + "learning_rate": 1.6876811802915183e-06, + "loss": 0.06798133850097657, + "step": 148985 + }, + { + "epoch": 1.2882724749461743, + "grad_norm": 26.712831702966334, + "learning_rate": 1.6874979767141678e-06, + "loss": 0.14722061157226562, + "step": 148990 + }, + { + "epoch": 1.2883157084677175, + "grad_norm": 15.40205725281563, + "learning_rate": 1.6873147791899612e-06, + "loss": 0.1898487091064453, + "step": 148995 + }, + { + "epoch": 1.288358941989261, + "grad_norm": 5.137351134098133, + "learning_rate": 1.6871315877197413e-06, + "loss": 0.0577117919921875, + "step": 149000 + }, + { + "epoch": 1.2884021755108042, + "grad_norm": 38.513443013264244, + "learning_rate": 1.686948402304356e-06, + "loss": 0.2912921905517578, + "step": 149005 + }, + { + "epoch": 1.2884454090323474, + "grad_norm": 3.0989464065863355, + "learning_rate": 1.6867652229446469e-06, + "loss": 0.08649749755859375, + "step": 149010 + }, + { + "epoch": 1.2884886425538906, + "grad_norm": 0.32151203145870716, + "learning_rate": 1.6865820496414622e-06, + "loss": 0.036043548583984376, + "step": 149015 + }, + { + "epoch": 1.2885318760754338, + "grad_norm": 1.746302866802686, + "learning_rate": 1.686398882395645e-06, + "loss": 0.095599365234375, + "step": 149020 + }, + { + "epoch": 1.288575109596977, + "grad_norm": 0.2310670357295794, + "learning_rate": 1.68621572120804e-06, + "loss": 0.08261528015136718, + "step": 149025 + }, + { + "epoch": 1.2886183431185203, + "grad_norm": 20.02679609927304, + "learning_rate": 1.686032566079492e-06, + "loss": 0.125579833984375, + "step": 149030 + }, + { + "epoch": 1.2886615766400635, + "grad_norm": 1.3229144903792687, + "learning_rate": 1.6858494170108456e-06, + "loss": 0.04597339630126953, + "step": 149035 + }, + { + "epoch": 1.288704810161607, + "grad_norm": 0.07511282635793481, + "learning_rate": 1.6856662740029446e-06, + "loss": 0.06351070404052735, + "step": 149040 + }, + { + "epoch": 1.2887480436831502, + "grad_norm": 11.816118645823698, + "learning_rate": 1.6854831370566355e-06, + "loss": 0.1225748062133789, + "step": 149045 + }, + { + "epoch": 1.2887912772046934, + "grad_norm": 4.1680815462620115, + "learning_rate": 1.6853000061727623e-06, + "loss": 0.028141021728515625, + "step": 149050 + }, + { + "epoch": 1.2888345107262367, + "grad_norm": 26.94159792942794, + "learning_rate": 1.685116881352169e-06, + "loss": 0.11258697509765625, + "step": 149055 + }, + { + "epoch": 1.28887774424778, + "grad_norm": 0.7123369579195277, + "learning_rate": 1.6849337625956995e-06, + "loss": 0.015264892578125, + "step": 149060 + }, + { + "epoch": 1.2889209777693234, + "grad_norm": 0.6319704748867109, + "learning_rate": 1.6847506499042004e-06, + "loss": 0.0448699951171875, + "step": 149065 + }, + { + "epoch": 1.2889642112908666, + "grad_norm": 1.692443947848028, + "learning_rate": 1.684567543278514e-06, + "loss": 0.07686843872070312, + "step": 149070 + }, + { + "epoch": 1.2890074448124098, + "grad_norm": 0.5296788211407855, + "learning_rate": 1.6843844427194872e-06, + "loss": 0.20347061157226562, + "step": 149075 + }, + { + "epoch": 1.289050678333953, + "grad_norm": 7.558415104278939, + "learning_rate": 1.684201348227963e-06, + "loss": 0.015038681030273438, + "step": 149080 + }, + { + "epoch": 1.2890939118554963, + "grad_norm": 1.1174318156416347, + "learning_rate": 1.6840182598047856e-06, + "loss": 0.008841514587402344, + "step": 149085 + }, + { + "epoch": 1.2891371453770395, + "grad_norm": 0.855047726270715, + "learning_rate": 1.6838351774508001e-06, + "loss": 0.0066890716552734375, + "step": 149090 + }, + { + "epoch": 1.2891803788985827, + "grad_norm": 0.038594687372341875, + "learning_rate": 1.6836521011668499e-06, + "loss": 0.05783653259277344, + "step": 149095 + }, + { + "epoch": 1.289223612420126, + "grad_norm": 19.954822868334894, + "learning_rate": 1.683469030953779e-06, + "loss": 0.07707977294921875, + "step": 149100 + }, + { + "epoch": 1.2892668459416694, + "grad_norm": 0.41217042903135975, + "learning_rate": 1.683285966812434e-06, + "loss": 0.011861038208007813, + "step": 149105 + }, + { + "epoch": 1.2893100794632126, + "grad_norm": 1.5632596748616636, + "learning_rate": 1.6831029087436579e-06, + "loss": 0.04456024169921875, + "step": 149110 + }, + { + "epoch": 1.2893533129847559, + "grad_norm": 0.053087878163901, + "learning_rate": 1.6829198567482941e-06, + "loss": 0.14964370727539061, + "step": 149115 + }, + { + "epoch": 1.289396546506299, + "grad_norm": 4.788879007445199, + "learning_rate": 1.682736810827187e-06, + "loss": 0.03454437255859375, + "step": 149120 + }, + { + "epoch": 1.2894397800278423, + "grad_norm": 0.42493365224735, + "learning_rate": 1.6825537709811822e-06, + "loss": 0.008814048767089844, + "step": 149125 + }, + { + "epoch": 1.2894830135493858, + "grad_norm": 3.234411603099506, + "learning_rate": 1.6823707372111232e-06, + "loss": 0.071624755859375, + "step": 149130 + }, + { + "epoch": 1.289526247070929, + "grad_norm": 2.9506692889026587, + "learning_rate": 1.6821877095178526e-06, + "loss": 0.0615234375, + "step": 149135 + }, + { + "epoch": 1.2895694805924722, + "grad_norm": 0.11147909327815146, + "learning_rate": 1.6820046879022175e-06, + "loss": 0.06747570037841796, + "step": 149140 + }, + { + "epoch": 1.2896127141140155, + "grad_norm": 3.2941351466163917, + "learning_rate": 1.68182167236506e-06, + "loss": 0.0436065673828125, + "step": 149145 + }, + { + "epoch": 1.2896559476355587, + "grad_norm": 15.99249686562134, + "learning_rate": 1.6816386629072241e-06, + "loss": 0.0902984619140625, + "step": 149150 + }, + { + "epoch": 1.289699181157102, + "grad_norm": 1.4013424162797976, + "learning_rate": 1.6814556595295547e-06, + "loss": 0.0062847137451171875, + "step": 149155 + }, + { + "epoch": 1.2897424146786451, + "grad_norm": 0.6581126924386282, + "learning_rate": 1.681272662232895e-06, + "loss": 0.08025588989257812, + "step": 149160 + }, + { + "epoch": 1.2897856482001884, + "grad_norm": 1.969980366602878, + "learning_rate": 1.6810896710180884e-06, + "loss": 0.0852142333984375, + "step": 149165 + }, + { + "epoch": 1.2898288817217318, + "grad_norm": 35.38330809635001, + "learning_rate": 1.6809066858859802e-06, + "loss": 0.06583099365234375, + "step": 149170 + }, + { + "epoch": 1.289872115243275, + "grad_norm": 0.8608043445080301, + "learning_rate": 1.6807237068374133e-06, + "loss": 0.0698638916015625, + "step": 149175 + }, + { + "epoch": 1.2899153487648183, + "grad_norm": 39.39879667465865, + "learning_rate": 1.6805407338732328e-06, + "loss": 0.10054931640625, + "step": 149180 + }, + { + "epoch": 1.2899585822863615, + "grad_norm": 18.82159346544811, + "learning_rate": 1.6803577669942821e-06, + "loss": 0.09533615112304687, + "step": 149185 + }, + { + "epoch": 1.2900018158079047, + "grad_norm": 48.321361553868435, + "learning_rate": 1.6801748062014049e-06, + "loss": 0.09961681365966797, + "step": 149190 + }, + { + "epoch": 1.2900450493294482, + "grad_norm": 0.23399674712400154, + "learning_rate": 1.6799918514954435e-06, + "loss": 0.0237762451171875, + "step": 149195 + }, + { + "epoch": 1.2900882828509914, + "grad_norm": 0.6024014658449621, + "learning_rate": 1.6798089028772444e-06, + "loss": 0.059283447265625, + "step": 149200 + }, + { + "epoch": 1.2901315163725346, + "grad_norm": 1.5898188910277977, + "learning_rate": 1.6796259603476498e-06, + "loss": 0.12205467224121094, + "step": 149205 + }, + { + "epoch": 1.2901747498940779, + "grad_norm": 0.11122125377521548, + "learning_rate": 1.6794430239075035e-06, + "loss": 0.01562652587890625, + "step": 149210 + }, + { + "epoch": 1.290217983415621, + "grad_norm": 1.1771295564098287, + "learning_rate": 1.6792600935576496e-06, + "loss": 0.01090850830078125, + "step": 149215 + }, + { + "epoch": 1.2902612169371643, + "grad_norm": 5.4698830928878746, + "learning_rate": 1.6790771692989313e-06, + "loss": 0.060589599609375, + "step": 149220 + }, + { + "epoch": 1.2903044504587076, + "grad_norm": 1.9139662687673051, + "learning_rate": 1.6788942511321913e-06, + "loss": 0.02379302978515625, + "step": 149225 + }, + { + "epoch": 1.2903476839802508, + "grad_norm": 0.08016846539671159, + "learning_rate": 1.6787113390582753e-06, + "loss": 0.012099266052246094, + "step": 149230 + }, + { + "epoch": 1.2903909175017942, + "grad_norm": 10.410845054324877, + "learning_rate": 1.6785284330780245e-06, + "loss": 0.15208663940429687, + "step": 149235 + }, + { + "epoch": 1.2904341510233375, + "grad_norm": 26.684159404997928, + "learning_rate": 1.678345533192285e-06, + "loss": 0.11149406433105469, + "step": 149240 + }, + { + "epoch": 1.2904773845448807, + "grad_norm": 0.7635510220276105, + "learning_rate": 1.6781626394018995e-06, + "loss": 0.0230743408203125, + "step": 149245 + }, + { + "epoch": 1.290520618066424, + "grad_norm": 0.8733850701477635, + "learning_rate": 1.6779797517077108e-06, + "loss": 0.067877197265625, + "step": 149250 + }, + { + "epoch": 1.2905638515879674, + "grad_norm": 17.777371470436528, + "learning_rate": 1.677796870110563e-06, + "loss": 0.08293342590332031, + "step": 149255 + }, + { + "epoch": 1.2906070851095106, + "grad_norm": 6.385022011802212, + "learning_rate": 1.6776139946112975e-06, + "loss": 0.17850723266601562, + "step": 149260 + }, + { + "epoch": 1.2906503186310538, + "grad_norm": 1.2320071344427646, + "learning_rate": 1.6774311252107612e-06, + "loss": 0.060803794860839845, + "step": 149265 + }, + { + "epoch": 1.290693552152597, + "grad_norm": 0.8301557947755543, + "learning_rate": 1.6772482619097954e-06, + "loss": 0.18771209716796874, + "step": 149270 + }, + { + "epoch": 1.2907367856741403, + "grad_norm": 16.55311817700425, + "learning_rate": 1.6770654047092438e-06, + "loss": 0.11565933227539063, + "step": 149275 + }, + { + "epoch": 1.2907800191956835, + "grad_norm": 0.38837997687928655, + "learning_rate": 1.6768825536099492e-06, + "loss": 0.037091064453125, + "step": 149280 + }, + { + "epoch": 1.2908232527172268, + "grad_norm": 12.867579085039809, + "learning_rate": 1.6766997086127548e-06, + "loss": 0.03516387939453125, + "step": 149285 + }, + { + "epoch": 1.29086648623877, + "grad_norm": 10.185394831766896, + "learning_rate": 1.6765168697185053e-06, + "loss": 0.036258697509765625, + "step": 149290 + }, + { + "epoch": 1.2909097197603134, + "grad_norm": 24.2803220330225, + "learning_rate": 1.676334036928042e-06, + "loss": 0.04893207550048828, + "step": 149295 + }, + { + "epoch": 1.2909529532818567, + "grad_norm": 2.2109394834361003, + "learning_rate": 1.6761512102422101e-06, + "loss": 0.0183868408203125, + "step": 149300 + }, + { + "epoch": 1.2909961868034, + "grad_norm": 0.30591148750252045, + "learning_rate": 1.6759683896618522e-06, + "loss": 0.05784759521484375, + "step": 149305 + }, + { + "epoch": 1.2910394203249431, + "grad_norm": 0.28182518473982443, + "learning_rate": 1.6757855751878107e-06, + "loss": 0.006931877136230469, + "step": 149310 + }, + { + "epoch": 1.2910826538464863, + "grad_norm": 3.128270734480813, + "learning_rate": 1.6756027668209295e-06, + "loss": 0.016889190673828124, + "step": 149315 + }, + { + "epoch": 1.2911258873680298, + "grad_norm": 0.4593874165737644, + "learning_rate": 1.675419964562051e-06, + "loss": 0.026993942260742188, + "step": 149320 + }, + { + "epoch": 1.291169120889573, + "grad_norm": 28.0367218653634, + "learning_rate": 1.6752371684120176e-06, + "loss": 0.1890422821044922, + "step": 149325 + }, + { + "epoch": 1.2912123544111163, + "grad_norm": 2.0486263112968843, + "learning_rate": 1.6750543783716742e-06, + "loss": 0.04325294494628906, + "step": 149330 + }, + { + "epoch": 1.2912555879326595, + "grad_norm": 4.845027101899332, + "learning_rate": 1.6748715944418633e-06, + "loss": 0.060767173767089844, + "step": 149335 + }, + { + "epoch": 1.2912988214542027, + "grad_norm": 6.538884576247855, + "learning_rate": 1.6746888166234274e-06, + "loss": 0.04884796142578125, + "step": 149340 + }, + { + "epoch": 1.291342054975746, + "grad_norm": 3.9392256365718668, + "learning_rate": 1.6745060449172083e-06, + "loss": 0.061846923828125, + "step": 149345 + }, + { + "epoch": 1.2913852884972892, + "grad_norm": 0.6844118736235244, + "learning_rate": 1.6743232793240516e-06, + "loss": 0.0100738525390625, + "step": 149350 + }, + { + "epoch": 1.2914285220188324, + "grad_norm": 0.7195339202765625, + "learning_rate": 1.6741405198447977e-06, + "loss": 0.014114761352539062, + "step": 149355 + }, + { + "epoch": 1.2914717555403759, + "grad_norm": 0.6585891606985621, + "learning_rate": 1.6739577664802915e-06, + "loss": 0.03842487335205078, + "step": 149360 + }, + { + "epoch": 1.291514989061919, + "grad_norm": 22.463619335388373, + "learning_rate": 1.6737750192313745e-06, + "loss": 0.09408111572265625, + "step": 149365 + }, + { + "epoch": 1.2915582225834623, + "grad_norm": 0.9290727248642365, + "learning_rate": 1.6735922780988904e-06, + "loss": 0.07076454162597656, + "step": 149370 + }, + { + "epoch": 1.2916014561050055, + "grad_norm": 2.741815665030061, + "learning_rate": 1.6734095430836815e-06, + "loss": 0.02816009521484375, + "step": 149375 + }, + { + "epoch": 1.2916446896265488, + "grad_norm": 4.608038277658052, + "learning_rate": 1.6732268141865908e-06, + "loss": 0.1220306396484375, + "step": 149380 + }, + { + "epoch": 1.2916879231480922, + "grad_norm": 5.969143852509049, + "learning_rate": 1.6730440914084603e-06, + "loss": 0.03231620788574219, + "step": 149385 + }, + { + "epoch": 1.2917311566696354, + "grad_norm": 15.064195129961126, + "learning_rate": 1.6728613747501326e-06, + "loss": 0.071929931640625, + "step": 149390 + }, + { + "epoch": 1.2917743901911787, + "grad_norm": 0.24847042777567638, + "learning_rate": 1.6726786642124517e-06, + "loss": 0.057154273986816405, + "step": 149395 + }, + { + "epoch": 1.291817623712722, + "grad_norm": 0.9143127459346339, + "learning_rate": 1.6724959597962581e-06, + "loss": 0.00301971435546875, + "step": 149400 + }, + { + "epoch": 1.2918608572342651, + "grad_norm": 2.842613516941529, + "learning_rate": 1.6723132615023973e-06, + "loss": 0.10382843017578125, + "step": 149405 + }, + { + "epoch": 1.2919040907558084, + "grad_norm": 1.5101187646948728, + "learning_rate": 1.6721305693317105e-06, + "loss": 0.03887519836425781, + "step": 149410 + }, + { + "epoch": 1.2919473242773516, + "grad_norm": 17.102399974658088, + "learning_rate": 1.6719478832850401e-06, + "loss": 0.14943923950195312, + "step": 149415 + }, + { + "epoch": 1.2919905577988948, + "grad_norm": 6.286324457589687, + "learning_rate": 1.6717652033632277e-06, + "loss": 0.05145530700683594, + "step": 149420 + }, + { + "epoch": 1.2920337913204383, + "grad_norm": 4.122412558673932, + "learning_rate": 1.671582529567118e-06, + "loss": 0.03003387451171875, + "step": 149425 + }, + { + "epoch": 1.2920770248419815, + "grad_norm": 1.4540530911011402, + "learning_rate": 1.671399861897552e-06, + "loss": 0.148388671875, + "step": 149430 + }, + { + "epoch": 1.2921202583635247, + "grad_norm": 19.149148365806507, + "learning_rate": 1.6712172003553726e-06, + "loss": 0.065032958984375, + "step": 149435 + }, + { + "epoch": 1.292163491885068, + "grad_norm": 6.2559072021696345, + "learning_rate": 1.671034544941422e-06, + "loss": 0.0178375244140625, + "step": 149440 + }, + { + "epoch": 1.2922067254066112, + "grad_norm": 0.14933746155559868, + "learning_rate": 1.670851895656543e-06, + "loss": 0.10161895751953125, + "step": 149445 + }, + { + "epoch": 1.2922499589281546, + "grad_norm": 0.9338864967055841, + "learning_rate": 1.6706692525015759e-06, + "loss": 0.020421218872070313, + "step": 149450 + }, + { + "epoch": 1.2922931924496979, + "grad_norm": 1.4235345739044107, + "learning_rate": 1.6704866154773664e-06, + "loss": 0.0367645263671875, + "step": 149455 + }, + { + "epoch": 1.292336425971241, + "grad_norm": 6.790570307306973, + "learning_rate": 1.6703039845847534e-06, + "loss": 0.0633392333984375, + "step": 149460 + }, + { + "epoch": 1.2923796594927843, + "grad_norm": 29.3031803825965, + "learning_rate": 1.6701213598245827e-06, + "loss": 0.12097053527832032, + "step": 149465 + }, + { + "epoch": 1.2924228930143276, + "grad_norm": 0.6678145970497474, + "learning_rate": 1.6699387411976944e-06, + "loss": 0.09593658447265625, + "step": 149470 + }, + { + "epoch": 1.2924661265358708, + "grad_norm": 17.309337610395165, + "learning_rate": 1.6697561287049313e-06, + "loss": 0.10460052490234376, + "step": 149475 + }, + { + "epoch": 1.292509360057414, + "grad_norm": 0.5039500611069561, + "learning_rate": 1.6695735223471354e-06, + "loss": 0.008837890625, + "step": 149480 + }, + { + "epoch": 1.2925525935789572, + "grad_norm": 5.319327782668535, + "learning_rate": 1.6693909221251476e-06, + "loss": 0.026689910888671876, + "step": 149485 + }, + { + "epoch": 1.2925958271005007, + "grad_norm": 29.754433106074075, + "learning_rate": 1.6692083280398124e-06, + "loss": 0.10396385192871094, + "step": 149490 + }, + { + "epoch": 1.292639060622044, + "grad_norm": 1.4170647355496728, + "learning_rate": 1.6690257400919709e-06, + "loss": 0.07183837890625, + "step": 149495 + }, + { + "epoch": 1.2926822941435872, + "grad_norm": 1.3903956007866694, + "learning_rate": 1.6688431582824652e-06, + "loss": 0.023160552978515624, + "step": 149500 + }, + { + "epoch": 1.2927255276651304, + "grad_norm": 0.2141549275442664, + "learning_rate": 1.6686605826121371e-06, + "loss": 0.012697219848632812, + "step": 149505 + }, + { + "epoch": 1.2927687611866738, + "grad_norm": 11.979899691635927, + "learning_rate": 1.6684780130818274e-06, + "loss": 0.03381423950195313, + "step": 149510 + }, + { + "epoch": 1.292811994708217, + "grad_norm": 3.270428941201091, + "learning_rate": 1.6682954496923807e-06, + "loss": 0.05126800537109375, + "step": 149515 + }, + { + "epoch": 1.2928552282297603, + "grad_norm": 1.5532739629755987, + "learning_rate": 1.6681128924446367e-06, + "loss": 0.3187896728515625, + "step": 149520 + }, + { + "epoch": 1.2928984617513035, + "grad_norm": 0.2082230729258804, + "learning_rate": 1.6679303413394398e-06, + "loss": 0.0103729248046875, + "step": 149525 + }, + { + "epoch": 1.2929416952728467, + "grad_norm": 5.996743093930385, + "learning_rate": 1.66774779637763e-06, + "loss": 0.13956680297851562, + "step": 149530 + }, + { + "epoch": 1.29298492879439, + "grad_norm": 1.2242829024324875, + "learning_rate": 1.6675652575600497e-06, + "loss": 0.043338775634765625, + "step": 149535 + }, + { + "epoch": 1.2930281623159332, + "grad_norm": 4.707559437480224, + "learning_rate": 1.6673827248875407e-06, + "loss": 0.0356597900390625, + "step": 149540 + }, + { + "epoch": 1.2930713958374764, + "grad_norm": 1.8339654009939435, + "learning_rate": 1.6672001983609436e-06, + "loss": 0.018341827392578124, + "step": 149545 + }, + { + "epoch": 1.2931146293590199, + "grad_norm": 7.1192142660293225, + "learning_rate": 1.6670176779811025e-06, + "loss": 0.07557830810546876, + "step": 149550 + }, + { + "epoch": 1.2931578628805631, + "grad_norm": 1.6930576901350018, + "learning_rate": 1.6668351637488583e-06, + "loss": 0.029686737060546874, + "step": 149555 + }, + { + "epoch": 1.2932010964021063, + "grad_norm": 0.39562304565792755, + "learning_rate": 1.666652655665053e-06, + "loss": 0.2356414794921875, + "step": 149560 + }, + { + "epoch": 1.2932443299236496, + "grad_norm": 135.36309263162107, + "learning_rate": 1.666470153730527e-06, + "loss": 0.103076171875, + "step": 149565 + }, + { + "epoch": 1.2932875634451928, + "grad_norm": 21.007640176802106, + "learning_rate": 1.666287657946122e-06, + "loss": 0.110009765625, + "step": 149570 + }, + { + "epoch": 1.2933307969667363, + "grad_norm": 0.15748971136998988, + "learning_rate": 1.6661051683126815e-06, + "loss": 0.06694869995117188, + "step": 149575 + }, + { + "epoch": 1.2933740304882795, + "grad_norm": 3.52044862929435, + "learning_rate": 1.6659226848310453e-06, + "loss": 0.0923370361328125, + "step": 149580 + }, + { + "epoch": 1.2934172640098227, + "grad_norm": 2.466386097803964, + "learning_rate": 1.665740207502057e-06, + "loss": 0.011409759521484375, + "step": 149585 + }, + { + "epoch": 1.293460497531366, + "grad_norm": 7.393113483673708, + "learning_rate": 1.6655577363265566e-06, + "loss": 0.051369857788085935, + "step": 149590 + }, + { + "epoch": 1.2935037310529092, + "grad_norm": 0.37551179034678367, + "learning_rate": 1.6653752713053858e-06, + "loss": 0.08234491348266601, + "step": 149595 + }, + { + "epoch": 1.2935469645744524, + "grad_norm": 0.5097879679399167, + "learning_rate": 1.6651928124393868e-06, + "loss": 0.083392333984375, + "step": 149600 + }, + { + "epoch": 1.2935901980959956, + "grad_norm": 0.8186493804434605, + "learning_rate": 1.6650103597294001e-06, + "loss": 0.17596397399902344, + "step": 149605 + }, + { + "epoch": 1.2936334316175389, + "grad_norm": 8.490237156419271, + "learning_rate": 1.664827913176267e-06, + "loss": 0.030388259887695314, + "step": 149610 + }, + { + "epoch": 1.2936766651390823, + "grad_norm": 0.6171680173463325, + "learning_rate": 1.6646454727808307e-06, + "loss": 0.4086578369140625, + "step": 149615 + }, + { + "epoch": 1.2937198986606255, + "grad_norm": 19.390406094084828, + "learning_rate": 1.6644630385439314e-06, + "loss": 0.2940803527832031, + "step": 149620 + }, + { + "epoch": 1.2937631321821688, + "grad_norm": 5.002233669146096, + "learning_rate": 1.6642806104664092e-06, + "loss": 0.06542129516601562, + "step": 149625 + }, + { + "epoch": 1.293806365703712, + "grad_norm": 9.50403607392394, + "learning_rate": 1.664098188549108e-06, + "loss": 0.0515869140625, + "step": 149630 + }, + { + "epoch": 1.2938495992252552, + "grad_norm": 0.1949351204427896, + "learning_rate": 1.6639157727928685e-06, + "loss": 0.027924346923828124, + "step": 149635 + }, + { + "epoch": 1.2938928327467987, + "grad_norm": 0.40210769646284017, + "learning_rate": 1.6637333631985306e-06, + "loss": 0.07112884521484375, + "step": 149640 + }, + { + "epoch": 1.293936066268342, + "grad_norm": 2.6911865003493785, + "learning_rate": 1.6635509597669355e-06, + "loss": 0.10028152465820313, + "step": 149645 + }, + { + "epoch": 1.2939792997898851, + "grad_norm": 0.3318156633396331, + "learning_rate": 1.6633685624989263e-06, + "loss": 0.042462158203125, + "step": 149650 + }, + { + "epoch": 1.2940225333114284, + "grad_norm": 6.66258504743319, + "learning_rate": 1.6631861713953432e-06, + "loss": 0.09506988525390625, + "step": 149655 + }, + { + "epoch": 1.2940657668329716, + "grad_norm": 8.094680127220789, + "learning_rate": 1.663003786457028e-06, + "loss": 0.16060028076171876, + "step": 149660 + }, + { + "epoch": 1.2941090003545148, + "grad_norm": 0.7136104029655191, + "learning_rate": 1.6628214076848207e-06, + "loss": 0.18356781005859374, + "step": 149665 + }, + { + "epoch": 1.294152233876058, + "grad_norm": 15.42956695872345, + "learning_rate": 1.6626390350795629e-06, + "loss": 0.1582042694091797, + "step": 149670 + }, + { + "epoch": 1.2941954673976013, + "grad_norm": 6.287224150019356, + "learning_rate": 1.6624566686420943e-06, + "loss": 0.20195770263671875, + "step": 149675 + }, + { + "epoch": 1.2942387009191447, + "grad_norm": 7.845594966351628, + "learning_rate": 1.6622743083732588e-06, + "loss": 0.02754058837890625, + "step": 149680 + }, + { + "epoch": 1.294281934440688, + "grad_norm": 0.05602358130402427, + "learning_rate": 1.6620919542738949e-06, + "loss": 0.10691204071044921, + "step": 149685 + }, + { + "epoch": 1.2943251679622312, + "grad_norm": 6.0638293538005925, + "learning_rate": 1.6619096063448458e-06, + "loss": 0.0880218505859375, + "step": 149690 + }, + { + "epoch": 1.2943684014837744, + "grad_norm": 8.302631415818736, + "learning_rate": 1.661727264586951e-06, + "loss": 0.018494415283203124, + "step": 149695 + }, + { + "epoch": 1.2944116350053176, + "grad_norm": 0.39918253980107843, + "learning_rate": 1.6615449290010526e-06, + "loss": 0.18299179077148436, + "step": 149700 + }, + { + "epoch": 1.294454868526861, + "grad_norm": 1.4187292206902853, + "learning_rate": 1.661362599587989e-06, + "loss": 0.019884490966796876, + "step": 149705 + }, + { + "epoch": 1.2944981020484043, + "grad_norm": 1.989282489994867, + "learning_rate": 1.6611802763486035e-06, + "loss": 0.06889495849609376, + "step": 149710 + }, + { + "epoch": 1.2945413355699475, + "grad_norm": 1.077851972288405, + "learning_rate": 1.660997959283737e-06, + "loss": 0.04152679443359375, + "step": 149715 + }, + { + "epoch": 1.2945845690914908, + "grad_norm": 0.43624308362332354, + "learning_rate": 1.6608156483942293e-06, + "loss": 0.010735416412353515, + "step": 149720 + }, + { + "epoch": 1.294627802613034, + "grad_norm": 3.9326697181615753, + "learning_rate": 1.6606333436809217e-06, + "loss": 0.32304840087890624, + "step": 149725 + }, + { + "epoch": 1.2946710361345772, + "grad_norm": 0.47032433265991647, + "learning_rate": 1.6604510451446548e-06, + "loss": 0.03543434143066406, + "step": 149730 + }, + { + "epoch": 1.2947142696561205, + "grad_norm": 0.5970054683861883, + "learning_rate": 1.6602687527862678e-06, + "loss": 0.025723648071289063, + "step": 149735 + }, + { + "epoch": 1.294757503177664, + "grad_norm": 1.6640038219210656, + "learning_rate": 1.6600864666066044e-06, + "loss": 0.14097862243652343, + "step": 149740 + }, + { + "epoch": 1.2948007366992071, + "grad_norm": 6.358509909178701, + "learning_rate": 1.6599041866065026e-06, + "loss": 0.10410003662109375, + "step": 149745 + }, + { + "epoch": 1.2948439702207504, + "grad_norm": 5.528549942926756, + "learning_rate": 1.6597219127868056e-06, + "loss": 0.10851287841796875, + "step": 149750 + }, + { + "epoch": 1.2948872037422936, + "grad_norm": 28.65550774423757, + "learning_rate": 1.659539645148352e-06, + "loss": 0.08480339050292969, + "step": 149755 + }, + { + "epoch": 1.2949304372638368, + "grad_norm": 3.8755347167920373, + "learning_rate": 1.659357383691984e-06, + "loss": 0.045415496826171874, + "step": 149760 + }, + { + "epoch": 1.2949736707853803, + "grad_norm": 8.627149013193119, + "learning_rate": 1.6591751284185407e-06, + "loss": 0.04130859375, + "step": 149765 + }, + { + "epoch": 1.2950169043069235, + "grad_norm": 0.721067360497266, + "learning_rate": 1.6589928793288625e-06, + "loss": 0.009613037109375, + "step": 149770 + }, + { + "epoch": 1.2950601378284667, + "grad_norm": 2.3574664360823827, + "learning_rate": 1.6588106364237917e-06, + "loss": 0.04425334930419922, + "step": 149775 + }, + { + "epoch": 1.29510337135001, + "grad_norm": 0.4167743425978865, + "learning_rate": 1.6586283997041676e-06, + "loss": 0.07025489807128907, + "step": 149780 + }, + { + "epoch": 1.2951466048715532, + "grad_norm": 0.20282835866698698, + "learning_rate": 1.6584461691708304e-06, + "loss": 0.013607025146484375, + "step": 149785 + }, + { + "epoch": 1.2951898383930964, + "grad_norm": 4.169976241677334, + "learning_rate": 1.6582639448246212e-06, + "loss": 0.07161636352539062, + "step": 149790 + }, + { + "epoch": 1.2952330719146397, + "grad_norm": 6.929187622505194, + "learning_rate": 1.6580817266663793e-06, + "loss": 0.05369415283203125, + "step": 149795 + }, + { + "epoch": 1.2952763054361829, + "grad_norm": 6.207952077685779, + "learning_rate": 1.6578995146969455e-06, + "loss": 0.1748495101928711, + "step": 149800 + }, + { + "epoch": 1.2953195389577263, + "grad_norm": 0.6030617285694981, + "learning_rate": 1.6577173089171623e-06, + "loss": 0.04110221862792969, + "step": 149805 + }, + { + "epoch": 1.2953627724792696, + "grad_norm": 24.549867796053057, + "learning_rate": 1.6575351093278676e-06, + "loss": 0.150341796875, + "step": 149810 + }, + { + "epoch": 1.2954060060008128, + "grad_norm": 11.740376571427978, + "learning_rate": 1.6573529159299028e-06, + "loss": 0.15035476684570312, + "step": 149815 + }, + { + "epoch": 1.295449239522356, + "grad_norm": 17.541965652263368, + "learning_rate": 1.6571707287241076e-06, + "loss": 0.048561477661132814, + "step": 149820 + }, + { + "epoch": 1.2954924730438993, + "grad_norm": 3.3309700180757016, + "learning_rate": 1.6569885477113224e-06, + "loss": 0.05113983154296875, + "step": 149825 + }, + { + "epoch": 1.2955357065654427, + "grad_norm": 0.5203280579048674, + "learning_rate": 1.6568063728923873e-06, + "loss": 0.025104522705078125, + "step": 149830 + }, + { + "epoch": 1.295578940086986, + "grad_norm": 0.4247284599119918, + "learning_rate": 1.6566242042681413e-06, + "loss": 0.031960678100585935, + "step": 149835 + }, + { + "epoch": 1.2956221736085292, + "grad_norm": 0.043738859348133693, + "learning_rate": 1.6564420418394267e-06, + "loss": 0.10932693481445313, + "step": 149840 + }, + { + "epoch": 1.2956654071300724, + "grad_norm": 14.165372137009792, + "learning_rate": 1.6562598856070831e-06, + "loss": 0.15377655029296874, + "step": 149845 + }, + { + "epoch": 1.2957086406516156, + "grad_norm": 1.0085194463059508, + "learning_rate": 1.656077735571949e-06, + "loss": 0.07945480346679687, + "step": 149850 + }, + { + "epoch": 1.2957518741731588, + "grad_norm": 3.831757892219461, + "learning_rate": 1.6558955917348666e-06, + "loss": 0.01967315673828125, + "step": 149855 + }, + { + "epoch": 1.295795107694702, + "grad_norm": 1.166626224315931, + "learning_rate": 1.655713454096675e-06, + "loss": 0.024536895751953124, + "step": 149860 + }, + { + "epoch": 1.2958383412162453, + "grad_norm": 9.248403666612438, + "learning_rate": 1.6555313226582128e-06, + "loss": 0.06712570190429687, + "step": 149865 + }, + { + "epoch": 1.2958815747377888, + "grad_norm": 0.1732967929354796, + "learning_rate": 1.6553491974203226e-06, + "loss": 0.0406524658203125, + "step": 149870 + }, + { + "epoch": 1.295924808259332, + "grad_norm": 0.5038639095664912, + "learning_rate": 1.655167078383843e-06, + "loss": 0.07544212341308594, + "step": 149875 + }, + { + "epoch": 1.2959680417808752, + "grad_norm": 9.137409882943743, + "learning_rate": 1.6549849655496143e-06, + "loss": 0.054032516479492185, + "step": 149880 + }, + { + "epoch": 1.2960112753024184, + "grad_norm": 18.841993274490413, + "learning_rate": 1.6548028589184762e-06, + "loss": 0.20511627197265625, + "step": 149885 + }, + { + "epoch": 1.2960545088239617, + "grad_norm": 1.984636592249191, + "learning_rate": 1.6546207584912682e-06, + "loss": 0.075970458984375, + "step": 149890 + }, + { + "epoch": 1.2960977423455051, + "grad_norm": 7.399615215555474, + "learning_rate": 1.6544386642688293e-06, + "loss": 0.09380645751953125, + "step": 149895 + }, + { + "epoch": 1.2961409758670484, + "grad_norm": 0.21904305361125043, + "learning_rate": 1.6542565762520016e-06, + "loss": 0.051878738403320315, + "step": 149900 + }, + { + "epoch": 1.2961842093885916, + "grad_norm": 1.0876351112748175, + "learning_rate": 1.6540744944416235e-06, + "loss": 0.038383865356445314, + "step": 149905 + }, + { + "epoch": 1.2962274429101348, + "grad_norm": 3.9758988191310918, + "learning_rate": 1.6538924188385338e-06, + "loss": 0.14956531524658204, + "step": 149910 + }, + { + "epoch": 1.296270676431678, + "grad_norm": 1.7265548870272835, + "learning_rate": 1.6537103494435745e-06, + "loss": 0.042086029052734376, + "step": 149915 + }, + { + "epoch": 1.2963139099532213, + "grad_norm": 25.01801734683934, + "learning_rate": 1.653528286257584e-06, + "loss": 0.06314620971679688, + "step": 149920 + }, + { + "epoch": 1.2963571434747645, + "grad_norm": 22.513802782532007, + "learning_rate": 1.6533462292814021e-06, + "loss": 0.04583740234375, + "step": 149925 + }, + { + "epoch": 1.2964003769963077, + "grad_norm": 17.560396959616778, + "learning_rate": 1.6531641785158672e-06, + "loss": 0.23916778564453126, + "step": 149930 + }, + { + "epoch": 1.2964436105178512, + "grad_norm": 0.057736636981306884, + "learning_rate": 1.6529821339618216e-06, + "loss": 0.14104232788085938, + "step": 149935 + }, + { + "epoch": 1.2964868440393944, + "grad_norm": 0.42244143804730977, + "learning_rate": 1.652800095620103e-06, + "loss": 0.029631805419921876, + "step": 149940 + }, + { + "epoch": 1.2965300775609376, + "grad_norm": 5.279266321427542, + "learning_rate": 1.6526180634915515e-06, + "loss": 0.0516693115234375, + "step": 149945 + }, + { + "epoch": 1.2965733110824809, + "grad_norm": 12.737586419918044, + "learning_rate": 1.6524360375770065e-06, + "loss": 0.08656959533691407, + "step": 149950 + }, + { + "epoch": 1.2966165446040243, + "grad_norm": 80.86973510365658, + "learning_rate": 1.6522540178773072e-06, + "loss": 0.12755584716796875, + "step": 149955 + }, + { + "epoch": 1.2966597781255675, + "grad_norm": 1.7729635968361852, + "learning_rate": 1.6520720043932921e-06, + "loss": 0.01256704330444336, + "step": 149960 + }, + { + "epoch": 1.2967030116471108, + "grad_norm": 43.204258897017404, + "learning_rate": 1.651889997125803e-06, + "loss": 0.18450546264648438, + "step": 149965 + }, + { + "epoch": 1.296746245168654, + "grad_norm": 40.256580478833605, + "learning_rate": 1.6517079960756769e-06, + "loss": 0.5141719818115235, + "step": 149970 + }, + { + "epoch": 1.2967894786901972, + "grad_norm": 0.25965925354551317, + "learning_rate": 1.6515260012437561e-06, + "loss": 0.022234344482421876, + "step": 149975 + }, + { + "epoch": 1.2968327122117405, + "grad_norm": 1.9084054335789944, + "learning_rate": 1.651344012630878e-06, + "loss": 0.3585704803466797, + "step": 149980 + }, + { + "epoch": 1.2968759457332837, + "grad_norm": 0.17815602477248427, + "learning_rate": 1.6511620302378817e-06, + "loss": 0.0290802001953125, + "step": 149985 + }, + { + "epoch": 1.296919179254827, + "grad_norm": 0.8272493241512529, + "learning_rate": 1.6509800540656072e-06, + "loss": 0.05960655212402344, + "step": 149990 + }, + { + "epoch": 1.2969624127763704, + "grad_norm": 0.3220208017106812, + "learning_rate": 1.6507980841148922e-06, + "loss": 0.01963958740234375, + "step": 149995 + }, + { + "epoch": 1.2970056462979136, + "grad_norm": 2.0379819004531337, + "learning_rate": 1.6506161203865784e-06, + "loss": 0.12570037841796874, + "step": 150000 + }, + { + "epoch": 1.2970488798194568, + "grad_norm": 0.9896217604441461, + "learning_rate": 1.6504341628815042e-06, + "loss": 0.05236663818359375, + "step": 150005 + }, + { + "epoch": 1.297092113341, + "grad_norm": 3.4771358685173785, + "learning_rate": 1.650252211600508e-06, + "loss": 0.10167427062988281, + "step": 150010 + }, + { + "epoch": 1.2971353468625433, + "grad_norm": 12.361564857130553, + "learning_rate": 1.6500702665444296e-06, + "loss": 0.14815216064453124, + "step": 150015 + }, + { + "epoch": 1.2971785803840867, + "grad_norm": 1.4515652046045744, + "learning_rate": 1.6498883277141064e-06, + "loss": 0.08560295104980468, + "step": 150020 + }, + { + "epoch": 1.29722181390563, + "grad_norm": 1.716305089130653, + "learning_rate": 1.649706395110379e-06, + "loss": 0.03388137817382812, + "step": 150025 + }, + { + "epoch": 1.2972650474271732, + "grad_norm": 2.3257573273061793, + "learning_rate": 1.6495244687340877e-06, + "loss": 0.04341468811035156, + "step": 150030 + }, + { + "epoch": 1.2973082809487164, + "grad_norm": 6.875856623734084, + "learning_rate": 1.64934254858607e-06, + "loss": 0.039019012451171876, + "step": 150035 + }, + { + "epoch": 1.2973515144702596, + "grad_norm": 0.8592584855954809, + "learning_rate": 1.6491606346671655e-06, + "loss": 0.011513900756835938, + "step": 150040 + }, + { + "epoch": 1.2973947479918029, + "grad_norm": 1.095705442388164, + "learning_rate": 1.6489787269782124e-06, + "loss": 0.059848785400390625, + "step": 150045 + }, + { + "epoch": 1.297437981513346, + "grad_norm": 4.766745166640615, + "learning_rate": 1.64879682552005e-06, + "loss": 0.04997520446777344, + "step": 150050 + }, + { + "epoch": 1.2974812150348893, + "grad_norm": 0.34826033015610397, + "learning_rate": 1.6486149302935165e-06, + "loss": 0.15228118896484374, + "step": 150055 + }, + { + "epoch": 1.2975244485564328, + "grad_norm": 24.673305357375625, + "learning_rate": 1.6484330412994525e-06, + "loss": 0.10914554595947265, + "step": 150060 + }, + { + "epoch": 1.297567682077976, + "grad_norm": 34.123058091843966, + "learning_rate": 1.6482511585386956e-06, + "loss": 0.26895599365234374, + "step": 150065 + }, + { + "epoch": 1.2976109155995192, + "grad_norm": 11.835688406963365, + "learning_rate": 1.6480692820120854e-06, + "loss": 0.11312484741210938, + "step": 150070 + }, + { + "epoch": 1.2976541491210625, + "grad_norm": 2.499270214900433, + "learning_rate": 1.647887411720459e-06, + "loss": 0.029417800903320312, + "step": 150075 + }, + { + "epoch": 1.2976973826426057, + "grad_norm": 0.1793613669575099, + "learning_rate": 1.6477055476646575e-06, + "loss": 0.060467529296875, + "step": 150080 + }, + { + "epoch": 1.2977406161641492, + "grad_norm": 24.06028057417178, + "learning_rate": 1.647523689845518e-06, + "loss": 0.13733901977539062, + "step": 150085 + }, + { + "epoch": 1.2977838496856924, + "grad_norm": 2.0259920367619144, + "learning_rate": 1.6473418382638792e-06, + "loss": 0.076129150390625, + "step": 150090 + }, + { + "epoch": 1.2978270832072356, + "grad_norm": 0.5666037759117215, + "learning_rate": 1.6471599929205813e-06, + "loss": 0.047998809814453126, + "step": 150095 + }, + { + "epoch": 1.2978703167287788, + "grad_norm": 14.049213484658676, + "learning_rate": 1.6469781538164617e-06, + "loss": 0.06700172424316406, + "step": 150100 + }, + { + "epoch": 1.297913550250322, + "grad_norm": 75.19492685502577, + "learning_rate": 1.6467963209523594e-06, + "loss": 0.1078329086303711, + "step": 150105 + }, + { + "epoch": 1.2979567837718653, + "grad_norm": 16.194706833257328, + "learning_rate": 1.646614494329113e-06, + "loss": 0.05261306762695313, + "step": 150110 + }, + { + "epoch": 1.2980000172934085, + "grad_norm": 11.446803012488695, + "learning_rate": 1.6464326739475612e-06, + "loss": 0.023331451416015624, + "step": 150115 + }, + { + "epoch": 1.2980432508149518, + "grad_norm": 0.14138607154384167, + "learning_rate": 1.646250859808541e-06, + "loss": 0.060909080505371097, + "step": 150120 + }, + { + "epoch": 1.2980864843364952, + "grad_norm": 0.25642176722373594, + "learning_rate": 1.6460690519128932e-06, + "loss": 0.0860504150390625, + "step": 150125 + }, + { + "epoch": 1.2981297178580384, + "grad_norm": 2.0230214326218605, + "learning_rate": 1.6458872502614555e-06, + "loss": 0.041520309448242185, + "step": 150130 + }, + { + "epoch": 1.2981729513795817, + "grad_norm": 1.0007334885540577, + "learning_rate": 1.645705454855065e-06, + "loss": 0.021724700927734375, + "step": 150135 + }, + { + "epoch": 1.298216184901125, + "grad_norm": 3.7385452419220746, + "learning_rate": 1.645523665694562e-06, + "loss": 0.023792266845703125, + "step": 150140 + }, + { + "epoch": 1.2982594184226681, + "grad_norm": 13.619083491958131, + "learning_rate": 1.6453418827807846e-06, + "loss": 0.10596771240234375, + "step": 150145 + }, + { + "epoch": 1.2983026519442116, + "grad_norm": 2.2623317633720545, + "learning_rate": 1.6451601061145695e-06, + "loss": 0.06705245971679688, + "step": 150150 + }, + { + "epoch": 1.2983458854657548, + "grad_norm": 1.4807328239345683, + "learning_rate": 1.6449783356967574e-06, + "loss": 0.070416259765625, + "step": 150155 + }, + { + "epoch": 1.298389118987298, + "grad_norm": 0.39428949897128623, + "learning_rate": 1.6447965715281857e-06, + "loss": 0.01303558349609375, + "step": 150160 + }, + { + "epoch": 1.2984323525088413, + "grad_norm": 25.65204859819754, + "learning_rate": 1.644614813609692e-06, + "loss": 0.06508827209472656, + "step": 150165 + }, + { + "epoch": 1.2984755860303845, + "grad_norm": 0.3928361618217788, + "learning_rate": 1.6444330619421154e-06, + "loss": 0.11479034423828124, + "step": 150170 + }, + { + "epoch": 1.2985188195519277, + "grad_norm": 1.4927099676637077, + "learning_rate": 1.6442513165262934e-06, + "loss": 0.0539215087890625, + "step": 150175 + }, + { + "epoch": 1.298562053073471, + "grad_norm": 1.281801661385891, + "learning_rate": 1.644069577363065e-06, + "loss": 0.019527435302734375, + "step": 150180 + }, + { + "epoch": 1.2986052865950142, + "grad_norm": 0.322902562727894, + "learning_rate": 1.6438878444532664e-06, + "loss": 0.022736740112304688, + "step": 150185 + }, + { + "epoch": 1.2986485201165576, + "grad_norm": 9.773243924947305, + "learning_rate": 1.6437061177977387e-06, + "loss": 0.08095464706420899, + "step": 150190 + }, + { + "epoch": 1.2986917536381009, + "grad_norm": 13.587713395968114, + "learning_rate": 1.6435243973973166e-06, + "loss": 0.045556640625, + "step": 150195 + }, + { + "epoch": 1.298734987159644, + "grad_norm": 1.3806000674763832, + "learning_rate": 1.643342683252842e-06, + "loss": 0.03976783752441406, + "step": 150200 + }, + { + "epoch": 1.2987782206811873, + "grad_norm": 15.233824283719848, + "learning_rate": 1.643160975365151e-06, + "loss": 0.059820556640625, + "step": 150205 + }, + { + "epoch": 1.2988214542027308, + "grad_norm": 49.50252711722484, + "learning_rate": 1.6429792737350815e-06, + "loss": 0.20825653076171874, + "step": 150210 + }, + { + "epoch": 1.298864687724274, + "grad_norm": 1.7557949987454518, + "learning_rate": 1.6427975783634706e-06, + "loss": 0.05567474365234375, + "step": 150215 + }, + { + "epoch": 1.2989079212458172, + "grad_norm": 1.5051414025203487, + "learning_rate": 1.6426158892511585e-06, + "loss": 0.02553138732910156, + "step": 150220 + }, + { + "epoch": 1.2989511547673604, + "grad_norm": 6.388556105138272, + "learning_rate": 1.642434206398982e-06, + "loss": 0.0664215087890625, + "step": 150225 + }, + { + "epoch": 1.2989943882889037, + "grad_norm": 0.3948553955694232, + "learning_rate": 1.6422525298077793e-06, + "loss": 0.15475082397460938, + "step": 150230 + }, + { + "epoch": 1.299037621810447, + "grad_norm": 0.7681518325839704, + "learning_rate": 1.6420708594783872e-06, + "loss": 0.03897533416748047, + "step": 150235 + }, + { + "epoch": 1.2990808553319901, + "grad_norm": 7.4467287935416016, + "learning_rate": 1.641889195411645e-06, + "loss": 0.059917449951171875, + "step": 150240 + }, + { + "epoch": 1.2991240888535334, + "grad_norm": 2.8712324483553497, + "learning_rate": 1.6417075376083886e-06, + "loss": 0.012664794921875, + "step": 150245 + }, + { + "epoch": 1.2991673223750768, + "grad_norm": 0.2890670667771702, + "learning_rate": 1.6415258860694565e-06, + "loss": 0.3130035400390625, + "step": 150250 + }, + { + "epoch": 1.29921055589662, + "grad_norm": 0.07292695389933947, + "learning_rate": 1.6413442407956886e-06, + "loss": 0.05919036865234375, + "step": 150255 + }, + { + "epoch": 1.2992537894181633, + "grad_norm": 7.293494043475628, + "learning_rate": 1.6411626017879207e-06, + "loss": 0.02710895538330078, + "step": 150260 + }, + { + "epoch": 1.2992970229397065, + "grad_norm": 0.11483875942087612, + "learning_rate": 1.640980969046991e-06, + "loss": 0.0720733642578125, + "step": 150265 + }, + { + "epoch": 1.2993402564612497, + "grad_norm": 20.07438094200877, + "learning_rate": 1.640799342573737e-06, + "loss": 0.06905364990234375, + "step": 150270 + }, + { + "epoch": 1.2993834899827932, + "grad_norm": 2.7052646678541548, + "learning_rate": 1.6406177223689963e-06, + "loss": 0.19509925842285156, + "step": 150275 + }, + { + "epoch": 1.2994267235043364, + "grad_norm": 1.5538591173616891, + "learning_rate": 1.6404361084336054e-06, + "loss": 0.188287353515625, + "step": 150280 + }, + { + "epoch": 1.2994699570258796, + "grad_norm": 3.3445659321611245, + "learning_rate": 1.6402545007684043e-06, + "loss": 0.17649307250976562, + "step": 150285 + }, + { + "epoch": 1.2995131905474229, + "grad_norm": 1.1566990175625267, + "learning_rate": 1.6400728993742291e-06, + "loss": 0.019870758056640625, + "step": 150290 + }, + { + "epoch": 1.299556424068966, + "grad_norm": 0.6248681999110385, + "learning_rate": 1.6398913042519177e-06, + "loss": 0.025542831420898436, + "step": 150295 + }, + { + "epoch": 1.2995996575905093, + "grad_norm": 4.005560906055271, + "learning_rate": 1.639709715402306e-06, + "loss": 0.09077911376953125, + "step": 150300 + }, + { + "epoch": 1.2996428911120526, + "grad_norm": 1.1848659155271564, + "learning_rate": 1.6395281328262342e-06, + "loss": 0.03229866027832031, + "step": 150305 + }, + { + "epoch": 1.2996861246335958, + "grad_norm": 53.66127685626285, + "learning_rate": 1.6393465565245377e-06, + "loss": 0.1707395553588867, + "step": 150310 + }, + { + "epoch": 1.2997293581551392, + "grad_norm": 0.12068521555930363, + "learning_rate": 1.639164986498055e-06, + "loss": 0.0026889801025390624, + "step": 150315 + }, + { + "epoch": 1.2997725916766825, + "grad_norm": 7.259828179893134, + "learning_rate": 1.6389834227476236e-06, + "loss": 0.014138412475585938, + "step": 150320 + }, + { + "epoch": 1.2998158251982257, + "grad_norm": 23.173326245156304, + "learning_rate": 1.6388018652740806e-06, + "loss": 0.1111480712890625, + "step": 150325 + }, + { + "epoch": 1.299859058719769, + "grad_norm": 12.531780907573213, + "learning_rate": 1.6386203140782625e-06, + "loss": 0.22547683715820313, + "step": 150330 + }, + { + "epoch": 1.2999022922413122, + "grad_norm": 3.344726613104997, + "learning_rate": 1.6384387691610076e-06, + "loss": 0.029131317138671876, + "step": 150335 + }, + { + "epoch": 1.2999455257628556, + "grad_norm": 0.297449140462355, + "learning_rate": 1.6382572305231527e-06, + "loss": 0.024010467529296874, + "step": 150340 + }, + { + "epoch": 1.2999887592843988, + "grad_norm": 0.20920643649193266, + "learning_rate": 1.6380756981655341e-06, + "loss": 0.029120254516601562, + "step": 150345 + }, + { + "epoch": 1.300031992805942, + "grad_norm": 1.4182827029121992, + "learning_rate": 1.637894172088991e-06, + "loss": 0.06283798217773437, + "step": 150350 + }, + { + "epoch": 1.3000752263274853, + "grad_norm": 7.688622539910766, + "learning_rate": 1.63771265229436e-06, + "loss": 0.039075469970703124, + "step": 150355 + }, + { + "epoch": 1.3001184598490285, + "grad_norm": 1.5878120897314678, + "learning_rate": 1.6375311387824764e-06, + "loss": 0.03761100769042969, + "step": 150360 + }, + { + "epoch": 1.3001616933705717, + "grad_norm": 2.792528004216878, + "learning_rate": 1.6373496315541801e-06, + "loss": 0.06974372863769532, + "step": 150365 + }, + { + "epoch": 1.300204926892115, + "grad_norm": 0.3973245517004023, + "learning_rate": 1.637168130610307e-06, + "loss": 0.12474136352539063, + "step": 150370 + }, + { + "epoch": 1.3002481604136582, + "grad_norm": 0.15688263466783778, + "learning_rate": 1.6369866359516922e-06, + "loss": 0.126312255859375, + "step": 150375 + }, + { + "epoch": 1.3002913939352017, + "grad_norm": 0.647340570849146, + "learning_rate": 1.6368051475791766e-06, + "loss": 0.12424087524414062, + "step": 150380 + }, + { + "epoch": 1.3003346274567449, + "grad_norm": 0.0963489797372578, + "learning_rate": 1.636623665493595e-06, + "loss": 0.04253578186035156, + "step": 150385 + }, + { + "epoch": 1.3003778609782881, + "grad_norm": 1.8893147176834164, + "learning_rate": 1.6364421896957844e-06, + "loss": 0.02974700927734375, + "step": 150390 + }, + { + "epoch": 1.3004210944998313, + "grad_norm": 7.545325614432428, + "learning_rate": 1.6362607201865818e-06, + "loss": 0.06228179931640625, + "step": 150395 + }, + { + "epoch": 1.3004643280213746, + "grad_norm": 0.8342748956063645, + "learning_rate": 1.6360792569668246e-06, + "loss": 0.05385894775390625, + "step": 150400 + }, + { + "epoch": 1.300507561542918, + "grad_norm": 0.05939799369618199, + "learning_rate": 1.635897800037348e-06, + "loss": 0.048620986938476565, + "step": 150405 + }, + { + "epoch": 1.3005507950644613, + "grad_norm": 1.7250950494716308, + "learning_rate": 1.6357163493989917e-06, + "loss": 0.056201171875, + "step": 150410 + }, + { + "epoch": 1.3005940285860045, + "grad_norm": 10.528871163296541, + "learning_rate": 1.6355349050525907e-06, + "loss": 0.06981048583984376, + "step": 150415 + }, + { + "epoch": 1.3006372621075477, + "grad_norm": 31.415601319121876, + "learning_rate": 1.6353534669989807e-06, + "loss": 0.16994056701660157, + "step": 150420 + }, + { + "epoch": 1.300680495629091, + "grad_norm": 10.232846018878071, + "learning_rate": 1.6351720352390017e-06, + "loss": 0.040390396118164064, + "step": 150425 + }, + { + "epoch": 1.3007237291506342, + "grad_norm": 1.3378430534708439, + "learning_rate": 1.6349906097734882e-06, + "loss": 0.09294967651367188, + "step": 150430 + }, + { + "epoch": 1.3007669626721774, + "grad_norm": 15.32429999542437, + "learning_rate": 1.6348091906032777e-06, + "loss": 0.034853363037109376, + "step": 150435 + }, + { + "epoch": 1.3008101961937206, + "grad_norm": 1.3659954349156482, + "learning_rate": 1.6346277777292054e-06, + "loss": 0.04503021240234375, + "step": 150440 + }, + { + "epoch": 1.300853429715264, + "grad_norm": 1.2239138025031369, + "learning_rate": 1.6344463711521101e-06, + "loss": 0.017079544067382813, + "step": 150445 + }, + { + "epoch": 1.3008966632368073, + "grad_norm": 17.70894590986584, + "learning_rate": 1.6342649708728276e-06, + "loss": 0.09713954925537109, + "step": 150450 + }, + { + "epoch": 1.3009398967583505, + "grad_norm": 35.25389375142566, + "learning_rate": 1.6340835768921945e-06, + "loss": 0.0827357292175293, + "step": 150455 + }, + { + "epoch": 1.3009831302798938, + "grad_norm": 8.426857826040358, + "learning_rate": 1.633902189211047e-06, + "loss": 0.056927490234375, + "step": 150460 + }, + { + "epoch": 1.3010263638014372, + "grad_norm": 3.084029582156655, + "learning_rate": 1.6337208078302222e-06, + "loss": 0.018982505798339842, + "step": 150465 + }, + { + "epoch": 1.3010695973229804, + "grad_norm": 22.6096223504365, + "learning_rate": 1.6335394327505548e-06, + "loss": 0.055276679992675784, + "step": 150470 + }, + { + "epoch": 1.3011128308445237, + "grad_norm": 3.0607390967014423, + "learning_rate": 1.6333580639728831e-06, + "loss": 0.05587615966796875, + "step": 150475 + }, + { + "epoch": 1.301156064366067, + "grad_norm": 0.9522518080961293, + "learning_rate": 1.6331767014980443e-06, + "loss": 0.022728919982910156, + "step": 150480 + }, + { + "epoch": 1.3011992978876101, + "grad_norm": 62.95896265022272, + "learning_rate": 1.6329953453268739e-06, + "loss": 0.41491851806640623, + "step": 150485 + }, + { + "epoch": 1.3012425314091534, + "grad_norm": 2.32921885754602, + "learning_rate": 1.6328139954602081e-06, + "loss": 0.05126876831054687, + "step": 150490 + }, + { + "epoch": 1.3012857649306966, + "grad_norm": 0.9184876351116049, + "learning_rate": 1.632632651898883e-06, + "loss": 0.00803070068359375, + "step": 150495 + }, + { + "epoch": 1.3013289984522398, + "grad_norm": 0.17019019748284386, + "learning_rate": 1.6324513146437343e-06, + "loss": 0.03152179718017578, + "step": 150500 + }, + { + "epoch": 1.3013722319737833, + "grad_norm": 2.413402235956546, + "learning_rate": 1.6322699836956007e-06, + "loss": 0.08706512451171874, + "step": 150505 + }, + { + "epoch": 1.3014154654953265, + "grad_norm": 0.39962414858995504, + "learning_rate": 1.632088659055317e-06, + "loss": 0.039963150024414064, + "step": 150510 + }, + { + "epoch": 1.3014586990168697, + "grad_norm": 5.31525771909046, + "learning_rate": 1.6319073407237197e-06, + "loss": 0.04658203125, + "step": 150515 + }, + { + "epoch": 1.301501932538413, + "grad_norm": 8.032392731343718, + "learning_rate": 1.6317260287016446e-06, + "loss": 0.03959236145019531, + "step": 150520 + }, + { + "epoch": 1.3015451660599562, + "grad_norm": 4.569952838790559, + "learning_rate": 1.631544722989927e-06, + "loss": 0.05558929443359375, + "step": 150525 + }, + { + "epoch": 1.3015883995814996, + "grad_norm": 23.048124438762326, + "learning_rate": 1.6313634235894053e-06, + "loss": 0.17575836181640625, + "step": 150530 + }, + { + "epoch": 1.3016316331030429, + "grad_norm": 11.520262918911836, + "learning_rate": 1.6311821305009138e-06, + "loss": 0.15151481628417968, + "step": 150535 + }, + { + "epoch": 1.301674866624586, + "grad_norm": 0.05430976739733612, + "learning_rate": 1.6310008437252899e-06, + "loss": 0.18578453063964845, + "step": 150540 + }, + { + "epoch": 1.3017181001461293, + "grad_norm": 8.82030083558362, + "learning_rate": 1.6308195632633695e-06, + "loss": 0.0728515625, + "step": 150545 + }, + { + "epoch": 1.3017613336676725, + "grad_norm": 7.867675622962194, + "learning_rate": 1.6306382891159884e-06, + "loss": 0.06912384033203126, + "step": 150550 + }, + { + "epoch": 1.3018045671892158, + "grad_norm": 0.34913295510071685, + "learning_rate": 1.6304570212839821e-06, + "loss": 0.13809890747070314, + "step": 150555 + }, + { + "epoch": 1.301847800710759, + "grad_norm": 7.410377861357139, + "learning_rate": 1.6302757597681874e-06, + "loss": 0.0457916259765625, + "step": 150560 + }, + { + "epoch": 1.3018910342323022, + "grad_norm": 6.781275616047808, + "learning_rate": 1.6300945045694387e-06, + "loss": 0.016781425476074217, + "step": 150565 + }, + { + "epoch": 1.3019342677538457, + "grad_norm": 0.670460930853652, + "learning_rate": 1.6299132556885742e-06, + "loss": 0.011634063720703126, + "step": 150570 + }, + { + "epoch": 1.301977501275389, + "grad_norm": 0.4817449176035399, + "learning_rate": 1.6297320131264288e-06, + "loss": 0.03291816711425781, + "step": 150575 + }, + { + "epoch": 1.3020207347969321, + "grad_norm": 0.36826592201159686, + "learning_rate": 1.629550776883838e-06, + "loss": 0.03995361328125, + "step": 150580 + }, + { + "epoch": 1.3020639683184754, + "grad_norm": 1.776341806426739, + "learning_rate": 1.6293695469616368e-06, + "loss": 0.09564170837402344, + "step": 150585 + }, + { + "epoch": 1.3021072018400186, + "grad_norm": 5.09803569187428, + "learning_rate": 1.6291883233606633e-06, + "loss": 0.06446914672851563, + "step": 150590 + }, + { + "epoch": 1.302150435361562, + "grad_norm": 1.3675177382448918, + "learning_rate": 1.6290071060817522e-06, + "loss": 0.07957916259765625, + "step": 150595 + }, + { + "epoch": 1.3021936688831053, + "grad_norm": 123.82108937151284, + "learning_rate": 1.6288258951257382e-06, + "loss": 0.07635688781738281, + "step": 150600 + }, + { + "epoch": 1.3022369024046485, + "grad_norm": 0.4249546593610999, + "learning_rate": 1.628644690493459e-06, + "loss": 0.09461212158203125, + "step": 150605 + }, + { + "epoch": 1.3022801359261917, + "grad_norm": 46.99365807978653, + "learning_rate": 1.6284634921857495e-06, + "loss": 0.12617111206054688, + "step": 150610 + }, + { + "epoch": 1.302323369447735, + "grad_norm": 0.6130443553189442, + "learning_rate": 1.6282823002034447e-06, + "loss": 0.0677490234375, + "step": 150615 + }, + { + "epoch": 1.3023666029692782, + "grad_norm": 17.877147846790113, + "learning_rate": 1.628101114547381e-06, + "loss": 0.07926654815673828, + "step": 150620 + }, + { + "epoch": 1.3024098364908214, + "grad_norm": 4.487994169903329, + "learning_rate": 1.627919935218394e-06, + "loss": 0.02909860610961914, + "step": 150625 + }, + { + "epoch": 1.3024530700123647, + "grad_norm": 0.3187613453529165, + "learning_rate": 1.6277387622173176e-06, + "loss": 0.02623882293701172, + "step": 150630 + }, + { + "epoch": 1.302496303533908, + "grad_norm": 0.16301968835968325, + "learning_rate": 1.62755759554499e-06, + "loss": 0.059336090087890626, + "step": 150635 + }, + { + "epoch": 1.3025395370554513, + "grad_norm": 6.284792348866784, + "learning_rate": 1.6273764352022455e-06, + "loss": 0.04613800048828125, + "step": 150640 + }, + { + "epoch": 1.3025827705769946, + "grad_norm": 2.2525780627372787, + "learning_rate": 1.627195281189918e-06, + "loss": 0.011185264587402344, + "step": 150645 + }, + { + "epoch": 1.3026260040985378, + "grad_norm": 1.4983505192388598, + "learning_rate": 1.6270141335088463e-06, + "loss": 0.024909210205078126, + "step": 150650 + }, + { + "epoch": 1.3026692376200812, + "grad_norm": 2.7338825171699934, + "learning_rate": 1.6268329921598642e-06, + "loss": 0.0373992919921875, + "step": 150655 + }, + { + "epoch": 1.3027124711416245, + "grad_norm": 0.7322553606426144, + "learning_rate": 1.6266518571438052e-06, + "loss": 0.066815185546875, + "step": 150660 + }, + { + "epoch": 1.3027557046631677, + "grad_norm": 0.28701769414341805, + "learning_rate": 1.6264707284615084e-06, + "loss": 0.014434432983398438, + "step": 150665 + }, + { + "epoch": 1.302798938184711, + "grad_norm": 8.261336241262201, + "learning_rate": 1.6262896061138065e-06, + "loss": 0.051678466796875, + "step": 150670 + }, + { + "epoch": 1.3028421717062542, + "grad_norm": 0.40606362749620944, + "learning_rate": 1.626108490101536e-06, + "loss": 0.02104949951171875, + "step": 150675 + }, + { + "epoch": 1.3028854052277974, + "grad_norm": 8.581303669491536, + "learning_rate": 1.6259273804255318e-06, + "loss": 0.07374801635742187, + "step": 150680 + }, + { + "epoch": 1.3029286387493406, + "grad_norm": 0.8778849866506074, + "learning_rate": 1.625746277086629e-06, + "loss": 0.2746837615966797, + "step": 150685 + }, + { + "epoch": 1.3029718722708838, + "grad_norm": 3.2072879378576618, + "learning_rate": 1.6255651800856626e-06, + "loss": 0.152130126953125, + "step": 150690 + }, + { + "epoch": 1.3030151057924273, + "grad_norm": 1.7284216796922325, + "learning_rate": 1.6253840894234673e-06, + "loss": 0.02232227325439453, + "step": 150695 + }, + { + "epoch": 1.3030583393139705, + "grad_norm": 24.158241935836603, + "learning_rate": 1.625203005100879e-06, + "loss": 0.1239349365234375, + "step": 150700 + }, + { + "epoch": 1.3031015728355138, + "grad_norm": 0.17965444074413878, + "learning_rate": 1.6250219271187343e-06, + "loss": 0.07420234680175782, + "step": 150705 + }, + { + "epoch": 1.303144806357057, + "grad_norm": 9.565513456995731, + "learning_rate": 1.6248408554778667e-06, + "loss": 0.026857757568359376, + "step": 150710 + }, + { + "epoch": 1.3031880398786002, + "grad_norm": 1.3141267328955697, + "learning_rate": 1.624659790179112e-06, + "loss": 0.0090057373046875, + "step": 150715 + }, + { + "epoch": 1.3032312734001437, + "grad_norm": 10.8183655859375, + "learning_rate": 1.6244787312233045e-06, + "loss": 0.15337181091308594, + "step": 150720 + }, + { + "epoch": 1.303274506921687, + "grad_norm": 3.5603812055917934, + "learning_rate": 1.6242976786112784e-06, + "loss": 0.014378547668457031, + "step": 150725 + }, + { + "epoch": 1.3033177404432301, + "grad_norm": 0.2994591109422034, + "learning_rate": 1.6241166323438713e-06, + "loss": 0.3855918884277344, + "step": 150730 + }, + { + "epoch": 1.3033609739647734, + "grad_norm": 6.240544165844606, + "learning_rate": 1.6239355924219162e-06, + "loss": 0.055578994750976565, + "step": 150735 + }, + { + "epoch": 1.3034042074863166, + "grad_norm": 4.250007411472807, + "learning_rate": 1.6237545588462485e-06, + "loss": 0.023237228393554688, + "step": 150740 + }, + { + "epoch": 1.3034474410078598, + "grad_norm": 1.386733235542441, + "learning_rate": 1.6235735316177039e-06, + "loss": 0.1215606689453125, + "step": 150745 + }, + { + "epoch": 1.303490674529403, + "grad_norm": 0.7563624046506019, + "learning_rate": 1.6233925107371149e-06, + "loss": 0.11542816162109375, + "step": 150750 + }, + { + "epoch": 1.3035339080509463, + "grad_norm": 6.481228214027696, + "learning_rate": 1.6232114962053193e-06, + "loss": 0.1228729248046875, + "step": 150755 + }, + { + "epoch": 1.3035771415724897, + "grad_norm": 35.8702070141912, + "learning_rate": 1.6230304880231492e-06, + "loss": 0.08644294738769531, + "step": 150760 + }, + { + "epoch": 1.303620375094033, + "grad_norm": 16.867085418124265, + "learning_rate": 1.6228494861914425e-06, + "loss": 0.0425994873046875, + "step": 150765 + }, + { + "epoch": 1.3036636086155762, + "grad_norm": 5.047403340318243, + "learning_rate": 1.6226684907110318e-06, + "loss": 0.018694305419921876, + "step": 150770 + }, + { + "epoch": 1.3037068421371194, + "grad_norm": 4.931214903632101, + "learning_rate": 1.6224875015827527e-06, + "loss": 0.022095680236816406, + "step": 150775 + }, + { + "epoch": 1.3037500756586626, + "grad_norm": 25.028492049720814, + "learning_rate": 1.622306518807439e-06, + "loss": 0.14488134384155274, + "step": 150780 + }, + { + "epoch": 1.303793309180206, + "grad_norm": 2.7118310464831596, + "learning_rate": 1.6221255423859262e-06, + "loss": 0.023859405517578126, + "step": 150785 + }, + { + "epoch": 1.3038365427017493, + "grad_norm": 0.6001251918459378, + "learning_rate": 1.6219445723190477e-06, + "loss": 0.04475860595703125, + "step": 150790 + }, + { + "epoch": 1.3038797762232925, + "grad_norm": 1.1662030741093181, + "learning_rate": 1.6217636086076396e-06, + "loss": 0.018450164794921876, + "step": 150795 + }, + { + "epoch": 1.3039230097448358, + "grad_norm": 12.826689523528643, + "learning_rate": 1.6215826512525367e-06, + "loss": 0.061551666259765624, + "step": 150800 + }, + { + "epoch": 1.303966243266379, + "grad_norm": 0.4154522703601008, + "learning_rate": 1.6214017002545724e-06, + "loss": 0.04074249267578125, + "step": 150805 + }, + { + "epoch": 1.3040094767879222, + "grad_norm": 5.228928312756887, + "learning_rate": 1.6212207556145807e-06, + "loss": 0.0420501708984375, + "step": 150810 + }, + { + "epoch": 1.3040527103094655, + "grad_norm": 0.3737618289446209, + "learning_rate": 1.621039817333398e-06, + "loss": 0.06310806274414063, + "step": 150815 + }, + { + "epoch": 1.3040959438310087, + "grad_norm": 2.0385456504870154, + "learning_rate": 1.6208588854118567e-06, + "loss": 0.1012786865234375, + "step": 150820 + }, + { + "epoch": 1.3041391773525521, + "grad_norm": 37.28878909810849, + "learning_rate": 1.6206779598507938e-06, + "loss": 0.2127532958984375, + "step": 150825 + }, + { + "epoch": 1.3041824108740954, + "grad_norm": 1.8480487619451151, + "learning_rate": 1.6204970406510419e-06, + "loss": 0.06512451171875, + "step": 150830 + }, + { + "epoch": 1.3042256443956386, + "grad_norm": 2.11293702836034, + "learning_rate": 1.620316127813436e-06, + "loss": 0.10369110107421875, + "step": 150835 + }, + { + "epoch": 1.3042688779171818, + "grad_norm": 23.60148377451537, + "learning_rate": 1.62013522133881e-06, + "loss": 0.07659454345703125, + "step": 150840 + }, + { + "epoch": 1.304312111438725, + "grad_norm": 20.099524886814404, + "learning_rate": 1.6199543212279986e-06, + "loss": 0.09384899139404297, + "step": 150845 + }, + { + "epoch": 1.3043553449602685, + "grad_norm": 0.7153358684368795, + "learning_rate": 1.6197734274818347e-06, + "loss": 0.22989959716796876, + "step": 150850 + }, + { + "epoch": 1.3043985784818117, + "grad_norm": 41.30754472788755, + "learning_rate": 1.619592540101155e-06, + "loss": 0.16332778930664063, + "step": 150855 + }, + { + "epoch": 1.304441812003355, + "grad_norm": 0.7273264669109547, + "learning_rate": 1.6194116590867922e-06, + "loss": 0.007501983642578125, + "step": 150860 + }, + { + "epoch": 1.3044850455248982, + "grad_norm": 1.00368446885181, + "learning_rate": 1.619230784439581e-06, + "loss": 0.025785446166992188, + "step": 150865 + }, + { + "epoch": 1.3045282790464414, + "grad_norm": 6.483731366296159, + "learning_rate": 1.6190499161603547e-06, + "loss": 0.047140884399414065, + "step": 150870 + }, + { + "epoch": 1.3045715125679846, + "grad_norm": 0.19549469765757904, + "learning_rate": 1.6188690542499489e-06, + "loss": 0.1739673614501953, + "step": 150875 + }, + { + "epoch": 1.3046147460895279, + "grad_norm": 1.910773717628271, + "learning_rate": 1.618688198709197e-06, + "loss": 0.016082763671875, + "step": 150880 + }, + { + "epoch": 1.304657979611071, + "grad_norm": 0.32224110055592126, + "learning_rate": 1.618507349538932e-06, + "loss": 0.10434417724609375, + "step": 150885 + }, + { + "epoch": 1.3047012131326146, + "grad_norm": 1.5695439516870384, + "learning_rate": 1.6183265067399903e-06, + "loss": 0.16285247802734376, + "step": 150890 + }, + { + "epoch": 1.3047444466541578, + "grad_norm": 2.119622451620612, + "learning_rate": 1.6181456703132043e-06, + "loss": 0.0587921142578125, + "step": 150895 + }, + { + "epoch": 1.304787680175701, + "grad_norm": 1.4504936944466043, + "learning_rate": 1.6179648402594087e-06, + "loss": 0.026702880859375, + "step": 150900 + }, + { + "epoch": 1.3048309136972442, + "grad_norm": 46.40169490661402, + "learning_rate": 1.6177840165794373e-06, + "loss": 0.23432235717773436, + "step": 150905 + }, + { + "epoch": 1.3048741472187877, + "grad_norm": 7.463841893490926, + "learning_rate": 1.6176031992741234e-06, + "loss": 0.0777679443359375, + "step": 150910 + }, + { + "epoch": 1.304917380740331, + "grad_norm": 3.243945676448923, + "learning_rate": 1.6174223883443005e-06, + "loss": 0.0879547119140625, + "step": 150915 + }, + { + "epoch": 1.3049606142618742, + "grad_norm": 2.989737412430117, + "learning_rate": 1.6172415837908045e-06, + "loss": 0.090521240234375, + "step": 150920 + }, + { + "epoch": 1.3050038477834174, + "grad_norm": 2.593966282306085, + "learning_rate": 1.617060785614467e-06, + "loss": 0.056917762756347655, + "step": 150925 + }, + { + "epoch": 1.3050470813049606, + "grad_norm": 1.3875707159168238, + "learning_rate": 1.6168799938161244e-06, + "loss": 0.01155853271484375, + "step": 150930 + }, + { + "epoch": 1.3050903148265038, + "grad_norm": 3.120862623067854, + "learning_rate": 1.6166992083966084e-06, + "loss": 0.0618682861328125, + "step": 150935 + }, + { + "epoch": 1.305133548348047, + "grad_norm": 0.7584184716339644, + "learning_rate": 1.616518429356754e-06, + "loss": 0.04011688232421875, + "step": 150940 + }, + { + "epoch": 1.3051767818695903, + "grad_norm": 68.03172031010482, + "learning_rate": 1.6163376566973939e-06, + "loss": 0.0503570556640625, + "step": 150945 + }, + { + "epoch": 1.3052200153911337, + "grad_norm": 59.216863669490095, + "learning_rate": 1.6161568904193613e-06, + "loss": 0.15907859802246094, + "step": 150950 + }, + { + "epoch": 1.305263248912677, + "grad_norm": 1.5489600396635652, + "learning_rate": 1.6159761305234919e-06, + "loss": 0.0734375, + "step": 150955 + }, + { + "epoch": 1.3053064824342202, + "grad_norm": 14.56165251640919, + "learning_rate": 1.6157953770106187e-06, + "loss": 0.11630172729492187, + "step": 150960 + }, + { + "epoch": 1.3053497159557634, + "grad_norm": 0.27152722057506246, + "learning_rate": 1.6156146298815746e-06, + "loss": 0.19750938415527344, + "step": 150965 + }, + { + "epoch": 1.3053929494773067, + "grad_norm": 2.090402454271415, + "learning_rate": 1.6154338891371935e-06, + "loss": 0.0678802490234375, + "step": 150970 + }, + { + "epoch": 1.3054361829988501, + "grad_norm": 0.2778362018626032, + "learning_rate": 1.6152531547783076e-06, + "loss": 0.09564056396484374, + "step": 150975 + }, + { + "epoch": 1.3054794165203933, + "grad_norm": 13.323463722347498, + "learning_rate": 1.6150724268057536e-06, + "loss": 0.05477752685546875, + "step": 150980 + }, + { + "epoch": 1.3055226500419366, + "grad_norm": 9.197302305819301, + "learning_rate": 1.6148917052203616e-06, + "loss": 0.03632850646972656, + "step": 150985 + }, + { + "epoch": 1.3055658835634798, + "grad_norm": 1.0488164780607911, + "learning_rate": 1.6147109900229678e-06, + "loss": 0.0600071907043457, + "step": 150990 + }, + { + "epoch": 1.305609117085023, + "grad_norm": 3.57653941853616, + "learning_rate": 1.6145302812144044e-06, + "loss": 0.012046623229980468, + "step": 150995 + }, + { + "epoch": 1.3056523506065663, + "grad_norm": 0.03438108668089431, + "learning_rate": 1.6143495787955052e-06, + "loss": 0.051924514770507815, + "step": 151000 + }, + { + "epoch": 1.3056955841281095, + "grad_norm": 10.334760868420272, + "learning_rate": 1.614168882767103e-06, + "loss": 0.055769729614257815, + "step": 151005 + }, + { + "epoch": 1.3057388176496527, + "grad_norm": 3.9028078869671288, + "learning_rate": 1.6139881931300303e-06, + "loss": 0.045926666259765624, + "step": 151010 + }, + { + "epoch": 1.3057820511711962, + "grad_norm": 6.7053375266139685, + "learning_rate": 1.6138075098851233e-06, + "loss": 0.0547393798828125, + "step": 151015 + }, + { + "epoch": 1.3058252846927394, + "grad_norm": 28.57265083772587, + "learning_rate": 1.613626833033213e-06, + "loss": 0.27319183349609377, + "step": 151020 + }, + { + "epoch": 1.3058685182142826, + "grad_norm": 0.8988451038796128, + "learning_rate": 1.6134461625751333e-06, + "loss": 0.016594696044921874, + "step": 151025 + }, + { + "epoch": 1.3059117517358259, + "grad_norm": 4.37243979247223, + "learning_rate": 1.6132654985117179e-06, + "loss": 0.0600006103515625, + "step": 151030 + }, + { + "epoch": 1.305954985257369, + "grad_norm": 15.612786828765914, + "learning_rate": 1.6130848408437978e-06, + "loss": 0.05211181640625, + "step": 151035 + }, + { + "epoch": 1.3059982187789125, + "grad_norm": 0.18070796555078986, + "learning_rate": 1.6129041895722094e-06, + "loss": 0.11561870574951172, + "step": 151040 + }, + { + "epoch": 1.3060414523004558, + "grad_norm": 0.9818841452689661, + "learning_rate": 1.6127235446977827e-06, + "loss": 0.014447402954101563, + "step": 151045 + }, + { + "epoch": 1.306084685821999, + "grad_norm": 4.614110112477666, + "learning_rate": 1.6125429062213539e-06, + "loss": 0.12794952392578124, + "step": 151050 + }, + { + "epoch": 1.3061279193435422, + "grad_norm": 2.773654751417588, + "learning_rate": 1.6123622741437548e-06, + "loss": 0.05580062866210937, + "step": 151055 + }, + { + "epoch": 1.3061711528650854, + "grad_norm": 1.5413771941919372, + "learning_rate": 1.6121816484658176e-06, + "loss": 0.017913818359375, + "step": 151060 + }, + { + "epoch": 1.3062143863866287, + "grad_norm": 34.62715596913033, + "learning_rate": 1.6120010291883765e-06, + "loss": 0.17231979370117187, + "step": 151065 + }, + { + "epoch": 1.306257619908172, + "grad_norm": 3.6981886657726153, + "learning_rate": 1.611820416312264e-06, + "loss": 0.12223358154296875, + "step": 151070 + }, + { + "epoch": 1.3063008534297151, + "grad_norm": 12.652968229414286, + "learning_rate": 1.6116398098383115e-06, + "loss": 0.03045654296875, + "step": 151075 + }, + { + "epoch": 1.3063440869512586, + "grad_norm": 47.31548718299482, + "learning_rate": 1.6114592097673554e-06, + "loss": 0.057830810546875, + "step": 151080 + }, + { + "epoch": 1.3063873204728018, + "grad_norm": 1.3222316002143948, + "learning_rate": 1.611278616100226e-06, + "loss": 0.09449539184570313, + "step": 151085 + }, + { + "epoch": 1.306430553994345, + "grad_norm": 3.3890640265519707, + "learning_rate": 1.6110980288377577e-06, + "loss": 0.17758216857910156, + "step": 151090 + }, + { + "epoch": 1.3064737875158883, + "grad_norm": 3.4654643890152323, + "learning_rate": 1.6109174479807807e-06, + "loss": 0.04648857116699219, + "step": 151095 + }, + { + "epoch": 1.3065170210374315, + "grad_norm": 0.5715573810395919, + "learning_rate": 1.6107368735301312e-06, + "loss": 0.12956085205078124, + "step": 151100 + }, + { + "epoch": 1.306560254558975, + "grad_norm": 4.002579668051004, + "learning_rate": 1.6105563054866394e-06, + "loss": 0.04668617248535156, + "step": 151105 + }, + { + "epoch": 1.3066034880805182, + "grad_norm": 4.497088743911791, + "learning_rate": 1.61037574385114e-06, + "loss": 0.05596771240234375, + "step": 151110 + }, + { + "epoch": 1.3066467216020614, + "grad_norm": 21.358592230127698, + "learning_rate": 1.6101951886244653e-06, + "loss": 0.02241172790527344, + "step": 151115 + }, + { + "epoch": 1.3066899551236046, + "grad_norm": 3.22379633198422, + "learning_rate": 1.6100146398074472e-06, + "loss": 0.08241424560546876, + "step": 151120 + }, + { + "epoch": 1.3067331886451479, + "grad_norm": 2.704783136017928, + "learning_rate": 1.6098340974009186e-06, + "loss": 0.10725860595703125, + "step": 151125 + }, + { + "epoch": 1.306776422166691, + "grad_norm": 0.31532920261622327, + "learning_rate": 1.6096535614057127e-06, + "loss": 0.04160232543945312, + "step": 151130 + }, + { + "epoch": 1.3068196556882343, + "grad_norm": 7.069255756082889, + "learning_rate": 1.6094730318226616e-06, + "loss": 0.06071281433105469, + "step": 151135 + }, + { + "epoch": 1.3068628892097776, + "grad_norm": 7.86752639958653, + "learning_rate": 1.6092925086525969e-06, + "loss": 0.024553489685058594, + "step": 151140 + }, + { + "epoch": 1.306906122731321, + "grad_norm": 17.74406828149946, + "learning_rate": 1.6091119918963533e-06, + "loss": 0.050292587280273436, + "step": 151145 + }, + { + "epoch": 1.3069493562528642, + "grad_norm": 0.44090499850799364, + "learning_rate": 1.6089314815547614e-06, + "loss": 0.0174224853515625, + "step": 151150 + }, + { + "epoch": 1.3069925897744075, + "grad_norm": 3.8439899142821523, + "learning_rate": 1.6087509776286557e-06, + "loss": 0.15739059448242188, + "step": 151155 + }, + { + "epoch": 1.3070358232959507, + "grad_norm": 2.83895744867919, + "learning_rate": 1.608570480118867e-06, + "loss": 0.05735931396484375, + "step": 151160 + }, + { + "epoch": 1.3070790568174941, + "grad_norm": 0.7832620782850162, + "learning_rate": 1.6083899890262287e-06, + "loss": 0.0804443359375, + "step": 151165 + }, + { + "epoch": 1.3071222903390374, + "grad_norm": 3.14590166096122, + "learning_rate": 1.608209504351572e-06, + "loss": 0.05526123046875, + "step": 151170 + }, + { + "epoch": 1.3071655238605806, + "grad_norm": 2.2373287500849055, + "learning_rate": 1.6080290260957312e-06, + "loss": 0.03639678955078125, + "step": 151175 + }, + { + "epoch": 1.3072087573821238, + "grad_norm": 9.924062711566453, + "learning_rate": 1.607848554259537e-06, + "loss": 0.1390848159790039, + "step": 151180 + }, + { + "epoch": 1.307251990903667, + "grad_norm": 0.6174803925992987, + "learning_rate": 1.6076680888438226e-06, + "loss": 0.036285400390625, + "step": 151185 + }, + { + "epoch": 1.3072952244252103, + "grad_norm": 4.370314473733095, + "learning_rate": 1.6074876298494197e-06, + "loss": 0.0602874755859375, + "step": 151190 + }, + { + "epoch": 1.3073384579467535, + "grad_norm": 0.9957077622632741, + "learning_rate": 1.6073071772771608e-06, + "loss": 0.01224517822265625, + "step": 151195 + }, + { + "epoch": 1.3073816914682967, + "grad_norm": 0.2420895521114394, + "learning_rate": 1.6071267311278775e-06, + "loss": 0.0388458251953125, + "step": 151200 + }, + { + "epoch": 1.3074249249898402, + "grad_norm": 0.7570189159096458, + "learning_rate": 1.6069462914024032e-06, + "loss": 0.196319580078125, + "step": 151205 + }, + { + "epoch": 1.3074681585113834, + "grad_norm": 3.4781318903520777, + "learning_rate": 1.6067658581015686e-06, + "loss": 0.3320159912109375, + "step": 151210 + }, + { + "epoch": 1.3075113920329267, + "grad_norm": 2.3080217932466227, + "learning_rate": 1.606585431226208e-06, + "loss": 0.1372213363647461, + "step": 151215 + }, + { + "epoch": 1.3075546255544699, + "grad_norm": 0.7966890970495139, + "learning_rate": 1.6064050107771523e-06, + "loss": 0.01371612548828125, + "step": 151220 + }, + { + "epoch": 1.3075978590760131, + "grad_norm": 0.3874271035945159, + "learning_rate": 1.6062245967552333e-06, + "loss": 0.07860984802246093, + "step": 151225 + }, + { + "epoch": 1.3076410925975566, + "grad_norm": 0.9498420025779583, + "learning_rate": 1.6060441891612832e-06, + "loss": 0.06559333801269532, + "step": 151230 + }, + { + "epoch": 1.3076843261190998, + "grad_norm": 2.0157822544931814, + "learning_rate": 1.6058637879961336e-06, + "loss": 0.05008697509765625, + "step": 151235 + }, + { + "epoch": 1.307727559640643, + "grad_norm": 7.364672660533332, + "learning_rate": 1.6056833932606174e-06, + "loss": 0.05461044311523437, + "step": 151240 + }, + { + "epoch": 1.3077707931621863, + "grad_norm": 3.224726551494698, + "learning_rate": 1.6055030049555669e-06, + "loss": 0.0298614501953125, + "step": 151245 + }, + { + "epoch": 1.3078140266837295, + "grad_norm": 1.5963001225522446, + "learning_rate": 1.605322623081813e-06, + "loss": 0.02995891571044922, + "step": 151250 + }, + { + "epoch": 1.3078572602052727, + "grad_norm": 0.10307248128418854, + "learning_rate": 1.6051422476401876e-06, + "loss": 0.035259246826171875, + "step": 151255 + }, + { + "epoch": 1.307900493726816, + "grad_norm": 1.3820999815705237, + "learning_rate": 1.6049618786315218e-06, + "loss": 0.06750278472900391, + "step": 151260 + }, + { + "epoch": 1.3079437272483592, + "grad_norm": 34.44411362768398, + "learning_rate": 1.6047815160566501e-06, + "loss": 0.2260843276977539, + "step": 151265 + }, + { + "epoch": 1.3079869607699026, + "grad_norm": 3.7741859818946804, + "learning_rate": 1.6046011599164015e-06, + "loss": 0.02202606201171875, + "step": 151270 + }, + { + "epoch": 1.3080301942914458, + "grad_norm": 2.2112708938402514, + "learning_rate": 1.6044208102116104e-06, + "loss": 0.066986083984375, + "step": 151275 + }, + { + "epoch": 1.308073427812989, + "grad_norm": 4.340365347675398, + "learning_rate": 1.604240466943107e-06, + "loss": 0.0576019287109375, + "step": 151280 + }, + { + "epoch": 1.3081166613345323, + "grad_norm": 5.765430919323493, + "learning_rate": 1.604060130111723e-06, + "loss": 0.0256683349609375, + "step": 151285 + }, + { + "epoch": 1.3081598948560755, + "grad_norm": 0.09473618988401868, + "learning_rate": 1.6038797997182906e-06, + "loss": 0.13529567718505858, + "step": 151290 + }, + { + "epoch": 1.308203128377619, + "grad_norm": 2.1364909047094387, + "learning_rate": 1.6036994757636414e-06, + "loss": 0.039325714111328125, + "step": 151295 + }, + { + "epoch": 1.3082463618991622, + "grad_norm": 1.29992843883233, + "learning_rate": 1.6035191582486052e-06, + "loss": 0.13358154296875, + "step": 151300 + }, + { + "epoch": 1.3082895954207054, + "grad_norm": 1.0292860016758174, + "learning_rate": 1.603338847174017e-06, + "loss": 0.006549835205078125, + "step": 151305 + }, + { + "epoch": 1.3083328289422487, + "grad_norm": 2.228495341368066, + "learning_rate": 1.6031585425407057e-06, + "loss": 0.0426788330078125, + "step": 151310 + }, + { + "epoch": 1.308376062463792, + "grad_norm": 0.8754559203452636, + "learning_rate": 1.6029782443495036e-06, + "loss": 0.14989051818847657, + "step": 151315 + }, + { + "epoch": 1.3084192959853351, + "grad_norm": 4.3927750981576, + "learning_rate": 1.6027979526012427e-06, + "loss": 0.06921920776367188, + "step": 151320 + }, + { + "epoch": 1.3084625295068784, + "grad_norm": 0.21827140031784315, + "learning_rate": 1.602617667296755e-06, + "loss": 0.0582183837890625, + "step": 151325 + }, + { + "epoch": 1.3085057630284216, + "grad_norm": 0.1816164360754049, + "learning_rate": 1.6024373884368693e-06, + "loss": 0.09812202453613281, + "step": 151330 + }, + { + "epoch": 1.308548996549965, + "grad_norm": 4.374987939698495, + "learning_rate": 1.6022571160224205e-06, + "loss": 0.03185958862304687, + "step": 151335 + }, + { + "epoch": 1.3085922300715083, + "grad_norm": 18.19342985751828, + "learning_rate": 1.6020768500542384e-06, + "loss": 0.05804367065429687, + "step": 151340 + }, + { + "epoch": 1.3086354635930515, + "grad_norm": 0.3170393790126797, + "learning_rate": 1.6018965905331539e-06, + "loss": 0.040726661682128906, + "step": 151345 + }, + { + "epoch": 1.3086786971145947, + "grad_norm": 0.6492471610518752, + "learning_rate": 1.601716337459999e-06, + "loss": 0.008047103881835938, + "step": 151350 + }, + { + "epoch": 1.308721930636138, + "grad_norm": 6.7663627492889225, + "learning_rate": 1.6015360908356049e-06, + "loss": 0.038208389282226564, + "step": 151355 + }, + { + "epoch": 1.3087651641576814, + "grad_norm": 3.055039510996253, + "learning_rate": 1.6013558506608017e-06, + "loss": 0.0545135498046875, + "step": 151360 + }, + { + "epoch": 1.3088083976792246, + "grad_norm": 4.058920279754956, + "learning_rate": 1.6011756169364227e-06, + "loss": 0.036133956909179685, + "step": 151365 + }, + { + "epoch": 1.3088516312007679, + "grad_norm": 4.804349609930499, + "learning_rate": 1.6009953896632983e-06, + "loss": 0.0198760986328125, + "step": 151370 + }, + { + "epoch": 1.308894864722311, + "grad_norm": 10.48311441507501, + "learning_rate": 1.6008151688422582e-06, + "loss": 0.6080337524414062, + "step": 151375 + }, + { + "epoch": 1.3089380982438543, + "grad_norm": 3.8172418426029515, + "learning_rate": 1.6006349544741361e-06, + "loss": 0.09991111755371093, + "step": 151380 + }, + { + "epoch": 1.3089813317653975, + "grad_norm": 4.237970666575575, + "learning_rate": 1.6004547465597619e-06, + "loss": 0.04264812469482422, + "step": 151385 + }, + { + "epoch": 1.3090245652869408, + "grad_norm": 2.209397668590351, + "learning_rate": 1.6002745450999669e-06, + "loss": 0.09562835693359376, + "step": 151390 + }, + { + "epoch": 1.3090677988084842, + "grad_norm": 0.32300335525796836, + "learning_rate": 1.6000943500955807e-06, + "loss": 0.10752410888671875, + "step": 151395 + }, + { + "epoch": 1.3091110323300275, + "grad_norm": 1.3214656974631012, + "learning_rate": 1.5999141615474371e-06, + "loss": 0.08355941772460937, + "step": 151400 + }, + { + "epoch": 1.3091542658515707, + "grad_norm": 0.21331063844737785, + "learning_rate": 1.5997339794563653e-06, + "loss": 0.6226108551025391, + "step": 151405 + }, + { + "epoch": 1.309197499373114, + "grad_norm": 3.0377429170431616, + "learning_rate": 1.5995538038231972e-06, + "loss": 0.03983383178710938, + "step": 151410 + }, + { + "epoch": 1.3092407328946571, + "grad_norm": 15.031662482346043, + "learning_rate": 1.5993736346487626e-06, + "loss": 0.2759437561035156, + "step": 151415 + }, + { + "epoch": 1.3092839664162006, + "grad_norm": 3.362535480585031, + "learning_rate": 1.5991934719338934e-06, + "loss": 0.06575088500976563, + "step": 151420 + }, + { + "epoch": 1.3093271999377438, + "grad_norm": 3.605684574114557, + "learning_rate": 1.5990133156794189e-06, + "loss": 0.092578125, + "step": 151425 + }, + { + "epoch": 1.309370433459287, + "grad_norm": 8.310358178953802, + "learning_rate": 1.598833165886172e-06, + "loss": 0.03687267303466797, + "step": 151430 + }, + { + "epoch": 1.3094136669808303, + "grad_norm": 25.840479456253362, + "learning_rate": 1.5986530225549818e-06, + "loss": 0.07878189086914063, + "step": 151435 + }, + { + "epoch": 1.3094569005023735, + "grad_norm": 2.988543906486208, + "learning_rate": 1.5984728856866812e-06, + "loss": 0.0437255859375, + "step": 151440 + }, + { + "epoch": 1.3095001340239167, + "grad_norm": 20.01847716443029, + "learning_rate": 1.5982927552821002e-06, + "loss": 0.051794815063476565, + "step": 151445 + }, + { + "epoch": 1.30954336754546, + "grad_norm": 1.906300360604537, + "learning_rate": 1.598112631342069e-06, + "loss": 0.04039955139160156, + "step": 151450 + }, + { + "epoch": 1.3095866010670032, + "grad_norm": 12.713375827763146, + "learning_rate": 1.597932513867417e-06, + "loss": 0.15527496337890626, + "step": 151455 + }, + { + "epoch": 1.3096298345885466, + "grad_norm": 1.628343321565928, + "learning_rate": 1.5977524028589777e-06, + "loss": 0.10699615478515626, + "step": 151460 + }, + { + "epoch": 1.3096730681100899, + "grad_norm": 47.02044430923352, + "learning_rate": 1.5975722983175802e-06, + "loss": 0.13260421752929688, + "step": 151465 + }, + { + "epoch": 1.309716301631633, + "grad_norm": 2.0078357481657925, + "learning_rate": 1.5973922002440556e-06, + "loss": 0.28376617431640627, + "step": 151470 + }, + { + "epoch": 1.3097595351531763, + "grad_norm": 1.2334984487847138, + "learning_rate": 1.5972121086392342e-06, + "loss": 0.076873779296875, + "step": 151475 + }, + { + "epoch": 1.3098027686747196, + "grad_norm": 0.17496464364358008, + "learning_rate": 1.5970320235039466e-06, + "loss": 0.02318229675292969, + "step": 151480 + }, + { + "epoch": 1.309846002196263, + "grad_norm": 4.613919154275265, + "learning_rate": 1.5968519448390222e-06, + "loss": 0.06610107421875, + "step": 151485 + }, + { + "epoch": 1.3098892357178062, + "grad_norm": 10.561010828047527, + "learning_rate": 1.5966718726452938e-06, + "loss": 0.12246932983398437, + "step": 151490 + }, + { + "epoch": 1.3099324692393495, + "grad_norm": 4.609422859776215, + "learning_rate": 1.5964918069235895e-06, + "loss": 0.054426383972167966, + "step": 151495 + }, + { + "epoch": 1.3099757027608927, + "grad_norm": 0.2890224536009811, + "learning_rate": 1.596311747674742e-06, + "loss": 0.027016067504882814, + "step": 151500 + }, + { + "epoch": 1.310018936282436, + "grad_norm": 13.317700497176425, + "learning_rate": 1.596131694899581e-06, + "loss": 0.06106243133544922, + "step": 151505 + }, + { + "epoch": 1.3100621698039792, + "grad_norm": 0.594204882906749, + "learning_rate": 1.5959516485989366e-06, + "loss": 0.012603759765625, + "step": 151510 + }, + { + "epoch": 1.3101054033255224, + "grad_norm": 5.321321304920421, + "learning_rate": 1.5957716087736386e-06, + "loss": 0.023484516143798827, + "step": 151515 + }, + { + "epoch": 1.3101486368470656, + "grad_norm": 2.078518449080969, + "learning_rate": 1.5955915754245169e-06, + "loss": 0.017476367950439452, + "step": 151520 + }, + { + "epoch": 1.310191870368609, + "grad_norm": 10.040124503889754, + "learning_rate": 1.5954115485524042e-06, + "loss": 0.03264923095703125, + "step": 151525 + }, + { + "epoch": 1.3102351038901523, + "grad_norm": 1.2128522082656052, + "learning_rate": 1.5952315281581292e-06, + "loss": 0.019483184814453124, + "step": 151530 + }, + { + "epoch": 1.3102783374116955, + "grad_norm": 3.680843958715573, + "learning_rate": 1.5950515142425223e-06, + "loss": 0.03412933349609375, + "step": 151535 + }, + { + "epoch": 1.3103215709332388, + "grad_norm": 7.882822131823875, + "learning_rate": 1.5948715068064124e-06, + "loss": 0.06630706787109375, + "step": 151540 + }, + { + "epoch": 1.310364804454782, + "grad_norm": 1.3769596790734546, + "learning_rate": 1.5946915058506324e-06, + "loss": 0.0164154052734375, + "step": 151545 + }, + { + "epoch": 1.3104080379763254, + "grad_norm": 2.720635156209762, + "learning_rate": 1.5945115113760107e-06, + "loss": 0.023099899291992188, + "step": 151550 + }, + { + "epoch": 1.3104512714978687, + "grad_norm": 54.32868868961584, + "learning_rate": 1.5943315233833767e-06, + "loss": 0.20586681365966797, + "step": 151555 + }, + { + "epoch": 1.310494505019412, + "grad_norm": 11.023951143844657, + "learning_rate": 1.5941515418735624e-06, + "loss": 0.04578742980957031, + "step": 151560 + }, + { + "epoch": 1.3105377385409551, + "grad_norm": 20.009746238461325, + "learning_rate": 1.5939715668473973e-06, + "loss": 0.032358741760253905, + "step": 151565 + }, + { + "epoch": 1.3105809720624984, + "grad_norm": 2.650530976254585, + "learning_rate": 1.5937915983057108e-06, + "loss": 0.07065773010253906, + "step": 151570 + }, + { + "epoch": 1.3106242055840416, + "grad_norm": 6.8668225021581675, + "learning_rate": 1.5936116362493334e-06, + "loss": 0.1080850601196289, + "step": 151575 + }, + { + "epoch": 1.3106674391055848, + "grad_norm": 15.130019160242762, + "learning_rate": 1.593431680679095e-06, + "loss": 0.0753265380859375, + "step": 151580 + }, + { + "epoch": 1.310710672627128, + "grad_norm": 0.32749582189140247, + "learning_rate": 1.5932517315958241e-06, + "loss": 0.023952484130859375, + "step": 151585 + }, + { + "epoch": 1.3107539061486715, + "grad_norm": 29.9219018345403, + "learning_rate": 1.5930717890003527e-06, + "loss": 0.05236968994140625, + "step": 151590 + }, + { + "epoch": 1.3107971396702147, + "grad_norm": 0.080135822024403, + "learning_rate": 1.5928918528935104e-06, + "loss": 0.06029796600341797, + "step": 151595 + }, + { + "epoch": 1.310840373191758, + "grad_norm": 11.228753197474395, + "learning_rate": 1.5927119232761253e-06, + "loss": 0.0350189208984375, + "step": 151600 + }, + { + "epoch": 1.3108836067133012, + "grad_norm": 0.37751419421179544, + "learning_rate": 1.5925320001490293e-06, + "loss": 0.01734466552734375, + "step": 151605 + }, + { + "epoch": 1.3109268402348446, + "grad_norm": 37.25476475372405, + "learning_rate": 1.5923520835130516e-06, + "loss": 0.22205581665039062, + "step": 151610 + }, + { + "epoch": 1.3109700737563879, + "grad_norm": 42.188959541363836, + "learning_rate": 1.5921721733690204e-06, + "loss": 0.1262969970703125, + "step": 151615 + }, + { + "epoch": 1.311013307277931, + "grad_norm": 1.4739874062795983, + "learning_rate": 1.5919922697177683e-06, + "loss": 0.021468734741210936, + "step": 151620 + }, + { + "epoch": 1.3110565407994743, + "grad_norm": 0.12191799234082257, + "learning_rate": 1.591812372560123e-06, + "loss": 0.003964614868164062, + "step": 151625 + }, + { + "epoch": 1.3110997743210175, + "grad_norm": 0.1811506425499748, + "learning_rate": 1.5916324818969148e-06, + "loss": 0.010439300537109375, + "step": 151630 + }, + { + "epoch": 1.3111430078425608, + "grad_norm": 4.157077918819581, + "learning_rate": 1.5914525977289733e-06, + "loss": 0.11413745880126953, + "step": 151635 + }, + { + "epoch": 1.311186241364104, + "grad_norm": 44.2458282805503, + "learning_rate": 1.5912727200571276e-06, + "loss": 0.2645378112792969, + "step": 151640 + }, + { + "epoch": 1.3112294748856472, + "grad_norm": 0.4857162882606747, + "learning_rate": 1.591092848882208e-06, + "loss": 0.061956024169921874, + "step": 151645 + }, + { + "epoch": 1.3112727084071907, + "grad_norm": 0.19319483785648744, + "learning_rate": 1.590912984205042e-06, + "loss": 0.12308502197265625, + "step": 151650 + }, + { + "epoch": 1.311315941928734, + "grad_norm": 30.94253812643786, + "learning_rate": 1.5907331260264625e-06, + "loss": 0.21147289276123046, + "step": 151655 + }, + { + "epoch": 1.3113591754502771, + "grad_norm": 11.731055850949074, + "learning_rate": 1.590553274347296e-06, + "loss": 0.14264602661132814, + "step": 151660 + }, + { + "epoch": 1.3114024089718204, + "grad_norm": 0.5781760277604457, + "learning_rate": 1.5903734291683744e-06, + "loss": 0.1863616943359375, + "step": 151665 + }, + { + "epoch": 1.3114456424933636, + "grad_norm": 6.770833974145914, + "learning_rate": 1.590193590490526e-06, + "loss": 0.05878238677978516, + "step": 151670 + }, + { + "epoch": 1.311488876014907, + "grad_norm": 11.907366350419132, + "learning_rate": 1.5900137583145802e-06, + "loss": 0.21862106323242186, + "step": 151675 + }, + { + "epoch": 1.3115321095364503, + "grad_norm": 2.403796148206565, + "learning_rate": 1.589833932641365e-06, + "loss": 0.07560520172119141, + "step": 151680 + }, + { + "epoch": 1.3115753430579935, + "grad_norm": 2.6482283830444153, + "learning_rate": 1.5896541134717123e-06, + "loss": 0.009873199462890624, + "step": 151685 + }, + { + "epoch": 1.3116185765795367, + "grad_norm": 1.6043638941093825, + "learning_rate": 1.5894743008064503e-06, + "loss": 0.024767684936523437, + "step": 151690 + }, + { + "epoch": 1.31166181010108, + "grad_norm": 0.7860462074950532, + "learning_rate": 1.5892944946464084e-06, + "loss": 0.0437957763671875, + "step": 151695 + }, + { + "epoch": 1.3117050436226232, + "grad_norm": 0.7221407454792204, + "learning_rate": 1.589114694992415e-06, + "loss": 0.01945152282714844, + "step": 151700 + }, + { + "epoch": 1.3117482771441664, + "grad_norm": 0.30080401685554564, + "learning_rate": 1.5889349018453008e-06, + "loss": 0.01224365234375, + "step": 151705 + }, + { + "epoch": 1.3117915106657096, + "grad_norm": 14.81816367080631, + "learning_rate": 1.5887551152058924e-06, + "loss": 0.1436767578125, + "step": 151710 + }, + { + "epoch": 1.311834744187253, + "grad_norm": 0.13003053685054564, + "learning_rate": 1.5885753350750212e-06, + "loss": 0.037335968017578124, + "step": 151715 + }, + { + "epoch": 1.3118779777087963, + "grad_norm": 20.65297497680979, + "learning_rate": 1.5883955614535167e-06, + "loss": 0.04430580139160156, + "step": 151720 + }, + { + "epoch": 1.3119212112303396, + "grad_norm": 1.8087259512154898, + "learning_rate": 1.588215794342207e-06, + "loss": 0.062188720703125, + "step": 151725 + }, + { + "epoch": 1.3119644447518828, + "grad_norm": 3.940975207953724, + "learning_rate": 1.5880360337419213e-06, + "loss": 0.12493820190429687, + "step": 151730 + }, + { + "epoch": 1.312007678273426, + "grad_norm": 1.4562432212465624, + "learning_rate": 1.5878562796534889e-06, + "loss": 0.04122848510742187, + "step": 151735 + }, + { + "epoch": 1.3120509117949695, + "grad_norm": 0.7909025687038072, + "learning_rate": 1.5876765320777382e-06, + "loss": 0.041259765625, + "step": 151740 + }, + { + "epoch": 1.3120941453165127, + "grad_norm": 1.2184315349119414, + "learning_rate": 1.5874967910154977e-06, + "loss": 0.08976306915283203, + "step": 151745 + }, + { + "epoch": 1.312137378838056, + "grad_norm": 4.438779605892292, + "learning_rate": 1.5873170564675985e-06, + "loss": 0.07066650390625, + "step": 151750 + }, + { + "epoch": 1.3121806123595992, + "grad_norm": 0.5112328876789247, + "learning_rate": 1.5871373284348679e-06, + "loss": 0.029346466064453125, + "step": 151755 + }, + { + "epoch": 1.3122238458811424, + "grad_norm": 23.0264177455198, + "learning_rate": 1.5869576069181352e-06, + "loss": 0.0612091064453125, + "step": 151760 + }, + { + "epoch": 1.3122670794026856, + "grad_norm": 34.43952478129223, + "learning_rate": 1.586777891918228e-06, + "loss": 0.2069478988647461, + "step": 151765 + }, + { + "epoch": 1.3123103129242288, + "grad_norm": 2.1131971430418584, + "learning_rate": 1.5865981834359772e-06, + "loss": 0.05022125244140625, + "step": 151770 + }, + { + "epoch": 1.312353546445772, + "grad_norm": 2.4352419769409526, + "learning_rate": 1.5864184814722098e-06, + "loss": 0.0432464599609375, + "step": 151775 + }, + { + "epoch": 1.3123967799673155, + "grad_norm": 0.8786207683586917, + "learning_rate": 1.5862387860277567e-06, + "loss": 0.041469669342041014, + "step": 151780 + }, + { + "epoch": 1.3124400134888587, + "grad_norm": 36.90623674145658, + "learning_rate": 1.5860590971034455e-06, + "loss": 0.08445167541503906, + "step": 151785 + }, + { + "epoch": 1.312483247010402, + "grad_norm": 2.5543365329541747, + "learning_rate": 1.5858794147001045e-06, + "loss": 0.016294479370117188, + "step": 151790 + }, + { + "epoch": 1.3125264805319452, + "grad_norm": 2.6844360832426175, + "learning_rate": 1.5856997388185627e-06, + "loss": 0.04178314208984375, + "step": 151795 + }, + { + "epoch": 1.3125697140534884, + "grad_norm": 0.8199620882734382, + "learning_rate": 1.5855200694596487e-06, + "loss": 0.035884857177734375, + "step": 151800 + }, + { + "epoch": 1.3126129475750319, + "grad_norm": 21.50193693590633, + "learning_rate": 1.5853404066241901e-06, + "loss": 0.06779212951660156, + "step": 151805 + }, + { + "epoch": 1.3126561810965751, + "grad_norm": 1.9876144939696834, + "learning_rate": 1.5851607503130179e-06, + "loss": 0.049782943725585935, + "step": 151810 + }, + { + "epoch": 1.3126994146181183, + "grad_norm": 3.015901876572468, + "learning_rate": 1.5849811005269588e-06, + "loss": 0.06024093627929687, + "step": 151815 + }, + { + "epoch": 1.3127426481396616, + "grad_norm": 184.7329416879093, + "learning_rate": 1.5848014572668422e-06, + "loss": 0.1129547119140625, + "step": 151820 + }, + { + "epoch": 1.3127858816612048, + "grad_norm": 1.5130522044155594, + "learning_rate": 1.5846218205334952e-06, + "loss": 0.0150054931640625, + "step": 151825 + }, + { + "epoch": 1.312829115182748, + "grad_norm": 1.0470652841668988, + "learning_rate": 1.5844421903277481e-06, + "loss": 0.047135353088378906, + "step": 151830 + }, + { + "epoch": 1.3128723487042913, + "grad_norm": 5.975491111104145, + "learning_rate": 1.5842625666504287e-06, + "loss": 0.11672286987304688, + "step": 151835 + }, + { + "epoch": 1.3129155822258345, + "grad_norm": 0.09190122469481786, + "learning_rate": 1.5840829495023643e-06, + "loss": 0.007984066009521484, + "step": 151840 + }, + { + "epoch": 1.312958815747378, + "grad_norm": 1.147344564864233, + "learning_rate": 1.583903338884385e-06, + "loss": 0.024808502197265624, + "step": 151845 + }, + { + "epoch": 1.3130020492689212, + "grad_norm": 5.420384548697312, + "learning_rate": 1.5837237347973183e-06, + "loss": 0.03432159423828125, + "step": 151850 + }, + { + "epoch": 1.3130452827904644, + "grad_norm": 2.2523412812089014, + "learning_rate": 1.5835441372419932e-06, + "loss": 0.10586929321289062, + "step": 151855 + }, + { + "epoch": 1.3130885163120076, + "grad_norm": 2.1520989454422077, + "learning_rate": 1.583364546219237e-06, + "loss": 0.020029830932617187, + "step": 151860 + }, + { + "epoch": 1.313131749833551, + "grad_norm": 0.3086725883443723, + "learning_rate": 1.5831849617298785e-06, + "loss": 0.13280754089355468, + "step": 151865 + }, + { + "epoch": 1.3131749833550943, + "grad_norm": 1.5386453478888948, + "learning_rate": 1.5830053837747443e-06, + "loss": 0.384869384765625, + "step": 151870 + }, + { + "epoch": 1.3132182168766375, + "grad_norm": 1.018705074506805, + "learning_rate": 1.5828258123546653e-06, + "loss": 0.069915771484375, + "step": 151875 + }, + { + "epoch": 1.3132614503981808, + "grad_norm": 0.12919299694338196, + "learning_rate": 1.5826462474704684e-06, + "loss": 0.03248672485351563, + "step": 151880 + }, + { + "epoch": 1.313304683919724, + "grad_norm": 0.36603384544732026, + "learning_rate": 1.5824666891229808e-06, + "loss": 0.05302848815917969, + "step": 151885 + }, + { + "epoch": 1.3133479174412672, + "grad_norm": 1.8496549956181234, + "learning_rate": 1.5822871373130328e-06, + "loss": 0.067706298828125, + "step": 151890 + }, + { + "epoch": 1.3133911509628104, + "grad_norm": 2.487890839408293, + "learning_rate": 1.5821075920414511e-06, + "loss": 0.0299102783203125, + "step": 151895 + }, + { + "epoch": 1.3134343844843537, + "grad_norm": 27.681766671476137, + "learning_rate": 1.5819280533090638e-06, + "loss": 0.13193511962890625, + "step": 151900 + }, + { + "epoch": 1.3134776180058971, + "grad_norm": 0.028300819883810977, + "learning_rate": 1.5817485211166978e-06, + "loss": 0.022272300720214844, + "step": 151905 + }, + { + "epoch": 1.3135208515274404, + "grad_norm": 5.616684407176967, + "learning_rate": 1.5815689954651836e-06, + "loss": 0.0280853271484375, + "step": 151910 + }, + { + "epoch": 1.3135640850489836, + "grad_norm": 5.995880755548782, + "learning_rate": 1.581389476355348e-06, + "loss": 0.045836257934570315, + "step": 151915 + }, + { + "epoch": 1.3136073185705268, + "grad_norm": 0.718333142445733, + "learning_rate": 1.5812099637880187e-06, + "loss": 0.0871124267578125, + "step": 151920 + }, + { + "epoch": 1.31365055209207, + "grad_norm": 7.158100927149557, + "learning_rate": 1.5810304577640238e-06, + "loss": 0.0728912353515625, + "step": 151925 + }, + { + "epoch": 1.3136937856136135, + "grad_norm": 0.5448606698567299, + "learning_rate": 1.5808509582841907e-06, + "loss": 0.006397056579589844, + "step": 151930 + }, + { + "epoch": 1.3137370191351567, + "grad_norm": 9.48100776252044, + "learning_rate": 1.5806714653493468e-06, + "loss": 0.0274627685546875, + "step": 151935 + }, + { + "epoch": 1.3137802526567, + "grad_norm": 1.5345764305892289, + "learning_rate": 1.58049197896032e-06, + "loss": 0.024095535278320312, + "step": 151940 + }, + { + "epoch": 1.3138234861782432, + "grad_norm": 1.115758508603782, + "learning_rate": 1.5803124991179405e-06, + "loss": 0.014604949951171875, + "step": 151945 + }, + { + "epoch": 1.3138667196997864, + "grad_norm": 1.4906223703177262, + "learning_rate": 1.5801330258230341e-06, + "loss": 0.03775634765625, + "step": 151950 + }, + { + "epoch": 1.3139099532213296, + "grad_norm": 3.5855465179138344, + "learning_rate": 1.5799535590764292e-06, + "loss": 0.024735260009765624, + "step": 151955 + }, + { + "epoch": 1.3139531867428729, + "grad_norm": 0.2782994510895442, + "learning_rate": 1.5797740988789527e-06, + "loss": 0.1625732421875, + "step": 151960 + }, + { + "epoch": 1.313996420264416, + "grad_norm": 11.16164062201688, + "learning_rate": 1.5795946452314315e-06, + "loss": 0.07446517944335937, + "step": 151965 + }, + { + "epoch": 1.3140396537859595, + "grad_norm": 2.7129538369169106, + "learning_rate": 1.5794151981346953e-06, + "loss": 0.10825576782226562, + "step": 151970 + }, + { + "epoch": 1.3140828873075028, + "grad_norm": 121.6108316419711, + "learning_rate": 1.5792357575895706e-06, + "loss": 0.18534622192382813, + "step": 151975 + }, + { + "epoch": 1.314126120829046, + "grad_norm": 1.2551121477299463, + "learning_rate": 1.5790563235968852e-06, + "loss": 0.0905181884765625, + "step": 151980 + }, + { + "epoch": 1.3141693543505892, + "grad_norm": 23.013865568742233, + "learning_rate": 1.5788768961574663e-06, + "loss": 0.10165510177612305, + "step": 151985 + }, + { + "epoch": 1.3142125878721325, + "grad_norm": 2.4054435187673855, + "learning_rate": 1.5786974752721405e-06, + "loss": 0.28734779357910156, + "step": 151990 + }, + { + "epoch": 1.314255821393676, + "grad_norm": 54.1256447135161, + "learning_rate": 1.5785180609417375e-06, + "loss": 0.16319961547851564, + "step": 151995 + }, + { + "epoch": 1.3142990549152191, + "grad_norm": 5.2452702258064825, + "learning_rate": 1.5783386531670825e-06, + "loss": 0.034978485107421874, + "step": 152000 + }, + { + "epoch": 1.3143422884367624, + "grad_norm": 1.6504130081830555, + "learning_rate": 1.5781592519490054e-06, + "loss": 0.09116744995117188, + "step": 152005 + }, + { + "epoch": 1.3143855219583056, + "grad_norm": 8.539636770086602, + "learning_rate": 1.5779798572883318e-06, + "loss": 0.3071174621582031, + "step": 152010 + }, + { + "epoch": 1.3144287554798488, + "grad_norm": 0.6670616092757611, + "learning_rate": 1.5778004691858897e-06, + "loss": 0.1625091552734375, + "step": 152015 + }, + { + "epoch": 1.314471989001392, + "grad_norm": 30.526457778167515, + "learning_rate": 1.577621087642506e-06, + "loss": 0.1322540283203125, + "step": 152020 + }, + { + "epoch": 1.3145152225229353, + "grad_norm": 2.732678394133529, + "learning_rate": 1.5774417126590084e-06, + "loss": 0.09359016418457031, + "step": 152025 + }, + { + "epoch": 1.3145584560444785, + "grad_norm": 0.2032633346152163, + "learning_rate": 1.5772623442362224e-06, + "loss": 0.019998931884765626, + "step": 152030 + }, + { + "epoch": 1.314601689566022, + "grad_norm": 0.39821809251296997, + "learning_rate": 1.577082982374978e-06, + "loss": 0.1924114227294922, + "step": 152035 + }, + { + "epoch": 1.3146449230875652, + "grad_norm": 0.5397559226182749, + "learning_rate": 1.5769036270761013e-06, + "loss": 0.049325180053710935, + "step": 152040 + }, + { + "epoch": 1.3146881566091084, + "grad_norm": 2.4205701429840305, + "learning_rate": 1.5767242783404194e-06, + "loss": 0.20328617095947266, + "step": 152045 + }, + { + "epoch": 1.3147313901306517, + "grad_norm": 0.1417699847495697, + "learning_rate": 1.5765449361687582e-06, + "loss": 0.3419347763061523, + "step": 152050 + }, + { + "epoch": 1.3147746236521949, + "grad_norm": 2.3107600591352595, + "learning_rate": 1.5763656005619469e-06, + "loss": 0.048067474365234376, + "step": 152055 + }, + { + "epoch": 1.3148178571737383, + "grad_norm": 0.5822963482878619, + "learning_rate": 1.5761862715208108e-06, + "loss": 0.06541824340820312, + "step": 152060 + }, + { + "epoch": 1.3148610906952816, + "grad_norm": 19.949681666567056, + "learning_rate": 1.576006949046179e-06, + "loss": 0.2583930969238281, + "step": 152065 + }, + { + "epoch": 1.3149043242168248, + "grad_norm": 2.298082999740221, + "learning_rate": 1.5758276331388768e-06, + "loss": 0.0617828369140625, + "step": 152070 + }, + { + "epoch": 1.314947557738368, + "grad_norm": 0.14892575517833048, + "learning_rate": 1.5756483237997321e-06, + "loss": 0.02427825927734375, + "step": 152075 + }, + { + "epoch": 1.3149907912599113, + "grad_norm": 0.4215803714375237, + "learning_rate": 1.5754690210295716e-06, + "loss": 0.012052345275878906, + "step": 152080 + }, + { + "epoch": 1.3150340247814545, + "grad_norm": 2.2663305606592927, + "learning_rate": 1.5752897248292215e-06, + "loss": 0.01739654541015625, + "step": 152085 + }, + { + "epoch": 1.3150772583029977, + "grad_norm": 1.679160053097355, + "learning_rate": 1.5751104351995095e-06, + "loss": 0.02400970458984375, + "step": 152090 + }, + { + "epoch": 1.315120491824541, + "grad_norm": 20.53387850308098, + "learning_rate": 1.5749311521412617e-06, + "loss": 0.07822589874267578, + "step": 152095 + }, + { + "epoch": 1.3151637253460844, + "grad_norm": 6.21970359872083, + "learning_rate": 1.5747518756553062e-06, + "loss": 0.07083587646484375, + "step": 152100 + }, + { + "epoch": 1.3152069588676276, + "grad_norm": 5.736375721000998, + "learning_rate": 1.5745726057424695e-06, + "loss": 0.0177093505859375, + "step": 152105 + }, + { + "epoch": 1.3152501923891708, + "grad_norm": 1.8874117177508989, + "learning_rate": 1.5743933424035764e-06, + "loss": 0.043710517883300784, + "step": 152110 + }, + { + "epoch": 1.315293425910714, + "grad_norm": 6.184523651272415, + "learning_rate": 1.5742140856394564e-06, + "loss": 0.13819808959960939, + "step": 152115 + }, + { + "epoch": 1.3153366594322575, + "grad_norm": 0.19607012690787565, + "learning_rate": 1.574034835450935e-06, + "loss": 0.13453407287597657, + "step": 152120 + }, + { + "epoch": 1.3153798929538008, + "grad_norm": 0.40520840695037774, + "learning_rate": 1.5738555918388381e-06, + "loss": 0.053424644470214847, + "step": 152125 + }, + { + "epoch": 1.315423126475344, + "grad_norm": 0.3985327383284561, + "learning_rate": 1.573676354803994e-06, + "loss": 0.03902626037597656, + "step": 152130 + }, + { + "epoch": 1.3154663599968872, + "grad_norm": 5.438702826092252, + "learning_rate": 1.5734971243472287e-06, + "loss": 0.027715301513671874, + "step": 152135 + }, + { + "epoch": 1.3155095935184304, + "grad_norm": 3.0303978722185696, + "learning_rate": 1.5733179004693687e-06, + "loss": 0.027649307250976564, + "step": 152140 + }, + { + "epoch": 1.3155528270399737, + "grad_norm": 21.43231858137849, + "learning_rate": 1.5731386831712405e-06, + "loss": 0.07090263366699219, + "step": 152145 + }, + { + "epoch": 1.315596060561517, + "grad_norm": 12.05509249313325, + "learning_rate": 1.5729594724536704e-06, + "loss": 0.06162109375, + "step": 152150 + }, + { + "epoch": 1.3156392940830601, + "grad_norm": 1.1628293820378215, + "learning_rate": 1.572780268317484e-06, + "loss": 0.016829681396484376, + "step": 152155 + }, + { + "epoch": 1.3156825276046036, + "grad_norm": 9.256418549489243, + "learning_rate": 1.5726010707635105e-06, + "loss": 0.0509552001953125, + "step": 152160 + }, + { + "epoch": 1.3157257611261468, + "grad_norm": 6.312014671531323, + "learning_rate": 1.5724218797925732e-06, + "loss": 0.12855987548828124, + "step": 152165 + }, + { + "epoch": 1.31576899464769, + "grad_norm": 0.6512589456389655, + "learning_rate": 1.572242695405501e-06, + "loss": 0.03226776123046875, + "step": 152170 + }, + { + "epoch": 1.3158122281692333, + "grad_norm": 1.6572799896283468, + "learning_rate": 1.5720635176031197e-06, + "loss": 0.09043655395507813, + "step": 152175 + }, + { + "epoch": 1.3158554616907765, + "grad_norm": 0.3048648174166387, + "learning_rate": 1.571884346386255e-06, + "loss": 0.05089302062988281, + "step": 152180 + }, + { + "epoch": 1.31589869521232, + "grad_norm": 0.037701200068251076, + "learning_rate": 1.571705181755734e-06, + "loss": 0.031035327911376955, + "step": 152185 + }, + { + "epoch": 1.3159419287338632, + "grad_norm": 10.915677243956795, + "learning_rate": 1.5715260237123805e-06, + "loss": 0.0608184814453125, + "step": 152190 + }, + { + "epoch": 1.3159851622554064, + "grad_norm": 5.658567190343147, + "learning_rate": 1.5713468722570246e-06, + "loss": 0.07476091384887695, + "step": 152195 + }, + { + "epoch": 1.3160283957769496, + "grad_norm": 0.7338144117446045, + "learning_rate": 1.5711677273904905e-06, + "loss": 0.06176128387451172, + "step": 152200 + }, + { + "epoch": 1.3160716292984929, + "grad_norm": 0.25130304747366894, + "learning_rate": 1.5709885891136044e-06, + "loss": 0.034906005859375, + "step": 152205 + }, + { + "epoch": 1.316114862820036, + "grad_norm": 1.5959507298211333, + "learning_rate": 1.570809457427193e-06, + "loss": 0.025309371948242187, + "step": 152210 + }, + { + "epoch": 1.3161580963415793, + "grad_norm": 1.0863623340581845, + "learning_rate": 1.570630332332081e-06, + "loss": 0.03702545166015625, + "step": 152215 + }, + { + "epoch": 1.3162013298631225, + "grad_norm": 0.4724276052707621, + "learning_rate": 1.5704512138290966e-06, + "loss": 0.06441421508789062, + "step": 152220 + }, + { + "epoch": 1.316244563384666, + "grad_norm": 13.623942914564298, + "learning_rate": 1.5702721019190633e-06, + "loss": 0.03204822540283203, + "step": 152225 + }, + { + "epoch": 1.3162877969062092, + "grad_norm": 1.5312151976835366, + "learning_rate": 1.57009299660281e-06, + "loss": 0.1181793212890625, + "step": 152230 + }, + { + "epoch": 1.3163310304277525, + "grad_norm": 0.31581407482369356, + "learning_rate": 1.569913897881162e-06, + "loss": 0.11917457580566407, + "step": 152235 + }, + { + "epoch": 1.3163742639492957, + "grad_norm": 29.98281285030237, + "learning_rate": 1.5697348057549445e-06, + "loss": 0.19539966583251953, + "step": 152240 + }, + { + "epoch": 1.316417497470839, + "grad_norm": 0.09491938032492418, + "learning_rate": 1.5695557202249837e-06, + "loss": 0.27411575317382814, + "step": 152245 + }, + { + "epoch": 1.3164607309923824, + "grad_norm": 15.22522166006912, + "learning_rate": 1.569376641292105e-06, + "loss": 0.026416015625, + "step": 152250 + }, + { + "epoch": 1.3165039645139256, + "grad_norm": 79.2284089792237, + "learning_rate": 1.5691975689571343e-06, + "loss": 0.3258544921875, + "step": 152255 + }, + { + "epoch": 1.3165471980354688, + "grad_norm": 3.3249770579105506, + "learning_rate": 1.5690185032208992e-06, + "loss": 0.13208236694335937, + "step": 152260 + }, + { + "epoch": 1.316590431557012, + "grad_norm": 5.743940014733337, + "learning_rate": 1.568839444084224e-06, + "loss": 0.1973175048828125, + "step": 152265 + }, + { + "epoch": 1.3166336650785553, + "grad_norm": 4.516853413593825, + "learning_rate": 1.568660391547935e-06, + "loss": 0.02650146484375, + "step": 152270 + }, + { + "epoch": 1.3166768986000985, + "grad_norm": 4.909301449458443, + "learning_rate": 1.5684813456128562e-06, + "loss": 0.08056125640869141, + "step": 152275 + }, + { + "epoch": 1.3167201321216417, + "grad_norm": 1.7599733024686752, + "learning_rate": 1.5683023062798167e-06, + "loss": 0.029415988922119142, + "step": 152280 + }, + { + "epoch": 1.316763365643185, + "grad_norm": 0.8745029793850505, + "learning_rate": 1.5681232735496387e-06, + "loss": 0.02644500732421875, + "step": 152285 + }, + { + "epoch": 1.3168065991647284, + "grad_norm": 16.919914547996434, + "learning_rate": 1.5679442474231513e-06, + "loss": 0.1284820556640625, + "step": 152290 + }, + { + "epoch": 1.3168498326862716, + "grad_norm": 0.6045086176647204, + "learning_rate": 1.5677652279011783e-06, + "loss": 0.045951461791992186, + "step": 152295 + }, + { + "epoch": 1.3168930662078149, + "grad_norm": 21.07752066730287, + "learning_rate": 1.5675862149845454e-06, + "loss": 0.08455581665039062, + "step": 152300 + }, + { + "epoch": 1.316936299729358, + "grad_norm": 0.9227351603302895, + "learning_rate": 1.5674072086740789e-06, + "loss": 0.022568511962890624, + "step": 152305 + }, + { + "epoch": 1.3169795332509016, + "grad_norm": 58.9349306596266, + "learning_rate": 1.5672282089706035e-06, + "loss": 0.263037109375, + "step": 152310 + }, + { + "epoch": 1.3170227667724448, + "grad_norm": 22.2223902583581, + "learning_rate": 1.5670492158749434e-06, + "loss": 0.18143081665039062, + "step": 152315 + }, + { + "epoch": 1.317066000293988, + "grad_norm": 3.3350748145194062, + "learning_rate": 1.5668702293879272e-06, + "loss": 0.036569976806640626, + "step": 152320 + }, + { + "epoch": 1.3171092338155312, + "grad_norm": 0.32211416472566434, + "learning_rate": 1.5666912495103785e-06, + "loss": 0.04156150817871094, + "step": 152325 + }, + { + "epoch": 1.3171524673370745, + "grad_norm": 25.964524952811807, + "learning_rate": 1.5665122762431235e-06, + "loss": 0.20359649658203124, + "step": 152330 + }, + { + "epoch": 1.3171957008586177, + "grad_norm": 1.7154109541026283, + "learning_rate": 1.5663333095869862e-06, + "loss": 0.0492462158203125, + "step": 152335 + }, + { + "epoch": 1.317238934380161, + "grad_norm": 2.3009186949789147, + "learning_rate": 1.5661543495427938e-06, + "loss": 0.06526393890380859, + "step": 152340 + }, + { + "epoch": 1.3172821679017042, + "grad_norm": 0.43980786170398073, + "learning_rate": 1.5659753961113709e-06, + "loss": 0.07907905578613281, + "step": 152345 + }, + { + "epoch": 1.3173254014232476, + "grad_norm": 19.664197887568232, + "learning_rate": 1.5657964492935416e-06, + "loss": 0.5332122802734375, + "step": 152350 + }, + { + "epoch": 1.3173686349447908, + "grad_norm": 5.635033078414079, + "learning_rate": 1.5656175090901336e-06, + "loss": 0.0415191650390625, + "step": 152355 + }, + { + "epoch": 1.317411868466334, + "grad_norm": 1.4453674772539669, + "learning_rate": 1.565438575501971e-06, + "loss": 0.06419677734375, + "step": 152360 + }, + { + "epoch": 1.3174551019878773, + "grad_norm": 2.0945319958955455, + "learning_rate": 1.5652596485298786e-06, + "loss": 0.14501953125, + "step": 152365 + }, + { + "epoch": 1.3174983355094205, + "grad_norm": 6.19817166223782, + "learning_rate": 1.565080728174682e-06, + "loss": 0.017782211303710938, + "step": 152370 + }, + { + "epoch": 1.317541569030964, + "grad_norm": 27.964587414059626, + "learning_rate": 1.5649018144372067e-06, + "loss": 0.04012374877929688, + "step": 152375 + }, + { + "epoch": 1.3175848025525072, + "grad_norm": 13.24249716242571, + "learning_rate": 1.5647229073182758e-06, + "loss": 0.034923553466796875, + "step": 152380 + }, + { + "epoch": 1.3176280360740504, + "grad_norm": 6.00442800198598, + "learning_rate": 1.5645440068187173e-06, + "loss": 0.13099594116210939, + "step": 152385 + }, + { + "epoch": 1.3176712695955937, + "grad_norm": 5.8693252856168465, + "learning_rate": 1.564365112939354e-06, + "loss": 0.057787322998046876, + "step": 152390 + }, + { + "epoch": 1.317714503117137, + "grad_norm": 6.326596809962504, + "learning_rate": 1.564186225681013e-06, + "loss": 0.12438087463378907, + "step": 152395 + }, + { + "epoch": 1.3177577366386801, + "grad_norm": 2.157911796798849, + "learning_rate": 1.5640073450445182e-06, + "loss": 0.020705413818359376, + "step": 152400 + }, + { + "epoch": 1.3178009701602234, + "grad_norm": 1.1683997290574593, + "learning_rate": 1.5638284710306948e-06, + "loss": 0.02581920623779297, + "step": 152405 + }, + { + "epoch": 1.3178442036817666, + "grad_norm": 3.6960111061424445, + "learning_rate": 1.5636496036403662e-06, + "loss": 0.017249584197998047, + "step": 152410 + }, + { + "epoch": 1.31788743720331, + "grad_norm": 2.4506197115907162, + "learning_rate": 1.5634707428743602e-06, + "loss": 0.03304862976074219, + "step": 152415 + }, + { + "epoch": 1.3179306707248533, + "grad_norm": 0.262687808010096, + "learning_rate": 1.5632918887335e-06, + "loss": 0.004334926605224609, + "step": 152420 + }, + { + "epoch": 1.3179739042463965, + "grad_norm": 2.297716291309709, + "learning_rate": 1.5631130412186104e-06, + "loss": 0.07421340942382812, + "step": 152425 + }, + { + "epoch": 1.3180171377679397, + "grad_norm": 0.5239883093984141, + "learning_rate": 1.5629342003305168e-06, + "loss": 0.00486907958984375, + "step": 152430 + }, + { + "epoch": 1.318060371289483, + "grad_norm": 0.7433431146674693, + "learning_rate": 1.5627553660700437e-06, + "loss": 0.015561676025390625, + "step": 152435 + }, + { + "epoch": 1.3181036048110264, + "grad_norm": 2.5994187379076936, + "learning_rate": 1.5625765384380146e-06, + "loss": 0.15190467834472657, + "step": 152440 + }, + { + "epoch": 1.3181468383325696, + "grad_norm": 3.1618114412185565, + "learning_rate": 1.5623977174352567e-06, + "loss": 0.02584228515625, + "step": 152445 + }, + { + "epoch": 1.3181900718541129, + "grad_norm": 1.4177727453305593, + "learning_rate": 1.5622189030625925e-06, + "loss": 0.11842117309570313, + "step": 152450 + }, + { + "epoch": 1.318233305375656, + "grad_norm": 9.75022359950523, + "learning_rate": 1.5620400953208488e-06, + "loss": 0.2000865936279297, + "step": 152455 + }, + { + "epoch": 1.3182765388971993, + "grad_norm": 0.0858453850252436, + "learning_rate": 1.5618612942108488e-06, + "loss": 0.013673782348632812, + "step": 152460 + }, + { + "epoch": 1.3183197724187425, + "grad_norm": 2.2958619424650077, + "learning_rate": 1.5616824997334174e-06, + "loss": 0.22938995361328124, + "step": 152465 + }, + { + "epoch": 1.3183630059402858, + "grad_norm": 0.27202020915047626, + "learning_rate": 1.5615037118893795e-06, + "loss": 0.02124481201171875, + "step": 152470 + }, + { + "epoch": 1.318406239461829, + "grad_norm": 5.628440479793541, + "learning_rate": 1.561324930679558e-06, + "loss": 0.03455429077148438, + "step": 152475 + }, + { + "epoch": 1.3184494729833725, + "grad_norm": 36.39015393227431, + "learning_rate": 1.5611461561047802e-06, + "loss": 0.30242919921875, + "step": 152480 + }, + { + "epoch": 1.3184927065049157, + "grad_norm": 6.557602016630547, + "learning_rate": 1.5609673881658692e-06, + "loss": 0.02700386047363281, + "step": 152485 + }, + { + "epoch": 1.318535940026459, + "grad_norm": 1.176608037447066, + "learning_rate": 1.5607886268636494e-06, + "loss": 0.04820747375488281, + "step": 152490 + }, + { + "epoch": 1.3185791735480021, + "grad_norm": 8.19715380069865, + "learning_rate": 1.5606098721989451e-06, + "loss": 0.022072601318359374, + "step": 152495 + }, + { + "epoch": 1.3186224070695454, + "grad_norm": 12.46457793959423, + "learning_rate": 1.56043112417258e-06, + "loss": 0.05503578186035156, + "step": 152500 + }, + { + "epoch": 1.3186656405910888, + "grad_norm": 1.0272262960474807, + "learning_rate": 1.5602523827853801e-06, + "loss": 0.02639350891113281, + "step": 152505 + }, + { + "epoch": 1.318708874112632, + "grad_norm": 0.7553259455772673, + "learning_rate": 1.5600736480381682e-06, + "loss": 0.12000656127929688, + "step": 152510 + }, + { + "epoch": 1.3187521076341753, + "grad_norm": 8.629937570732272, + "learning_rate": 1.5598949199317703e-06, + "loss": 0.09207019805908204, + "step": 152515 + }, + { + "epoch": 1.3187953411557185, + "grad_norm": 15.943380917642306, + "learning_rate": 1.5597161984670099e-06, + "loss": 0.024298095703125, + "step": 152520 + }, + { + "epoch": 1.3188385746772617, + "grad_norm": 1.426896965315559, + "learning_rate": 1.5595374836447113e-06, + "loss": 0.14613723754882812, + "step": 152525 + }, + { + "epoch": 1.318881808198805, + "grad_norm": 2.004588457961694, + "learning_rate": 1.5593587754656986e-06, + "loss": 0.14477157592773438, + "step": 152530 + }, + { + "epoch": 1.3189250417203482, + "grad_norm": 0.051036501651860106, + "learning_rate": 1.5591800739307955e-06, + "loss": 0.06764183044433594, + "step": 152535 + }, + { + "epoch": 1.3189682752418914, + "grad_norm": 2.6521305817563685, + "learning_rate": 1.5590013790408258e-06, + "loss": 0.03925952911376953, + "step": 152540 + }, + { + "epoch": 1.3190115087634349, + "grad_norm": 4.565830698990238, + "learning_rate": 1.5588226907966152e-06, + "loss": 0.01755943298339844, + "step": 152545 + }, + { + "epoch": 1.319054742284978, + "grad_norm": 12.202730171637977, + "learning_rate": 1.5586440091989874e-06, + "loss": 0.126617431640625, + "step": 152550 + }, + { + "epoch": 1.3190979758065213, + "grad_norm": 7.178231046536973, + "learning_rate": 1.558465334248766e-06, + "loss": 0.03969764709472656, + "step": 152555 + }, + { + "epoch": 1.3191412093280646, + "grad_norm": 15.712032524250661, + "learning_rate": 1.5582866659467738e-06, + "loss": 0.07881927490234375, + "step": 152560 + }, + { + "epoch": 1.319184442849608, + "grad_norm": 14.077098340849577, + "learning_rate": 1.5581080042938374e-06, + "loss": 0.08099899291992188, + "step": 152565 + }, + { + "epoch": 1.3192276763711512, + "grad_norm": 55.794326481500846, + "learning_rate": 1.557929349290778e-06, + "loss": 0.1520862579345703, + "step": 152570 + }, + { + "epoch": 1.3192709098926945, + "grad_norm": 30.167472845970366, + "learning_rate": 1.5577507009384225e-06, + "loss": 0.13230438232421876, + "step": 152575 + }, + { + "epoch": 1.3193141434142377, + "grad_norm": 1.1758744330165196, + "learning_rate": 1.5575720592375933e-06, + "loss": 0.0276397705078125, + "step": 152580 + }, + { + "epoch": 1.319357376935781, + "grad_norm": 1.6145117485704188, + "learning_rate": 1.5573934241891143e-06, + "loss": 0.03637237548828125, + "step": 152585 + }, + { + "epoch": 1.3194006104573242, + "grad_norm": 8.526833408579497, + "learning_rate": 1.5572147957938093e-06, + "loss": 0.04436569213867188, + "step": 152590 + }, + { + "epoch": 1.3194438439788674, + "grad_norm": 0.31670501855379146, + "learning_rate": 1.5570361740525019e-06, + "loss": 0.03557090759277344, + "step": 152595 + }, + { + "epoch": 1.3194870775004106, + "grad_norm": 1.024916267215275, + "learning_rate": 1.5568575589660166e-06, + "loss": 0.0644287109375, + "step": 152600 + }, + { + "epoch": 1.319530311021954, + "grad_norm": 0.6970819978893847, + "learning_rate": 1.5566789505351753e-06, + "loss": 0.04480438232421875, + "step": 152605 + }, + { + "epoch": 1.3195735445434973, + "grad_norm": 0.6380679637068443, + "learning_rate": 1.5565003487608046e-06, + "loss": 0.07563600540161133, + "step": 152610 + }, + { + "epoch": 1.3196167780650405, + "grad_norm": 6.076061442202149, + "learning_rate": 1.5563217536437256e-06, + "loss": 0.04620027542114258, + "step": 152615 + }, + { + "epoch": 1.3196600115865837, + "grad_norm": 5.5328481348206955, + "learning_rate": 1.5561431651847637e-06, + "loss": 0.03959465026855469, + "step": 152620 + }, + { + "epoch": 1.319703245108127, + "grad_norm": 17.453681889387234, + "learning_rate": 1.5559645833847424e-06, + "loss": 0.15285797119140626, + "step": 152625 + }, + { + "epoch": 1.3197464786296704, + "grad_norm": 1.017919924789706, + "learning_rate": 1.5557860082444847e-06, + "loss": 0.00920257568359375, + "step": 152630 + }, + { + "epoch": 1.3197897121512137, + "grad_norm": 8.777349050591702, + "learning_rate": 1.5556074397648131e-06, + "loss": 0.14060211181640625, + "step": 152635 + }, + { + "epoch": 1.3198329456727569, + "grad_norm": 15.253352941182206, + "learning_rate": 1.5554288779465536e-06, + "loss": 0.22054882049560548, + "step": 152640 + }, + { + "epoch": 1.3198761791943001, + "grad_norm": 61.969052595354356, + "learning_rate": 1.5552503227905286e-06, + "loss": 0.15147552490234376, + "step": 152645 + }, + { + "epoch": 1.3199194127158433, + "grad_norm": 0.14702538802244244, + "learning_rate": 1.5550717742975612e-06, + "loss": 0.022515487670898438, + "step": 152650 + }, + { + "epoch": 1.3199626462373866, + "grad_norm": 0.18841301159250223, + "learning_rate": 1.5548932324684754e-06, + "loss": 0.07511634826660156, + "step": 152655 + }, + { + "epoch": 1.3200058797589298, + "grad_norm": 0.7990665196435461, + "learning_rate": 1.5547146973040941e-06, + "loss": 0.0358154296875, + "step": 152660 + }, + { + "epoch": 1.320049113280473, + "grad_norm": 11.507511982183217, + "learning_rate": 1.55453616880524e-06, + "loss": 0.11559600830078125, + "step": 152665 + }, + { + "epoch": 1.3200923468020165, + "grad_norm": 9.86119193123559, + "learning_rate": 1.5543576469727381e-06, + "loss": 0.304986572265625, + "step": 152670 + }, + { + "epoch": 1.3201355803235597, + "grad_norm": 0.20026724987327132, + "learning_rate": 1.55417913180741e-06, + "loss": 0.23333892822265626, + "step": 152675 + }, + { + "epoch": 1.320178813845103, + "grad_norm": 2.0184916554062937, + "learning_rate": 1.5540006233100813e-06, + "loss": 0.03837890625, + "step": 152680 + }, + { + "epoch": 1.3202220473666462, + "grad_norm": 0.8707762256735817, + "learning_rate": 1.553822121481574e-06, + "loss": 0.010417556762695313, + "step": 152685 + }, + { + "epoch": 1.3202652808881894, + "grad_norm": 44.130229498778874, + "learning_rate": 1.5536436263227112e-06, + "loss": 0.22360153198242189, + "step": 152690 + }, + { + "epoch": 1.3203085144097328, + "grad_norm": 1.5399761231718347, + "learning_rate": 1.553465137834316e-06, + "loss": 0.016607666015625, + "step": 152695 + }, + { + "epoch": 1.320351747931276, + "grad_norm": 3.3155689441907774, + "learning_rate": 1.5532866560172108e-06, + "loss": 0.01085042953491211, + "step": 152700 + }, + { + "epoch": 1.3203949814528193, + "grad_norm": 50.89404303241499, + "learning_rate": 1.5531081808722208e-06, + "loss": 0.09222278594970704, + "step": 152705 + }, + { + "epoch": 1.3204382149743625, + "grad_norm": 0.6972015625750438, + "learning_rate": 1.5529297124001682e-06, + "loss": 0.19018020629882812, + "step": 152710 + }, + { + "epoch": 1.3204814484959058, + "grad_norm": 2.221225542288808, + "learning_rate": 1.5527512506018755e-06, + "loss": 0.172515869140625, + "step": 152715 + }, + { + "epoch": 1.320524682017449, + "grad_norm": 18.7240596657642, + "learning_rate": 1.5525727954781663e-06, + "loss": 0.07178678512573242, + "step": 152720 + }, + { + "epoch": 1.3205679155389922, + "grad_norm": 42.31368668603859, + "learning_rate": 1.5523943470298627e-06, + "loss": 0.1260395050048828, + "step": 152725 + }, + { + "epoch": 1.3206111490605354, + "grad_norm": 12.776718907471729, + "learning_rate": 1.5522159052577891e-06, + "loss": 0.022127342224121094, + "step": 152730 + }, + { + "epoch": 1.320654382582079, + "grad_norm": 19.726320899942237, + "learning_rate": 1.5520374701627668e-06, + "loss": 0.07715377807617188, + "step": 152735 + }, + { + "epoch": 1.3206976161036221, + "grad_norm": 3.458506199969005, + "learning_rate": 1.5518590417456216e-06, + "loss": 0.02572479248046875, + "step": 152740 + }, + { + "epoch": 1.3207408496251654, + "grad_norm": 4.690507927001124, + "learning_rate": 1.5516806200071738e-06, + "loss": 0.10495071411132813, + "step": 152745 + }, + { + "epoch": 1.3207840831467086, + "grad_norm": 0.816517872116034, + "learning_rate": 1.551502204948247e-06, + "loss": 0.01616363525390625, + "step": 152750 + }, + { + "epoch": 1.3208273166682518, + "grad_norm": 1.0044859723882922, + "learning_rate": 1.5513237965696642e-06, + "loss": 0.05395793914794922, + "step": 152755 + }, + { + "epoch": 1.3208705501897953, + "grad_norm": 0.41872528739294784, + "learning_rate": 1.551145394872247e-06, + "loss": 0.021931838989257813, + "step": 152760 + }, + { + "epoch": 1.3209137837113385, + "grad_norm": 1.5178213072558617, + "learning_rate": 1.55096699985682e-06, + "loss": 0.02534027099609375, + "step": 152765 + }, + { + "epoch": 1.3209570172328817, + "grad_norm": 0.8610376843990324, + "learning_rate": 1.5507886115242053e-06, + "loss": 0.0254180908203125, + "step": 152770 + }, + { + "epoch": 1.321000250754425, + "grad_norm": 1.9998513781772405, + "learning_rate": 1.5506102298752258e-06, + "loss": 0.02740325927734375, + "step": 152775 + }, + { + "epoch": 1.3210434842759682, + "grad_norm": 20.347161014908725, + "learning_rate": 1.5504318549107037e-06, + "loss": 0.049028778076171876, + "step": 152780 + }, + { + "epoch": 1.3210867177975114, + "grad_norm": 9.614329649285212, + "learning_rate": 1.5502534866314604e-06, + "loss": 0.11060829162597656, + "step": 152785 + }, + { + "epoch": 1.3211299513190546, + "grad_norm": 0.8424321901508295, + "learning_rate": 1.5500751250383213e-06, + "loss": 0.009237289428710938, + "step": 152790 + }, + { + "epoch": 1.3211731848405979, + "grad_norm": 26.088102946992095, + "learning_rate": 1.5498967701321061e-06, + "loss": 0.18087501525878907, + "step": 152795 + }, + { + "epoch": 1.3212164183621413, + "grad_norm": 0.5886695510105039, + "learning_rate": 1.5497184219136404e-06, + "loss": 0.019448280334472656, + "step": 152800 + }, + { + "epoch": 1.3212596518836845, + "grad_norm": 25.17663289439202, + "learning_rate": 1.5495400803837452e-06, + "loss": 0.03967971801757812, + "step": 152805 + }, + { + "epoch": 1.3213028854052278, + "grad_norm": 1.3182017839699314, + "learning_rate": 1.5493617455432429e-06, + "loss": 0.15793266296386718, + "step": 152810 + }, + { + "epoch": 1.321346118926771, + "grad_norm": 1.623430986450361, + "learning_rate": 1.5491834173929557e-06, + "loss": 0.0534423828125, + "step": 152815 + }, + { + "epoch": 1.3213893524483145, + "grad_norm": 0.6379952592309583, + "learning_rate": 1.549005095933707e-06, + "loss": 0.18452835083007812, + "step": 152820 + }, + { + "epoch": 1.3214325859698577, + "grad_norm": 8.659305206898155, + "learning_rate": 1.5488267811663172e-06, + "loss": 0.03643989562988281, + "step": 152825 + }, + { + "epoch": 1.321475819491401, + "grad_norm": 2.748064340885023, + "learning_rate": 1.5486484730916113e-06, + "loss": 0.1172454833984375, + "step": 152830 + }, + { + "epoch": 1.3215190530129441, + "grad_norm": 0.5370131733386214, + "learning_rate": 1.5484701717104101e-06, + "loss": 0.05399169921875, + "step": 152835 + }, + { + "epoch": 1.3215622865344874, + "grad_norm": 5.6814756847282, + "learning_rate": 1.5482918770235352e-06, + "loss": 0.023886871337890626, + "step": 152840 + }, + { + "epoch": 1.3216055200560306, + "grad_norm": 1.485901640179087, + "learning_rate": 1.5481135890318109e-06, + "loss": 0.09782943725585938, + "step": 152845 + }, + { + "epoch": 1.3216487535775738, + "grad_norm": 1.5123065263181918, + "learning_rate": 1.5479353077360586e-06, + "loss": 0.02829456329345703, + "step": 152850 + }, + { + "epoch": 1.321691987099117, + "grad_norm": 0.15666009400041137, + "learning_rate": 1.5477570331370992e-06, + "loss": 0.04832115173339844, + "step": 152855 + }, + { + "epoch": 1.3217352206206605, + "grad_norm": 8.374335903327943, + "learning_rate": 1.5475787652357572e-06, + "loss": 0.07229347229003906, + "step": 152860 + }, + { + "epoch": 1.3217784541422037, + "grad_norm": 0.22312270805309078, + "learning_rate": 1.5474005040328536e-06, + "loss": 0.05551300048828125, + "step": 152865 + }, + { + "epoch": 1.321821687663747, + "grad_norm": 22.335782492910557, + "learning_rate": 1.5472222495292106e-06, + "loss": 0.1848724365234375, + "step": 152870 + }, + { + "epoch": 1.3218649211852902, + "grad_norm": 2.651571514180417, + "learning_rate": 1.54704400172565e-06, + "loss": 0.10564498901367188, + "step": 152875 + }, + { + "epoch": 1.3219081547068334, + "grad_norm": 14.484003272415237, + "learning_rate": 1.5468657606229943e-06, + "loss": 0.08565826416015625, + "step": 152880 + }, + { + "epoch": 1.3219513882283769, + "grad_norm": 3.3330304908117205, + "learning_rate": 1.546687526222065e-06, + "loss": 0.024423599243164062, + "step": 152885 + }, + { + "epoch": 1.32199462174992, + "grad_norm": 1.3525206352884191, + "learning_rate": 1.5465092985236832e-06, + "loss": 0.018304824829101562, + "step": 152890 + }, + { + "epoch": 1.3220378552714633, + "grad_norm": 7.271690496320895, + "learning_rate": 1.5463310775286738e-06, + "loss": 0.05911750793457031, + "step": 152895 + }, + { + "epoch": 1.3220810887930066, + "grad_norm": 7.197564286359323, + "learning_rate": 1.5461528632378554e-06, + "loss": 0.13899078369140624, + "step": 152900 + }, + { + "epoch": 1.3221243223145498, + "grad_norm": 39.2836675333205, + "learning_rate": 1.545974655652053e-06, + "loss": 0.13672027587890626, + "step": 152905 + }, + { + "epoch": 1.322167555836093, + "grad_norm": 4.355086430418073, + "learning_rate": 1.545796454772087e-06, + "loss": 0.026558303833007814, + "step": 152910 + }, + { + "epoch": 1.3222107893576363, + "grad_norm": 1.1396017797153997, + "learning_rate": 1.5456182605987788e-06, + "loss": 0.030021286010742186, + "step": 152915 + }, + { + "epoch": 1.3222540228791795, + "grad_norm": 1.4915653605209658, + "learning_rate": 1.54544007313295e-06, + "loss": 0.07476654052734374, + "step": 152920 + }, + { + "epoch": 1.322297256400723, + "grad_norm": 0.7350245062073909, + "learning_rate": 1.5452618923754237e-06, + "loss": 0.027513504028320312, + "step": 152925 + }, + { + "epoch": 1.3223404899222662, + "grad_norm": 0.34595980258757497, + "learning_rate": 1.545083718327022e-06, + "loss": 0.15535850524902345, + "step": 152930 + }, + { + "epoch": 1.3223837234438094, + "grad_norm": 32.7359178917641, + "learning_rate": 1.5449055509885646e-06, + "loss": 0.13914260864257813, + "step": 152935 + }, + { + "epoch": 1.3224269569653526, + "grad_norm": 12.510661685306513, + "learning_rate": 1.5447273903608744e-06, + "loss": 0.08705024719238282, + "step": 152940 + }, + { + "epoch": 1.3224701904868958, + "grad_norm": 29.127670983426974, + "learning_rate": 1.5445492364447735e-06, + "loss": 0.3252056121826172, + "step": 152945 + }, + { + "epoch": 1.3225134240084393, + "grad_norm": 13.903881039967882, + "learning_rate": 1.5443710892410813e-06, + "loss": 0.08038864135742188, + "step": 152950 + }, + { + "epoch": 1.3225566575299825, + "grad_norm": 0.18179550813428183, + "learning_rate": 1.5441929487506222e-06, + "loss": 0.018462371826171876, + "step": 152955 + }, + { + "epoch": 1.3225998910515258, + "grad_norm": 0.8481174317949128, + "learning_rate": 1.5440148149742153e-06, + "loss": 0.13921852111816407, + "step": 152960 + }, + { + "epoch": 1.322643124573069, + "grad_norm": 0.22055901530419333, + "learning_rate": 1.5438366879126851e-06, + "loss": 0.14294157028198243, + "step": 152965 + }, + { + "epoch": 1.3226863580946122, + "grad_norm": 0.24673213032397176, + "learning_rate": 1.5436585675668508e-06, + "loss": 0.03125, + "step": 152970 + }, + { + "epoch": 1.3227295916161554, + "grad_norm": 0.5662168449024917, + "learning_rate": 1.5434804539375347e-06, + "loss": 0.022694778442382813, + "step": 152975 + }, + { + "epoch": 1.3227728251376987, + "grad_norm": 1.451957896481104, + "learning_rate": 1.5433023470255582e-06, + "loss": 0.18695831298828125, + "step": 152980 + }, + { + "epoch": 1.322816058659242, + "grad_norm": 2.117063876811621, + "learning_rate": 1.543124246831741e-06, + "loss": 0.014298534393310547, + "step": 152985 + }, + { + "epoch": 1.3228592921807854, + "grad_norm": 3.488882525663137, + "learning_rate": 1.5429461533569076e-06, + "loss": 0.05434837341308594, + "step": 152990 + }, + { + "epoch": 1.3229025257023286, + "grad_norm": 28.286356899491818, + "learning_rate": 1.5427680666018778e-06, + "loss": 0.13121414184570312, + "step": 152995 + }, + { + "epoch": 1.3229457592238718, + "grad_norm": 1.4579446417043842, + "learning_rate": 1.5425899865674728e-06, + "loss": 0.09334945678710938, + "step": 153000 + }, + { + "epoch": 1.322988992745415, + "grad_norm": 19.518064906705664, + "learning_rate": 1.5424119132545143e-06, + "loss": 0.06629486083984375, + "step": 153005 + }, + { + "epoch": 1.3230322262669583, + "grad_norm": 14.715020977967823, + "learning_rate": 1.5422338466638218e-06, + "loss": 0.27045440673828125, + "step": 153010 + }, + { + "epoch": 1.3230754597885017, + "grad_norm": 0.16274336627335775, + "learning_rate": 1.542055786796218e-06, + "loss": 0.032085418701171875, + "step": 153015 + }, + { + "epoch": 1.323118693310045, + "grad_norm": 2.2495006417320726, + "learning_rate": 1.541877733652525e-06, + "loss": 0.033912277221679686, + "step": 153020 + }, + { + "epoch": 1.3231619268315882, + "grad_norm": 0.45794069038679797, + "learning_rate": 1.5416996872335634e-06, + "loss": 0.0062652587890625, + "step": 153025 + }, + { + "epoch": 1.3232051603531314, + "grad_norm": 0.5096196822762497, + "learning_rate": 1.5415216475401538e-06, + "loss": 0.07885208129882812, + "step": 153030 + }, + { + "epoch": 1.3232483938746746, + "grad_norm": 4.4458340489542705, + "learning_rate": 1.5413436145731175e-06, + "loss": 0.031444549560546875, + "step": 153035 + }, + { + "epoch": 1.3232916273962179, + "grad_norm": 6.137219577375443, + "learning_rate": 1.5411655883332758e-06, + "loss": 0.0624847412109375, + "step": 153040 + }, + { + "epoch": 1.323334860917761, + "grad_norm": 0.30140640323226264, + "learning_rate": 1.5409875688214493e-06, + "loss": 0.0367401123046875, + "step": 153045 + }, + { + "epoch": 1.3233780944393045, + "grad_norm": 0.36568956952366427, + "learning_rate": 1.5408095560384581e-06, + "loss": 0.01506805419921875, + "step": 153050 + }, + { + "epoch": 1.3234213279608478, + "grad_norm": 0.14548234781946986, + "learning_rate": 1.5406315499851253e-06, + "loss": 0.0988250732421875, + "step": 153055 + }, + { + "epoch": 1.323464561482391, + "grad_norm": 11.304638235544822, + "learning_rate": 1.540453550662271e-06, + "loss": 0.045781707763671874, + "step": 153060 + }, + { + "epoch": 1.3235077950039342, + "grad_norm": 5.295755284350345, + "learning_rate": 1.5402755580707151e-06, + "loss": 0.233929443359375, + "step": 153065 + }, + { + "epoch": 1.3235510285254775, + "grad_norm": 9.176020086018966, + "learning_rate": 1.5400975722112798e-06, + "loss": 0.07273101806640625, + "step": 153070 + }, + { + "epoch": 1.323594262047021, + "grad_norm": 4.573018353241741, + "learning_rate": 1.5399195930847862e-06, + "loss": 0.08069629669189453, + "step": 153075 + }, + { + "epoch": 1.3236374955685641, + "grad_norm": 7.363465975300969, + "learning_rate": 1.5397416206920529e-06, + "loss": 0.028348541259765624, + "step": 153080 + }, + { + "epoch": 1.3236807290901074, + "grad_norm": 2.573115736571992, + "learning_rate": 1.5395636550339034e-06, + "loss": 0.0067535400390625, + "step": 153085 + }, + { + "epoch": 1.3237239626116506, + "grad_norm": 16.674300357312884, + "learning_rate": 1.539385696111157e-06, + "loss": 0.04853038787841797, + "step": 153090 + }, + { + "epoch": 1.3237671961331938, + "grad_norm": 0.9810100196806669, + "learning_rate": 1.5392077439246352e-06, + "loss": 0.02301311492919922, + "step": 153095 + }, + { + "epoch": 1.323810429654737, + "grad_norm": 1.9172340847576805, + "learning_rate": 1.5390297984751578e-06, + "loss": 0.0102508544921875, + "step": 153100 + }, + { + "epoch": 1.3238536631762803, + "grad_norm": 6.285244325431569, + "learning_rate": 1.5388518597635462e-06, + "loss": 0.08039875030517578, + "step": 153105 + }, + { + "epoch": 1.3238968966978235, + "grad_norm": 2.160149511954359, + "learning_rate": 1.5386739277906197e-06, + "loss": 0.0441192626953125, + "step": 153110 + }, + { + "epoch": 1.323940130219367, + "grad_norm": 5.403740293664527, + "learning_rate": 1.5384960025572006e-06, + "loss": 0.02183361053466797, + "step": 153115 + }, + { + "epoch": 1.3239833637409102, + "grad_norm": 0.8288692832356134, + "learning_rate": 1.538318084064109e-06, + "loss": 0.07773590087890625, + "step": 153120 + }, + { + "epoch": 1.3240265972624534, + "grad_norm": 17.538696482193725, + "learning_rate": 1.538140172312164e-06, + "loss": 0.06129875183105469, + "step": 153125 + }, + { + "epoch": 1.3240698307839966, + "grad_norm": 4.089154952494509, + "learning_rate": 1.537962267302189e-06, + "loss": 0.03570404052734375, + "step": 153130 + }, + { + "epoch": 1.3241130643055399, + "grad_norm": 3.6115517734462195, + "learning_rate": 1.5377843690350022e-06, + "loss": 0.10149154663085938, + "step": 153135 + }, + { + "epoch": 1.3241562978270833, + "grad_norm": 7.517766248921559, + "learning_rate": 1.5376064775114251e-06, + "loss": 0.1089385986328125, + "step": 153140 + }, + { + "epoch": 1.3241995313486266, + "grad_norm": 0.18351141300711662, + "learning_rate": 1.5374285927322761e-06, + "loss": 0.037348175048828126, + "step": 153145 + }, + { + "epoch": 1.3242427648701698, + "grad_norm": 1.38302815982523, + "learning_rate": 1.5372507146983787e-06, + "loss": 0.03813877105712891, + "step": 153150 + }, + { + "epoch": 1.324285998391713, + "grad_norm": 1.2361137796495378, + "learning_rate": 1.5370728434105517e-06, + "loss": 0.17302360534667968, + "step": 153155 + }, + { + "epoch": 1.3243292319132562, + "grad_norm": 0.16743826044613203, + "learning_rate": 1.5368949788696155e-06, + "loss": 0.05686235427856445, + "step": 153160 + }, + { + "epoch": 1.3243724654347995, + "grad_norm": 1.972208121230099, + "learning_rate": 1.5367171210763903e-06, + "loss": 0.029798507690429688, + "step": 153165 + }, + { + "epoch": 1.3244156989563427, + "grad_norm": 3.3217286572244094, + "learning_rate": 1.5365392700316966e-06, + "loss": 0.015826416015625, + "step": 153170 + }, + { + "epoch": 1.324458932477886, + "grad_norm": 1.3687974747960847, + "learning_rate": 1.536361425736353e-06, + "loss": 0.0534637451171875, + "step": 153175 + }, + { + "epoch": 1.3245021659994294, + "grad_norm": 1.4617057599454548, + "learning_rate": 1.5361835881911827e-06, + "loss": 0.2300537109375, + "step": 153180 + }, + { + "epoch": 1.3245453995209726, + "grad_norm": 0.22551090837420967, + "learning_rate": 1.5360057573970029e-06, + "loss": 0.3300056457519531, + "step": 153185 + }, + { + "epoch": 1.3245886330425158, + "grad_norm": 11.557006904805561, + "learning_rate": 1.5358279333546364e-06, + "loss": 0.11405715942382813, + "step": 153190 + }, + { + "epoch": 1.324631866564059, + "grad_norm": 0.9827091247342106, + "learning_rate": 1.535650116064902e-06, + "loss": 0.21887741088867188, + "step": 153195 + }, + { + "epoch": 1.3246751000856023, + "grad_norm": 30.389952660537496, + "learning_rate": 1.5354723055286203e-06, + "loss": 0.096234130859375, + "step": 153200 + }, + { + "epoch": 1.3247183336071457, + "grad_norm": 4.614922926784919, + "learning_rate": 1.5352945017466106e-06, + "loss": 0.06632919311523437, + "step": 153205 + }, + { + "epoch": 1.324761567128689, + "grad_norm": 3.1884692068754754, + "learning_rate": 1.5351167047196923e-06, + "loss": 0.0542205810546875, + "step": 153210 + }, + { + "epoch": 1.3248048006502322, + "grad_norm": 57.67636039069822, + "learning_rate": 1.534938914448687e-06, + "loss": 0.16937179565429689, + "step": 153215 + }, + { + "epoch": 1.3248480341717754, + "grad_norm": 5.485030522124122, + "learning_rate": 1.5347611309344145e-06, + "loss": 0.10878925323486328, + "step": 153220 + }, + { + "epoch": 1.3248912676933187, + "grad_norm": 2.053651250055462, + "learning_rate": 1.5345833541776936e-06, + "loss": 0.030457305908203124, + "step": 153225 + }, + { + "epoch": 1.324934501214862, + "grad_norm": 6.011863080385255, + "learning_rate": 1.5344055841793442e-06, + "loss": 0.05770721435546875, + "step": 153230 + }, + { + "epoch": 1.3249777347364051, + "grad_norm": 2.912427412691498, + "learning_rate": 1.534227820940188e-06, + "loss": 0.14876861572265626, + "step": 153235 + }, + { + "epoch": 1.3250209682579484, + "grad_norm": 0.24081186321071033, + "learning_rate": 1.5340500644610417e-06, + "loss": 0.0146636962890625, + "step": 153240 + }, + { + "epoch": 1.3250642017794918, + "grad_norm": 13.594390026357564, + "learning_rate": 1.5338723147427287e-06, + "loss": 0.3975128173828125, + "step": 153245 + }, + { + "epoch": 1.325107435301035, + "grad_norm": 12.619052594197392, + "learning_rate": 1.5336945717860666e-06, + "loss": 0.04400348663330078, + "step": 153250 + }, + { + "epoch": 1.3251506688225783, + "grad_norm": 0.9806540195847236, + "learning_rate": 1.5335168355918763e-06, + "loss": 0.026232147216796876, + "step": 153255 + }, + { + "epoch": 1.3251939023441215, + "grad_norm": 57.93723776501511, + "learning_rate": 1.5333391061609763e-06, + "loss": 0.5218307495117187, + "step": 153260 + }, + { + "epoch": 1.325237135865665, + "grad_norm": 1.6836553144643398, + "learning_rate": 1.5331613834941867e-06, + "loss": 0.09278068542480469, + "step": 153265 + }, + { + "epoch": 1.3252803693872082, + "grad_norm": 6.4273440701404585, + "learning_rate": 1.532983667592326e-06, + "loss": 0.02205047607421875, + "step": 153270 + }, + { + "epoch": 1.3253236029087514, + "grad_norm": 47.83962789171917, + "learning_rate": 1.5328059584562162e-06, + "loss": 0.154656982421875, + "step": 153275 + }, + { + "epoch": 1.3253668364302946, + "grad_norm": 20.892617754376968, + "learning_rate": 1.5326282560866756e-06, + "loss": 0.055496597290039064, + "step": 153280 + }, + { + "epoch": 1.3254100699518379, + "grad_norm": 0.3951091938315099, + "learning_rate": 1.5324505604845242e-06, + "loss": 0.025465774536132812, + "step": 153285 + }, + { + "epoch": 1.325453303473381, + "grad_norm": 1.3370262272781004, + "learning_rate": 1.5322728716505796e-06, + "loss": 0.01973247528076172, + "step": 153290 + }, + { + "epoch": 1.3254965369949243, + "grad_norm": 1.968406252894912, + "learning_rate": 1.5320951895856643e-06, + "loss": 0.028011322021484375, + "step": 153295 + }, + { + "epoch": 1.3255397705164675, + "grad_norm": 25.498773779640977, + "learning_rate": 1.5319175142905957e-06, + "loss": 0.06065511703491211, + "step": 153300 + }, + { + "epoch": 1.325583004038011, + "grad_norm": 1.095911912501784, + "learning_rate": 1.531739845766193e-06, + "loss": 0.0848114013671875, + "step": 153305 + }, + { + "epoch": 1.3256262375595542, + "grad_norm": 16.094229387822466, + "learning_rate": 1.5315621840132778e-06, + "loss": 0.054149627685546875, + "step": 153310 + }, + { + "epoch": 1.3256694710810975, + "grad_norm": 0.2007486418982078, + "learning_rate": 1.5313845290326681e-06, + "loss": 0.1242898941040039, + "step": 153315 + }, + { + "epoch": 1.3257127046026407, + "grad_norm": 1.6038284645682452, + "learning_rate": 1.531206880825183e-06, + "loss": 0.05048370361328125, + "step": 153320 + }, + { + "epoch": 1.325755938124184, + "grad_norm": 7.501905288571798, + "learning_rate": 1.5310292393916415e-06, + "loss": 0.05077362060546875, + "step": 153325 + }, + { + "epoch": 1.3257991716457274, + "grad_norm": 1.056075666759724, + "learning_rate": 1.530851604732864e-06, + "loss": 0.07628173828125, + "step": 153330 + }, + { + "epoch": 1.3258424051672706, + "grad_norm": 0.7825355261885897, + "learning_rate": 1.5306739768496673e-06, + "loss": 0.027968978881835936, + "step": 153335 + }, + { + "epoch": 1.3258856386888138, + "grad_norm": 28.574634124189533, + "learning_rate": 1.530496355742874e-06, + "loss": 0.22996578216552735, + "step": 153340 + }, + { + "epoch": 1.325928872210357, + "grad_norm": 1.1720529234371675, + "learning_rate": 1.530318741413302e-06, + "loss": 0.052520370483398436, + "step": 153345 + }, + { + "epoch": 1.3259721057319003, + "grad_norm": 2.4187582134145442, + "learning_rate": 1.5301411338617681e-06, + "loss": 0.01807861328125, + "step": 153350 + }, + { + "epoch": 1.3260153392534435, + "grad_norm": 1.445717039825211, + "learning_rate": 1.529963533089095e-06, + "loss": 0.1340362548828125, + "step": 153355 + }, + { + "epoch": 1.3260585727749867, + "grad_norm": 0.9681509977442302, + "learning_rate": 1.5297859390961002e-06, + "loss": 0.031304931640625, + "step": 153360 + }, + { + "epoch": 1.32610180629653, + "grad_norm": 6.551861097201005, + "learning_rate": 1.5296083518836015e-06, + "loss": 0.030214309692382812, + "step": 153365 + }, + { + "epoch": 1.3261450398180734, + "grad_norm": 28.095236288424047, + "learning_rate": 1.5294307714524207e-06, + "loss": 0.22952861785888673, + "step": 153370 + }, + { + "epoch": 1.3261882733396166, + "grad_norm": 2.9574175302665258, + "learning_rate": 1.5292531978033746e-06, + "loss": 0.07210540771484375, + "step": 153375 + }, + { + "epoch": 1.3262315068611599, + "grad_norm": 5.4819845648477905, + "learning_rate": 1.529075630937283e-06, + "loss": 0.083941650390625, + "step": 153380 + }, + { + "epoch": 1.326274740382703, + "grad_norm": 1.5738267909231796, + "learning_rate": 1.528898070854965e-06, + "loss": 0.05930023193359375, + "step": 153385 + }, + { + "epoch": 1.3263179739042463, + "grad_norm": 6.774142786286407, + "learning_rate": 1.5287205175572387e-06, + "loss": 0.06987724304199219, + "step": 153390 + }, + { + "epoch": 1.3263612074257898, + "grad_norm": 18.588402166530987, + "learning_rate": 1.5285429710449239e-06, + "loss": 0.10036487579345703, + "step": 153395 + }, + { + "epoch": 1.326404440947333, + "grad_norm": 1.1118110074804892, + "learning_rate": 1.5283654313188373e-06, + "loss": 0.0341949462890625, + "step": 153400 + }, + { + "epoch": 1.3264476744688762, + "grad_norm": 5.363545572994041, + "learning_rate": 1.5281878983798002e-06, + "loss": 0.0204193115234375, + "step": 153405 + }, + { + "epoch": 1.3264909079904195, + "grad_norm": 1.9470680008381154, + "learning_rate": 1.5280103722286296e-06, + "loss": 0.029816055297851564, + "step": 153410 + }, + { + "epoch": 1.3265341415119627, + "grad_norm": 8.230286807478052, + "learning_rate": 1.5278328528661464e-06, + "loss": 0.115362548828125, + "step": 153415 + }, + { + "epoch": 1.326577375033506, + "grad_norm": 0.720797568579249, + "learning_rate": 1.5276553402931676e-06, + "loss": 0.02724876403808594, + "step": 153420 + }, + { + "epoch": 1.3266206085550492, + "grad_norm": 0.4245605559152116, + "learning_rate": 1.5274778345105127e-06, + "loss": 0.0225982666015625, + "step": 153425 + }, + { + "epoch": 1.3266638420765924, + "grad_norm": 6.563061137866838, + "learning_rate": 1.527300335518999e-06, + "loss": 0.058250808715820314, + "step": 153430 + }, + { + "epoch": 1.3267070755981358, + "grad_norm": 2.963868016169811, + "learning_rate": 1.5271228433194463e-06, + "loss": 0.03507919311523437, + "step": 153435 + }, + { + "epoch": 1.326750309119679, + "grad_norm": 0.7014101857564657, + "learning_rate": 1.5269453579126736e-06, + "loss": 0.08049545288085938, + "step": 153440 + }, + { + "epoch": 1.3267935426412223, + "grad_norm": 0.8069195613865667, + "learning_rate": 1.5267678792994983e-06, + "loss": 0.1055877685546875, + "step": 153445 + }, + { + "epoch": 1.3268367761627655, + "grad_norm": 6.196111230598891, + "learning_rate": 1.5265904074807394e-06, + "loss": 0.0439361572265625, + "step": 153450 + }, + { + "epoch": 1.3268800096843087, + "grad_norm": 8.636455632073174, + "learning_rate": 1.5264129424572147e-06, + "loss": 0.1429075241088867, + "step": 153455 + }, + { + "epoch": 1.3269232432058522, + "grad_norm": 1.195473829326737, + "learning_rate": 1.5262354842297444e-06, + "loss": 0.01696434020996094, + "step": 153460 + }, + { + "epoch": 1.3269664767273954, + "grad_norm": 0.2230754863153913, + "learning_rate": 1.526058032799144e-06, + "loss": 0.04346771240234375, + "step": 153465 + }, + { + "epoch": 1.3270097102489387, + "grad_norm": 1.049228141399678, + "learning_rate": 1.5258805881662355e-06, + "loss": 0.03542041778564453, + "step": 153470 + }, + { + "epoch": 1.3270529437704819, + "grad_norm": 17.23571411939535, + "learning_rate": 1.5257031503318354e-06, + "loss": 0.200177001953125, + "step": 153475 + }, + { + "epoch": 1.3270961772920251, + "grad_norm": 3.3051088581011947, + "learning_rate": 1.5255257192967614e-06, + "loss": 0.02825775146484375, + "step": 153480 + }, + { + "epoch": 1.3271394108135683, + "grad_norm": 2.4790203256447536, + "learning_rate": 1.5253482950618332e-06, + "loss": 0.08621902465820312, + "step": 153485 + }, + { + "epoch": 1.3271826443351116, + "grad_norm": 14.85966927991959, + "learning_rate": 1.5251708776278682e-06, + "loss": 0.12917022705078124, + "step": 153490 + }, + { + "epoch": 1.3272258778566548, + "grad_norm": 0.9220064213575022, + "learning_rate": 1.5249934669956837e-06, + "loss": 0.09240646362304687, + "step": 153495 + }, + { + "epoch": 1.3272691113781983, + "grad_norm": 3.4888168783280133, + "learning_rate": 1.5248160631661003e-06, + "loss": 0.034136962890625, + "step": 153500 + }, + { + "epoch": 1.3273123448997415, + "grad_norm": 7.2352908973487065, + "learning_rate": 1.5246386661399346e-06, + "loss": 0.02341766357421875, + "step": 153505 + }, + { + "epoch": 1.3273555784212847, + "grad_norm": 0.6359369092306513, + "learning_rate": 1.524461275918005e-06, + "loss": 0.009459686279296876, + "step": 153510 + }, + { + "epoch": 1.327398811942828, + "grad_norm": 3.7100364507031403, + "learning_rate": 1.5242838925011286e-06, + "loss": 0.06058006286621094, + "step": 153515 + }, + { + "epoch": 1.3274420454643714, + "grad_norm": 4.572621189950719, + "learning_rate": 1.5241065158901255e-06, + "loss": 0.0480560302734375, + "step": 153520 + }, + { + "epoch": 1.3274852789859146, + "grad_norm": 32.24160237423337, + "learning_rate": 1.5239291460858116e-06, + "loss": 0.18934135437011718, + "step": 153525 + }, + { + "epoch": 1.3275285125074578, + "grad_norm": 0.20816630795181737, + "learning_rate": 1.523751783089007e-06, + "loss": 0.020804595947265626, + "step": 153530 + }, + { + "epoch": 1.327571746029001, + "grad_norm": 1.2318592962113515, + "learning_rate": 1.5235744269005293e-06, + "loss": 0.02618064880371094, + "step": 153535 + }, + { + "epoch": 1.3276149795505443, + "grad_norm": 2.7003106942761357, + "learning_rate": 1.5233970775211954e-06, + "loss": 0.030568313598632813, + "step": 153540 + }, + { + "epoch": 1.3276582130720875, + "grad_norm": 4.096274043283599, + "learning_rate": 1.5232197349518241e-06, + "loss": 0.08293533325195312, + "step": 153545 + }, + { + "epoch": 1.3277014465936308, + "grad_norm": 0.8761410051242822, + "learning_rate": 1.5230423991932326e-06, + "loss": 0.04794464111328125, + "step": 153550 + }, + { + "epoch": 1.327744680115174, + "grad_norm": 0.23425574566302115, + "learning_rate": 1.522865070246239e-06, + "loss": 0.130523681640625, + "step": 153555 + }, + { + "epoch": 1.3277879136367174, + "grad_norm": 16.749235227308194, + "learning_rate": 1.5226877481116604e-06, + "loss": 0.037921142578125, + "step": 153560 + }, + { + "epoch": 1.3278311471582607, + "grad_norm": 1.0148528038522133, + "learning_rate": 1.5225104327903158e-06, + "loss": 0.02272186279296875, + "step": 153565 + }, + { + "epoch": 1.327874380679804, + "grad_norm": 1.9886400808044506, + "learning_rate": 1.5223331242830235e-06, + "loss": 0.07417678833007812, + "step": 153570 + }, + { + "epoch": 1.3279176142013471, + "grad_norm": 2.130171043938672, + "learning_rate": 1.5221558225905982e-06, + "loss": 0.047010040283203124, + "step": 153575 + }, + { + "epoch": 1.3279608477228904, + "grad_norm": 11.32492172896484, + "learning_rate": 1.521978527713861e-06, + "loss": 0.064044189453125, + "step": 153580 + }, + { + "epoch": 1.3280040812444338, + "grad_norm": 6.991060064988096, + "learning_rate": 1.5218012396536288e-06, + "loss": 0.10847702026367187, + "step": 153585 + }, + { + "epoch": 1.328047314765977, + "grad_norm": 5.355350346428732, + "learning_rate": 1.521623958410717e-06, + "loss": 0.037396240234375, + "step": 153590 + }, + { + "epoch": 1.3280905482875203, + "grad_norm": 16.009732227799763, + "learning_rate": 1.5214466839859463e-06, + "loss": 0.10503692626953125, + "step": 153595 + }, + { + "epoch": 1.3281337818090635, + "grad_norm": 31.12832537442552, + "learning_rate": 1.5212694163801327e-06, + "loss": 0.18903961181640624, + "step": 153600 + }, + { + "epoch": 1.3281770153306067, + "grad_norm": 3.247290953500932, + "learning_rate": 1.5210921555940943e-06, + "loss": 0.009611892700195312, + "step": 153605 + }, + { + "epoch": 1.32822024885215, + "grad_norm": 0.3298317129056736, + "learning_rate": 1.5209149016286474e-06, + "loss": 0.039813995361328125, + "step": 153610 + }, + { + "epoch": 1.3282634823736932, + "grad_norm": 0.19968106202028782, + "learning_rate": 1.5207376544846107e-06, + "loss": 0.046387100219726564, + "step": 153615 + }, + { + "epoch": 1.3283067158952364, + "grad_norm": 2.6480379233468336, + "learning_rate": 1.5205604141628002e-06, + "loss": 0.018910598754882813, + "step": 153620 + }, + { + "epoch": 1.3283499494167799, + "grad_norm": 79.53562897854503, + "learning_rate": 1.5203831806640351e-06, + "loss": 0.037212371826171875, + "step": 153625 + }, + { + "epoch": 1.328393182938323, + "grad_norm": 0.07272385984854203, + "learning_rate": 1.5202059539891316e-06, + "loss": 0.00980377197265625, + "step": 153630 + }, + { + "epoch": 1.3284364164598663, + "grad_norm": 46.661399833299555, + "learning_rate": 1.520028734138908e-06, + "loss": 0.12668533325195314, + "step": 153635 + }, + { + "epoch": 1.3284796499814095, + "grad_norm": 0.4222329614707868, + "learning_rate": 1.5198515211141814e-06, + "loss": 0.11612281799316407, + "step": 153640 + }, + { + "epoch": 1.3285228835029528, + "grad_norm": 4.469101864671914, + "learning_rate": 1.5196743149157684e-06, + "loss": 0.046377944946289065, + "step": 153645 + }, + { + "epoch": 1.3285661170244962, + "grad_norm": 12.180815610065574, + "learning_rate": 1.5194971155444872e-06, + "loss": 0.06791610717773437, + "step": 153650 + }, + { + "epoch": 1.3286093505460395, + "grad_norm": 3.949469399882159, + "learning_rate": 1.5193199230011528e-06, + "loss": 0.05848236083984375, + "step": 153655 + }, + { + "epoch": 1.3286525840675827, + "grad_norm": 4.401521553122873, + "learning_rate": 1.5191427372865857e-06, + "loss": 0.045654296875, + "step": 153660 + }, + { + "epoch": 1.328695817589126, + "grad_norm": 15.156419182540054, + "learning_rate": 1.5189655584016014e-06, + "loss": 0.05524024963378906, + "step": 153665 + }, + { + "epoch": 1.3287390511106691, + "grad_norm": 1.3355569439580395, + "learning_rate": 1.5187883863470167e-06, + "loss": 0.035532379150390626, + "step": 153670 + }, + { + "epoch": 1.3287822846322124, + "grad_norm": 3.6605548780767294, + "learning_rate": 1.5186112211236496e-06, + "loss": 0.09083976745605468, + "step": 153675 + }, + { + "epoch": 1.3288255181537556, + "grad_norm": 16.951345777042675, + "learning_rate": 1.5184340627323149e-06, + "loss": 0.05099411010742187, + "step": 153680 + }, + { + "epoch": 1.3288687516752988, + "grad_norm": 47.26133163928967, + "learning_rate": 1.518256911173833e-06, + "loss": 0.31214752197265627, + "step": 153685 + }, + { + "epoch": 1.3289119851968423, + "grad_norm": 0.8981008899714035, + "learning_rate": 1.518079766449018e-06, + "loss": 0.036038780212402345, + "step": 153690 + }, + { + "epoch": 1.3289552187183855, + "grad_norm": 1.0691157772951698, + "learning_rate": 1.5179026285586895e-06, + "loss": 0.02906646728515625, + "step": 153695 + }, + { + "epoch": 1.3289984522399287, + "grad_norm": 6.913271179567352, + "learning_rate": 1.5177254975036625e-06, + "loss": 0.023842239379882814, + "step": 153700 + }, + { + "epoch": 1.329041685761472, + "grad_norm": 1.187168203750374, + "learning_rate": 1.5175483732847552e-06, + "loss": 0.04741668701171875, + "step": 153705 + }, + { + "epoch": 1.3290849192830152, + "grad_norm": 12.23603004385438, + "learning_rate": 1.5173712559027837e-06, + "loss": 0.19625930786132811, + "step": 153710 + }, + { + "epoch": 1.3291281528045586, + "grad_norm": 2.170407606275472, + "learning_rate": 1.5171941453585635e-06, + "loss": 0.20490188598632814, + "step": 153715 + }, + { + "epoch": 1.3291713863261019, + "grad_norm": 0.683634248111826, + "learning_rate": 1.5170170416529143e-06, + "loss": 0.010210800170898437, + "step": 153720 + }, + { + "epoch": 1.329214619847645, + "grad_norm": 17.720284001776722, + "learning_rate": 1.5168399447866512e-06, + "loss": 0.06984405517578125, + "step": 153725 + }, + { + "epoch": 1.3292578533691883, + "grad_norm": 2.875222372982151, + "learning_rate": 1.5166628547605915e-06, + "loss": 0.0588623046875, + "step": 153730 + }, + { + "epoch": 1.3293010868907316, + "grad_norm": 2.5233977125479092, + "learning_rate": 1.5164857715755516e-06, + "loss": 0.02636260986328125, + "step": 153735 + }, + { + "epoch": 1.3293443204122748, + "grad_norm": 0.9957966748835838, + "learning_rate": 1.5163086952323472e-06, + "loss": 0.06948013305664062, + "step": 153740 + }, + { + "epoch": 1.329387553933818, + "grad_norm": 26.793717618918816, + "learning_rate": 1.516131625731797e-06, + "loss": 0.09941940307617188, + "step": 153745 + }, + { + "epoch": 1.3294307874553613, + "grad_norm": 27.55725835113865, + "learning_rate": 1.5159545630747153e-06, + "loss": 0.133551025390625, + "step": 153750 + }, + { + "epoch": 1.3294740209769047, + "grad_norm": 13.507104923551971, + "learning_rate": 1.5157775072619212e-06, + "loss": 0.20963172912597655, + "step": 153755 + }, + { + "epoch": 1.329517254498448, + "grad_norm": 0.3050481769419768, + "learning_rate": 1.5156004582942301e-06, + "loss": 0.036998748779296875, + "step": 153760 + }, + { + "epoch": 1.3295604880199912, + "grad_norm": 0.22490126076273967, + "learning_rate": 1.5154234161724588e-06, + "loss": 0.0374664306640625, + "step": 153765 + }, + { + "epoch": 1.3296037215415344, + "grad_norm": 1.806783846615591, + "learning_rate": 1.5152463808974234e-06, + "loss": 0.04953079223632813, + "step": 153770 + }, + { + "epoch": 1.3296469550630778, + "grad_norm": 0.14446033251875706, + "learning_rate": 1.5150693524699406e-06, + "loss": 0.10175018310546875, + "step": 153775 + }, + { + "epoch": 1.329690188584621, + "grad_norm": 1.1078602235412243, + "learning_rate": 1.5148923308908254e-06, + "loss": 0.07197723388671876, + "step": 153780 + }, + { + "epoch": 1.3297334221061643, + "grad_norm": 6.4204799928281995, + "learning_rate": 1.5147153161608966e-06, + "loss": 0.06644821166992188, + "step": 153785 + }, + { + "epoch": 1.3297766556277075, + "grad_norm": 0.22411048037188563, + "learning_rate": 1.5145383082809698e-06, + "loss": 0.038361740112304685, + "step": 153790 + }, + { + "epoch": 1.3298198891492508, + "grad_norm": 9.97964439801211, + "learning_rate": 1.514361307251861e-06, + "loss": 0.13999290466308595, + "step": 153795 + }, + { + "epoch": 1.329863122670794, + "grad_norm": 10.245349347444824, + "learning_rate": 1.5141843130743854e-06, + "loss": 0.020547103881835938, + "step": 153800 + }, + { + "epoch": 1.3299063561923372, + "grad_norm": 18.00389759118056, + "learning_rate": 1.5140073257493614e-06, + "loss": 0.186724853515625, + "step": 153805 + }, + { + "epoch": 1.3299495897138804, + "grad_norm": 2.078876152690025, + "learning_rate": 1.5138303452776033e-06, + "loss": 0.041058349609375, + "step": 153810 + }, + { + "epoch": 1.329992823235424, + "grad_norm": 1.196980094206103, + "learning_rate": 1.5136533716599295e-06, + "loss": 0.08608245849609375, + "step": 153815 + }, + { + "epoch": 1.3300360567569671, + "grad_norm": 25.010484071020624, + "learning_rate": 1.513476404897155e-06, + "loss": 0.14558792114257812, + "step": 153820 + }, + { + "epoch": 1.3300792902785104, + "grad_norm": 6.678373975086585, + "learning_rate": 1.5132994449900958e-06, + "loss": 0.061008453369140625, + "step": 153825 + }, + { + "epoch": 1.3301225238000536, + "grad_norm": 11.851938629731844, + "learning_rate": 1.5131224919395687e-06, + "loss": 0.039171600341796876, + "step": 153830 + }, + { + "epoch": 1.3301657573215968, + "grad_norm": 2.4793826867379094, + "learning_rate": 1.512945545746389e-06, + "loss": 0.052342700958251956, + "step": 153835 + }, + { + "epoch": 1.3302089908431403, + "grad_norm": 0.3163471229957937, + "learning_rate": 1.5127686064113725e-06, + "loss": 0.018248748779296876, + "step": 153840 + }, + { + "epoch": 1.3302522243646835, + "grad_norm": 2.12886829499327, + "learning_rate": 1.5125916739353356e-06, + "loss": 0.026589202880859374, + "step": 153845 + }, + { + "epoch": 1.3302954578862267, + "grad_norm": 40.375234930563174, + "learning_rate": 1.512414748319095e-06, + "loss": 0.14654293060302734, + "step": 153850 + }, + { + "epoch": 1.33033869140777, + "grad_norm": 9.678295714595613, + "learning_rate": 1.5122378295634652e-06, + "loss": 0.024263572692871094, + "step": 153855 + }, + { + "epoch": 1.3303819249293132, + "grad_norm": 1.503283055556335, + "learning_rate": 1.5120609176692641e-06, + "loss": 0.07875595092773438, + "step": 153860 + }, + { + "epoch": 1.3304251584508564, + "grad_norm": 0.09313936965207562, + "learning_rate": 1.5118840126373064e-06, + "loss": 0.01284637451171875, + "step": 153865 + }, + { + "epoch": 1.3304683919723996, + "grad_norm": 0.0872292414027696, + "learning_rate": 1.5117071144684084e-06, + "loss": 0.024358367919921874, + "step": 153870 + }, + { + "epoch": 1.3305116254939429, + "grad_norm": 0.7402275075351641, + "learning_rate": 1.5115302231633847e-06, + "loss": 0.25926895141601564, + "step": 153875 + }, + { + "epoch": 1.3305548590154863, + "grad_norm": 0.2940480354606005, + "learning_rate": 1.5113533387230531e-06, + "loss": 0.10696029663085938, + "step": 153880 + }, + { + "epoch": 1.3305980925370295, + "grad_norm": 18.45891704534537, + "learning_rate": 1.5111764611482283e-06, + "loss": 0.08922538757324219, + "step": 153885 + }, + { + "epoch": 1.3306413260585728, + "grad_norm": 45.34802241982177, + "learning_rate": 1.5109995904397257e-06, + "loss": 0.07801513671875, + "step": 153890 + }, + { + "epoch": 1.330684559580116, + "grad_norm": 0.761342878023663, + "learning_rate": 1.5108227265983621e-06, + "loss": 0.1137298583984375, + "step": 153895 + }, + { + "epoch": 1.3307277931016592, + "grad_norm": 0.8734854704799662, + "learning_rate": 1.5106458696249522e-06, + "loss": 0.0191619873046875, + "step": 153900 + }, + { + "epoch": 1.3307710266232027, + "grad_norm": 2.5730296795817895, + "learning_rate": 1.5104690195203109e-06, + "loss": 0.021502685546875, + "step": 153905 + }, + { + "epoch": 1.330814260144746, + "grad_norm": 0.6000652759906747, + "learning_rate": 1.5102921762852559e-06, + "loss": 0.023626708984375, + "step": 153910 + }, + { + "epoch": 1.3308574936662891, + "grad_norm": 36.029404744485056, + "learning_rate": 1.5101153399206007e-06, + "loss": 0.19572677612304687, + "step": 153915 + }, + { + "epoch": 1.3309007271878324, + "grad_norm": 0.27271361920201265, + "learning_rate": 1.5099385104271628e-06, + "loss": 0.02842559814453125, + "step": 153920 + }, + { + "epoch": 1.3309439607093756, + "grad_norm": 0.10004467396848242, + "learning_rate": 1.5097616878057572e-06, + "loss": 0.03453369140625, + "step": 153925 + }, + { + "epoch": 1.3309871942309188, + "grad_norm": 35.877417286330804, + "learning_rate": 1.5095848720571985e-06, + "loss": 0.09937992095947265, + "step": 153930 + }, + { + "epoch": 1.331030427752462, + "grad_norm": 7.393398623629917, + "learning_rate": 1.5094080631823031e-06, + "loss": 0.05233478546142578, + "step": 153935 + }, + { + "epoch": 1.3310736612740053, + "grad_norm": 12.632875586542704, + "learning_rate": 1.5092312611818848e-06, + "loss": 0.14473514556884765, + "step": 153940 + }, + { + "epoch": 1.3311168947955487, + "grad_norm": 1.8309326850563428, + "learning_rate": 1.5090544660567615e-06, + "loss": 0.01845073699951172, + "step": 153945 + }, + { + "epoch": 1.331160128317092, + "grad_norm": 40.922442586891215, + "learning_rate": 1.508877677807747e-06, + "loss": 0.26105804443359376, + "step": 153950 + }, + { + "epoch": 1.3312033618386352, + "grad_norm": 3.8856307074354413, + "learning_rate": 1.5087008964356567e-06, + "loss": 0.01076507568359375, + "step": 153955 + }, + { + "epoch": 1.3312465953601784, + "grad_norm": 0.9096924982179792, + "learning_rate": 1.5085241219413064e-06, + "loss": 0.018122482299804687, + "step": 153960 + }, + { + "epoch": 1.3312898288817216, + "grad_norm": 0.34241123941027757, + "learning_rate": 1.50834735432551e-06, + "loss": 0.027406692504882812, + "step": 153965 + }, + { + "epoch": 1.331333062403265, + "grad_norm": 0.5509985357195599, + "learning_rate": 1.508170593589085e-06, + "loss": 0.038421249389648436, + "step": 153970 + }, + { + "epoch": 1.3313762959248083, + "grad_norm": 1.6281993999661455, + "learning_rate": 1.507993839732844e-06, + "loss": 0.030514907836914063, + "step": 153975 + }, + { + "epoch": 1.3314195294463516, + "grad_norm": 91.90804997160132, + "learning_rate": 1.5078170927576044e-06, + "loss": 0.022439956665039062, + "step": 153980 + }, + { + "epoch": 1.3314627629678948, + "grad_norm": 7.273705307824817, + "learning_rate": 1.5076403526641812e-06, + "loss": 0.09468841552734375, + "step": 153985 + }, + { + "epoch": 1.331505996489438, + "grad_norm": 6.704458533565849, + "learning_rate": 1.5074636194533886e-06, + "loss": 0.045463180541992186, + "step": 153990 + }, + { + "epoch": 1.3315492300109812, + "grad_norm": 0.17225586207494636, + "learning_rate": 1.5072868931260418e-06, + "loss": 0.019073867797851564, + "step": 153995 + }, + { + "epoch": 1.3315924635325245, + "grad_norm": 0.3769181265802722, + "learning_rate": 1.507110173682956e-06, + "loss": 0.03246803283691406, + "step": 154000 + }, + { + "epoch": 1.331635697054068, + "grad_norm": 0.4771508005032464, + "learning_rate": 1.5069334611249448e-06, + "loss": 0.029842376708984375, + "step": 154005 + }, + { + "epoch": 1.3316789305756112, + "grad_norm": 1.5504404745418436, + "learning_rate": 1.506756755452826e-06, + "loss": 0.024833297729492186, + "step": 154010 + }, + { + "epoch": 1.3317221640971544, + "grad_norm": 3.680859186712284, + "learning_rate": 1.5065800566674132e-06, + "loss": 0.41749114990234376, + "step": 154015 + }, + { + "epoch": 1.3317653976186976, + "grad_norm": 11.481650992666918, + "learning_rate": 1.506403364769521e-06, + "loss": 0.10538558959960938, + "step": 154020 + }, + { + "epoch": 1.3318086311402408, + "grad_norm": 0.6698478902261996, + "learning_rate": 1.5062266797599632e-06, + "loss": 0.07127761840820312, + "step": 154025 + }, + { + "epoch": 1.3318518646617843, + "grad_norm": 20.7264215044209, + "learning_rate": 1.5060500016395576e-06, + "loss": 0.10850753784179687, + "step": 154030 + }, + { + "epoch": 1.3318950981833275, + "grad_norm": 8.617079627979761, + "learning_rate": 1.5058733304091157e-06, + "loss": 0.0590728759765625, + "step": 154035 + }, + { + "epoch": 1.3319383317048707, + "grad_norm": 0.045593089305657905, + "learning_rate": 1.505696666069455e-06, + "loss": 0.09391307830810547, + "step": 154040 + }, + { + "epoch": 1.331981565226414, + "grad_norm": 5.960791544039895, + "learning_rate": 1.50552000862139e-06, + "loss": 0.03685054779052734, + "step": 154045 + }, + { + "epoch": 1.3320247987479572, + "grad_norm": 0.9174356974837302, + "learning_rate": 1.505343358065734e-06, + "loss": 0.10533599853515625, + "step": 154050 + }, + { + "epoch": 1.3320680322695004, + "grad_norm": 0.5432206596161527, + "learning_rate": 1.5051667144033026e-06, + "loss": 0.08588829040527343, + "step": 154055 + }, + { + "epoch": 1.3321112657910437, + "grad_norm": 3.554228673391443, + "learning_rate": 1.5049900776349106e-06, + "loss": 0.018822860717773438, + "step": 154060 + }, + { + "epoch": 1.332154499312587, + "grad_norm": 0.04041265575060118, + "learning_rate": 1.5048134477613705e-06, + "loss": 0.01224517822265625, + "step": 154065 + }, + { + "epoch": 1.3321977328341303, + "grad_norm": 8.709196089077246, + "learning_rate": 1.5046368247835e-06, + "loss": 0.05068283081054688, + "step": 154070 + }, + { + "epoch": 1.3322409663556736, + "grad_norm": 2.7286943977704934, + "learning_rate": 1.5044602087021122e-06, + "loss": 0.0493194580078125, + "step": 154075 + }, + { + "epoch": 1.3322841998772168, + "grad_norm": 12.767852335896938, + "learning_rate": 1.504283599518021e-06, + "loss": 0.04248619079589844, + "step": 154080 + }, + { + "epoch": 1.33232743339876, + "grad_norm": 11.689008025971132, + "learning_rate": 1.5041069972320428e-06, + "loss": 0.1084320068359375, + "step": 154085 + }, + { + "epoch": 1.3323706669203033, + "grad_norm": 0.5050660408859156, + "learning_rate": 1.5039304018449907e-06, + "loss": 0.018231582641601563, + "step": 154090 + }, + { + "epoch": 1.3324139004418467, + "grad_norm": 7.191626044625786, + "learning_rate": 1.5037538133576795e-06, + "loss": 0.04182281494140625, + "step": 154095 + }, + { + "epoch": 1.33245713396339, + "grad_norm": 29.837372329813558, + "learning_rate": 1.5035772317709221e-06, + "loss": 0.035348701477050784, + "step": 154100 + }, + { + "epoch": 1.3325003674849332, + "grad_norm": 4.496964625452378, + "learning_rate": 1.5034006570855357e-06, + "loss": 0.22185115814208983, + "step": 154105 + }, + { + "epoch": 1.3325436010064764, + "grad_norm": 0.42340492648892114, + "learning_rate": 1.5032240893023333e-06, + "loss": 0.020043563842773438, + "step": 154110 + }, + { + "epoch": 1.3325868345280196, + "grad_norm": 0.4907718315204619, + "learning_rate": 1.503047528422129e-06, + "loss": 0.0531463623046875, + "step": 154115 + }, + { + "epoch": 1.3326300680495629, + "grad_norm": 2.156795409139491, + "learning_rate": 1.5028709744457373e-06, + "loss": 0.323150634765625, + "step": 154120 + }, + { + "epoch": 1.332673301571106, + "grad_norm": 9.1384668654419, + "learning_rate": 1.5026944273739725e-06, + "loss": 0.10375633239746093, + "step": 154125 + }, + { + "epoch": 1.3327165350926493, + "grad_norm": 54.24142217262909, + "learning_rate": 1.5025178872076476e-06, + "loss": 0.29833526611328126, + "step": 154130 + }, + { + "epoch": 1.3327597686141928, + "grad_norm": 2.5446060282745284, + "learning_rate": 1.5023413539475785e-06, + "loss": 0.02516632080078125, + "step": 154135 + }, + { + "epoch": 1.332803002135736, + "grad_norm": 1.992023329163269, + "learning_rate": 1.5021648275945777e-06, + "loss": 0.03056793212890625, + "step": 154140 + }, + { + "epoch": 1.3328462356572792, + "grad_norm": 4.3062256779004775, + "learning_rate": 1.501988308149462e-06, + "loss": 0.07427139282226562, + "step": 154145 + }, + { + "epoch": 1.3328894691788225, + "grad_norm": 25.21816868780618, + "learning_rate": 1.5018117956130436e-06, + "loss": 0.09435882568359374, + "step": 154150 + }, + { + "epoch": 1.3329327027003657, + "grad_norm": 2.2141085679339207, + "learning_rate": 1.5016352899861365e-06, + "loss": 0.033519744873046875, + "step": 154155 + }, + { + "epoch": 1.3329759362219091, + "grad_norm": 6.531776947468478, + "learning_rate": 1.5014587912695545e-06, + "loss": 0.03130035400390625, + "step": 154160 + }, + { + "epoch": 1.3330191697434524, + "grad_norm": 1.25191839502031, + "learning_rate": 1.5012822994641128e-06, + "loss": 0.045511817932128905, + "step": 154165 + }, + { + "epoch": 1.3330624032649956, + "grad_norm": 0.5241339292597922, + "learning_rate": 1.501105814570625e-06, + "loss": 0.02612590789794922, + "step": 154170 + }, + { + "epoch": 1.3331056367865388, + "grad_norm": 0.9918893375336519, + "learning_rate": 1.5009293365899047e-06, + "loss": 0.34767837524414064, + "step": 154175 + }, + { + "epoch": 1.333148870308082, + "grad_norm": 10.855875567192935, + "learning_rate": 1.5007528655227658e-06, + "loss": 0.0669158935546875, + "step": 154180 + }, + { + "epoch": 1.3331921038296253, + "grad_norm": 2.972394430225806, + "learning_rate": 1.5005764013700218e-06, + "loss": 0.0263763427734375, + "step": 154185 + }, + { + "epoch": 1.3332353373511685, + "grad_norm": 0.7843319287224373, + "learning_rate": 1.5003999441324864e-06, + "loss": 0.03238983154296875, + "step": 154190 + }, + { + "epoch": 1.3332785708727117, + "grad_norm": 2.602253670953705, + "learning_rate": 1.5002234938109746e-06, + "loss": 0.023597526550292968, + "step": 154195 + }, + { + "epoch": 1.3333218043942552, + "grad_norm": 2.0398238097185404, + "learning_rate": 1.500047050406299e-06, + "loss": 0.06490859985351563, + "step": 154200 + }, + { + "epoch": 1.3333650379157984, + "grad_norm": 1.1783899730797769, + "learning_rate": 1.4998706139192745e-06, + "loss": 0.0128509521484375, + "step": 154205 + }, + { + "epoch": 1.3334082714373416, + "grad_norm": 45.1272478995974, + "learning_rate": 1.4996941843507143e-06, + "loss": 0.1361988067626953, + "step": 154210 + }, + { + "epoch": 1.3334515049588849, + "grad_norm": 3.6579550191645738, + "learning_rate": 1.4995177617014322e-06, + "loss": 0.060747528076171876, + "step": 154215 + }, + { + "epoch": 1.3334947384804283, + "grad_norm": 0.07154957850553124, + "learning_rate": 1.4993413459722413e-06, + "loss": 0.02466754913330078, + "step": 154220 + }, + { + "epoch": 1.3335379720019715, + "grad_norm": 17.57883273169394, + "learning_rate": 1.4991649371639546e-06, + "loss": 0.11867179870605468, + "step": 154225 + }, + { + "epoch": 1.3335812055235148, + "grad_norm": 0.2897491716321119, + "learning_rate": 1.4989885352773876e-06, + "loss": 0.008595657348632813, + "step": 154230 + }, + { + "epoch": 1.333624439045058, + "grad_norm": 0.7331393887270999, + "learning_rate": 1.4988121403133528e-06, + "loss": 0.6341289520263672, + "step": 154235 + }, + { + "epoch": 1.3336676725666012, + "grad_norm": 0.8955987961752875, + "learning_rate": 1.4986357522726635e-06, + "loss": 0.04168853759765625, + "step": 154240 + }, + { + "epoch": 1.3337109060881445, + "grad_norm": 1.5539380349419853, + "learning_rate": 1.498459371156134e-06, + "loss": 0.08086509704589843, + "step": 154245 + }, + { + "epoch": 1.3337541396096877, + "grad_norm": 0.1673747847744616, + "learning_rate": 1.4982829969645756e-06, + "loss": 0.008821868896484375, + "step": 154250 + }, + { + "epoch": 1.333797373131231, + "grad_norm": 11.482325968973345, + "learning_rate": 1.4981066296988048e-06, + "loss": 0.11534614562988281, + "step": 154255 + }, + { + "epoch": 1.3338406066527744, + "grad_norm": 0.148772388091711, + "learning_rate": 1.497930269359632e-06, + "loss": 0.19204330444335938, + "step": 154260 + }, + { + "epoch": 1.3338838401743176, + "grad_norm": 0.9405779157878089, + "learning_rate": 1.4977539159478736e-06, + "loss": 0.0715372085571289, + "step": 154265 + }, + { + "epoch": 1.3339270736958608, + "grad_norm": 1.6765768540580492, + "learning_rate": 1.4975775694643415e-06, + "loss": 0.155706787109375, + "step": 154270 + }, + { + "epoch": 1.333970307217404, + "grad_norm": 4.2158000770090185, + "learning_rate": 1.4974012299098487e-06, + "loss": 0.027581024169921874, + "step": 154275 + }, + { + "epoch": 1.3340135407389473, + "grad_norm": 3.1911163870985093, + "learning_rate": 1.4972248972852085e-06, + "loss": 0.240301513671875, + "step": 154280 + }, + { + "epoch": 1.3340567742604907, + "grad_norm": 6.882665073404321, + "learning_rate": 1.4970485715912344e-06, + "loss": 0.034366607666015625, + "step": 154285 + }, + { + "epoch": 1.334100007782034, + "grad_norm": 13.127391180954955, + "learning_rate": 1.496872252828738e-06, + "loss": 0.14827728271484375, + "step": 154290 + }, + { + "epoch": 1.3341432413035772, + "grad_norm": 3.4879929514882986, + "learning_rate": 1.496695940998536e-06, + "loss": 0.05786972045898438, + "step": 154295 + }, + { + "epoch": 1.3341864748251204, + "grad_norm": 0.2860018387930128, + "learning_rate": 1.4965196361014386e-06, + "loss": 0.019550132751464843, + "step": 154300 + }, + { + "epoch": 1.3342297083466637, + "grad_norm": 4.971866432349843, + "learning_rate": 1.496343338138259e-06, + "loss": 0.15086097717285157, + "step": 154305 + }, + { + "epoch": 1.3342729418682069, + "grad_norm": 3.9926799736848766, + "learning_rate": 1.4961670471098123e-06, + "loss": 0.0232421875, + "step": 154310 + }, + { + "epoch": 1.3343161753897501, + "grad_norm": 6.205798733564392, + "learning_rate": 1.4959907630169102e-06, + "loss": 0.10832557678222657, + "step": 154315 + }, + { + "epoch": 1.3343594089112933, + "grad_norm": 0.5478477777819095, + "learning_rate": 1.4958144858603645e-06, + "loss": 0.0305267333984375, + "step": 154320 + }, + { + "epoch": 1.3344026424328368, + "grad_norm": 0.4186932237083513, + "learning_rate": 1.4956382156409906e-06, + "loss": 0.08507614135742188, + "step": 154325 + }, + { + "epoch": 1.33444587595438, + "grad_norm": 1.5226341624499053, + "learning_rate": 1.4954619523596006e-06, + "loss": 0.052032852172851564, + "step": 154330 + }, + { + "epoch": 1.3344891094759233, + "grad_norm": 3.319165725379521, + "learning_rate": 1.4952856960170068e-06, + "loss": 0.03784027099609375, + "step": 154335 + }, + { + "epoch": 1.3345323429974665, + "grad_norm": 4.322692752576187, + "learning_rate": 1.4951094466140225e-06, + "loss": 0.032237815856933597, + "step": 154340 + }, + { + "epoch": 1.3345755765190097, + "grad_norm": 4.470176136100723, + "learning_rate": 1.4949332041514605e-06, + "loss": 0.032086944580078124, + "step": 154345 + }, + { + "epoch": 1.3346188100405532, + "grad_norm": 1.523957285620746, + "learning_rate": 1.4947569686301339e-06, + "loss": 0.040390396118164064, + "step": 154350 + }, + { + "epoch": 1.3346620435620964, + "grad_norm": 0.21634135433486423, + "learning_rate": 1.4945807400508538e-06, + "loss": 0.05765380859375, + "step": 154355 + }, + { + "epoch": 1.3347052770836396, + "grad_norm": 0.6517800249507719, + "learning_rate": 1.4944045184144353e-06, + "loss": 0.12837982177734375, + "step": 154360 + }, + { + "epoch": 1.3347485106051828, + "grad_norm": 0.804820007498172, + "learning_rate": 1.4942283037216891e-06, + "loss": 0.1313568115234375, + "step": 154365 + }, + { + "epoch": 1.334791744126726, + "grad_norm": 5.476177633366165, + "learning_rate": 1.4940520959734302e-06, + "loss": 0.040330123901367185, + "step": 154370 + }, + { + "epoch": 1.3348349776482693, + "grad_norm": 14.949867029020938, + "learning_rate": 1.4938758951704695e-06, + "loss": 0.08298587799072266, + "step": 154375 + }, + { + "epoch": 1.3348782111698125, + "grad_norm": 0.1601401341092215, + "learning_rate": 1.4936997013136204e-06, + "loss": 0.24593048095703124, + "step": 154380 + }, + { + "epoch": 1.3349214446913558, + "grad_norm": 19.056703934250518, + "learning_rate": 1.4935235144036944e-06, + "loss": 0.1047576904296875, + "step": 154385 + }, + { + "epoch": 1.3349646782128992, + "grad_norm": 19.065387227927978, + "learning_rate": 1.4933473344415055e-06, + "loss": 0.04359283447265625, + "step": 154390 + }, + { + "epoch": 1.3350079117344424, + "grad_norm": 3.2197347101987255, + "learning_rate": 1.4931711614278654e-06, + "loss": 0.0930419921875, + "step": 154395 + }, + { + "epoch": 1.3350511452559857, + "grad_norm": 0.502381467976837, + "learning_rate": 1.492994995363587e-06, + "loss": 0.04354095458984375, + "step": 154400 + }, + { + "epoch": 1.335094378777529, + "grad_norm": 4.920357976194676, + "learning_rate": 1.4928188362494826e-06, + "loss": 0.035405731201171874, + "step": 154405 + }, + { + "epoch": 1.3351376122990721, + "grad_norm": 3.321282206896038, + "learning_rate": 1.4926426840863647e-06, + "loss": 0.2402057647705078, + "step": 154410 + }, + { + "epoch": 1.3351808458206156, + "grad_norm": 6.005005047907991, + "learning_rate": 1.492466538875044e-06, + "loss": 0.029328155517578124, + "step": 154415 + }, + { + "epoch": 1.3352240793421588, + "grad_norm": 31.955809514615662, + "learning_rate": 1.492290400616336e-06, + "loss": 0.2931316375732422, + "step": 154420 + }, + { + "epoch": 1.335267312863702, + "grad_norm": 9.53924902748506, + "learning_rate": 1.49211426931105e-06, + "loss": 0.16492538452148436, + "step": 154425 + }, + { + "epoch": 1.3353105463852453, + "grad_norm": 11.717151429860966, + "learning_rate": 1.4919381449600008e-06, + "loss": 0.03864898681640625, + "step": 154430 + }, + { + "epoch": 1.3353537799067885, + "grad_norm": 6.387770294697392, + "learning_rate": 1.4917620275640001e-06, + "loss": 0.18163604736328126, + "step": 154435 + }, + { + "epoch": 1.3353970134283317, + "grad_norm": 0.311772824690002, + "learning_rate": 1.4915859171238594e-06, + "loss": 0.13384590148925782, + "step": 154440 + }, + { + "epoch": 1.335440246949875, + "grad_norm": 2.4303222184988305, + "learning_rate": 1.491409813640391e-06, + "loss": 0.1548320770263672, + "step": 154445 + }, + { + "epoch": 1.3354834804714182, + "grad_norm": 4.529250170722712, + "learning_rate": 1.4912337171144062e-06, + "loss": 0.04521598815917969, + "step": 154450 + }, + { + "epoch": 1.3355267139929616, + "grad_norm": 2.4927110447694787, + "learning_rate": 1.4910576275467192e-06, + "loss": 0.15088882446289062, + "step": 154455 + }, + { + "epoch": 1.3355699475145049, + "grad_norm": 0.6971764780137693, + "learning_rate": 1.4908815449381412e-06, + "loss": 0.0219390869140625, + "step": 154460 + }, + { + "epoch": 1.335613181036048, + "grad_norm": 34.529629712881636, + "learning_rate": 1.4907054692894843e-06, + "loss": 0.10481452941894531, + "step": 154465 + }, + { + "epoch": 1.3356564145575913, + "grad_norm": 4.620103874152867, + "learning_rate": 1.4905294006015604e-06, + "loss": 0.12135162353515624, + "step": 154470 + }, + { + "epoch": 1.3356996480791348, + "grad_norm": 0.49344315226781393, + "learning_rate": 1.4903533388751802e-06, + "loss": 0.03179931640625, + "step": 154475 + }, + { + "epoch": 1.335742881600678, + "grad_norm": 3.3350603476124245, + "learning_rate": 1.490177284111157e-06, + "loss": 0.023823928833007813, + "step": 154480 + }, + { + "epoch": 1.3357861151222212, + "grad_norm": 5.171935955443788, + "learning_rate": 1.490001236310304e-06, + "loss": 0.027690887451171875, + "step": 154485 + }, + { + "epoch": 1.3358293486437645, + "grad_norm": 13.757523984728346, + "learning_rate": 1.4898251954734318e-06, + "loss": 0.12071113586425782, + "step": 154490 + }, + { + "epoch": 1.3358725821653077, + "grad_norm": 39.41976938808743, + "learning_rate": 1.4896491616013528e-06, + "loss": 0.2436288833618164, + "step": 154495 + }, + { + "epoch": 1.335915815686851, + "grad_norm": 0.33791131151133547, + "learning_rate": 1.4894731346948777e-06, + "loss": 0.024784088134765625, + "step": 154500 + }, + { + "epoch": 1.3359590492083941, + "grad_norm": 10.47149940807726, + "learning_rate": 1.4892971147548194e-06, + "loss": 0.02978363037109375, + "step": 154505 + }, + { + "epoch": 1.3360022827299374, + "grad_norm": 11.13482606935822, + "learning_rate": 1.4891211017819893e-06, + "loss": 0.082318115234375, + "step": 154510 + }, + { + "epoch": 1.3360455162514808, + "grad_norm": 7.036729700428086, + "learning_rate": 1.488945095777198e-06, + "loss": 0.051715087890625, + "step": 154515 + }, + { + "epoch": 1.336088749773024, + "grad_norm": 0.6876329360056052, + "learning_rate": 1.4887690967412597e-06, + "loss": 0.0038166046142578125, + "step": 154520 + }, + { + "epoch": 1.3361319832945673, + "grad_norm": 1.9118793489025625, + "learning_rate": 1.4885931046749843e-06, + "loss": 0.06270599365234375, + "step": 154525 + }, + { + "epoch": 1.3361752168161105, + "grad_norm": 2.1371550548486242, + "learning_rate": 1.4884171195791833e-06, + "loss": 0.2053913116455078, + "step": 154530 + }, + { + "epoch": 1.3362184503376537, + "grad_norm": 0.14212615677144935, + "learning_rate": 1.48824114145467e-06, + "loss": 0.22019615173339843, + "step": 154535 + }, + { + "epoch": 1.3362616838591972, + "grad_norm": 1.206222576334264, + "learning_rate": 1.488065170302255e-06, + "loss": 0.008356094360351562, + "step": 154540 + }, + { + "epoch": 1.3363049173807404, + "grad_norm": 2.3789223092180745, + "learning_rate": 1.4878892061227486e-06, + "loss": 0.06374979019165039, + "step": 154545 + }, + { + "epoch": 1.3363481509022836, + "grad_norm": 1.3746709137194426, + "learning_rate": 1.4877132489169646e-06, + "loss": 0.24777870178222655, + "step": 154550 + }, + { + "epoch": 1.3363913844238269, + "grad_norm": 1.6770165731132052, + "learning_rate": 1.4875372986857138e-06, + "loss": 0.01068563461303711, + "step": 154555 + }, + { + "epoch": 1.33643461794537, + "grad_norm": 5.19627935183762, + "learning_rate": 1.4873613554298072e-06, + "loss": 0.031490516662597653, + "step": 154560 + }, + { + "epoch": 1.3364778514669133, + "grad_norm": 16.27319152398594, + "learning_rate": 1.4871854191500564e-06, + "loss": 0.059468841552734374, + "step": 154565 + }, + { + "epoch": 1.3365210849884566, + "grad_norm": 1.8404438713111224, + "learning_rate": 1.4870094898472726e-06, + "loss": 0.012003707885742187, + "step": 154570 + }, + { + "epoch": 1.3365643185099998, + "grad_norm": 6.034607069551665, + "learning_rate": 1.4868335675222663e-06, + "loss": 0.04560546875, + "step": 154575 + }, + { + "epoch": 1.3366075520315432, + "grad_norm": 5.5026955602128815, + "learning_rate": 1.4866576521758508e-06, + "loss": 0.04385528564453125, + "step": 154580 + }, + { + "epoch": 1.3366507855530865, + "grad_norm": 4.121703045759026, + "learning_rate": 1.4864817438088367e-06, + "loss": 0.02152252197265625, + "step": 154585 + }, + { + "epoch": 1.3366940190746297, + "grad_norm": 6.639179152343139, + "learning_rate": 1.486305842422034e-06, + "loss": 0.05113067626953125, + "step": 154590 + }, + { + "epoch": 1.336737252596173, + "grad_norm": 0.07169253111173661, + "learning_rate": 1.486129948016256e-06, + "loss": 0.03246116638183594, + "step": 154595 + }, + { + "epoch": 1.3367804861177162, + "grad_norm": 0.2817609320103208, + "learning_rate": 1.4859540605923133e-06, + "loss": 0.03642349243164063, + "step": 154600 + }, + { + "epoch": 1.3368237196392596, + "grad_norm": 4.466064897228114, + "learning_rate": 1.4857781801510163e-06, + "loss": 0.037685394287109375, + "step": 154605 + }, + { + "epoch": 1.3368669531608028, + "grad_norm": 2.492381899381708, + "learning_rate": 1.4856023066931755e-06, + "loss": 0.04269905090332031, + "step": 154610 + }, + { + "epoch": 1.336910186682346, + "grad_norm": 0.3531452803760253, + "learning_rate": 1.485426440219604e-06, + "loss": 0.11563262939453126, + "step": 154615 + }, + { + "epoch": 1.3369534202038893, + "grad_norm": 1.7572230837967902, + "learning_rate": 1.4852505807311121e-06, + "loss": 0.03402862548828125, + "step": 154620 + }, + { + "epoch": 1.3369966537254325, + "grad_norm": 1.480579650039574, + "learning_rate": 1.4850747282285106e-06, + "loss": 0.05415496826171875, + "step": 154625 + }, + { + "epoch": 1.3370398872469758, + "grad_norm": 3.4349655790371085, + "learning_rate": 1.4848988827126109e-06, + "loss": 0.008905029296875, + "step": 154630 + }, + { + "epoch": 1.337083120768519, + "grad_norm": 1.9180653073383334, + "learning_rate": 1.4847230441842232e-06, + "loss": 0.022117996215820314, + "step": 154635 + }, + { + "epoch": 1.3371263542900622, + "grad_norm": 40.21520171107032, + "learning_rate": 1.4845472126441578e-06, + "loss": 0.14779434204101563, + "step": 154640 + }, + { + "epoch": 1.3371695878116057, + "grad_norm": 1.6701562177483784, + "learning_rate": 1.4843713880932282e-06, + "loss": 0.027615737915039063, + "step": 154645 + }, + { + "epoch": 1.337212821333149, + "grad_norm": 0.19391811686018137, + "learning_rate": 1.4841955705322428e-06, + "loss": 0.28924999237060545, + "step": 154650 + }, + { + "epoch": 1.3372560548546921, + "grad_norm": 7.927366382111063, + "learning_rate": 1.4840197599620144e-06, + "loss": 0.13800621032714844, + "step": 154655 + }, + { + "epoch": 1.3372992883762354, + "grad_norm": 3.7340497697648387, + "learning_rate": 1.483843956383353e-06, + "loss": 0.035953330993652347, + "step": 154660 + }, + { + "epoch": 1.3373425218977786, + "grad_norm": 23.28863259759145, + "learning_rate": 1.4836681597970693e-06, + "loss": 0.1410888671875, + "step": 154665 + }, + { + "epoch": 1.337385755419322, + "grad_norm": 0.458268353517065, + "learning_rate": 1.4834923702039729e-06, + "loss": 0.10491790771484374, + "step": 154670 + }, + { + "epoch": 1.3374289889408653, + "grad_norm": 0.5874587452144818, + "learning_rate": 1.4833165876048766e-06, + "loss": 0.04404144287109375, + "step": 154675 + }, + { + "epoch": 1.3374722224624085, + "grad_norm": 29.529115321683978, + "learning_rate": 1.4831408120005907e-06, + "loss": 0.08048667907714843, + "step": 154680 + }, + { + "epoch": 1.3375154559839517, + "grad_norm": 41.349504335451655, + "learning_rate": 1.4829650433919246e-06, + "loss": 0.16887779235839845, + "step": 154685 + }, + { + "epoch": 1.337558689505495, + "grad_norm": 2.1696895963163954, + "learning_rate": 1.4827892817796902e-06, + "loss": 0.10202178955078126, + "step": 154690 + }, + { + "epoch": 1.3376019230270382, + "grad_norm": 0.8320540356742417, + "learning_rate": 1.4826135271646978e-06, + "loss": 0.04380779266357422, + "step": 154695 + }, + { + "epoch": 1.3376451565485814, + "grad_norm": 36.216255456249776, + "learning_rate": 1.4824377795477558e-06, + "loss": 0.09051437377929687, + "step": 154700 + }, + { + "epoch": 1.3376883900701246, + "grad_norm": 4.0048117842192905, + "learning_rate": 1.4822620389296772e-06, + "loss": 0.1702850341796875, + "step": 154705 + }, + { + "epoch": 1.337731623591668, + "grad_norm": 0.3208554275973777, + "learning_rate": 1.4820863053112733e-06, + "loss": 0.03528289794921875, + "step": 154710 + }, + { + "epoch": 1.3377748571132113, + "grad_norm": 41.9476418149857, + "learning_rate": 1.4819105786933526e-06, + "loss": 0.1868682861328125, + "step": 154715 + }, + { + "epoch": 1.3378180906347545, + "grad_norm": 2.1952406730617486, + "learning_rate": 1.4817348590767269e-06, + "loss": 0.03572998046875, + "step": 154720 + }, + { + "epoch": 1.3378613241562978, + "grad_norm": 6.373131259132169, + "learning_rate": 1.4815591464622053e-06, + "loss": 0.144732666015625, + "step": 154725 + }, + { + "epoch": 1.3379045576778412, + "grad_norm": 0.23772866224505534, + "learning_rate": 1.4813834408505991e-06, + "loss": 0.07615509033203124, + "step": 154730 + }, + { + "epoch": 1.3379477911993845, + "grad_norm": 0.16353300785255073, + "learning_rate": 1.4812077422427171e-06, + "loss": 0.059252166748046876, + "step": 154735 + }, + { + "epoch": 1.3379910247209277, + "grad_norm": 23.53091282006315, + "learning_rate": 1.4810320506393717e-06, + "loss": 0.09421615600585938, + "step": 154740 + }, + { + "epoch": 1.338034258242471, + "grad_norm": 1.0312539934435085, + "learning_rate": 1.4808563660413723e-06, + "loss": 0.04340705871582031, + "step": 154745 + }, + { + "epoch": 1.3380774917640141, + "grad_norm": 0.258346426729721, + "learning_rate": 1.4806806884495294e-06, + "loss": 0.03585662841796875, + "step": 154750 + }, + { + "epoch": 1.3381207252855574, + "grad_norm": 0.969230775807896, + "learning_rate": 1.4805050178646514e-06, + "loss": 0.38551025390625, + "step": 154755 + }, + { + "epoch": 1.3381639588071006, + "grad_norm": 14.843555557700215, + "learning_rate": 1.4803293542875512e-06, + "loss": 0.05178632736206055, + "step": 154760 + }, + { + "epoch": 1.3382071923286438, + "grad_norm": 0.25100068675076653, + "learning_rate": 1.4801536977190368e-06, + "loss": 0.44011611938476564, + "step": 154765 + }, + { + "epoch": 1.3382504258501873, + "grad_norm": 1.4316680304524623, + "learning_rate": 1.4799780481599203e-06, + "loss": 0.025581741333007814, + "step": 154770 + }, + { + "epoch": 1.3382936593717305, + "grad_norm": 7.105362099589503, + "learning_rate": 1.4798024056110108e-06, + "loss": 0.20441436767578125, + "step": 154775 + }, + { + "epoch": 1.3383368928932737, + "grad_norm": 2.4142983107513976, + "learning_rate": 1.4796267700731181e-06, + "loss": 0.17441368103027344, + "step": 154780 + }, + { + "epoch": 1.338380126414817, + "grad_norm": 4.666249629393732, + "learning_rate": 1.4794511415470525e-06, + "loss": 0.02039642333984375, + "step": 154785 + }, + { + "epoch": 1.3384233599363602, + "grad_norm": 15.709838830999397, + "learning_rate": 1.479275520033624e-06, + "loss": 0.06494331359863281, + "step": 154790 + }, + { + "epoch": 1.3384665934579036, + "grad_norm": 6.74704666379422, + "learning_rate": 1.4790999055336423e-06, + "loss": 0.2584390640258789, + "step": 154795 + }, + { + "epoch": 1.3385098269794469, + "grad_norm": 2.837654247714479, + "learning_rate": 1.4789242980479164e-06, + "loss": 0.021439361572265624, + "step": 154800 + }, + { + "epoch": 1.33855306050099, + "grad_norm": 10.940340298989398, + "learning_rate": 1.478748697577258e-06, + "loss": 0.06381568908691407, + "step": 154805 + }, + { + "epoch": 1.3385962940225333, + "grad_norm": 0.21664236760037134, + "learning_rate": 1.4785731041224767e-06, + "loss": 0.045428466796875, + "step": 154810 + }, + { + "epoch": 1.3386395275440766, + "grad_norm": 0.3119497627812495, + "learning_rate": 1.4783975176843803e-06, + "loss": 0.0388458251953125, + "step": 154815 + }, + { + "epoch": 1.3386827610656198, + "grad_norm": 0.3836493292823851, + "learning_rate": 1.4782219382637814e-06, + "loss": 0.015858268737792967, + "step": 154820 + }, + { + "epoch": 1.338725994587163, + "grad_norm": 11.131995670757332, + "learning_rate": 1.4780463658614884e-06, + "loss": 0.0468048095703125, + "step": 154825 + }, + { + "epoch": 1.3387692281087062, + "grad_norm": 16.990583462985324, + "learning_rate": 1.47787080047831e-06, + "loss": 0.13541183471679688, + "step": 154830 + }, + { + "epoch": 1.3388124616302497, + "grad_norm": 33.98136856121851, + "learning_rate": 1.4776952421150582e-06, + "loss": 0.230047607421875, + "step": 154835 + }, + { + "epoch": 1.338855695151793, + "grad_norm": 10.00671083411242, + "learning_rate": 1.4775196907725414e-06, + "loss": 0.1692403793334961, + "step": 154840 + }, + { + "epoch": 1.3388989286733362, + "grad_norm": 3.0719271328366515, + "learning_rate": 1.4773441464515688e-06, + "loss": 0.08949928283691407, + "step": 154845 + }, + { + "epoch": 1.3389421621948794, + "grad_norm": 5.928956977687067, + "learning_rate": 1.4771686091529512e-06, + "loss": 0.08334407806396485, + "step": 154850 + }, + { + "epoch": 1.3389853957164226, + "grad_norm": 0.970939889927882, + "learning_rate": 1.4769930788774968e-06, + "loss": 0.04299583435058594, + "step": 154855 + }, + { + "epoch": 1.339028629237966, + "grad_norm": 25.149735480431524, + "learning_rate": 1.4768175556260163e-06, + "loss": 0.10296192169189453, + "step": 154860 + }, + { + "epoch": 1.3390718627595093, + "grad_norm": 6.362917313527253, + "learning_rate": 1.476642039399317e-06, + "loss": 0.05310535430908203, + "step": 154865 + }, + { + "epoch": 1.3391150962810525, + "grad_norm": 3.484905389530127, + "learning_rate": 1.4764665301982111e-06, + "loss": 0.12776336669921876, + "step": 154870 + }, + { + "epoch": 1.3391583298025957, + "grad_norm": 43.79685270317767, + "learning_rate": 1.4762910280235062e-06, + "loss": 0.212200927734375, + "step": 154875 + }, + { + "epoch": 1.339201563324139, + "grad_norm": 44.64092610503977, + "learning_rate": 1.4761155328760135e-06, + "loss": 0.14923858642578125, + "step": 154880 + }, + { + "epoch": 1.3392447968456822, + "grad_norm": 7.12923200612016, + "learning_rate": 1.4759400447565412e-06, + "loss": 0.0726531982421875, + "step": 154885 + }, + { + "epoch": 1.3392880303672254, + "grad_norm": 2.3267466549413096, + "learning_rate": 1.4757645636658986e-06, + "loss": 0.0357940673828125, + "step": 154890 + }, + { + "epoch": 1.3393312638887687, + "grad_norm": 10.045029044438854, + "learning_rate": 1.4755890896048944e-06, + "loss": 0.015412521362304688, + "step": 154895 + }, + { + "epoch": 1.3393744974103121, + "grad_norm": 1.466435575148169, + "learning_rate": 1.4754136225743398e-06, + "loss": 0.08686599731445313, + "step": 154900 + }, + { + "epoch": 1.3394177309318553, + "grad_norm": 4.6454058489797685, + "learning_rate": 1.4752381625750422e-06, + "loss": 0.01990509033203125, + "step": 154905 + }, + { + "epoch": 1.3394609644533986, + "grad_norm": 3.119567108846554, + "learning_rate": 1.475062709607812e-06, + "loss": 0.09128875732421875, + "step": 154910 + }, + { + "epoch": 1.3395041979749418, + "grad_norm": 0.09646182454475859, + "learning_rate": 1.4748872636734578e-06, + "loss": 0.057195281982421874, + "step": 154915 + }, + { + "epoch": 1.3395474314964853, + "grad_norm": 1.939365469710864, + "learning_rate": 1.4747118247727883e-06, + "loss": 0.06698951721191407, + "step": 154920 + }, + { + "epoch": 1.3395906650180285, + "grad_norm": 2.022635374070198, + "learning_rate": 1.4745363929066125e-06, + "loss": 0.01697196960449219, + "step": 154925 + }, + { + "epoch": 1.3396338985395717, + "grad_norm": 1.9819707753118165, + "learning_rate": 1.4743609680757399e-06, + "loss": 0.028018951416015625, + "step": 154930 + }, + { + "epoch": 1.339677132061115, + "grad_norm": 5.2020720723475, + "learning_rate": 1.4741855502809808e-06, + "loss": 0.07740144729614258, + "step": 154935 + }, + { + "epoch": 1.3397203655826582, + "grad_norm": 2.800288802133129, + "learning_rate": 1.4740101395231432e-06, + "loss": 0.013615036010742187, + "step": 154940 + }, + { + "epoch": 1.3397635991042014, + "grad_norm": 13.009132277252707, + "learning_rate": 1.4738347358030358e-06, + "loss": 0.036791229248046876, + "step": 154945 + }, + { + "epoch": 1.3398068326257446, + "grad_norm": 1.230794525897408, + "learning_rate": 1.473659339121468e-06, + "loss": 0.011458301544189453, + "step": 154950 + }, + { + "epoch": 1.3398500661472879, + "grad_norm": 1.6204536866239991, + "learning_rate": 1.4734839494792486e-06, + "loss": 0.1780078887939453, + "step": 154955 + }, + { + "epoch": 1.3398932996688313, + "grad_norm": 25.795654275524207, + "learning_rate": 1.4733085668771849e-06, + "loss": 0.09168510437011719, + "step": 154960 + }, + { + "epoch": 1.3399365331903745, + "grad_norm": 31.114783884132738, + "learning_rate": 1.4731331913160883e-06, + "loss": 0.2102447509765625, + "step": 154965 + }, + { + "epoch": 1.3399797667119178, + "grad_norm": 1.5662572996475683, + "learning_rate": 1.4729578227967666e-06, + "loss": 0.025554656982421875, + "step": 154970 + }, + { + "epoch": 1.340023000233461, + "grad_norm": 0.8756038595575003, + "learning_rate": 1.4727824613200284e-06, + "loss": 0.09767951965332031, + "step": 154975 + }, + { + "epoch": 1.3400662337550042, + "grad_norm": 7.60533713794251, + "learning_rate": 1.4726071068866815e-06, + "loss": 0.0352783203125, + "step": 154980 + }, + { + "epoch": 1.3401094672765477, + "grad_norm": 0.3351894226554814, + "learning_rate": 1.472431759497537e-06, + "loss": 0.017325210571289062, + "step": 154985 + }, + { + "epoch": 1.340152700798091, + "grad_norm": 3.8757736158583405, + "learning_rate": 1.472256419153401e-06, + "loss": 0.2689788818359375, + "step": 154990 + }, + { + "epoch": 1.3401959343196341, + "grad_norm": 6.068123487081106, + "learning_rate": 1.4720810858550842e-06, + "loss": 0.05219821929931641, + "step": 154995 + }, + { + "epoch": 1.3402391678411774, + "grad_norm": 26.59615202995806, + "learning_rate": 1.4719057596033946e-06, + "loss": 0.19564895629882811, + "step": 155000 + }, + { + "epoch": 1.3402824013627206, + "grad_norm": 5.122522381887554, + "learning_rate": 1.471730440399141e-06, + "loss": 0.03131732940673828, + "step": 155005 + }, + { + "epoch": 1.3403256348842638, + "grad_norm": 10.572371284874846, + "learning_rate": 1.471555128243131e-06, + "loss": 0.045270538330078124, + "step": 155010 + }, + { + "epoch": 1.340368868405807, + "grad_norm": 0.19899893166136545, + "learning_rate": 1.4713798231361739e-06, + "loss": 0.2320720672607422, + "step": 155015 + }, + { + "epoch": 1.3404121019273503, + "grad_norm": 3.8078896955804793, + "learning_rate": 1.471204525079077e-06, + "loss": 0.08816909790039062, + "step": 155020 + }, + { + "epoch": 1.3404553354488937, + "grad_norm": 0.42718973089257983, + "learning_rate": 1.4710292340726506e-06, + "loss": 0.17553234100341797, + "step": 155025 + }, + { + "epoch": 1.340498568970437, + "grad_norm": 0.7268948644432727, + "learning_rate": 1.4708539501177024e-06, + "loss": 0.04511184692382812, + "step": 155030 + }, + { + "epoch": 1.3405418024919802, + "grad_norm": 0.5752163966710295, + "learning_rate": 1.470678673215041e-06, + "loss": 0.024797916412353516, + "step": 155035 + }, + { + "epoch": 1.3405850360135234, + "grad_norm": 11.377255973752602, + "learning_rate": 1.4705034033654728e-06, + "loss": 0.1037567138671875, + "step": 155040 + }, + { + "epoch": 1.3406282695350666, + "grad_norm": 2.2342665545292064, + "learning_rate": 1.470328140569809e-06, + "loss": 0.1914997100830078, + "step": 155045 + }, + { + "epoch": 1.34067150305661, + "grad_norm": 1.4541640382280903, + "learning_rate": 1.4701528848288568e-06, + "loss": 0.19435806274414064, + "step": 155050 + }, + { + "epoch": 1.3407147365781533, + "grad_norm": 4.590077409353259, + "learning_rate": 1.469977636143423e-06, + "loss": 0.02777099609375, + "step": 155055 + }, + { + "epoch": 1.3407579700996965, + "grad_norm": 1.4376697462620551, + "learning_rate": 1.4698023945143185e-06, + "loss": 0.01396484375, + "step": 155060 + }, + { + "epoch": 1.3408012036212398, + "grad_norm": 3.9757157632909648, + "learning_rate": 1.4696271599423498e-06, + "loss": 0.19019088745117188, + "step": 155065 + }, + { + "epoch": 1.340844437142783, + "grad_norm": 6.714312054222894, + "learning_rate": 1.4694519324283257e-06, + "loss": 0.03302278518676758, + "step": 155070 + }, + { + "epoch": 1.3408876706643262, + "grad_norm": 9.255280498024065, + "learning_rate": 1.4692767119730543e-06, + "loss": 0.03257827758789063, + "step": 155075 + }, + { + "epoch": 1.3409309041858695, + "grad_norm": 1.7549256664135846, + "learning_rate": 1.469101498577343e-06, + "loss": 0.024259185791015624, + "step": 155080 + }, + { + "epoch": 1.3409741377074127, + "grad_norm": 0.08971447830009148, + "learning_rate": 1.4689262922419993e-06, + "loss": 0.026309967041015625, + "step": 155085 + }, + { + "epoch": 1.3410173712289561, + "grad_norm": 0.5322231633781214, + "learning_rate": 1.4687510929678335e-06, + "loss": 0.014638328552246093, + "step": 155090 + }, + { + "epoch": 1.3410606047504994, + "grad_norm": 3.4663053924408405, + "learning_rate": 1.4685759007556521e-06, + "loss": 0.043777847290039064, + "step": 155095 + }, + { + "epoch": 1.3411038382720426, + "grad_norm": 1.3833365398797508, + "learning_rate": 1.4684007156062625e-06, + "loss": 0.01724395751953125, + "step": 155100 + }, + { + "epoch": 1.3411470717935858, + "grad_norm": 24.31894924107183, + "learning_rate": 1.4682255375204744e-06, + "loss": 0.06325721740722656, + "step": 155105 + }, + { + "epoch": 1.341190305315129, + "grad_norm": 2.097629416661523, + "learning_rate": 1.468050366499095e-06, + "loss": 0.024582672119140624, + "step": 155110 + }, + { + "epoch": 1.3412335388366725, + "grad_norm": 2.5448765508546467, + "learning_rate": 1.4678752025429305e-06, + "loss": 0.04485244750976562, + "step": 155115 + }, + { + "epoch": 1.3412767723582157, + "grad_norm": 2.838870827939093, + "learning_rate": 1.4677000456527916e-06, + "loss": 0.050507736206054685, + "step": 155120 + }, + { + "epoch": 1.341320005879759, + "grad_norm": 15.636562450606972, + "learning_rate": 1.4675248958294842e-06, + "loss": 0.030926513671875, + "step": 155125 + }, + { + "epoch": 1.3413632394013022, + "grad_norm": 0.6911973897289487, + "learning_rate": 1.467349753073817e-06, + "loss": 0.018953323364257812, + "step": 155130 + }, + { + "epoch": 1.3414064729228454, + "grad_norm": 1.882386337316868, + "learning_rate": 1.4671746173865971e-06, + "loss": 0.024182891845703124, + "step": 155135 + }, + { + "epoch": 1.3414497064443887, + "grad_norm": 2.7644445790673102, + "learning_rate": 1.4669994887686328e-06, + "loss": 0.03501548767089844, + "step": 155140 + }, + { + "epoch": 1.3414929399659319, + "grad_norm": 89.40187267493674, + "learning_rate": 1.4668243672207311e-06, + "loss": 0.23227767944335936, + "step": 155145 + }, + { + "epoch": 1.3415361734874751, + "grad_norm": 7.51608551148216, + "learning_rate": 1.466649252743699e-06, + "loss": 0.041623687744140624, + "step": 155150 + }, + { + "epoch": 1.3415794070090186, + "grad_norm": 9.1317315389785, + "learning_rate": 1.4664741453383447e-06, + "loss": 0.05605945587158203, + "step": 155155 + }, + { + "epoch": 1.3416226405305618, + "grad_norm": 0.17164217603786674, + "learning_rate": 1.4662990450054776e-06, + "loss": 0.04854736328125, + "step": 155160 + }, + { + "epoch": 1.341665874052105, + "grad_norm": 2.3058455177009773, + "learning_rate": 1.4661239517459038e-06, + "loss": 0.0311279296875, + "step": 155165 + }, + { + "epoch": 1.3417091075736483, + "grad_norm": 3.869840716142461, + "learning_rate": 1.4659488655604306e-06, + "loss": 0.0367767333984375, + "step": 155170 + }, + { + "epoch": 1.3417523410951917, + "grad_norm": 1.1712162822884182, + "learning_rate": 1.4657737864498655e-06, + "loss": 0.21886749267578126, + "step": 155175 + }, + { + "epoch": 1.341795574616735, + "grad_norm": 1.4698403118906835, + "learning_rate": 1.4655987144150154e-06, + "loss": 0.04334564208984375, + "step": 155180 + }, + { + "epoch": 1.3418388081382782, + "grad_norm": 15.1752249955005, + "learning_rate": 1.4654236494566896e-06, + "loss": 0.08425788879394532, + "step": 155185 + }, + { + "epoch": 1.3418820416598214, + "grad_norm": 1.618020973506277, + "learning_rate": 1.465248591575694e-06, + "loss": 0.17403640747070312, + "step": 155190 + }, + { + "epoch": 1.3419252751813646, + "grad_norm": 0.2639907845292632, + "learning_rate": 1.4650735407728366e-06, + "loss": 0.0048615455627441405, + "step": 155195 + }, + { + "epoch": 1.3419685087029078, + "grad_norm": 0.7704087938480493, + "learning_rate": 1.4648984970489243e-06, + "loss": 0.08020782470703125, + "step": 155200 + }, + { + "epoch": 1.342011742224451, + "grad_norm": 1.4898296067566246, + "learning_rate": 1.464723460404763e-06, + "loss": 0.0416534423828125, + "step": 155205 + }, + { + "epoch": 1.3420549757459943, + "grad_norm": 5.500141014935733, + "learning_rate": 1.464548430841163e-06, + "loss": 0.0814849853515625, + "step": 155210 + }, + { + "epoch": 1.3420982092675378, + "grad_norm": 1.7350605917759523, + "learning_rate": 1.464373408358929e-06, + "loss": 0.17657852172851562, + "step": 155215 + }, + { + "epoch": 1.342141442789081, + "grad_norm": 0.8499588298641019, + "learning_rate": 1.4641983929588699e-06, + "loss": 0.006201553344726563, + "step": 155220 + }, + { + "epoch": 1.3421846763106242, + "grad_norm": 0.06975850608791898, + "learning_rate": 1.464023384641792e-06, + "loss": 0.0041103363037109375, + "step": 155225 + }, + { + "epoch": 1.3422279098321674, + "grad_norm": 4.274087501508649, + "learning_rate": 1.4638483834085027e-06, + "loss": 0.09263191223144532, + "step": 155230 + }, + { + "epoch": 1.3422711433537107, + "grad_norm": 46.28836719404179, + "learning_rate": 1.463673389259809e-06, + "loss": 0.09174232482910157, + "step": 155235 + }, + { + "epoch": 1.3423143768752541, + "grad_norm": 27.561995366180454, + "learning_rate": 1.463498402196518e-06, + "loss": 0.23924331665039061, + "step": 155240 + }, + { + "epoch": 1.3423576103967974, + "grad_norm": 0.46561389083479227, + "learning_rate": 1.463323422219435e-06, + "loss": 0.20140495300292968, + "step": 155245 + }, + { + "epoch": 1.3424008439183406, + "grad_norm": 5.141903179757771, + "learning_rate": 1.4631484493293701e-06, + "loss": 0.09100799560546875, + "step": 155250 + }, + { + "epoch": 1.3424440774398838, + "grad_norm": 3.878661684830766, + "learning_rate": 1.4629734835271286e-06, + "loss": 0.05660552978515625, + "step": 155255 + }, + { + "epoch": 1.342487310961427, + "grad_norm": 13.7264951070626, + "learning_rate": 1.462798524813518e-06, + "loss": 0.13433685302734374, + "step": 155260 + }, + { + "epoch": 1.3425305444829703, + "grad_norm": 0.7215278315152592, + "learning_rate": 1.462623573189343e-06, + "loss": 0.02885284423828125, + "step": 155265 + }, + { + "epoch": 1.3425737780045135, + "grad_norm": 9.223646708195586, + "learning_rate": 1.462448628655414e-06, + "loss": 0.165447998046875, + "step": 155270 + }, + { + "epoch": 1.3426170115260567, + "grad_norm": 4.25449664112737, + "learning_rate": 1.4622736912125343e-06, + "loss": 0.07498741149902344, + "step": 155275 + }, + { + "epoch": 1.3426602450476002, + "grad_norm": 2.2240200232882885, + "learning_rate": 1.4620987608615141e-06, + "loss": 0.07643814086914062, + "step": 155280 + }, + { + "epoch": 1.3427034785691434, + "grad_norm": 38.24221042677467, + "learning_rate": 1.4619238376031583e-06, + "loss": 0.100244140625, + "step": 155285 + }, + { + "epoch": 1.3427467120906866, + "grad_norm": 0.6358481838389552, + "learning_rate": 1.4617489214382737e-06, + "loss": 0.024917221069335936, + "step": 155290 + }, + { + "epoch": 1.3427899456122299, + "grad_norm": 6.291315766851727, + "learning_rate": 1.4615740123676674e-06, + "loss": 0.15595664978027343, + "step": 155295 + }, + { + "epoch": 1.342833179133773, + "grad_norm": 0.5856831284315649, + "learning_rate": 1.4613991103921458e-06, + "loss": 0.037969970703125, + "step": 155300 + }, + { + "epoch": 1.3428764126553165, + "grad_norm": 3.811942382120248, + "learning_rate": 1.4612242155125154e-06, + "loss": 0.14951629638671876, + "step": 155305 + }, + { + "epoch": 1.3429196461768598, + "grad_norm": 2.8181674913581163, + "learning_rate": 1.461049327729582e-06, + "loss": 0.09039306640625, + "step": 155310 + }, + { + "epoch": 1.342962879698403, + "grad_norm": 1.5164693996871847, + "learning_rate": 1.4608744470441541e-06, + "loss": 0.08781204223632813, + "step": 155315 + }, + { + "epoch": 1.3430061132199462, + "grad_norm": 8.826365100114325, + "learning_rate": 1.4606995734570375e-06, + "loss": 0.0931304931640625, + "step": 155320 + }, + { + "epoch": 1.3430493467414895, + "grad_norm": 0.9536799413675406, + "learning_rate": 1.4605247069690371e-06, + "loss": 0.019961166381835937, + "step": 155325 + }, + { + "epoch": 1.3430925802630327, + "grad_norm": 0.28509718891706715, + "learning_rate": 1.4603498475809616e-06, + "loss": 0.02768402099609375, + "step": 155330 + }, + { + "epoch": 1.343135813784576, + "grad_norm": 3.002522632443125, + "learning_rate": 1.460174995293617e-06, + "loss": 0.2669258117675781, + "step": 155335 + }, + { + "epoch": 1.3431790473061191, + "grad_norm": 0.9098209378739559, + "learning_rate": 1.460000150107808e-06, + "loss": 0.015524673461914062, + "step": 155340 + }, + { + "epoch": 1.3432222808276626, + "grad_norm": 3.20238789299272, + "learning_rate": 1.4598253120243434e-06, + "loss": 0.029474258422851562, + "step": 155345 + }, + { + "epoch": 1.3432655143492058, + "grad_norm": 0.2642009462357757, + "learning_rate": 1.4596504810440282e-06, + "loss": 0.05457305908203125, + "step": 155350 + }, + { + "epoch": 1.343308747870749, + "grad_norm": 25.668576937153716, + "learning_rate": 1.459475657167669e-06, + "loss": 0.05272598266601562, + "step": 155355 + }, + { + "epoch": 1.3433519813922923, + "grad_norm": 0.08090449483626234, + "learning_rate": 1.459300840396072e-06, + "loss": 0.03706226348876953, + "step": 155360 + }, + { + "epoch": 1.3433952149138355, + "grad_norm": 1.3949018851732633, + "learning_rate": 1.459126030730043e-06, + "loss": 0.09282951354980469, + "step": 155365 + }, + { + "epoch": 1.343438448435379, + "grad_norm": 3.062574713778476, + "learning_rate": 1.458951228170388e-06, + "loss": 0.09016265869140624, + "step": 155370 + }, + { + "epoch": 1.3434816819569222, + "grad_norm": 11.680098108204406, + "learning_rate": 1.4587764327179148e-06, + "loss": 0.06045608520507813, + "step": 155375 + }, + { + "epoch": 1.3435249154784654, + "grad_norm": 2.8101122955770097, + "learning_rate": 1.4586016443734271e-06, + "loss": 0.19049644470214844, + "step": 155380 + }, + { + "epoch": 1.3435681490000086, + "grad_norm": 7.37246809084961, + "learning_rate": 1.4584268631377335e-06, + "loss": 0.0737466812133789, + "step": 155385 + }, + { + "epoch": 1.3436113825215519, + "grad_norm": 65.433369108004, + "learning_rate": 1.4582520890116393e-06, + "loss": 0.11549606323242187, + "step": 155390 + }, + { + "epoch": 1.343654616043095, + "grad_norm": 1.5060862818245961, + "learning_rate": 1.45807732199595e-06, + "loss": 0.0408233642578125, + "step": 155395 + }, + { + "epoch": 1.3436978495646383, + "grad_norm": 0.3146672595664707, + "learning_rate": 1.457902562091472e-06, + "loss": 0.09134674072265625, + "step": 155400 + }, + { + "epoch": 1.3437410830861816, + "grad_norm": 1.4161362446910717, + "learning_rate": 1.45772780929901e-06, + "loss": 0.012513351440429688, + "step": 155405 + }, + { + "epoch": 1.343784316607725, + "grad_norm": 5.231137849243639, + "learning_rate": 1.4575530636193722e-06, + "loss": 0.052121734619140624, + "step": 155410 + }, + { + "epoch": 1.3438275501292682, + "grad_norm": 22.02217996399098, + "learning_rate": 1.4573783250533631e-06, + "loss": 0.05687255859375, + "step": 155415 + }, + { + "epoch": 1.3438707836508115, + "grad_norm": 5.699704509014391, + "learning_rate": 1.4572035936017892e-06, + "loss": 0.028211212158203124, + "step": 155420 + }, + { + "epoch": 1.3439140171723547, + "grad_norm": 3.6742936836266087, + "learning_rate": 1.4570288692654559e-06, + "loss": 0.0875762939453125, + "step": 155425 + }, + { + "epoch": 1.3439572506938982, + "grad_norm": 1.5648883474005104, + "learning_rate": 1.4568541520451681e-06, + "loss": 0.018366241455078126, + "step": 155430 + }, + { + "epoch": 1.3440004842154414, + "grad_norm": 2.409820871530532, + "learning_rate": 1.4566794419417334e-06, + "loss": 0.17462158203125, + "step": 155435 + }, + { + "epoch": 1.3440437177369846, + "grad_norm": 51.55807003845047, + "learning_rate": 1.4565047389559559e-06, + "loss": 0.16931838989257814, + "step": 155440 + }, + { + "epoch": 1.3440869512585278, + "grad_norm": 0.42379890727996394, + "learning_rate": 1.456330043088643e-06, + "loss": 0.008240890502929688, + "step": 155445 + }, + { + "epoch": 1.344130184780071, + "grad_norm": 0.09942060396787686, + "learning_rate": 1.4561553543406002e-06, + "loss": 0.039612960815429685, + "step": 155450 + }, + { + "epoch": 1.3441734183016143, + "grad_norm": 3.4963690449878935, + "learning_rate": 1.4559806727126318e-06, + "loss": 0.015898609161376955, + "step": 155455 + }, + { + "epoch": 1.3442166518231575, + "grad_norm": 0.15932112561176454, + "learning_rate": 1.4558059982055442e-06, + "loss": 0.023184585571289062, + "step": 155460 + }, + { + "epoch": 1.3442598853447008, + "grad_norm": 4.272000534620238, + "learning_rate": 1.4556313308201418e-06, + "loss": 0.0175628662109375, + "step": 155465 + }, + { + "epoch": 1.3443031188662442, + "grad_norm": 11.093273097148076, + "learning_rate": 1.4554566705572325e-06, + "loss": 0.15036582946777344, + "step": 155470 + }, + { + "epoch": 1.3443463523877874, + "grad_norm": 32.76716077670276, + "learning_rate": 1.4552820174176209e-06, + "loss": 0.06781902313232421, + "step": 155475 + }, + { + "epoch": 1.3443895859093307, + "grad_norm": 6.412984637884578, + "learning_rate": 1.4551073714021117e-06, + "loss": 0.14253387451171876, + "step": 155480 + }, + { + "epoch": 1.344432819430874, + "grad_norm": 4.203403127471222, + "learning_rate": 1.4549327325115107e-06, + "loss": 0.05191364288330078, + "step": 155485 + }, + { + "epoch": 1.3444760529524171, + "grad_norm": 0.27607004507201105, + "learning_rate": 1.4547581007466226e-06, + "loss": 0.013033294677734375, + "step": 155490 + }, + { + "epoch": 1.3445192864739606, + "grad_norm": 2.239229519184976, + "learning_rate": 1.4545834761082544e-06, + "loss": 0.10178413391113281, + "step": 155495 + }, + { + "epoch": 1.3445625199955038, + "grad_norm": 1.0456619113846206, + "learning_rate": 1.4544088585972098e-06, + "loss": 0.020685577392578126, + "step": 155500 + }, + { + "epoch": 1.344605753517047, + "grad_norm": 3.8550386822937557, + "learning_rate": 1.4542342482142959e-06, + "loss": 0.04527549743652344, + "step": 155505 + }, + { + "epoch": 1.3446489870385903, + "grad_norm": 40.952960481825045, + "learning_rate": 1.4540596449603172e-06, + "loss": 0.2148153305053711, + "step": 155510 + }, + { + "epoch": 1.3446922205601335, + "grad_norm": 0.5734992552606002, + "learning_rate": 1.4538850488360788e-06, + "loss": 0.07876510620117187, + "step": 155515 + }, + { + "epoch": 1.3447354540816767, + "grad_norm": 0.3418406292562355, + "learning_rate": 1.4537104598423858e-06, + "loss": 0.07349853515625, + "step": 155520 + }, + { + "epoch": 1.34477868760322, + "grad_norm": 0.5507373196150749, + "learning_rate": 1.4535358779800438e-06, + "loss": 0.13591327667236328, + "step": 155525 + }, + { + "epoch": 1.3448219211247632, + "grad_norm": 0.14886457205943224, + "learning_rate": 1.4533613032498564e-06, + "loss": 0.04022674560546875, + "step": 155530 + }, + { + "epoch": 1.3448651546463066, + "grad_norm": 20.616130972429644, + "learning_rate": 1.453186735652631e-06, + "loss": 0.048080825805664064, + "step": 155535 + }, + { + "epoch": 1.3449083881678499, + "grad_norm": 7.023562409143462, + "learning_rate": 1.4530121751891718e-06, + "loss": 0.0372039794921875, + "step": 155540 + }, + { + "epoch": 1.344951621689393, + "grad_norm": 6.518853041145882, + "learning_rate": 1.4528376218602836e-06, + "loss": 0.25177764892578125, + "step": 155545 + }, + { + "epoch": 1.3449948552109363, + "grad_norm": 11.094484127605126, + "learning_rate": 1.4526630756667709e-06, + "loss": 0.01940498352050781, + "step": 155550 + }, + { + "epoch": 1.3450380887324795, + "grad_norm": 12.375958317000205, + "learning_rate": 1.4524885366094402e-06, + "loss": 0.10800628662109375, + "step": 155555 + }, + { + "epoch": 1.345081322254023, + "grad_norm": 2.1744865174026944, + "learning_rate": 1.4523140046890956e-06, + "loss": 0.03875274658203125, + "step": 155560 + }, + { + "epoch": 1.3451245557755662, + "grad_norm": 13.435627441463376, + "learning_rate": 1.452139479906541e-06, + "loss": 0.1304485321044922, + "step": 155565 + }, + { + "epoch": 1.3451677892971095, + "grad_norm": 9.984709129719889, + "learning_rate": 1.4519649622625836e-06, + "loss": 0.1001739501953125, + "step": 155570 + }, + { + "epoch": 1.3452110228186527, + "grad_norm": 0.6325870168763176, + "learning_rate": 1.4517904517580265e-06, + "loss": 0.2294281005859375, + "step": 155575 + }, + { + "epoch": 1.345254256340196, + "grad_norm": 1.185032022356395, + "learning_rate": 1.4516159483936758e-06, + "loss": 0.09781341552734375, + "step": 155580 + }, + { + "epoch": 1.3452974898617391, + "grad_norm": 0.15562615981439742, + "learning_rate": 1.451441452170335e-06, + "loss": 0.06186370849609375, + "step": 155585 + }, + { + "epoch": 1.3453407233832824, + "grad_norm": 0.6043314650241305, + "learning_rate": 1.4512669630888094e-06, + "loss": 0.01828346252441406, + "step": 155590 + }, + { + "epoch": 1.3453839569048256, + "grad_norm": 3.4497182032028, + "learning_rate": 1.451092481149903e-06, + "loss": 0.027928924560546874, + "step": 155595 + }, + { + "epoch": 1.345427190426369, + "grad_norm": 45.427994395325165, + "learning_rate": 1.4509180063544217e-06, + "loss": 0.17142410278320314, + "step": 155600 + }, + { + "epoch": 1.3454704239479123, + "grad_norm": 0.7914321750974873, + "learning_rate": 1.450743538703169e-06, + "loss": 0.005176162719726563, + "step": 155605 + }, + { + "epoch": 1.3455136574694555, + "grad_norm": 0.6921965299590708, + "learning_rate": 1.450569078196951e-06, + "loss": 0.0168701171875, + "step": 155610 + }, + { + "epoch": 1.3455568909909987, + "grad_norm": 2.4677171073908095, + "learning_rate": 1.4503946248365718e-06, + "loss": 0.031258773803710935, + "step": 155615 + }, + { + "epoch": 1.345600124512542, + "grad_norm": 50.54235413227002, + "learning_rate": 1.4502201786228359e-06, + "loss": 0.249993896484375, + "step": 155620 + }, + { + "epoch": 1.3456433580340854, + "grad_norm": 0.2093924172742376, + "learning_rate": 1.4500457395565458e-06, + "loss": 0.03466639518737793, + "step": 155625 + }, + { + "epoch": 1.3456865915556286, + "grad_norm": 0.5243464193049665, + "learning_rate": 1.4498713076385094e-06, + "loss": 0.057680511474609376, + "step": 155630 + }, + { + "epoch": 1.3457298250771719, + "grad_norm": 51.51947660630902, + "learning_rate": 1.4496968828695295e-06, + "loss": 0.37370147705078127, + "step": 155635 + }, + { + "epoch": 1.345773058598715, + "grad_norm": 0.4691650133849136, + "learning_rate": 1.4495224652504106e-06, + "loss": 0.0352020263671875, + "step": 155640 + }, + { + "epoch": 1.3458162921202583, + "grad_norm": 1.8350342452040564, + "learning_rate": 1.449348054781957e-06, + "loss": 0.05872039794921875, + "step": 155645 + }, + { + "epoch": 1.3458595256418016, + "grad_norm": 4.141987604785201, + "learning_rate": 1.449173651464973e-06, + "loss": 0.14155349731445313, + "step": 155650 + }, + { + "epoch": 1.3459027591633448, + "grad_norm": 3.922186318859183, + "learning_rate": 1.4489992553002621e-06, + "loss": 0.089971923828125, + "step": 155655 + }, + { + "epoch": 1.3459459926848882, + "grad_norm": 17.99389205610604, + "learning_rate": 1.448824866288631e-06, + "loss": 0.21168746948242187, + "step": 155660 + }, + { + "epoch": 1.3459892262064315, + "grad_norm": 11.534692297166904, + "learning_rate": 1.4486504844308812e-06, + "loss": 0.03849334716796875, + "step": 155665 + }, + { + "epoch": 1.3460324597279747, + "grad_norm": 1.1601552915295956, + "learning_rate": 1.4484761097278192e-06, + "loss": 0.0636138916015625, + "step": 155670 + }, + { + "epoch": 1.346075693249518, + "grad_norm": 1.9186140748196103, + "learning_rate": 1.4483017421802485e-06, + "loss": 0.19377899169921875, + "step": 155675 + }, + { + "epoch": 1.3461189267710612, + "grad_norm": 0.27302922348393915, + "learning_rate": 1.448127381788973e-06, + "loss": 0.021624183654785155, + "step": 155680 + }, + { + "epoch": 1.3461621602926046, + "grad_norm": 2.6184396434320436, + "learning_rate": 1.447953028554797e-06, + "loss": 0.0657989501953125, + "step": 155685 + }, + { + "epoch": 1.3462053938141478, + "grad_norm": 0.4805750673509976, + "learning_rate": 1.4477786824785233e-06, + "loss": 0.06151142120361328, + "step": 155690 + }, + { + "epoch": 1.346248627335691, + "grad_norm": 0.597114535295981, + "learning_rate": 1.4476043435609585e-06, + "loss": 0.08788986206054687, + "step": 155695 + }, + { + "epoch": 1.3462918608572343, + "grad_norm": 0.33923920671381863, + "learning_rate": 1.4474300118029047e-06, + "loss": 0.040996551513671875, + "step": 155700 + }, + { + "epoch": 1.3463350943787775, + "grad_norm": 6.28658402707268, + "learning_rate": 1.447255687205167e-06, + "loss": 0.013574981689453125, + "step": 155705 + }, + { + "epoch": 1.3463783279003207, + "grad_norm": 17.96507386828242, + "learning_rate": 1.4470813697685487e-06, + "loss": 0.1199981689453125, + "step": 155710 + }, + { + "epoch": 1.346421561421864, + "grad_norm": 0.16167207802206532, + "learning_rate": 1.4469070594938528e-06, + "loss": 0.04875106811523437, + "step": 155715 + }, + { + "epoch": 1.3464647949434072, + "grad_norm": 8.119179545090375, + "learning_rate": 1.4467327563818855e-06, + "loss": 0.02725067138671875, + "step": 155720 + }, + { + "epoch": 1.3465080284649507, + "grad_norm": 3.0695590312022523, + "learning_rate": 1.446558460433448e-06, + "loss": 0.03483848571777344, + "step": 155725 + }, + { + "epoch": 1.3465512619864939, + "grad_norm": 3.658270675059359, + "learning_rate": 1.4463841716493472e-06, + "loss": 0.08386955261230469, + "step": 155730 + }, + { + "epoch": 1.3465944955080371, + "grad_norm": 1.1341624620170199, + "learning_rate": 1.4462098900303853e-06, + "loss": 0.04460906982421875, + "step": 155735 + }, + { + "epoch": 1.3466377290295803, + "grad_norm": 13.590839116791141, + "learning_rate": 1.446035615577366e-06, + "loss": 0.09955291748046875, + "step": 155740 + }, + { + "epoch": 1.3466809625511236, + "grad_norm": 14.890164230336802, + "learning_rate": 1.4458613482910928e-06, + "loss": 0.104583740234375, + "step": 155745 + }, + { + "epoch": 1.346724196072667, + "grad_norm": 34.90014438334132, + "learning_rate": 1.4456870881723703e-06, + "loss": 0.09326629638671875, + "step": 155750 + }, + { + "epoch": 1.3467674295942103, + "grad_norm": 0.5020729461167348, + "learning_rate": 1.445512835222e-06, + "loss": 0.01902456283569336, + "step": 155755 + }, + { + "epoch": 1.3468106631157535, + "grad_norm": 9.09736419787464, + "learning_rate": 1.4453385894407886e-06, + "loss": 0.11192474365234376, + "step": 155760 + }, + { + "epoch": 1.3468538966372967, + "grad_norm": 0.560618672889655, + "learning_rate": 1.4451643508295381e-06, + "loss": 0.072149658203125, + "step": 155765 + }, + { + "epoch": 1.34689713015884, + "grad_norm": 0.8296029818202888, + "learning_rate": 1.444990119389051e-06, + "loss": 0.056597900390625, + "step": 155770 + }, + { + "epoch": 1.3469403636803832, + "grad_norm": 2.8385566758135172, + "learning_rate": 1.4448158951201333e-06, + "loss": 0.02060546875, + "step": 155775 + }, + { + "epoch": 1.3469835972019264, + "grad_norm": 17.057737730786435, + "learning_rate": 1.4446416780235874e-06, + "loss": 0.02996959686279297, + "step": 155780 + }, + { + "epoch": 1.3470268307234696, + "grad_norm": 4.696915148224212, + "learning_rate": 1.4444674681002155e-06, + "loss": 0.06728363037109375, + "step": 155785 + }, + { + "epoch": 1.347070064245013, + "grad_norm": 0.9149391876366149, + "learning_rate": 1.4442932653508232e-06, + "loss": 0.15898971557617186, + "step": 155790 + }, + { + "epoch": 1.3471132977665563, + "grad_norm": 1.3553527215382537, + "learning_rate": 1.444119069776213e-06, + "loss": 0.03764076232910156, + "step": 155795 + }, + { + "epoch": 1.3471565312880995, + "grad_norm": 0.3475334756883575, + "learning_rate": 1.443944881377188e-06, + "loss": 0.0135467529296875, + "step": 155800 + }, + { + "epoch": 1.3471997648096428, + "grad_norm": 2.9072580749539805, + "learning_rate": 1.4437707001545516e-06, + "loss": 0.016819000244140625, + "step": 155805 + }, + { + "epoch": 1.347242998331186, + "grad_norm": 9.524304229281299, + "learning_rate": 1.4435965261091077e-06, + "loss": 0.12653121948242188, + "step": 155810 + }, + { + "epoch": 1.3472862318527294, + "grad_norm": 16.117640452349566, + "learning_rate": 1.4434223592416588e-06, + "loss": 0.04639854431152344, + "step": 155815 + }, + { + "epoch": 1.3473294653742727, + "grad_norm": 79.66034195832238, + "learning_rate": 1.443248199553007e-06, + "loss": 0.20740242004394532, + "step": 155820 + }, + { + "epoch": 1.347372698895816, + "grad_norm": 0.2762947399093382, + "learning_rate": 1.4430740470439587e-06, + "loss": 0.08024959564208985, + "step": 155825 + }, + { + "epoch": 1.3474159324173591, + "grad_norm": 0.6622698746185857, + "learning_rate": 1.442899901715314e-06, + "loss": 0.034225082397460936, + "step": 155830 + }, + { + "epoch": 1.3474591659389024, + "grad_norm": 6.019624402212063, + "learning_rate": 1.4427257635678781e-06, + "loss": 0.0254974365234375, + "step": 155835 + }, + { + "epoch": 1.3475023994604456, + "grad_norm": 62.310987036406296, + "learning_rate": 1.442551632602454e-06, + "loss": 0.18245887756347656, + "step": 155840 + }, + { + "epoch": 1.3475456329819888, + "grad_norm": 30.027547847710423, + "learning_rate": 1.4423775088198437e-06, + "loss": 0.1178924560546875, + "step": 155845 + }, + { + "epoch": 1.347588866503532, + "grad_norm": 2.8054866306901993, + "learning_rate": 1.4422033922208496e-06, + "loss": 0.049273681640625, + "step": 155850 + }, + { + "epoch": 1.3476321000250755, + "grad_norm": 0.41406980702968266, + "learning_rate": 1.442029282806277e-06, + "loss": 0.02557029724121094, + "step": 155855 + }, + { + "epoch": 1.3476753335466187, + "grad_norm": 0.18130596238076027, + "learning_rate": 1.4418551805769279e-06, + "loss": 0.046899032592773435, + "step": 155860 + }, + { + "epoch": 1.347718567068162, + "grad_norm": 0.09370359717733, + "learning_rate": 1.4416810855336047e-06, + "loss": 0.012368011474609374, + "step": 155865 + }, + { + "epoch": 1.3477618005897052, + "grad_norm": 1.5255988540437635, + "learning_rate": 1.4415069976771107e-06, + "loss": 0.101861572265625, + "step": 155870 + }, + { + "epoch": 1.3478050341112486, + "grad_norm": 6.547077754076271, + "learning_rate": 1.441332917008249e-06, + "loss": 0.045925140380859375, + "step": 155875 + }, + { + "epoch": 1.3478482676327919, + "grad_norm": 0.09211399370598072, + "learning_rate": 1.4411588435278205e-06, + "loss": 0.0266754150390625, + "step": 155880 + }, + { + "epoch": 1.347891501154335, + "grad_norm": 1.301267109360711, + "learning_rate": 1.4409847772366314e-06, + "loss": 0.011910247802734374, + "step": 155885 + }, + { + "epoch": 1.3479347346758783, + "grad_norm": 9.459984136554771, + "learning_rate": 1.4408107181354814e-06, + "loss": 0.03511123657226563, + "step": 155890 + }, + { + "epoch": 1.3479779681974215, + "grad_norm": 4.109227099123729, + "learning_rate": 1.4406366662251753e-06, + "loss": 0.010574722290039062, + "step": 155895 + }, + { + "epoch": 1.3480212017189648, + "grad_norm": 0.6511068493806015, + "learning_rate": 1.4404626215065156e-06, + "loss": 0.03953399658203125, + "step": 155900 + }, + { + "epoch": 1.348064435240508, + "grad_norm": 4.2515362740884335, + "learning_rate": 1.4402885839803045e-06, + "loss": 0.09789581298828125, + "step": 155905 + }, + { + "epoch": 1.3481076687620512, + "grad_norm": 6.333284998238092, + "learning_rate": 1.4401145536473447e-06, + "loss": 0.03280487060546875, + "step": 155910 + }, + { + "epoch": 1.3481509022835947, + "grad_norm": 3.1426268067662857, + "learning_rate": 1.439940530508437e-06, + "loss": 0.035968780517578125, + "step": 155915 + }, + { + "epoch": 1.348194135805138, + "grad_norm": 18.60251941868777, + "learning_rate": 1.4397665145643874e-06, + "loss": 0.08859710693359375, + "step": 155920 + }, + { + "epoch": 1.3482373693266811, + "grad_norm": 19.219520191375544, + "learning_rate": 1.4395925058159966e-06, + "loss": 0.1812774658203125, + "step": 155925 + }, + { + "epoch": 1.3482806028482244, + "grad_norm": 1.2855244045961767, + "learning_rate": 1.4394185042640673e-06, + "loss": 0.12817344665527344, + "step": 155930 + }, + { + "epoch": 1.3483238363697676, + "grad_norm": 1.017998806106859, + "learning_rate": 1.439244509909402e-06, + "loss": 0.022312164306640625, + "step": 155935 + }, + { + "epoch": 1.348367069891311, + "grad_norm": 0.6434971605331717, + "learning_rate": 1.439070522752802e-06, + "loss": 0.12423210144042969, + "step": 155940 + }, + { + "epoch": 1.3484103034128543, + "grad_norm": 11.211404103636639, + "learning_rate": 1.4388965427950707e-06, + "loss": 0.046509933471679685, + "step": 155945 + }, + { + "epoch": 1.3484535369343975, + "grad_norm": 1.1760913215529458, + "learning_rate": 1.4387225700370118e-06, + "loss": 0.013078022003173827, + "step": 155950 + }, + { + "epoch": 1.3484967704559407, + "grad_norm": 4.337176186458729, + "learning_rate": 1.4385486044794268e-06, + "loss": 0.13049545288085937, + "step": 155955 + }, + { + "epoch": 1.348540003977484, + "grad_norm": 15.295285653150009, + "learning_rate": 1.4383746461231173e-06, + "loss": 0.12444114685058594, + "step": 155960 + }, + { + "epoch": 1.3485832374990272, + "grad_norm": 0.6047897315322013, + "learning_rate": 1.4382006949688857e-06, + "loss": 0.045825958251953125, + "step": 155965 + }, + { + "epoch": 1.3486264710205704, + "grad_norm": 5.234806804589089, + "learning_rate": 1.438026751017535e-06, + "loss": 0.025272941589355467, + "step": 155970 + }, + { + "epoch": 1.3486697045421137, + "grad_norm": 0.449081286752375, + "learning_rate": 1.4378528142698653e-06, + "loss": 0.06265449523925781, + "step": 155975 + }, + { + "epoch": 1.348712938063657, + "grad_norm": 2.624713800056733, + "learning_rate": 1.4376788847266817e-06, + "loss": 0.1838043212890625, + "step": 155980 + }, + { + "epoch": 1.3487561715852003, + "grad_norm": 1.0183986460076226, + "learning_rate": 1.4375049623887852e-06, + "loss": 0.008119583129882812, + "step": 155985 + }, + { + "epoch": 1.3487994051067436, + "grad_norm": 20.706713050576273, + "learning_rate": 1.4373310472569776e-06, + "loss": 0.10962905883789062, + "step": 155990 + }, + { + "epoch": 1.3488426386282868, + "grad_norm": 22.103340056861885, + "learning_rate": 1.4371571393320602e-06, + "loss": 0.1055419921875, + "step": 155995 + }, + { + "epoch": 1.34888587214983, + "grad_norm": 3.095187044660056, + "learning_rate": 1.4369832386148368e-06, + "loss": 0.0277740478515625, + "step": 156000 + }, + { + "epoch": 1.3489291056713735, + "grad_norm": 0.7246442610803548, + "learning_rate": 1.436809345106109e-06, + "loss": 0.24455223083496094, + "step": 156005 + }, + { + "epoch": 1.3489723391929167, + "grad_norm": 1.3957538752855552, + "learning_rate": 1.436635458806677e-06, + "loss": 0.020180892944335938, + "step": 156010 + }, + { + "epoch": 1.34901557271446, + "grad_norm": 12.17429538245212, + "learning_rate": 1.436461579717345e-06, + "loss": 0.04048576354980469, + "step": 156015 + }, + { + "epoch": 1.3490588062360032, + "grad_norm": 0.10365904212589974, + "learning_rate": 1.4362877078389145e-06, + "loss": 0.02315177917480469, + "step": 156020 + }, + { + "epoch": 1.3491020397575464, + "grad_norm": 1.4061819982520507, + "learning_rate": 1.4361138431721866e-06, + "loss": 0.05465621948242187, + "step": 156025 + }, + { + "epoch": 1.3491452732790896, + "grad_norm": 0.16518540109598517, + "learning_rate": 1.4359399857179635e-06, + "loss": 0.03296737670898438, + "step": 156030 + }, + { + "epoch": 1.3491885068006328, + "grad_norm": 1.0077283492952749, + "learning_rate": 1.435766135477047e-06, + "loss": 0.32077560424804685, + "step": 156035 + }, + { + "epoch": 1.349231740322176, + "grad_norm": 6.977663365841081, + "learning_rate": 1.435592292450238e-06, + "loss": 0.06290817260742188, + "step": 156040 + }, + { + "epoch": 1.3492749738437195, + "grad_norm": 0.7846862574873527, + "learning_rate": 1.4354184566383398e-06, + "loss": 0.05248565673828125, + "step": 156045 + }, + { + "epoch": 1.3493182073652628, + "grad_norm": 0.5135638769824082, + "learning_rate": 1.435244628042154e-06, + "loss": 0.04709548950195312, + "step": 156050 + }, + { + "epoch": 1.349361440886806, + "grad_norm": 1.9882040592869739, + "learning_rate": 1.4350708066624801e-06, + "loss": 0.18170700073242188, + "step": 156055 + }, + { + "epoch": 1.3494046744083492, + "grad_norm": 1.390230936268437, + "learning_rate": 1.4348969925001225e-06, + "loss": 0.22628936767578126, + "step": 156060 + }, + { + "epoch": 1.3494479079298924, + "grad_norm": 2.0646877620220216, + "learning_rate": 1.4347231855558818e-06, + "loss": 0.03731842041015625, + "step": 156065 + }, + { + "epoch": 1.349491141451436, + "grad_norm": 1.9632639973579886, + "learning_rate": 1.4345493858305582e-06, + "loss": 0.030901336669921876, + "step": 156070 + }, + { + "epoch": 1.3495343749729791, + "grad_norm": 17.182560086602816, + "learning_rate": 1.434375593324956e-06, + "loss": 0.09660682678222657, + "step": 156075 + }, + { + "epoch": 1.3495776084945224, + "grad_norm": 10.214502954728648, + "learning_rate": 1.434201808039875e-06, + "loss": 0.013299942016601562, + "step": 156080 + }, + { + "epoch": 1.3496208420160656, + "grad_norm": 2.7851423340791057, + "learning_rate": 1.4340280299761168e-06, + "loss": 0.07163925170898437, + "step": 156085 + }, + { + "epoch": 1.3496640755376088, + "grad_norm": 127.1321267651293, + "learning_rate": 1.4338542591344831e-06, + "loss": 0.1955780029296875, + "step": 156090 + }, + { + "epoch": 1.349707309059152, + "grad_norm": 5.177849383614127, + "learning_rate": 1.4336804955157753e-06, + "loss": 0.028354644775390625, + "step": 156095 + }, + { + "epoch": 1.3497505425806953, + "grad_norm": 1.3123489812241576, + "learning_rate": 1.4335067391207944e-06, + "loss": 0.054169845581054685, + "step": 156100 + }, + { + "epoch": 1.3497937761022385, + "grad_norm": 0.9595477149046756, + "learning_rate": 1.433332989950341e-06, + "loss": 0.0219879150390625, + "step": 156105 + }, + { + "epoch": 1.349837009623782, + "grad_norm": 0.29914578366426564, + "learning_rate": 1.4331592480052184e-06, + "loss": 0.0030301094055175783, + "step": 156110 + }, + { + "epoch": 1.3498802431453252, + "grad_norm": 0.4311105603506939, + "learning_rate": 1.432985513286226e-06, + "loss": 0.020229911804199217, + "step": 156115 + }, + { + "epoch": 1.3499234766668684, + "grad_norm": 8.20111086851946, + "learning_rate": 1.4328117857941668e-06, + "loss": 0.06322021484375, + "step": 156120 + }, + { + "epoch": 1.3499667101884116, + "grad_norm": 1.7845208197471545, + "learning_rate": 1.4326380655298413e-06, + "loss": 0.2333892822265625, + "step": 156125 + }, + { + "epoch": 1.350009943709955, + "grad_norm": 0.7653662219221871, + "learning_rate": 1.4324643524940507e-06, + "loss": 0.02868499755859375, + "step": 156130 + }, + { + "epoch": 1.3500531772314983, + "grad_norm": 0.5809370725035744, + "learning_rate": 1.4322906466875946e-06, + "loss": 0.03996429443359375, + "step": 156135 + }, + { + "epoch": 1.3500964107530415, + "grad_norm": 17.148273485268213, + "learning_rate": 1.432116948111277e-06, + "loss": 0.16356658935546875, + "step": 156140 + }, + { + "epoch": 1.3501396442745848, + "grad_norm": 15.494989532064771, + "learning_rate": 1.4319432567658971e-06, + "loss": 0.08202037811279297, + "step": 156145 + }, + { + "epoch": 1.350182877796128, + "grad_norm": 3.8130670532031727, + "learning_rate": 1.4317695726522564e-06, + "loss": 0.10484619140625, + "step": 156150 + }, + { + "epoch": 1.3502261113176712, + "grad_norm": 3.8746694425428645, + "learning_rate": 1.4315958957711562e-06, + "loss": 0.021345901489257812, + "step": 156155 + }, + { + "epoch": 1.3502693448392145, + "grad_norm": 5.309368131788732, + "learning_rate": 1.4314222261233967e-06, + "loss": 0.031743621826171874, + "step": 156160 + }, + { + "epoch": 1.3503125783607577, + "grad_norm": 3.4602732740644013, + "learning_rate": 1.4312485637097785e-06, + "loss": 0.04646987915039062, + "step": 156165 + }, + { + "epoch": 1.3503558118823011, + "grad_norm": 4.0438685043163805, + "learning_rate": 1.431074908531103e-06, + "loss": 0.06691436767578125, + "step": 156170 + }, + { + "epoch": 1.3503990454038444, + "grad_norm": 0.4888196110447014, + "learning_rate": 1.4309012605881727e-06, + "loss": 0.02147331237792969, + "step": 156175 + }, + { + "epoch": 1.3504422789253876, + "grad_norm": 2.461190487196845, + "learning_rate": 1.4307276198817874e-06, + "loss": 0.0388824462890625, + "step": 156180 + }, + { + "epoch": 1.3504855124469308, + "grad_norm": 3.574735326282843, + "learning_rate": 1.4305539864127474e-06, + "loss": 0.053453826904296876, + "step": 156185 + }, + { + "epoch": 1.350528745968474, + "grad_norm": 0.20818751513164632, + "learning_rate": 1.4303803601818535e-06, + "loss": 0.010297203063964843, + "step": 156190 + }, + { + "epoch": 1.3505719794900175, + "grad_norm": 3.7298259771105435, + "learning_rate": 1.430206741189907e-06, + "loss": 0.09401798248291016, + "step": 156195 + }, + { + "epoch": 1.3506152130115607, + "grad_norm": 1.158717550473783, + "learning_rate": 1.4300331294377073e-06, + "loss": 0.033893585205078125, + "step": 156200 + }, + { + "epoch": 1.350658446533104, + "grad_norm": 0.05559643821856018, + "learning_rate": 1.4298595249260567e-06, + "loss": 0.01275177001953125, + "step": 156205 + }, + { + "epoch": 1.3507016800546472, + "grad_norm": 12.717195271109071, + "learning_rate": 1.4296859276557555e-06, + "loss": 0.07364730834960938, + "step": 156210 + }, + { + "epoch": 1.3507449135761904, + "grad_norm": 0.6108941301839396, + "learning_rate": 1.4295123376276044e-06, + "loss": 0.03809967041015625, + "step": 156215 + }, + { + "epoch": 1.3507881470977336, + "grad_norm": 0.4634816961501864, + "learning_rate": 1.429338754842402e-06, + "loss": 0.008055877685546876, + "step": 156220 + }, + { + "epoch": 1.3508313806192769, + "grad_norm": 10.11836027484657, + "learning_rate": 1.4291651793009517e-06, + "loss": 0.042274856567382814, + "step": 156225 + }, + { + "epoch": 1.35087461414082, + "grad_norm": 0.7138780465966997, + "learning_rate": 1.428991611004052e-06, + "loss": 0.015249252319335938, + "step": 156230 + }, + { + "epoch": 1.3509178476623636, + "grad_norm": 3.1104916682412393, + "learning_rate": 1.4288180499525048e-06, + "loss": 0.015629959106445313, + "step": 156235 + }, + { + "epoch": 1.3509610811839068, + "grad_norm": 2.02576008921516, + "learning_rate": 1.4286444961471102e-06, + "loss": 0.014938735961914062, + "step": 156240 + }, + { + "epoch": 1.35100431470545, + "grad_norm": 0.7318644678121804, + "learning_rate": 1.4284709495886683e-06, + "loss": 0.009070587158203126, + "step": 156245 + }, + { + "epoch": 1.3510475482269932, + "grad_norm": 1.0542383979909407, + "learning_rate": 1.4282974102779794e-06, + "loss": 0.04286956787109375, + "step": 156250 + }, + { + "epoch": 1.3510907817485365, + "grad_norm": 8.774782489248835, + "learning_rate": 1.4281238782158443e-06, + "loss": 0.05399761199951172, + "step": 156255 + }, + { + "epoch": 1.35113401527008, + "grad_norm": 50.32477701095313, + "learning_rate": 1.4279503534030629e-06, + "loss": 0.2510650634765625, + "step": 156260 + }, + { + "epoch": 1.3511772487916232, + "grad_norm": 101.51261912887706, + "learning_rate": 1.4277768358404342e-06, + "loss": 0.30936737060546876, + "step": 156265 + }, + { + "epoch": 1.3512204823131664, + "grad_norm": 5.111992157621282, + "learning_rate": 1.4276033255287612e-06, + "loss": 0.027533721923828126, + "step": 156270 + }, + { + "epoch": 1.3512637158347096, + "grad_norm": 11.461414737626228, + "learning_rate": 1.4274298224688425e-06, + "loss": 0.10140914916992187, + "step": 156275 + }, + { + "epoch": 1.3513069493562528, + "grad_norm": 5.592538217345249, + "learning_rate": 1.4272563266614775e-06, + "loss": 0.06583232879638672, + "step": 156280 + }, + { + "epoch": 1.351350182877796, + "grad_norm": 3.018978920470273, + "learning_rate": 1.4270828381074682e-06, + "loss": 0.01836700439453125, + "step": 156285 + }, + { + "epoch": 1.3513934163993393, + "grad_norm": 13.483002976315403, + "learning_rate": 1.426909356807614e-06, + "loss": 0.0728973388671875, + "step": 156290 + }, + { + "epoch": 1.3514366499208825, + "grad_norm": 0.3577904825840476, + "learning_rate": 1.426735882762714e-06, + "loss": 0.19634170532226564, + "step": 156295 + }, + { + "epoch": 1.351479883442426, + "grad_norm": 0.7226890311460595, + "learning_rate": 1.4265624159735697e-06, + "loss": 0.039997100830078125, + "step": 156300 + }, + { + "epoch": 1.3515231169639692, + "grad_norm": 19.96570781448831, + "learning_rate": 1.4263889564409808e-06, + "loss": 0.11593780517578126, + "step": 156305 + }, + { + "epoch": 1.3515663504855124, + "grad_norm": 1.7753025536149747, + "learning_rate": 1.4262155041657471e-06, + "loss": 0.020863151550292967, + "step": 156310 + }, + { + "epoch": 1.3516095840070557, + "grad_norm": 0.6418090836679353, + "learning_rate": 1.4260420591486685e-06, + "loss": 0.1269052505493164, + "step": 156315 + }, + { + "epoch": 1.351652817528599, + "grad_norm": 0.6634461939839068, + "learning_rate": 1.4258686213905448e-06, + "loss": 0.014154052734375, + "step": 156320 + }, + { + "epoch": 1.3516960510501423, + "grad_norm": 44.52838744650398, + "learning_rate": 1.4256951908921746e-06, + "loss": 0.12683486938476562, + "step": 156325 + }, + { + "epoch": 1.3517392845716856, + "grad_norm": 1.1265945183121109, + "learning_rate": 1.4255217676543603e-06, + "loss": 0.005716705322265625, + "step": 156330 + }, + { + "epoch": 1.3517825180932288, + "grad_norm": 17.365686655810336, + "learning_rate": 1.4253483516779005e-06, + "loss": 0.074920654296875, + "step": 156335 + }, + { + "epoch": 1.351825751614772, + "grad_norm": 1.2808366083573848, + "learning_rate": 1.4251749429635939e-06, + "loss": 0.0530792236328125, + "step": 156340 + }, + { + "epoch": 1.3518689851363153, + "grad_norm": 1.313118918138762, + "learning_rate": 1.4250015415122422e-06, + "loss": 0.13763427734375, + "step": 156345 + }, + { + "epoch": 1.3519122186578585, + "grad_norm": 17.324672140010662, + "learning_rate": 1.4248281473246445e-06, + "loss": 0.13486404418945314, + "step": 156350 + }, + { + "epoch": 1.3519554521794017, + "grad_norm": 4.856346371513083, + "learning_rate": 1.4246547604016e-06, + "loss": 0.020794677734375, + "step": 156355 + }, + { + "epoch": 1.351998685700945, + "grad_norm": 7.27765061433345, + "learning_rate": 1.4244813807439074e-06, + "loss": 0.044896697998046874, + "step": 156360 + }, + { + "epoch": 1.3520419192224884, + "grad_norm": 2.23358335507175, + "learning_rate": 1.4243080083523686e-06, + "loss": 0.03744163513183594, + "step": 156365 + }, + { + "epoch": 1.3520851527440316, + "grad_norm": 0.7428812520200881, + "learning_rate": 1.4241346432277822e-06, + "loss": 0.19693145751953126, + "step": 156370 + }, + { + "epoch": 1.3521283862655749, + "grad_norm": 22.938468820550476, + "learning_rate": 1.4239612853709476e-06, + "loss": 0.07464218139648438, + "step": 156375 + }, + { + "epoch": 1.352171619787118, + "grad_norm": 0.48769845073673634, + "learning_rate": 1.4237879347826643e-06, + "loss": 0.0935760498046875, + "step": 156380 + }, + { + "epoch": 1.3522148533086615, + "grad_norm": 0.7126486817430325, + "learning_rate": 1.4236145914637314e-06, + "loss": 0.04668712615966797, + "step": 156385 + }, + { + "epoch": 1.3522580868302048, + "grad_norm": 0.15650288972035017, + "learning_rate": 1.4234412554149477e-06, + "loss": 0.04797439575195313, + "step": 156390 + }, + { + "epoch": 1.352301320351748, + "grad_norm": 7.921682466949649, + "learning_rate": 1.423267926637114e-06, + "loss": 0.0425506591796875, + "step": 156395 + }, + { + "epoch": 1.3523445538732912, + "grad_norm": 6.027004384648272, + "learning_rate": 1.4230946051310303e-06, + "loss": 0.0213043212890625, + "step": 156400 + }, + { + "epoch": 1.3523877873948345, + "grad_norm": 0.021023453616194225, + "learning_rate": 1.4229212908974946e-06, + "loss": 0.02660083770751953, + "step": 156405 + }, + { + "epoch": 1.3524310209163777, + "grad_norm": 17.500989798308513, + "learning_rate": 1.4227479839373068e-06, + "loss": 0.04375267028808594, + "step": 156410 + }, + { + "epoch": 1.352474254437921, + "grad_norm": 1.0320945151538439, + "learning_rate": 1.422574684251266e-06, + "loss": 0.01358795166015625, + "step": 156415 + }, + { + "epoch": 1.3525174879594641, + "grad_norm": 1.4005430445770126, + "learning_rate": 1.42240139184017e-06, + "loss": 0.05147705078125, + "step": 156420 + }, + { + "epoch": 1.3525607214810076, + "grad_norm": 0.152333618515783, + "learning_rate": 1.4222281067048208e-06, + "loss": 0.012314987182617188, + "step": 156425 + }, + { + "epoch": 1.3526039550025508, + "grad_norm": 4.3374853373022635, + "learning_rate": 1.422054828846016e-06, + "loss": 0.04682235717773438, + "step": 156430 + }, + { + "epoch": 1.352647188524094, + "grad_norm": 0.5947476168843131, + "learning_rate": 1.421881558264555e-06, + "loss": 0.03673553466796875, + "step": 156435 + }, + { + "epoch": 1.3526904220456373, + "grad_norm": 4.039995933840361, + "learning_rate": 1.4217082949612366e-06, + "loss": 0.05064125061035156, + "step": 156440 + }, + { + "epoch": 1.3527336555671805, + "grad_norm": 26.36563474073407, + "learning_rate": 1.4215350389368594e-06, + "loss": 0.15758132934570312, + "step": 156445 + }, + { + "epoch": 1.352776889088724, + "grad_norm": 0.41137836692555657, + "learning_rate": 1.4213617901922235e-06, + "loss": 0.021062469482421874, + "step": 156450 + }, + { + "epoch": 1.3528201226102672, + "grad_norm": 19.60507456561361, + "learning_rate": 1.421188548728127e-06, + "loss": 0.05755691528320313, + "step": 156455 + }, + { + "epoch": 1.3528633561318104, + "grad_norm": 0.8104923725484493, + "learning_rate": 1.4210153145453705e-06, + "loss": 0.082379150390625, + "step": 156460 + }, + { + "epoch": 1.3529065896533536, + "grad_norm": 2.580912702059445, + "learning_rate": 1.4208420876447519e-06, + "loss": 0.022875213623046876, + "step": 156465 + }, + { + "epoch": 1.3529498231748969, + "grad_norm": 6.363399040952443, + "learning_rate": 1.4206688680270694e-06, + "loss": 0.0944127082824707, + "step": 156470 + }, + { + "epoch": 1.35299305669644, + "grad_norm": 38.66108586365609, + "learning_rate": 1.4204956556931233e-06, + "loss": 0.1041499137878418, + "step": 156475 + }, + { + "epoch": 1.3530362902179833, + "grad_norm": 4.4166740075295925, + "learning_rate": 1.4203224506437113e-06, + "loss": 0.09234466552734374, + "step": 156480 + }, + { + "epoch": 1.3530795237395266, + "grad_norm": 73.505577125458, + "learning_rate": 1.4201492528796315e-06, + "loss": 0.15580596923828124, + "step": 156485 + }, + { + "epoch": 1.35312275726107, + "grad_norm": 4.527427521713847, + "learning_rate": 1.419976062401685e-06, + "loss": 0.02730560302734375, + "step": 156490 + }, + { + "epoch": 1.3531659907826132, + "grad_norm": 3.5254800592835194, + "learning_rate": 1.419802879210669e-06, + "loss": 0.024057960510253905, + "step": 156495 + }, + { + "epoch": 1.3532092243041565, + "grad_norm": 0.03908390707434053, + "learning_rate": 1.419629703307383e-06, + "loss": 0.07185783386230468, + "step": 156500 + }, + { + "epoch": 1.3532524578256997, + "grad_norm": 3.2846309299226304, + "learning_rate": 1.4194565346926235e-06, + "loss": 0.08996429443359374, + "step": 156505 + }, + { + "epoch": 1.353295691347243, + "grad_norm": 8.788441579084484, + "learning_rate": 1.4192833733671924e-06, + "loss": 0.1167633056640625, + "step": 156510 + }, + { + "epoch": 1.3533389248687864, + "grad_norm": 18.59527332330989, + "learning_rate": 1.4191102193318864e-06, + "loss": 0.07008094787597656, + "step": 156515 + }, + { + "epoch": 1.3533821583903296, + "grad_norm": 0.534295103683512, + "learning_rate": 1.4189370725875032e-06, + "loss": 0.08908615112304688, + "step": 156520 + }, + { + "epoch": 1.3534253919118728, + "grad_norm": 27.020702609177913, + "learning_rate": 1.4187639331348438e-06, + "loss": 0.21623878479003905, + "step": 156525 + }, + { + "epoch": 1.353468625433416, + "grad_norm": 6.377752621877169, + "learning_rate": 1.4185908009747055e-06, + "loss": 0.058626556396484376, + "step": 156530 + }, + { + "epoch": 1.3535118589549593, + "grad_norm": 28.416084066753676, + "learning_rate": 1.4184176761078864e-06, + "loss": 0.07643508911132812, + "step": 156535 + }, + { + "epoch": 1.3535550924765025, + "grad_norm": 37.978218999602895, + "learning_rate": 1.4182445585351854e-06, + "loss": 0.06900539398193359, + "step": 156540 + }, + { + "epoch": 1.3535983259980457, + "grad_norm": 4.467416132260693, + "learning_rate": 1.4180714482574008e-06, + "loss": 0.030451583862304687, + "step": 156545 + }, + { + "epoch": 1.353641559519589, + "grad_norm": 10.801732000412363, + "learning_rate": 1.4178983452753294e-06, + "loss": 0.07705230712890625, + "step": 156550 + }, + { + "epoch": 1.3536847930411324, + "grad_norm": 0.43679682766210887, + "learning_rate": 1.4177252495897727e-06, + "loss": 0.14072723388671876, + "step": 156555 + }, + { + "epoch": 1.3537280265626757, + "grad_norm": 21.900544990798274, + "learning_rate": 1.4175521612015271e-06, + "loss": 0.1931488037109375, + "step": 156560 + }, + { + "epoch": 1.3537712600842189, + "grad_norm": 5.398613619718705, + "learning_rate": 1.41737908011139e-06, + "loss": 0.07624979019165039, + "step": 156565 + }, + { + "epoch": 1.3538144936057621, + "grad_norm": 0.9564221084251039, + "learning_rate": 1.4172060063201617e-06, + "loss": 0.02889108657836914, + "step": 156570 + }, + { + "epoch": 1.3538577271273056, + "grad_norm": 1.6231453129638322, + "learning_rate": 1.4170329398286395e-06, + "loss": 0.06462783813476562, + "step": 156575 + }, + { + "epoch": 1.3539009606488488, + "grad_norm": 0.9429522858053223, + "learning_rate": 1.4168598806376207e-06, + "loss": 0.07285690307617188, + "step": 156580 + }, + { + "epoch": 1.353944194170392, + "grad_norm": 0.07617088644008273, + "learning_rate": 1.4166868287479053e-06, + "loss": 0.14546279907226561, + "step": 156585 + }, + { + "epoch": 1.3539874276919353, + "grad_norm": 112.72128574091951, + "learning_rate": 1.4165137841602901e-06, + "loss": 0.2412811279296875, + "step": 156590 + }, + { + "epoch": 1.3540306612134785, + "grad_norm": 0.5811473861123058, + "learning_rate": 1.4163407468755732e-06, + "loss": 0.1019287109375, + "step": 156595 + }, + { + "epoch": 1.3540738947350217, + "grad_norm": 0.5072456803197478, + "learning_rate": 1.4161677168945533e-06, + "loss": 0.026844024658203125, + "step": 156600 + }, + { + "epoch": 1.354117128256565, + "grad_norm": 28.76627095086047, + "learning_rate": 1.4159946942180279e-06, + "loss": 0.39221649169921874, + "step": 156605 + }, + { + "epoch": 1.3541603617781082, + "grad_norm": 9.648517652201635, + "learning_rate": 1.4158216788467946e-06, + "loss": 0.0627471923828125, + "step": 156610 + }, + { + "epoch": 1.3542035952996516, + "grad_norm": 0.33357497794511076, + "learning_rate": 1.4156486707816509e-06, + "loss": 0.03920822143554688, + "step": 156615 + }, + { + "epoch": 1.3542468288211948, + "grad_norm": 0.9511842688907053, + "learning_rate": 1.4154756700233956e-06, + "loss": 0.11006813049316407, + "step": 156620 + }, + { + "epoch": 1.354290062342738, + "grad_norm": 0.8645859944110287, + "learning_rate": 1.4153026765728274e-06, + "loss": 0.064404296875, + "step": 156625 + }, + { + "epoch": 1.3543332958642813, + "grad_norm": 15.251080779916608, + "learning_rate": 1.4151296904307433e-06, + "loss": 0.064239501953125, + "step": 156630 + }, + { + "epoch": 1.3543765293858245, + "grad_norm": 0.9475532048994819, + "learning_rate": 1.414956711597941e-06, + "loss": 0.2152679443359375, + "step": 156635 + }, + { + "epoch": 1.354419762907368, + "grad_norm": 13.41331808127437, + "learning_rate": 1.4147837400752185e-06, + "loss": 0.04793472290039062, + "step": 156640 + }, + { + "epoch": 1.3544629964289112, + "grad_norm": 6.961223140546635, + "learning_rate": 1.414610775863372e-06, + "loss": 0.05361480712890625, + "step": 156645 + }, + { + "epoch": 1.3545062299504544, + "grad_norm": 3.689173852080939, + "learning_rate": 1.414437818963202e-06, + "loss": 0.01812744140625, + "step": 156650 + }, + { + "epoch": 1.3545494634719977, + "grad_norm": 6.206696527722843, + "learning_rate": 1.4142648693755044e-06, + "loss": 0.07816162109375, + "step": 156655 + }, + { + "epoch": 1.354592696993541, + "grad_norm": 0.8711266019176623, + "learning_rate": 1.414091927101077e-06, + "loss": 0.10722427368164063, + "step": 156660 + }, + { + "epoch": 1.3546359305150841, + "grad_norm": 1.494032247842764, + "learning_rate": 1.4139189921407176e-06, + "loss": 0.0248321533203125, + "step": 156665 + }, + { + "epoch": 1.3546791640366274, + "grad_norm": 9.197018573748538, + "learning_rate": 1.4137460644952224e-06, + "loss": 0.0697662353515625, + "step": 156670 + }, + { + "epoch": 1.3547223975581706, + "grad_norm": 6.216319789261038, + "learning_rate": 1.4135731441653918e-06, + "loss": 0.10181961059570313, + "step": 156675 + }, + { + "epoch": 1.354765631079714, + "grad_norm": 9.18537955989359, + "learning_rate": 1.4134002311520202e-06, + "loss": 0.04058914184570313, + "step": 156680 + }, + { + "epoch": 1.3548088646012573, + "grad_norm": 28.027919962193412, + "learning_rate": 1.4132273254559076e-06, + "loss": 0.05154972076416016, + "step": 156685 + }, + { + "epoch": 1.3548520981228005, + "grad_norm": 1.4646720622658076, + "learning_rate": 1.4130544270778502e-06, + "loss": 0.05284194946289063, + "step": 156690 + }, + { + "epoch": 1.3548953316443437, + "grad_norm": 0.6214752427298879, + "learning_rate": 1.4128815360186457e-06, + "loss": 0.043242263793945315, + "step": 156695 + }, + { + "epoch": 1.354938565165887, + "grad_norm": 0.49146557243654587, + "learning_rate": 1.4127086522790915e-06, + "loss": 0.01693115234375, + "step": 156700 + }, + { + "epoch": 1.3549817986874304, + "grad_norm": 17.989418121483777, + "learning_rate": 1.4125357758599848e-06, + "loss": 0.05504341125488281, + "step": 156705 + }, + { + "epoch": 1.3550250322089736, + "grad_norm": 0.9150774460928535, + "learning_rate": 1.4123629067621214e-06, + "loss": 0.056468963623046875, + "step": 156710 + }, + { + "epoch": 1.3550682657305169, + "grad_norm": 11.720146926049377, + "learning_rate": 1.4121900449863014e-06, + "loss": 0.12618637084960938, + "step": 156715 + }, + { + "epoch": 1.35511149925206, + "grad_norm": 0.5699876359134014, + "learning_rate": 1.4120171905333206e-06, + "loss": 0.008292770385742188, + "step": 156720 + }, + { + "epoch": 1.3551547327736033, + "grad_norm": 0.22147739901871902, + "learning_rate": 1.4118443434039758e-06, + "loss": 0.022239112854003908, + "step": 156725 + }, + { + "epoch": 1.3551979662951465, + "grad_norm": 14.519592662670082, + "learning_rate": 1.4116715035990639e-06, + "loss": 0.17014312744140625, + "step": 156730 + }, + { + "epoch": 1.3552411998166898, + "grad_norm": 2.126180694519004, + "learning_rate": 1.4114986711193835e-06, + "loss": 0.019980621337890626, + "step": 156735 + }, + { + "epoch": 1.355284433338233, + "grad_norm": 0.1426499325774764, + "learning_rate": 1.4113258459657298e-06, + "loss": 0.18765716552734374, + "step": 156740 + }, + { + "epoch": 1.3553276668597765, + "grad_norm": 6.398663752357912, + "learning_rate": 1.4111530281389022e-06, + "loss": 0.15601844787597657, + "step": 156745 + }, + { + "epoch": 1.3553709003813197, + "grad_norm": 10.991935549801463, + "learning_rate": 1.4109802176396963e-06, + "loss": 0.0283782958984375, + "step": 156750 + }, + { + "epoch": 1.355414133902863, + "grad_norm": 6.420444102259172, + "learning_rate": 1.4108074144689093e-06, + "loss": 0.0382537841796875, + "step": 156755 + }, + { + "epoch": 1.3554573674244061, + "grad_norm": 1.4542456665190897, + "learning_rate": 1.410634618627338e-06, + "loss": 0.02892723083496094, + "step": 156760 + }, + { + "epoch": 1.3555006009459494, + "grad_norm": 4.929304271406574, + "learning_rate": 1.4104618301157796e-06, + "loss": 0.028566741943359376, + "step": 156765 + }, + { + "epoch": 1.3555438344674928, + "grad_norm": 0.5464597739965933, + "learning_rate": 1.4102890489350295e-06, + "loss": 0.1414276123046875, + "step": 156770 + }, + { + "epoch": 1.355587067989036, + "grad_norm": 0.41009106832487696, + "learning_rate": 1.410116275085887e-06, + "loss": 0.06279449462890625, + "step": 156775 + }, + { + "epoch": 1.3556303015105793, + "grad_norm": 0.9898950618642689, + "learning_rate": 1.4099435085691474e-06, + "loss": 0.021640777587890625, + "step": 156780 + }, + { + "epoch": 1.3556735350321225, + "grad_norm": 7.31486564038768, + "learning_rate": 1.4097707493856083e-06, + "loss": 0.03290290832519531, + "step": 156785 + }, + { + "epoch": 1.3557167685536657, + "grad_norm": 0.18263395075327946, + "learning_rate": 1.4095979975360648e-06, + "loss": 0.287553596496582, + "step": 156790 + }, + { + "epoch": 1.355760002075209, + "grad_norm": 2.1201720453226187, + "learning_rate": 1.4094252530213158e-06, + "loss": 0.023590087890625, + "step": 156795 + }, + { + "epoch": 1.3558032355967522, + "grad_norm": 5.715476810000515, + "learning_rate": 1.409252515842157e-06, + "loss": 0.07593345642089844, + "step": 156800 + }, + { + "epoch": 1.3558464691182954, + "grad_norm": 0.2913243161428976, + "learning_rate": 1.4090797859993841e-06, + "loss": 0.010237312316894532, + "step": 156805 + }, + { + "epoch": 1.3558897026398389, + "grad_norm": 0.13609146482993623, + "learning_rate": 1.4089070634937955e-06, + "loss": 0.03696155548095703, + "step": 156810 + }, + { + "epoch": 1.355932936161382, + "grad_norm": 2.003886130062237, + "learning_rate": 1.4087343483261871e-06, + "loss": 0.06438560485839843, + "step": 156815 + }, + { + "epoch": 1.3559761696829253, + "grad_norm": 37.58397699609673, + "learning_rate": 1.4085616404973554e-06, + "loss": 0.056125640869140625, + "step": 156820 + }, + { + "epoch": 1.3560194032044686, + "grad_norm": 0.5661909215386929, + "learning_rate": 1.4083889400080964e-06, + "loss": 0.06385955810546876, + "step": 156825 + }, + { + "epoch": 1.356062636726012, + "grad_norm": 63.00167025179402, + "learning_rate": 1.4082162468592075e-06, + "loss": 0.022253799438476562, + "step": 156830 + }, + { + "epoch": 1.3561058702475552, + "grad_norm": 1.4433027751820702, + "learning_rate": 1.4080435610514831e-06, + "loss": 0.03650035858154297, + "step": 156835 + }, + { + "epoch": 1.3561491037690985, + "grad_norm": 17.42123409490907, + "learning_rate": 1.4078708825857226e-06, + "loss": 0.09466514587402344, + "step": 156840 + }, + { + "epoch": 1.3561923372906417, + "grad_norm": 14.588277170570027, + "learning_rate": 1.4076982114627193e-06, + "loss": 0.03411178588867188, + "step": 156845 + }, + { + "epoch": 1.356235570812185, + "grad_norm": 1.9751372133483627, + "learning_rate": 1.4075255476832724e-06, + "loss": 0.06951990127563476, + "step": 156850 + }, + { + "epoch": 1.3562788043337282, + "grad_norm": 0.1856538745464009, + "learning_rate": 1.407352891248177e-06, + "loss": 0.04478530883789063, + "step": 156855 + }, + { + "epoch": 1.3563220378552714, + "grad_norm": 6.896164780735114, + "learning_rate": 1.4071802421582295e-06, + "loss": 0.0358123779296875, + "step": 156860 + }, + { + "epoch": 1.3563652713768146, + "grad_norm": 0.4835013166103519, + "learning_rate": 1.4070076004142259e-06, + "loss": 0.038514328002929685, + "step": 156865 + }, + { + "epoch": 1.356408504898358, + "grad_norm": 0.13925847735478417, + "learning_rate": 1.4068349660169614e-06, + "loss": 0.08549919128417968, + "step": 156870 + }, + { + "epoch": 1.3564517384199013, + "grad_norm": 5.376581797760437, + "learning_rate": 1.4066623389672343e-06, + "loss": 0.0429840087890625, + "step": 156875 + }, + { + "epoch": 1.3564949719414445, + "grad_norm": 0.24509714009186537, + "learning_rate": 1.4064897192658397e-06, + "loss": 0.008436965942382812, + "step": 156880 + }, + { + "epoch": 1.3565382054629878, + "grad_norm": 2.440660776746134, + "learning_rate": 1.4063171069135737e-06, + "loss": 0.033596038818359375, + "step": 156885 + }, + { + "epoch": 1.356581438984531, + "grad_norm": 4.3289083497620044, + "learning_rate": 1.4061445019112325e-06, + "loss": 0.027765274047851562, + "step": 156890 + }, + { + "epoch": 1.3566246725060744, + "grad_norm": 0.08527441722903545, + "learning_rate": 1.4059719042596108e-06, + "loss": 0.006499862670898438, + "step": 156895 + }, + { + "epoch": 1.3566679060276177, + "grad_norm": 20.052722030738753, + "learning_rate": 1.405799313959507e-06, + "loss": 0.19648056030273436, + "step": 156900 + }, + { + "epoch": 1.356711139549161, + "grad_norm": 3.0365934383396107, + "learning_rate": 1.405626731011715e-06, + "loss": 0.0835235595703125, + "step": 156905 + }, + { + "epoch": 1.3567543730707041, + "grad_norm": 0.42431802477958136, + "learning_rate": 1.4054541554170322e-06, + "loss": 0.00798969268798828, + "step": 156910 + }, + { + "epoch": 1.3567976065922474, + "grad_norm": 0.4375228460574905, + "learning_rate": 1.4052815871762544e-06, + "loss": 0.030194091796875, + "step": 156915 + }, + { + "epoch": 1.3568408401137906, + "grad_norm": 12.350192715773092, + "learning_rate": 1.4051090262901768e-06, + "loss": 0.05306797027587891, + "step": 156920 + }, + { + "epoch": 1.3568840736353338, + "grad_norm": 2.0353787365813383, + "learning_rate": 1.4049364727595957e-06, + "loss": 0.08223342895507812, + "step": 156925 + }, + { + "epoch": 1.356927307156877, + "grad_norm": 2.5609239679583338, + "learning_rate": 1.4047639265853052e-06, + "loss": 0.17188549041748047, + "step": 156930 + }, + { + "epoch": 1.3569705406784205, + "grad_norm": 2.3336163755053003, + "learning_rate": 1.404591387768104e-06, + "loss": 0.018389511108398437, + "step": 156935 + }, + { + "epoch": 1.3570137741999637, + "grad_norm": 0.4108720929786738, + "learning_rate": 1.404418856308786e-06, + "loss": 0.021503448486328125, + "step": 156940 + }, + { + "epoch": 1.357057007721507, + "grad_norm": 19.158877136209988, + "learning_rate": 1.4042463322081473e-06, + "loss": 0.04937744140625, + "step": 156945 + }, + { + "epoch": 1.3571002412430502, + "grad_norm": 3.6574810854684543, + "learning_rate": 1.4040738154669837e-06, + "loss": 0.03221893310546875, + "step": 156950 + }, + { + "epoch": 1.3571434747645934, + "grad_norm": 7.363274279275321, + "learning_rate": 1.4039013060860891e-06, + "loss": 0.03616905212402344, + "step": 156955 + }, + { + "epoch": 1.3571867082861369, + "grad_norm": 1.2288330075699416, + "learning_rate": 1.4037288040662622e-06, + "loss": 0.04056625366210938, + "step": 156960 + }, + { + "epoch": 1.35722994180768, + "grad_norm": 13.103653031197474, + "learning_rate": 1.4035563094082957e-06, + "loss": 0.14533462524414062, + "step": 156965 + }, + { + "epoch": 1.3572731753292233, + "grad_norm": 0.43453255634224003, + "learning_rate": 1.4033838221129876e-06, + "loss": 0.045223236083984375, + "step": 156970 + }, + { + "epoch": 1.3573164088507665, + "grad_norm": 5.112503254573854, + "learning_rate": 1.4032113421811323e-06, + "loss": 0.024484825134277344, + "step": 156975 + }, + { + "epoch": 1.3573596423723098, + "grad_norm": 3.7039667085644976, + "learning_rate": 1.4030388696135253e-06, + "loss": 0.01149444580078125, + "step": 156980 + }, + { + "epoch": 1.357402875893853, + "grad_norm": 62.18763255902824, + "learning_rate": 1.4028664044109618e-06, + "loss": 0.20282459259033203, + "step": 156985 + }, + { + "epoch": 1.3574461094153962, + "grad_norm": 30.55001985255835, + "learning_rate": 1.402693946574237e-06, + "loss": 0.06386947631835938, + "step": 156990 + }, + { + "epoch": 1.3574893429369395, + "grad_norm": 0.5730814278886216, + "learning_rate": 1.4025214961041458e-06, + "loss": 0.04558486938476562, + "step": 156995 + }, + { + "epoch": 1.357532576458483, + "grad_norm": 1.998921140148945, + "learning_rate": 1.4023490530014856e-06, + "loss": 0.023646736145019533, + "step": 157000 + }, + { + "epoch": 1.3575758099800261, + "grad_norm": 1.5617657381114645, + "learning_rate": 1.4021766172670499e-06, + "loss": 0.0407989501953125, + "step": 157005 + }, + { + "epoch": 1.3576190435015694, + "grad_norm": 13.160636870971697, + "learning_rate": 1.4020041889016346e-06, + "loss": 0.10374908447265625, + "step": 157010 + }, + { + "epoch": 1.3576622770231126, + "grad_norm": 3.732255701184321, + "learning_rate": 1.4018317679060339e-06, + "loss": 0.049333953857421876, + "step": 157015 + }, + { + "epoch": 1.3577055105446558, + "grad_norm": 0.9021828126517567, + "learning_rate": 1.401659354281045e-06, + "loss": 0.01346282958984375, + "step": 157020 + }, + { + "epoch": 1.3577487440661993, + "grad_norm": 0.6468688390326959, + "learning_rate": 1.4014869480274607e-06, + "loss": 0.06809539794921875, + "step": 157025 + }, + { + "epoch": 1.3577919775877425, + "grad_norm": 0.7937222493733921, + "learning_rate": 1.4013145491460784e-06, + "loss": 0.00480499267578125, + "step": 157030 + }, + { + "epoch": 1.3578352111092857, + "grad_norm": 23.95806465317631, + "learning_rate": 1.401142157637692e-06, + "loss": 0.06307754516601563, + "step": 157035 + }, + { + "epoch": 1.357878444630829, + "grad_norm": 0.2932502007890648, + "learning_rate": 1.4009697735030973e-06, + "loss": 0.007022857666015625, + "step": 157040 + }, + { + "epoch": 1.3579216781523722, + "grad_norm": 117.26780311660748, + "learning_rate": 1.400797396743088e-06, + "loss": 0.0771575927734375, + "step": 157045 + }, + { + "epoch": 1.3579649116739154, + "grad_norm": 1.153416602710059, + "learning_rate": 1.40062502735846e-06, + "loss": 0.044859695434570315, + "step": 157050 + }, + { + "epoch": 1.3580081451954586, + "grad_norm": 3.5830402144913776, + "learning_rate": 1.4004526653500083e-06, + "loss": 0.03412322998046875, + "step": 157055 + }, + { + "epoch": 1.3580513787170019, + "grad_norm": 13.697086025288574, + "learning_rate": 1.400280310718526e-06, + "loss": 0.050449371337890625, + "step": 157060 + }, + { + "epoch": 1.3580946122385453, + "grad_norm": 9.274209555882697, + "learning_rate": 1.4001079634648114e-06, + "loss": 0.113421630859375, + "step": 157065 + }, + { + "epoch": 1.3581378457600886, + "grad_norm": 0.3160207764463575, + "learning_rate": 1.3999356235896555e-06, + "loss": 0.07327346801757813, + "step": 157070 + }, + { + "epoch": 1.3581810792816318, + "grad_norm": 3.829228114051776, + "learning_rate": 1.3997632910938568e-06, + "loss": 0.056249237060546874, + "step": 157075 + }, + { + "epoch": 1.358224312803175, + "grad_norm": 7.762272489895046, + "learning_rate": 1.3995909659782086e-06, + "loss": 0.03199615478515625, + "step": 157080 + }, + { + "epoch": 1.3582675463247185, + "grad_norm": 1.5002139746596799, + "learning_rate": 1.3994186482435048e-06, + "loss": 0.01232757568359375, + "step": 157085 + }, + { + "epoch": 1.3583107798462617, + "grad_norm": 8.5118449148085, + "learning_rate": 1.39924633789054e-06, + "loss": 0.08828277587890625, + "step": 157090 + }, + { + "epoch": 1.358354013367805, + "grad_norm": 6.850982380278806, + "learning_rate": 1.3990740349201106e-06, + "loss": 0.0252410888671875, + "step": 157095 + }, + { + "epoch": 1.3583972468893482, + "grad_norm": 15.733321180498939, + "learning_rate": 1.3989017393330105e-06, + "loss": 0.04411106109619141, + "step": 157100 + }, + { + "epoch": 1.3584404804108914, + "grad_norm": 21.380458815973945, + "learning_rate": 1.3987294511300342e-06, + "loss": 0.20155181884765624, + "step": 157105 + }, + { + "epoch": 1.3584837139324346, + "grad_norm": 3.7092004567939223, + "learning_rate": 1.3985571703119761e-06, + "loss": 0.05424957275390625, + "step": 157110 + }, + { + "epoch": 1.3585269474539778, + "grad_norm": 10.726079140548668, + "learning_rate": 1.3983848968796305e-06, + "loss": 0.029289627075195314, + "step": 157115 + }, + { + "epoch": 1.358570180975521, + "grad_norm": 3.3073275160894116, + "learning_rate": 1.3982126308337914e-06, + "loss": 0.03963623046875, + "step": 157120 + }, + { + "epoch": 1.3586134144970645, + "grad_norm": 1.1375811271505913, + "learning_rate": 1.398040372175255e-06, + "loss": 0.03126029968261719, + "step": 157125 + }, + { + "epoch": 1.3586566480186077, + "grad_norm": 27.03535942614486, + "learning_rate": 1.3978681209048143e-06, + "loss": 0.06309051513671875, + "step": 157130 + }, + { + "epoch": 1.358699881540151, + "grad_norm": 0.15895173974139046, + "learning_rate": 1.397695877023265e-06, + "loss": 0.3937530517578125, + "step": 157135 + }, + { + "epoch": 1.3587431150616942, + "grad_norm": 6.16866075378716, + "learning_rate": 1.3975236405314005e-06, + "loss": 0.074261474609375, + "step": 157140 + }, + { + "epoch": 1.3587863485832374, + "grad_norm": 1.1371487214602993, + "learning_rate": 1.3973514114300155e-06, + "loss": 0.031678962707519534, + "step": 157145 + }, + { + "epoch": 1.3588295821047809, + "grad_norm": 6.4993803084891395, + "learning_rate": 1.3971791897199045e-06, + "loss": 0.016182327270507814, + "step": 157150 + }, + { + "epoch": 1.3588728156263241, + "grad_norm": 2.707937632439858, + "learning_rate": 1.39700697540186e-06, + "loss": 0.054656600952148436, + "step": 157155 + }, + { + "epoch": 1.3589160491478673, + "grad_norm": 0.012207506202885455, + "learning_rate": 1.396834768476679e-06, + "loss": 0.008085918426513673, + "step": 157160 + }, + { + "epoch": 1.3589592826694106, + "grad_norm": 0.7619402236978561, + "learning_rate": 1.3966625689451544e-06, + "loss": 0.03720550537109375, + "step": 157165 + }, + { + "epoch": 1.3590025161909538, + "grad_norm": 14.93113607438345, + "learning_rate": 1.3964903768080801e-06, + "loss": 0.05225296020507812, + "step": 157170 + }, + { + "epoch": 1.359045749712497, + "grad_norm": 0.6162091527070833, + "learning_rate": 1.3963181920662505e-06, + "loss": 0.2757392883300781, + "step": 157175 + }, + { + "epoch": 1.3590889832340403, + "grad_norm": 20.5525279901679, + "learning_rate": 1.3961460147204588e-06, + "loss": 0.05630035400390625, + "step": 157180 + }, + { + "epoch": 1.3591322167555835, + "grad_norm": 4.474136547370204, + "learning_rate": 1.3959738447715012e-06, + "loss": 0.07784576416015625, + "step": 157185 + }, + { + "epoch": 1.359175450277127, + "grad_norm": 12.702933581788917, + "learning_rate": 1.395801682220169e-06, + "loss": 0.0858978271484375, + "step": 157190 + }, + { + "epoch": 1.3592186837986702, + "grad_norm": 4.492713837933464, + "learning_rate": 1.3956295270672592e-06, + "loss": 0.32870941162109374, + "step": 157195 + }, + { + "epoch": 1.3592619173202134, + "grad_norm": 22.224464914804013, + "learning_rate": 1.395457379313564e-06, + "loss": 0.10921707153320312, + "step": 157200 + }, + { + "epoch": 1.3593051508417566, + "grad_norm": 0.6450801662502618, + "learning_rate": 1.3952852389598777e-06, + "loss": 0.05809974670410156, + "step": 157205 + }, + { + "epoch": 1.3593483843632999, + "grad_norm": 4.7872099573476365, + "learning_rate": 1.3951131060069938e-06, + "loss": 0.05695610046386719, + "step": 157210 + }, + { + "epoch": 1.3593916178848433, + "grad_norm": 5.760650054432404, + "learning_rate": 1.3949409804557069e-06, + "loss": 0.0544830322265625, + "step": 157215 + }, + { + "epoch": 1.3594348514063865, + "grad_norm": 13.32028995544274, + "learning_rate": 1.3947688623068092e-06, + "loss": 0.05243988037109375, + "step": 157220 + }, + { + "epoch": 1.3594780849279298, + "grad_norm": 59.06184395063685, + "learning_rate": 1.3945967515610964e-06, + "loss": 0.13175201416015625, + "step": 157225 + }, + { + "epoch": 1.359521318449473, + "grad_norm": 1.3656351860097513, + "learning_rate": 1.3944246482193618e-06, + "loss": 0.016565704345703126, + "step": 157230 + }, + { + "epoch": 1.3595645519710162, + "grad_norm": 1.2185075247534183, + "learning_rate": 1.3942525522823989e-06, + "loss": 0.275274658203125, + "step": 157235 + }, + { + "epoch": 1.3596077854925595, + "grad_norm": 0.3023480254073363, + "learning_rate": 1.3940804637509998e-06, + "loss": 0.060002899169921874, + "step": 157240 + }, + { + "epoch": 1.3596510190141027, + "grad_norm": 1.696515524604246, + "learning_rate": 1.3939083826259612e-06, + "loss": 0.0798126220703125, + "step": 157245 + }, + { + "epoch": 1.359694252535646, + "grad_norm": 0.5647603053968763, + "learning_rate": 1.3937363089080743e-06, + "loss": 0.13186187744140626, + "step": 157250 + }, + { + "epoch": 1.3597374860571894, + "grad_norm": 0.32955841868709074, + "learning_rate": 1.3935642425981344e-06, + "loss": 0.051720046997070314, + "step": 157255 + }, + { + "epoch": 1.3597807195787326, + "grad_norm": 17.723315815327343, + "learning_rate": 1.3933921836969343e-06, + "loss": 0.04311676025390625, + "step": 157260 + }, + { + "epoch": 1.3598239531002758, + "grad_norm": 0.06168095055097102, + "learning_rate": 1.3932201322052672e-06, + "loss": 0.19025650024414062, + "step": 157265 + }, + { + "epoch": 1.359867186621819, + "grad_norm": 5.158073602899367, + "learning_rate": 1.3930480881239273e-06, + "loss": 0.050739097595214847, + "step": 157270 + }, + { + "epoch": 1.3599104201433623, + "grad_norm": 26.774335694462717, + "learning_rate": 1.3928760514537078e-06, + "loss": 0.2742462158203125, + "step": 157275 + }, + { + "epoch": 1.3599536536649057, + "grad_norm": 4.958495672877267, + "learning_rate": 1.3927040221954003e-06, + "loss": 0.2827434539794922, + "step": 157280 + }, + { + "epoch": 1.359996887186449, + "grad_norm": 2.9922759848268483, + "learning_rate": 1.3925320003498011e-06, + "loss": 0.104022216796875, + "step": 157285 + }, + { + "epoch": 1.3600401207079922, + "grad_norm": 1.000480099837662, + "learning_rate": 1.3923599859177026e-06, + "loss": 0.10476341247558593, + "step": 157290 + }, + { + "epoch": 1.3600833542295354, + "grad_norm": 0.7160472909296346, + "learning_rate": 1.3921879788998963e-06, + "loss": 0.073150634765625, + "step": 157295 + }, + { + "epoch": 1.3601265877510786, + "grad_norm": 3.818715658055302, + "learning_rate": 1.3920159792971784e-06, + "loss": 0.04156646728515625, + "step": 157300 + }, + { + "epoch": 1.3601698212726219, + "grad_norm": 0.37615963017465714, + "learning_rate": 1.3918439871103404e-06, + "loss": 0.03946342468261719, + "step": 157305 + }, + { + "epoch": 1.360213054794165, + "grad_norm": 12.127578360238111, + "learning_rate": 1.391672002340176e-06, + "loss": 0.11124420166015625, + "step": 157310 + }, + { + "epoch": 1.3602562883157086, + "grad_norm": 5.0245090018439456, + "learning_rate": 1.3915000249874772e-06, + "loss": 0.0399017333984375, + "step": 157315 + }, + { + "epoch": 1.3602995218372518, + "grad_norm": 7.801119363099413, + "learning_rate": 1.391328055053039e-06, + "loss": 0.11884880065917969, + "step": 157320 + }, + { + "epoch": 1.360342755358795, + "grad_norm": 5.967406377118007, + "learning_rate": 1.3911560925376538e-06, + "loss": 0.052534866333007815, + "step": 157325 + }, + { + "epoch": 1.3603859888803382, + "grad_norm": 2.468652800820133, + "learning_rate": 1.3909841374421144e-06, + "loss": 0.04042129516601563, + "step": 157330 + }, + { + "epoch": 1.3604292224018815, + "grad_norm": 3.975210782464851, + "learning_rate": 1.3908121897672144e-06, + "loss": 0.11008071899414062, + "step": 157335 + }, + { + "epoch": 1.360472455923425, + "grad_norm": 1.8165735240358685, + "learning_rate": 1.390640249513746e-06, + "loss": 0.010822296142578125, + "step": 157340 + }, + { + "epoch": 1.3605156894449681, + "grad_norm": 0.48373410076678647, + "learning_rate": 1.3904683166825016e-06, + "loss": 0.0499176025390625, + "step": 157345 + }, + { + "epoch": 1.3605589229665114, + "grad_norm": 1.580348662444265, + "learning_rate": 1.3902963912742762e-06, + "loss": 0.0275054931640625, + "step": 157350 + }, + { + "epoch": 1.3606021564880546, + "grad_norm": 10.285954594697643, + "learning_rate": 1.3901244732898604e-06, + "loss": 0.027581024169921874, + "step": 157355 + }, + { + "epoch": 1.3606453900095978, + "grad_norm": 1.696336273919987, + "learning_rate": 1.3899525627300498e-06, + "loss": 0.030309677124023438, + "step": 157360 + }, + { + "epoch": 1.360688623531141, + "grad_norm": 0.11612609120969809, + "learning_rate": 1.3897806595956351e-06, + "loss": 0.025732421875, + "step": 157365 + }, + { + "epoch": 1.3607318570526843, + "grad_norm": 1.3386444491050102, + "learning_rate": 1.38960876388741e-06, + "loss": 0.017439651489257812, + "step": 157370 + }, + { + "epoch": 1.3607750905742275, + "grad_norm": 4.611534112975828, + "learning_rate": 1.3894368756061658e-06, + "loss": 0.037401580810546876, + "step": 157375 + }, + { + "epoch": 1.360818324095771, + "grad_norm": 2.30748983038104, + "learning_rate": 1.3892649947526977e-06, + "loss": 0.07128562927246093, + "step": 157380 + }, + { + "epoch": 1.3608615576173142, + "grad_norm": 5.056330737556388, + "learning_rate": 1.389093121327797e-06, + "loss": 0.07474784851074219, + "step": 157385 + }, + { + "epoch": 1.3609047911388574, + "grad_norm": 14.90221115703736, + "learning_rate": 1.3889212553322566e-06, + "loss": 0.1910778045654297, + "step": 157390 + }, + { + "epoch": 1.3609480246604007, + "grad_norm": 0.1870539049217402, + "learning_rate": 1.3887493967668689e-06, + "loss": 0.16163387298583984, + "step": 157395 + }, + { + "epoch": 1.3609912581819439, + "grad_norm": 5.355997973235915, + "learning_rate": 1.3885775456324267e-06, + "loss": 0.028863906860351562, + "step": 157400 + }, + { + "epoch": 1.3610344917034873, + "grad_norm": 2.7552012769766847, + "learning_rate": 1.3884057019297213e-06, + "loss": 0.105670166015625, + "step": 157405 + }, + { + "epoch": 1.3610777252250306, + "grad_norm": 4.374404061858212, + "learning_rate": 1.3882338656595477e-06, + "loss": 0.04835186004638672, + "step": 157410 + }, + { + "epoch": 1.3611209587465738, + "grad_norm": 1.3738833503244474, + "learning_rate": 1.3880620368226956e-06, + "loss": 0.01099700927734375, + "step": 157415 + }, + { + "epoch": 1.361164192268117, + "grad_norm": 8.842372683065658, + "learning_rate": 1.3878902154199603e-06, + "loss": 0.11256561279296876, + "step": 157420 + }, + { + "epoch": 1.3612074257896603, + "grad_norm": 1.165725380911932, + "learning_rate": 1.3877184014521329e-06, + "loss": 0.11601486206054687, + "step": 157425 + }, + { + "epoch": 1.3612506593112035, + "grad_norm": 1.57288147037892, + "learning_rate": 1.3875465949200056e-06, + "loss": 0.008576202392578124, + "step": 157430 + }, + { + "epoch": 1.3612938928327467, + "grad_norm": 0.5901091052749283, + "learning_rate": 1.3873747958243708e-06, + "loss": 0.0199005126953125, + "step": 157435 + }, + { + "epoch": 1.36133712635429, + "grad_norm": 11.419543484519384, + "learning_rate": 1.3872030041660205e-06, + "loss": 0.030029296875, + "step": 157440 + }, + { + "epoch": 1.3613803598758334, + "grad_norm": 7.711299351086576, + "learning_rate": 1.3870312199457477e-06, + "loss": 0.0295989990234375, + "step": 157445 + }, + { + "epoch": 1.3614235933973766, + "grad_norm": 15.525277514232265, + "learning_rate": 1.3868594431643449e-06, + "loss": 0.05389556884765625, + "step": 157450 + }, + { + "epoch": 1.3614668269189198, + "grad_norm": 13.66437834485074, + "learning_rate": 1.3866876738226037e-06, + "loss": 0.0596588134765625, + "step": 157455 + }, + { + "epoch": 1.361510060440463, + "grad_norm": 11.497714114171687, + "learning_rate": 1.3865159119213165e-06, + "loss": 0.13172607421875, + "step": 157460 + }, + { + "epoch": 1.3615532939620063, + "grad_norm": 3.832725506155735, + "learning_rate": 1.386344157461274e-06, + "loss": 0.03236713409423828, + "step": 157465 + }, + { + "epoch": 1.3615965274835498, + "grad_norm": 30.803813028183047, + "learning_rate": 1.386172410443271e-06, + "loss": 0.06912384033203126, + "step": 157470 + }, + { + "epoch": 1.361639761005093, + "grad_norm": 1.2153986816226212, + "learning_rate": 1.386000670868097e-06, + "loss": 0.08626174926757812, + "step": 157475 + }, + { + "epoch": 1.3616829945266362, + "grad_norm": 0.8219990028138768, + "learning_rate": 1.3858289387365467e-06, + "loss": 0.02758941650390625, + "step": 157480 + }, + { + "epoch": 1.3617262280481794, + "grad_norm": 2.1149363963822245, + "learning_rate": 1.3856572140494101e-06, + "loss": 0.04078598022460937, + "step": 157485 + }, + { + "epoch": 1.3617694615697227, + "grad_norm": 4.095228993693015, + "learning_rate": 1.3854854968074803e-06, + "loss": 0.1513885498046875, + "step": 157490 + }, + { + "epoch": 1.361812695091266, + "grad_norm": 15.697105281887099, + "learning_rate": 1.3853137870115487e-06, + "loss": 0.1571685791015625, + "step": 157495 + }, + { + "epoch": 1.3618559286128091, + "grad_norm": 0.1866107040537911, + "learning_rate": 1.385142084662407e-06, + "loss": 0.012372398376464843, + "step": 157500 + }, + { + "epoch": 1.3618991621343524, + "grad_norm": 4.16872080183594, + "learning_rate": 1.3849703897608464e-06, + "loss": 0.05964508056640625, + "step": 157505 + }, + { + "epoch": 1.3619423956558958, + "grad_norm": 8.020623146366226, + "learning_rate": 1.3847987023076604e-06, + "loss": 0.0353057861328125, + "step": 157510 + }, + { + "epoch": 1.361985629177439, + "grad_norm": 11.251926081065113, + "learning_rate": 1.3846270223036404e-06, + "loss": 0.06321754455566406, + "step": 157515 + }, + { + "epoch": 1.3620288626989823, + "grad_norm": 2.0001082560961554, + "learning_rate": 1.3844553497495767e-06, + "loss": 0.038547515869140625, + "step": 157520 + }, + { + "epoch": 1.3620720962205255, + "grad_norm": 29.819249369885014, + "learning_rate": 1.3842836846462634e-06, + "loss": 0.07532997131347656, + "step": 157525 + }, + { + "epoch": 1.362115329742069, + "grad_norm": 2.3841175482523473, + "learning_rate": 1.3841120269944908e-06, + "loss": 0.029286956787109374, + "step": 157530 + }, + { + "epoch": 1.3621585632636122, + "grad_norm": 1.9660152798960813, + "learning_rate": 1.3839403767950496e-06, + "loss": 0.14777641296386718, + "step": 157535 + }, + { + "epoch": 1.3622017967851554, + "grad_norm": 30.756736177051966, + "learning_rate": 1.383768734048734e-06, + "loss": 0.21625213623046874, + "step": 157540 + }, + { + "epoch": 1.3622450303066986, + "grad_norm": 0.05540310338368466, + "learning_rate": 1.3835970987563335e-06, + "loss": 0.06222972869873047, + "step": 157545 + }, + { + "epoch": 1.3622882638282419, + "grad_norm": 1.90758136242865, + "learning_rate": 1.3834254709186411e-06, + "loss": 0.10593185424804688, + "step": 157550 + }, + { + "epoch": 1.362331497349785, + "grad_norm": 0.2550359702229836, + "learning_rate": 1.3832538505364472e-06, + "loss": 0.02699127197265625, + "step": 157555 + }, + { + "epoch": 1.3623747308713283, + "grad_norm": 0.15507612808523552, + "learning_rate": 1.3830822376105437e-06, + "loss": 0.03485870361328125, + "step": 157560 + }, + { + "epoch": 1.3624179643928715, + "grad_norm": 0.36022829018040636, + "learning_rate": 1.382910632141722e-06, + "loss": 0.00802764892578125, + "step": 157565 + }, + { + "epoch": 1.362461197914415, + "grad_norm": 2.87515210911316, + "learning_rate": 1.3827390341307726e-06, + "loss": 0.13695144653320312, + "step": 157570 + }, + { + "epoch": 1.3625044314359582, + "grad_norm": 0.814179130735101, + "learning_rate": 1.382567443578489e-06, + "loss": 0.06000518798828125, + "step": 157575 + }, + { + "epoch": 1.3625476649575015, + "grad_norm": 0.9428957850459653, + "learning_rate": 1.3823958604856604e-06, + "loss": 0.060207366943359375, + "step": 157580 + }, + { + "epoch": 1.3625908984790447, + "grad_norm": 4.997901991577486, + "learning_rate": 1.3822242848530802e-06, + "loss": 0.024579429626464845, + "step": 157585 + }, + { + "epoch": 1.362634132000588, + "grad_norm": 0.19328099086690767, + "learning_rate": 1.3820527166815385e-06, + "loss": 0.010103607177734375, + "step": 157590 + }, + { + "epoch": 1.3626773655221314, + "grad_norm": 2.4314894433379166, + "learning_rate": 1.381881155971827e-06, + "loss": 0.01821746826171875, + "step": 157595 + }, + { + "epoch": 1.3627205990436746, + "grad_norm": 1.138489862178894, + "learning_rate": 1.381709602724735e-06, + "loss": 0.012877655029296876, + "step": 157600 + }, + { + "epoch": 1.3627638325652178, + "grad_norm": 0.6645201904704698, + "learning_rate": 1.381538056941057e-06, + "loss": 0.030574798583984375, + "step": 157605 + }, + { + "epoch": 1.362807066086761, + "grad_norm": 0.28988623676258163, + "learning_rate": 1.3813665186215821e-06, + "loss": 0.09008331298828125, + "step": 157610 + }, + { + "epoch": 1.3628502996083043, + "grad_norm": 0.16255662260910284, + "learning_rate": 1.381194987767102e-06, + "loss": 0.016776275634765626, + "step": 157615 + }, + { + "epoch": 1.3628935331298475, + "grad_norm": 0.23234511496678836, + "learning_rate": 1.3810234643784075e-06, + "loss": 0.046441841125488284, + "step": 157620 + }, + { + "epoch": 1.3629367666513907, + "grad_norm": 0.23151394459891733, + "learning_rate": 1.3808519484562894e-06, + "loss": 0.031069564819335937, + "step": 157625 + }, + { + "epoch": 1.362980000172934, + "grad_norm": 1.1769319340035844, + "learning_rate": 1.3806804400015386e-06, + "loss": 0.011516952514648437, + "step": 157630 + }, + { + "epoch": 1.3630232336944774, + "grad_norm": 1.593435675247795, + "learning_rate": 1.380508939014947e-06, + "loss": 0.3120269775390625, + "step": 157635 + }, + { + "epoch": 1.3630664672160206, + "grad_norm": 1.2964435255703854, + "learning_rate": 1.3803374454973044e-06, + "loss": 0.09654817581176758, + "step": 157640 + }, + { + "epoch": 1.3631097007375639, + "grad_norm": 4.967466113541895, + "learning_rate": 1.3801659594494033e-06, + "loss": 0.042024993896484376, + "step": 157645 + }, + { + "epoch": 1.363152934259107, + "grad_norm": 1.4145554667533413, + "learning_rate": 1.3799944808720336e-06, + "loss": 0.09121551513671874, + "step": 157650 + }, + { + "epoch": 1.3631961677806503, + "grad_norm": 19.398276349628244, + "learning_rate": 1.3798230097659865e-06, + "loss": 0.0467219352722168, + "step": 157655 + }, + { + "epoch": 1.3632394013021938, + "grad_norm": 61.90615121386049, + "learning_rate": 1.3796515461320523e-06, + "loss": 0.24905242919921874, + "step": 157660 + }, + { + "epoch": 1.363282634823737, + "grad_norm": 9.606593346318537, + "learning_rate": 1.3794800899710208e-06, + "loss": 0.020928955078125, + "step": 157665 + }, + { + "epoch": 1.3633258683452802, + "grad_norm": 2.7746845185458495, + "learning_rate": 1.3793086412836853e-06, + "loss": 0.11880416870117187, + "step": 157670 + }, + { + "epoch": 1.3633691018668235, + "grad_norm": 14.790929811998678, + "learning_rate": 1.3791372000708351e-06, + "loss": 0.06159534454345703, + "step": 157675 + }, + { + "epoch": 1.3634123353883667, + "grad_norm": 9.870218146960468, + "learning_rate": 1.3789657663332606e-06, + "loss": 0.04526653289794922, + "step": 157680 + }, + { + "epoch": 1.36345556890991, + "grad_norm": 0.9494782121357619, + "learning_rate": 1.378794340071752e-06, + "loss": 0.031682586669921874, + "step": 157685 + }, + { + "epoch": 1.3634988024314532, + "grad_norm": 0.24976119005575195, + "learning_rate": 1.3786229212871014e-06, + "loss": 0.03353958129882813, + "step": 157690 + }, + { + "epoch": 1.3635420359529964, + "grad_norm": 5.422087619052385, + "learning_rate": 1.378451509980098e-06, + "loss": 0.03882732391357422, + "step": 157695 + }, + { + "epoch": 1.3635852694745398, + "grad_norm": 0.7112306165170559, + "learning_rate": 1.3782801061515336e-06, + "loss": 0.02481689453125, + "step": 157700 + }, + { + "epoch": 1.363628502996083, + "grad_norm": 0.7842202200384035, + "learning_rate": 1.3781087098021984e-06, + "loss": 0.027007675170898436, + "step": 157705 + }, + { + "epoch": 1.3636717365176263, + "grad_norm": 10.86427812593486, + "learning_rate": 1.3779373209328823e-06, + "loss": 0.05888938903808594, + "step": 157710 + }, + { + "epoch": 1.3637149700391695, + "grad_norm": 0.39976675698981107, + "learning_rate": 1.377765939544376e-06, + "loss": 0.03223562240600586, + "step": 157715 + }, + { + "epoch": 1.3637582035607128, + "grad_norm": 15.416143975332277, + "learning_rate": 1.3775945656374698e-06, + "loss": 0.2553285598754883, + "step": 157720 + }, + { + "epoch": 1.3638014370822562, + "grad_norm": 1.0618618555623633, + "learning_rate": 1.3774231992129533e-06, + "loss": 0.01621589660644531, + "step": 157725 + }, + { + "epoch": 1.3638446706037994, + "grad_norm": 40.10775233698186, + "learning_rate": 1.3772518402716184e-06, + "loss": 0.1811840057373047, + "step": 157730 + }, + { + "epoch": 1.3638879041253427, + "grad_norm": 3.8400146444496213, + "learning_rate": 1.3770804888142545e-06, + "loss": 0.08682365417480468, + "step": 157735 + }, + { + "epoch": 1.363931137646886, + "grad_norm": 1.6672977271699116, + "learning_rate": 1.3769091448416522e-06, + "loss": 0.01726245880126953, + "step": 157740 + }, + { + "epoch": 1.3639743711684291, + "grad_norm": 1.6268088134690533, + "learning_rate": 1.3767378083546004e-06, + "loss": 0.19112892150878907, + "step": 157745 + }, + { + "epoch": 1.3640176046899724, + "grad_norm": 1.7094375685658068, + "learning_rate": 1.3765664793538914e-06, + "loss": 0.06276779174804688, + "step": 157750 + }, + { + "epoch": 1.3640608382115156, + "grad_norm": 14.442687098263, + "learning_rate": 1.3763951578403147e-06, + "loss": 0.04539031982421875, + "step": 157755 + }, + { + "epoch": 1.3641040717330588, + "grad_norm": 0.2113313723194795, + "learning_rate": 1.3762238438146588e-06, + "loss": 0.21935768127441407, + "step": 157760 + }, + { + "epoch": 1.3641473052546023, + "grad_norm": 31.933627605219506, + "learning_rate": 1.376052537277716e-06, + "loss": 0.12601394653320314, + "step": 157765 + }, + { + "epoch": 1.3641905387761455, + "grad_norm": 3.9037415972264693, + "learning_rate": 1.3758812382302752e-06, + "loss": 0.016217422485351563, + "step": 157770 + }, + { + "epoch": 1.3642337722976887, + "grad_norm": 1.6470471773910367, + "learning_rate": 1.3757099466731272e-06, + "loss": 0.03819198608398437, + "step": 157775 + }, + { + "epoch": 1.364277005819232, + "grad_norm": 25.87000047315682, + "learning_rate": 1.375538662607061e-06, + "loss": 0.07169418334960938, + "step": 157780 + }, + { + "epoch": 1.3643202393407754, + "grad_norm": 4.161621047920493, + "learning_rate": 1.375367386032867e-06, + "loss": 0.036895751953125, + "step": 157785 + }, + { + "epoch": 1.3643634728623186, + "grad_norm": 8.714214512185128, + "learning_rate": 1.375196116951334e-06, + "loss": 0.3222625732421875, + "step": 157790 + }, + { + "epoch": 1.3644067063838619, + "grad_norm": 1.1412232505456117, + "learning_rate": 1.3750248553632541e-06, + "loss": 0.07546348571777343, + "step": 157795 + }, + { + "epoch": 1.364449939905405, + "grad_norm": 30.47774287446912, + "learning_rate": 1.3748536012694159e-06, + "loss": 0.0862457275390625, + "step": 157800 + }, + { + "epoch": 1.3644931734269483, + "grad_norm": 1.8961734320189103, + "learning_rate": 1.3746823546706081e-06, + "loss": 0.06525535583496093, + "step": 157805 + }, + { + "epoch": 1.3645364069484915, + "grad_norm": 4.937545731372457, + "learning_rate": 1.374511115567623e-06, + "loss": 0.03918914794921875, + "step": 157810 + }, + { + "epoch": 1.3645796404700348, + "grad_norm": 0.612179260020173, + "learning_rate": 1.3743398839612488e-06, + "loss": 0.013194465637207031, + "step": 157815 + }, + { + "epoch": 1.364622873991578, + "grad_norm": 4.42849134239654, + "learning_rate": 1.3741686598522755e-06, + "loss": 0.0511077880859375, + "step": 157820 + }, + { + "epoch": 1.3646661075131215, + "grad_norm": 0.18349750503737106, + "learning_rate": 1.3739974432414917e-06, + "loss": 0.08977203369140625, + "step": 157825 + }, + { + "epoch": 1.3647093410346647, + "grad_norm": 0.1186646725753824, + "learning_rate": 1.373826234129689e-06, + "loss": 0.030255889892578124, + "step": 157830 + }, + { + "epoch": 1.364752574556208, + "grad_norm": 0.9761350643072517, + "learning_rate": 1.373655032517656e-06, + "loss": 0.18814988136291505, + "step": 157835 + }, + { + "epoch": 1.3647958080777511, + "grad_norm": 8.18849399542558, + "learning_rate": 1.373483838406182e-06, + "loss": 0.18237724304199218, + "step": 157840 + }, + { + "epoch": 1.3648390415992944, + "grad_norm": 0.36942285308718076, + "learning_rate": 1.3733126517960571e-06, + "loss": 0.059028244018554686, + "step": 157845 + }, + { + "epoch": 1.3648822751208378, + "grad_norm": 7.649410333803453, + "learning_rate": 1.3731414726880708e-06, + "loss": 0.012659072875976562, + "step": 157850 + }, + { + "epoch": 1.364925508642381, + "grad_norm": 1.941923673278642, + "learning_rate": 1.3729703010830107e-06, + "loss": 0.09442214965820313, + "step": 157855 + }, + { + "epoch": 1.3649687421639243, + "grad_norm": 0.5021916879808621, + "learning_rate": 1.3727991369816693e-06, + "loss": 0.15762977600097655, + "step": 157860 + }, + { + "epoch": 1.3650119756854675, + "grad_norm": 3.4433691850555816, + "learning_rate": 1.3726279803848332e-06, + "loss": 0.026181602478027345, + "step": 157865 + }, + { + "epoch": 1.3650552092070107, + "grad_norm": 3.608040108235913, + "learning_rate": 1.3724568312932938e-06, + "loss": 0.05281562805175781, + "step": 157870 + }, + { + "epoch": 1.365098442728554, + "grad_norm": 1.011367054540597, + "learning_rate": 1.3722856897078403e-06, + "loss": 0.15600814819335937, + "step": 157875 + }, + { + "epoch": 1.3651416762500972, + "grad_norm": 0.5707339771084927, + "learning_rate": 1.3721145556292611e-06, + "loss": 0.023626708984375, + "step": 157880 + }, + { + "epoch": 1.3651849097716404, + "grad_norm": 1.0046884021376095, + "learning_rate": 1.3719434290583447e-06, + "loss": 0.5217720031738281, + "step": 157885 + }, + { + "epoch": 1.3652281432931839, + "grad_norm": 40.205449631972144, + "learning_rate": 1.3717723099958822e-06, + "loss": 0.10377655029296876, + "step": 157890 + }, + { + "epoch": 1.365271376814727, + "grad_norm": 9.535165538788881, + "learning_rate": 1.3716011984426622e-06, + "loss": 0.03100433349609375, + "step": 157895 + }, + { + "epoch": 1.3653146103362703, + "grad_norm": 0.40911730269101104, + "learning_rate": 1.3714300943994732e-06, + "loss": 0.05736846923828125, + "step": 157900 + }, + { + "epoch": 1.3653578438578136, + "grad_norm": 8.466084152654554, + "learning_rate": 1.3712589978671052e-06, + "loss": 0.07572288513183593, + "step": 157905 + }, + { + "epoch": 1.3654010773793568, + "grad_norm": 0.1413677304663297, + "learning_rate": 1.3710879088463456e-06, + "loss": 0.06518745422363281, + "step": 157910 + }, + { + "epoch": 1.3654443109009002, + "grad_norm": 10.368969922191923, + "learning_rate": 1.3709168273379857e-06, + "loss": 0.08035964965820312, + "step": 157915 + }, + { + "epoch": 1.3654875444224435, + "grad_norm": 0.40851058597240797, + "learning_rate": 1.3707457533428121e-06, + "loss": 0.029216194152832033, + "step": 157920 + }, + { + "epoch": 1.3655307779439867, + "grad_norm": 8.233460549923757, + "learning_rate": 1.3705746868616162e-06, + "loss": 0.06313629150390625, + "step": 157925 + }, + { + "epoch": 1.36557401146553, + "grad_norm": 0.1224273404904853, + "learning_rate": 1.3704036278951863e-06, + "loss": 0.005996036529541016, + "step": 157930 + }, + { + "epoch": 1.3656172449870732, + "grad_norm": 61.38008103919972, + "learning_rate": 1.3702325764443105e-06, + "loss": 0.46502227783203126, + "step": 157935 + }, + { + "epoch": 1.3656604785086164, + "grad_norm": 6.040470134646063, + "learning_rate": 1.3700615325097781e-06, + "loss": 0.08377208709716796, + "step": 157940 + }, + { + "epoch": 1.3657037120301596, + "grad_norm": 1.0435647798154866, + "learning_rate": 1.369890496092378e-06, + "loss": 0.006362152099609375, + "step": 157945 + }, + { + "epoch": 1.3657469455517028, + "grad_norm": 2.464908439579328, + "learning_rate": 1.3697194671928978e-06, + "loss": 0.11142940521240234, + "step": 157950 + }, + { + "epoch": 1.3657901790732463, + "grad_norm": 1.3163397844106355, + "learning_rate": 1.3695484458121283e-06, + "loss": 0.05016021728515625, + "step": 157955 + }, + { + "epoch": 1.3658334125947895, + "grad_norm": 24.1413362891499, + "learning_rate": 1.3693774319508575e-06, + "loss": 0.14711456298828124, + "step": 157960 + }, + { + "epoch": 1.3658766461163327, + "grad_norm": 4.5022660025605745, + "learning_rate": 1.369206425609874e-06, + "loss": 0.0246826171875, + "step": 157965 + }, + { + "epoch": 1.365919879637876, + "grad_norm": 0.8434519942725418, + "learning_rate": 1.369035426789965e-06, + "loss": 0.042665863037109376, + "step": 157970 + }, + { + "epoch": 1.3659631131594192, + "grad_norm": 0.33017370301046756, + "learning_rate": 1.3688644354919216e-06, + "loss": 0.11911640167236329, + "step": 157975 + }, + { + "epoch": 1.3660063466809627, + "grad_norm": 8.098703853487068, + "learning_rate": 1.3686934517165304e-06, + "loss": 0.09359283447265625, + "step": 157980 + }, + { + "epoch": 1.3660495802025059, + "grad_norm": 0.06834819337251888, + "learning_rate": 1.3685224754645817e-06, + "loss": 0.09487228393554688, + "step": 157985 + }, + { + "epoch": 1.3660928137240491, + "grad_norm": 23.999065320872138, + "learning_rate": 1.3683515067368636e-06, + "loss": 0.0899810791015625, + "step": 157990 + }, + { + "epoch": 1.3661360472455923, + "grad_norm": 4.618155893296768, + "learning_rate": 1.3681805455341637e-06, + "loss": 0.08807849884033203, + "step": 157995 + }, + { + "epoch": 1.3661792807671356, + "grad_norm": 2.9820885379088597, + "learning_rate": 1.368009591857271e-06, + "loss": 0.20302276611328124, + "step": 158000 + }, + { + "epoch": 1.3662225142886788, + "grad_norm": 3.4602824745251177, + "learning_rate": 1.367838645706974e-06, + "loss": 0.02539215087890625, + "step": 158005 + }, + { + "epoch": 1.366265747810222, + "grad_norm": 1.2884623398232131, + "learning_rate": 1.367667707084061e-06, + "loss": 0.010165786743164063, + "step": 158010 + }, + { + "epoch": 1.3663089813317653, + "grad_norm": 0.5441485646014439, + "learning_rate": 1.3674967759893191e-06, + "loss": 0.020722579956054688, + "step": 158015 + }, + { + "epoch": 1.3663522148533087, + "grad_norm": 5.419737277227587, + "learning_rate": 1.367325852423539e-06, + "loss": 0.06425933837890625, + "step": 158020 + }, + { + "epoch": 1.366395448374852, + "grad_norm": 2.164912867017548, + "learning_rate": 1.3671549363875079e-06, + "loss": 0.06012458801269531, + "step": 158025 + }, + { + "epoch": 1.3664386818963952, + "grad_norm": 0.6406618434937839, + "learning_rate": 1.3669840278820124e-06, + "loss": 0.08887176513671875, + "step": 158030 + }, + { + "epoch": 1.3664819154179384, + "grad_norm": 15.921118063386858, + "learning_rate": 1.3668131269078437e-06, + "loss": 0.11873970031738282, + "step": 158035 + }, + { + "epoch": 1.3665251489394818, + "grad_norm": 0.5781668089805804, + "learning_rate": 1.3666422334657887e-06, + "loss": 0.007846832275390625, + "step": 158040 + }, + { + "epoch": 1.366568382461025, + "grad_norm": 4.99809419371283, + "learning_rate": 1.3664713475566342e-06, + "loss": 0.041253662109375, + "step": 158045 + }, + { + "epoch": 1.3666116159825683, + "grad_norm": 21.510121643029155, + "learning_rate": 1.3663004691811705e-06, + "loss": 0.054794692993164064, + "step": 158050 + }, + { + "epoch": 1.3666548495041115, + "grad_norm": 1.3242012624304114, + "learning_rate": 1.3661295983401847e-06, + "loss": 0.10000839233398437, + "step": 158055 + }, + { + "epoch": 1.3666980830256548, + "grad_norm": 0.1546960849642121, + "learning_rate": 1.3659587350344646e-06, + "loss": 0.017761993408203124, + "step": 158060 + }, + { + "epoch": 1.366741316547198, + "grad_norm": 1.6232936651558267, + "learning_rate": 1.3657878792647987e-06, + "loss": 0.0224365234375, + "step": 158065 + }, + { + "epoch": 1.3667845500687412, + "grad_norm": 0.17252209354599013, + "learning_rate": 1.3656170310319746e-06, + "loss": 0.07231521606445312, + "step": 158070 + }, + { + "epoch": 1.3668277835902845, + "grad_norm": 1.2565706747604588, + "learning_rate": 1.3654461903367794e-06, + "loss": 0.03483657836914063, + "step": 158075 + }, + { + "epoch": 1.366871017111828, + "grad_norm": 0.6281416922410599, + "learning_rate": 1.365275357180003e-06, + "loss": 0.10078125, + "step": 158080 + }, + { + "epoch": 1.3669142506333711, + "grad_norm": 6.742114429846426, + "learning_rate": 1.3651045315624308e-06, + "loss": 0.01538543701171875, + "step": 158085 + }, + { + "epoch": 1.3669574841549144, + "grad_norm": 0.5022968932729022, + "learning_rate": 1.3649337134848534e-06, + "loss": 0.02297210693359375, + "step": 158090 + }, + { + "epoch": 1.3670007176764576, + "grad_norm": 4.648732162109755, + "learning_rate": 1.3647629029480572e-06, + "loss": 0.059659385681152345, + "step": 158095 + }, + { + "epoch": 1.3670439511980008, + "grad_norm": 0.11535526296171315, + "learning_rate": 1.3645920999528302e-06, + "loss": 0.08939971923828124, + "step": 158100 + }, + { + "epoch": 1.3670871847195443, + "grad_norm": 9.651683385389854, + "learning_rate": 1.3644213044999595e-06, + "loss": 0.037880325317382814, + "step": 158105 + }, + { + "epoch": 1.3671304182410875, + "grad_norm": 0.06573044381780956, + "learning_rate": 1.3642505165902325e-06, + "loss": 0.018128585815429688, + "step": 158110 + }, + { + "epoch": 1.3671736517626307, + "grad_norm": 0.7019961462417398, + "learning_rate": 1.3640797362244385e-06, + "loss": 0.084832763671875, + "step": 158115 + }, + { + "epoch": 1.367216885284174, + "grad_norm": 0.16810138854624337, + "learning_rate": 1.3639089634033643e-06, + "loss": 0.23518142700195313, + "step": 158120 + }, + { + "epoch": 1.3672601188057172, + "grad_norm": 0.6683194704411969, + "learning_rate": 1.3637381981277972e-06, + "loss": 0.04434356689453125, + "step": 158125 + }, + { + "epoch": 1.3673033523272604, + "grad_norm": 10.420013878920344, + "learning_rate": 1.3635674403985252e-06, + "loss": 0.04707260131835937, + "step": 158130 + }, + { + "epoch": 1.3673465858488036, + "grad_norm": 1.0276858052097986, + "learning_rate": 1.3633966902163343e-06, + "loss": 0.048343658447265625, + "step": 158135 + }, + { + "epoch": 1.3673898193703469, + "grad_norm": 0.7909927416489808, + "learning_rate": 1.3632259475820145e-06, + "loss": 0.068719482421875, + "step": 158140 + }, + { + "epoch": 1.3674330528918903, + "grad_norm": 5.8589456722874, + "learning_rate": 1.3630552124963506e-06, + "loss": 0.027837753295898438, + "step": 158145 + }, + { + "epoch": 1.3674762864134336, + "grad_norm": 0.26120613142012045, + "learning_rate": 1.362884484960133e-06, + "loss": 0.44858589172363283, + "step": 158150 + }, + { + "epoch": 1.3675195199349768, + "grad_norm": 33.98398336002449, + "learning_rate": 1.362713764974147e-06, + "loss": 0.05988006591796875, + "step": 158155 + }, + { + "epoch": 1.36756275345652, + "grad_norm": 1.9055433840582854, + "learning_rate": 1.3625430525391805e-06, + "loss": 0.0612030029296875, + "step": 158160 + }, + { + "epoch": 1.3676059869780632, + "grad_norm": 0.09219770049499083, + "learning_rate": 1.3623723476560213e-06, + "loss": 0.06253433227539062, + "step": 158165 + }, + { + "epoch": 1.3676492204996067, + "grad_norm": 4.908068389912188, + "learning_rate": 1.3622016503254556e-06, + "loss": 0.01535491943359375, + "step": 158170 + }, + { + "epoch": 1.36769245402115, + "grad_norm": 0.4382792190456775, + "learning_rate": 1.36203096054827e-06, + "loss": 0.008538055419921874, + "step": 158175 + }, + { + "epoch": 1.3677356875426931, + "grad_norm": 0.4205340292896141, + "learning_rate": 1.3618602783252542e-06, + "loss": 0.028952789306640626, + "step": 158180 + }, + { + "epoch": 1.3677789210642364, + "grad_norm": 6.291836901341886, + "learning_rate": 1.3616896036571943e-06, + "loss": 0.048918533325195315, + "step": 158185 + }, + { + "epoch": 1.3678221545857796, + "grad_norm": 1.790510119639229, + "learning_rate": 1.361518936544877e-06, + "loss": 0.028226852416992188, + "step": 158190 + }, + { + "epoch": 1.3678653881073228, + "grad_norm": 6.3058640195249485, + "learning_rate": 1.361348276989088e-06, + "loss": 0.06832046508789062, + "step": 158195 + }, + { + "epoch": 1.367908621628866, + "grad_norm": 0.5822738029274525, + "learning_rate": 1.3611776249906177e-06, + "loss": 0.06033649444580078, + "step": 158200 + }, + { + "epoch": 1.3679518551504093, + "grad_norm": 1.7542038209664197, + "learning_rate": 1.3610069805502498e-06, + "loss": 0.19233016967773436, + "step": 158205 + }, + { + "epoch": 1.3679950886719527, + "grad_norm": 8.75135251474119, + "learning_rate": 1.3608363436687746e-06, + "loss": 0.07019805908203125, + "step": 158210 + }, + { + "epoch": 1.368038322193496, + "grad_norm": 8.455695426129283, + "learning_rate": 1.3606657143469768e-06, + "loss": 0.07031974792480469, + "step": 158215 + }, + { + "epoch": 1.3680815557150392, + "grad_norm": 0.3996674881567622, + "learning_rate": 1.3604950925856442e-06, + "loss": 0.03418121337890625, + "step": 158220 + }, + { + "epoch": 1.3681247892365824, + "grad_norm": 0.5510027210944046, + "learning_rate": 1.360324478385563e-06, + "loss": 0.0571746826171875, + "step": 158225 + }, + { + "epoch": 1.3681680227581259, + "grad_norm": 0.44764088570210736, + "learning_rate": 1.3601538717475207e-06, + "loss": 0.011173248291015625, + "step": 158230 + }, + { + "epoch": 1.368211256279669, + "grad_norm": 0.5005278887978376, + "learning_rate": 1.3599832726723025e-06, + "loss": 0.026160049438476562, + "step": 158235 + }, + { + "epoch": 1.3682544898012123, + "grad_norm": 3.4016523711504143, + "learning_rate": 1.359812681160698e-06, + "loss": 0.012038612365722656, + "step": 158240 + }, + { + "epoch": 1.3682977233227556, + "grad_norm": 0.5251323743087468, + "learning_rate": 1.3596420972134923e-06, + "loss": 0.013494873046875, + "step": 158245 + }, + { + "epoch": 1.3683409568442988, + "grad_norm": 13.100907573362655, + "learning_rate": 1.3594715208314724e-06, + "loss": 0.11202392578125, + "step": 158250 + }, + { + "epoch": 1.368384190365842, + "grad_norm": 0.447979967938239, + "learning_rate": 1.3593009520154233e-06, + "loss": 0.11002960205078124, + "step": 158255 + }, + { + "epoch": 1.3684274238873853, + "grad_norm": 23.150234372059092, + "learning_rate": 1.359130390766135e-06, + "loss": 0.11695175170898438, + "step": 158260 + }, + { + "epoch": 1.3684706574089285, + "grad_norm": 1.641477216572189, + "learning_rate": 1.358959837084392e-06, + "loss": 0.015198516845703124, + "step": 158265 + }, + { + "epoch": 1.368513890930472, + "grad_norm": 1.9981387554474561, + "learning_rate": 1.35878929097098e-06, + "loss": 0.17994155883789062, + "step": 158270 + }, + { + "epoch": 1.3685571244520152, + "grad_norm": 15.53572929896691, + "learning_rate": 1.358618752426688e-06, + "loss": 0.04352645874023438, + "step": 158275 + }, + { + "epoch": 1.3686003579735584, + "grad_norm": 1.0371390084486043, + "learning_rate": 1.3584482214523015e-06, + "loss": 0.12860527038574218, + "step": 158280 + }, + { + "epoch": 1.3686435914951016, + "grad_norm": 3.0428005659412714, + "learning_rate": 1.3582776980486067e-06, + "loss": 0.07480278015136718, + "step": 158285 + }, + { + "epoch": 1.3686868250166448, + "grad_norm": 12.13687185142276, + "learning_rate": 1.35810718221639e-06, + "loss": 0.08579978942871094, + "step": 158290 + }, + { + "epoch": 1.3687300585381883, + "grad_norm": 7.339934538274015, + "learning_rate": 1.3579366739564377e-06, + "loss": 0.25025405883789065, + "step": 158295 + }, + { + "epoch": 1.3687732920597315, + "grad_norm": 1.2758006138780003, + "learning_rate": 1.3577661732695356e-06, + "loss": 0.007590484619140625, + "step": 158300 + }, + { + "epoch": 1.3688165255812748, + "grad_norm": 43.58011137422001, + "learning_rate": 1.357595680156472e-06, + "loss": 0.17128868103027345, + "step": 158305 + }, + { + "epoch": 1.368859759102818, + "grad_norm": 6.632013137248088, + "learning_rate": 1.3574251946180302e-06, + "loss": 0.0316864013671875, + "step": 158310 + }, + { + "epoch": 1.3689029926243612, + "grad_norm": 28.014940173008874, + "learning_rate": 1.357254716655e-06, + "loss": 0.09543724060058593, + "step": 158315 + }, + { + "epoch": 1.3689462261459044, + "grad_norm": 0.3489026532744066, + "learning_rate": 1.3570842462681657e-06, + "loss": 0.0038349151611328123, + "step": 158320 + }, + { + "epoch": 1.3689894596674477, + "grad_norm": 0.43907695977594885, + "learning_rate": 1.3569137834583134e-06, + "loss": 0.13629226684570311, + "step": 158325 + }, + { + "epoch": 1.369032693188991, + "grad_norm": 0.7766781032698421, + "learning_rate": 1.356743328226229e-06, + "loss": 0.04629249572753906, + "step": 158330 + }, + { + "epoch": 1.3690759267105344, + "grad_norm": 1.2611330438027955, + "learning_rate": 1.3565728805727e-06, + "loss": 0.009910964965820312, + "step": 158335 + }, + { + "epoch": 1.3691191602320776, + "grad_norm": 30.075909696496804, + "learning_rate": 1.3564024404985117e-06, + "loss": 0.104901123046875, + "step": 158340 + }, + { + "epoch": 1.3691623937536208, + "grad_norm": 0.7992207285891357, + "learning_rate": 1.3562320080044502e-06, + "loss": 0.11518497467041015, + "step": 158345 + }, + { + "epoch": 1.369205627275164, + "grad_norm": 0.5194382647169642, + "learning_rate": 1.356061583091301e-06, + "loss": 0.010573959350585938, + "step": 158350 + }, + { + "epoch": 1.3692488607967073, + "grad_norm": 6.865522876742652, + "learning_rate": 1.3558911657598509e-06, + "loss": 0.020836639404296874, + "step": 158355 + }, + { + "epoch": 1.3692920943182507, + "grad_norm": 2.4268636254277403, + "learning_rate": 1.3557207560108843e-06, + "loss": 0.05883636474609375, + "step": 158360 + }, + { + "epoch": 1.369335327839794, + "grad_norm": 0.9174530027354197, + "learning_rate": 1.3555503538451894e-06, + "loss": 0.05976295471191406, + "step": 158365 + }, + { + "epoch": 1.3693785613613372, + "grad_norm": 1.531942477679472, + "learning_rate": 1.3553799592635499e-06, + "loss": 0.049219512939453126, + "step": 158370 + }, + { + "epoch": 1.3694217948828804, + "grad_norm": 2.7601304272823652, + "learning_rate": 1.3552095722667535e-06, + "loss": 0.23991851806640624, + "step": 158375 + }, + { + "epoch": 1.3694650284044236, + "grad_norm": 4.679315906970474, + "learning_rate": 1.3550391928555857e-06, + "loss": 0.0556950569152832, + "step": 158380 + }, + { + "epoch": 1.3695082619259669, + "grad_norm": 2.614309496796504, + "learning_rate": 1.3548688210308316e-06, + "loss": 0.035115814208984374, + "step": 158385 + }, + { + "epoch": 1.36955149544751, + "grad_norm": 4.782641950330502, + "learning_rate": 1.3546984567932769e-06, + "loss": 0.026554107666015625, + "step": 158390 + }, + { + "epoch": 1.3695947289690533, + "grad_norm": 3.116627169937544, + "learning_rate": 1.3545281001437067e-06, + "loss": 0.29320526123046875, + "step": 158395 + }, + { + "epoch": 1.3696379624905968, + "grad_norm": 12.357034522545653, + "learning_rate": 1.3543577510829087e-06, + "loss": 0.027099227905273436, + "step": 158400 + }, + { + "epoch": 1.36968119601214, + "grad_norm": 15.16231872263971, + "learning_rate": 1.354187409611667e-06, + "loss": 0.07026214599609375, + "step": 158405 + }, + { + "epoch": 1.3697244295336832, + "grad_norm": 2.405227904050991, + "learning_rate": 1.3540170757307678e-06, + "loss": 0.07901115417480468, + "step": 158410 + }, + { + "epoch": 1.3697676630552265, + "grad_norm": 6.322418561157022, + "learning_rate": 1.3538467494409962e-06, + "loss": 0.02590484619140625, + "step": 158415 + }, + { + "epoch": 1.3698108965767697, + "grad_norm": 10.098407462012563, + "learning_rate": 1.353676430743137e-06, + "loss": 0.09794197082519532, + "step": 158420 + }, + { + "epoch": 1.3698541300983131, + "grad_norm": 1.107248684700548, + "learning_rate": 1.3535061196379775e-06, + "loss": 0.08556671142578125, + "step": 158425 + }, + { + "epoch": 1.3698973636198564, + "grad_norm": 2.3938727643754514, + "learning_rate": 1.3533358161263012e-06, + "loss": 0.10198860168457032, + "step": 158430 + }, + { + "epoch": 1.3699405971413996, + "grad_norm": 21.397674030877592, + "learning_rate": 1.3531655202088962e-06, + "loss": 0.07205162048339844, + "step": 158435 + }, + { + "epoch": 1.3699838306629428, + "grad_norm": 40.692297461557544, + "learning_rate": 1.352995231886546e-06, + "loss": 0.06568927764892578, + "step": 158440 + }, + { + "epoch": 1.370027064184486, + "grad_norm": 4.91281872196945, + "learning_rate": 1.352824951160036e-06, + "loss": 0.200506591796875, + "step": 158445 + }, + { + "epoch": 1.3700702977060293, + "grad_norm": 0.5654992999647684, + "learning_rate": 1.3526546780301522e-06, + "loss": 0.020446205139160158, + "step": 158450 + }, + { + "epoch": 1.3701135312275725, + "grad_norm": 56.672429498899234, + "learning_rate": 1.3524844124976795e-06, + "loss": 0.08116168975830078, + "step": 158455 + }, + { + "epoch": 1.3701567647491157, + "grad_norm": 2.3600607944378247, + "learning_rate": 1.3523141545634017e-06, + "loss": 0.0254486083984375, + "step": 158460 + }, + { + "epoch": 1.3701999982706592, + "grad_norm": 60.321601212488815, + "learning_rate": 1.3521439042281064e-06, + "loss": 0.18553447723388672, + "step": 158465 + }, + { + "epoch": 1.3702432317922024, + "grad_norm": 53.57014453499433, + "learning_rate": 1.3519736614925782e-06, + "loss": 0.18397445678710939, + "step": 158470 + }, + { + "epoch": 1.3702864653137456, + "grad_norm": 2.009449001349762, + "learning_rate": 1.3518034263576017e-06, + "loss": 0.059153079986572266, + "step": 158475 + }, + { + "epoch": 1.3703296988352889, + "grad_norm": 6.943470623351222, + "learning_rate": 1.3516331988239609e-06, + "loss": 0.04116134643554688, + "step": 158480 + }, + { + "epoch": 1.3703729323568323, + "grad_norm": 1.1336012227670742, + "learning_rate": 1.3514629788924435e-06, + "loss": 0.07618179321289062, + "step": 158485 + }, + { + "epoch": 1.3704161658783756, + "grad_norm": 8.819380140477213, + "learning_rate": 1.3512927665638316e-06, + "loss": 0.07305335998535156, + "step": 158490 + }, + { + "epoch": 1.3704593993999188, + "grad_norm": 0.11504651203185559, + "learning_rate": 1.3511225618389136e-06, + "loss": 0.15417861938476562, + "step": 158495 + }, + { + "epoch": 1.370502632921462, + "grad_norm": 0.06736998763651043, + "learning_rate": 1.3509523647184723e-06, + "loss": 0.03147315979003906, + "step": 158500 + }, + { + "epoch": 1.3705458664430052, + "grad_norm": 7.375422946844913, + "learning_rate": 1.3507821752032929e-06, + "loss": 0.11700286865234374, + "step": 158505 + }, + { + "epoch": 1.3705890999645485, + "grad_norm": 0.45589256948985324, + "learning_rate": 1.3506119932941604e-06, + "loss": 0.11722221374511718, + "step": 158510 + }, + { + "epoch": 1.3706323334860917, + "grad_norm": 0.10110399510202336, + "learning_rate": 1.3504418189918596e-06, + "loss": 0.033869171142578126, + "step": 158515 + }, + { + "epoch": 1.370675567007635, + "grad_norm": 1.8126381761122838, + "learning_rate": 1.3502716522971755e-06, + "loss": 0.03681106567382812, + "step": 158520 + }, + { + "epoch": 1.3707188005291784, + "grad_norm": 0.1825459908531586, + "learning_rate": 1.3501014932108917e-06, + "loss": 0.014183425903320312, + "step": 158525 + }, + { + "epoch": 1.3707620340507216, + "grad_norm": 1.2752995016232234, + "learning_rate": 1.349931341733795e-06, + "loss": 0.0828826904296875, + "step": 158530 + }, + { + "epoch": 1.3708052675722648, + "grad_norm": 24.978422442318582, + "learning_rate": 1.3497611978666682e-06, + "loss": 0.13024444580078126, + "step": 158535 + }, + { + "epoch": 1.370848501093808, + "grad_norm": 0.22297159822247606, + "learning_rate": 1.3495910616102979e-06, + "loss": 0.13276138305664062, + "step": 158540 + }, + { + "epoch": 1.3708917346153513, + "grad_norm": 0.5843989919429242, + "learning_rate": 1.3494209329654677e-06, + "loss": 0.027513885498046876, + "step": 158545 + }, + { + "epoch": 1.3709349681368947, + "grad_norm": 0.4506622902069121, + "learning_rate": 1.3492508119329624e-06, + "loss": 0.021868133544921876, + "step": 158550 + }, + { + "epoch": 1.370978201658438, + "grad_norm": 5.162876617569043, + "learning_rate": 1.3490806985135656e-06, + "loss": 0.15288925170898438, + "step": 158555 + }, + { + "epoch": 1.3710214351799812, + "grad_norm": 0.13510846712786, + "learning_rate": 1.3489105927080636e-06, + "loss": 0.0591583251953125, + "step": 158560 + }, + { + "epoch": 1.3710646687015244, + "grad_norm": 0.3100971582550239, + "learning_rate": 1.34874049451724e-06, + "loss": 0.28934173583984374, + "step": 158565 + }, + { + "epoch": 1.3711079022230677, + "grad_norm": 6.715211234040585, + "learning_rate": 1.3485704039418799e-06, + "loss": 0.17873930931091309, + "step": 158570 + }, + { + "epoch": 1.371151135744611, + "grad_norm": 8.403828037838478, + "learning_rate": 1.3484003209827664e-06, + "loss": 0.05074920654296875, + "step": 158575 + }, + { + "epoch": 1.3711943692661541, + "grad_norm": 5.8834962682300915, + "learning_rate": 1.3482302456406851e-06, + "loss": 0.20370063781738282, + "step": 158580 + }, + { + "epoch": 1.3712376027876974, + "grad_norm": 4.335296865013212, + "learning_rate": 1.3480601779164188e-06, + "loss": 0.06322784423828125, + "step": 158585 + }, + { + "epoch": 1.3712808363092408, + "grad_norm": 15.142501938960242, + "learning_rate": 1.347890117810754e-06, + "loss": 0.03762054443359375, + "step": 158590 + }, + { + "epoch": 1.371324069830784, + "grad_norm": 3.0537112495534484, + "learning_rate": 1.3477200653244732e-06, + "loss": 0.12778472900390625, + "step": 158595 + }, + { + "epoch": 1.3713673033523273, + "grad_norm": 11.548337896483964, + "learning_rate": 1.3475500204583621e-06, + "loss": 0.28623046875, + "step": 158600 + }, + { + "epoch": 1.3714105368738705, + "grad_norm": 2.001101652340949, + "learning_rate": 1.3473799832132045e-06, + "loss": 0.11125965118408203, + "step": 158605 + }, + { + "epoch": 1.3714537703954137, + "grad_norm": 6.472302117330893, + "learning_rate": 1.3472099535897844e-06, + "loss": 0.2211151123046875, + "step": 158610 + }, + { + "epoch": 1.3714970039169572, + "grad_norm": 0.013507602712238023, + "learning_rate": 1.347039931588886e-06, + "loss": 0.025943756103515625, + "step": 158615 + }, + { + "epoch": 1.3715402374385004, + "grad_norm": 0.14400205728437127, + "learning_rate": 1.3468699172112922e-06, + "loss": 0.0151275634765625, + "step": 158620 + }, + { + "epoch": 1.3715834709600436, + "grad_norm": 0.2079295344419875, + "learning_rate": 1.3466999104577894e-06, + "loss": 0.048462295532226564, + "step": 158625 + }, + { + "epoch": 1.3716267044815869, + "grad_norm": 8.834515235877813, + "learning_rate": 1.3465299113291605e-06, + "loss": 0.04740333557128906, + "step": 158630 + }, + { + "epoch": 1.37166993800313, + "grad_norm": 0.25913380767732713, + "learning_rate": 1.3463599198261895e-06, + "loss": 0.17261962890625, + "step": 158635 + }, + { + "epoch": 1.3717131715246733, + "grad_norm": 0.40279148455760666, + "learning_rate": 1.3461899359496605e-06, + "loss": 0.3175617218017578, + "step": 158640 + }, + { + "epoch": 1.3717564050462165, + "grad_norm": 0.7049150715934821, + "learning_rate": 1.3460199597003565e-06, + "loss": 0.08502578735351562, + "step": 158645 + }, + { + "epoch": 1.3717996385677598, + "grad_norm": 1.788740017398703, + "learning_rate": 1.345849991079063e-06, + "loss": 0.04019355773925781, + "step": 158650 + }, + { + "epoch": 1.3718428720893032, + "grad_norm": 1.2555131427739277, + "learning_rate": 1.3456800300865624e-06, + "loss": 0.1040557861328125, + "step": 158655 + }, + { + "epoch": 1.3718861056108465, + "grad_norm": 0.8492320188239337, + "learning_rate": 1.3455100767236401e-06, + "loss": 0.2310558319091797, + "step": 158660 + }, + { + "epoch": 1.3719293391323897, + "grad_norm": 1.0144131550724593, + "learning_rate": 1.3453401309910793e-06, + "loss": 0.005317115783691406, + "step": 158665 + }, + { + "epoch": 1.371972572653933, + "grad_norm": 2.853272714692328, + "learning_rate": 1.3451701928896638e-06, + "loss": 0.07915496826171875, + "step": 158670 + }, + { + "epoch": 1.3720158061754761, + "grad_norm": 5.65331370720894, + "learning_rate": 1.345000262420177e-06, + "loss": 0.05120086669921875, + "step": 158675 + }, + { + "epoch": 1.3720590396970196, + "grad_norm": 16.831360244323868, + "learning_rate": 1.3448303395834014e-06, + "loss": 0.024574089050292968, + "step": 158680 + }, + { + "epoch": 1.3721022732185628, + "grad_norm": 0.22794612692485217, + "learning_rate": 1.3446604243801236e-06, + "loss": 0.02763824462890625, + "step": 158685 + }, + { + "epoch": 1.372145506740106, + "grad_norm": 6.43607374266512, + "learning_rate": 1.3444905168111254e-06, + "loss": 0.025228118896484374, + "step": 158690 + }, + { + "epoch": 1.3721887402616493, + "grad_norm": 2.8490877757856374, + "learning_rate": 1.3443206168771907e-06, + "loss": 0.19144096374511718, + "step": 158695 + }, + { + "epoch": 1.3722319737831925, + "grad_norm": 2.5768695950641725, + "learning_rate": 1.3441507245791028e-06, + "loss": 0.07293663024902344, + "step": 158700 + }, + { + "epoch": 1.3722752073047357, + "grad_norm": 9.830130050106611, + "learning_rate": 1.3439808399176446e-06, + "loss": 0.01617889404296875, + "step": 158705 + }, + { + "epoch": 1.372318440826279, + "grad_norm": 3.587346122742665, + "learning_rate": 1.3438109628936015e-06, + "loss": 0.04973907470703125, + "step": 158710 + }, + { + "epoch": 1.3723616743478222, + "grad_norm": 0.9579892032985998, + "learning_rate": 1.3436410935077546e-06, + "loss": 0.03708343505859375, + "step": 158715 + }, + { + "epoch": 1.3724049078693656, + "grad_norm": 9.697128975234934, + "learning_rate": 1.34347123176089e-06, + "loss": 0.026261520385742188, + "step": 158720 + }, + { + "epoch": 1.3724481413909089, + "grad_norm": 10.945906360627538, + "learning_rate": 1.3433013776537893e-06, + "loss": 0.08042774200439454, + "step": 158725 + }, + { + "epoch": 1.372491374912452, + "grad_norm": 0.20060728725305818, + "learning_rate": 1.3431315311872369e-06, + "loss": 0.06698417663574219, + "step": 158730 + }, + { + "epoch": 1.3725346084339953, + "grad_norm": 2.7419246040073557, + "learning_rate": 1.342961692362015e-06, + "loss": 0.09625320434570313, + "step": 158735 + }, + { + "epoch": 1.3725778419555388, + "grad_norm": 4.365901611589923, + "learning_rate": 1.3427918611789072e-06, + "loss": 0.094342041015625, + "step": 158740 + }, + { + "epoch": 1.372621075477082, + "grad_norm": 1.3236103197741722, + "learning_rate": 1.3426220376386964e-06, + "loss": 0.02409210205078125, + "step": 158745 + }, + { + "epoch": 1.3726643089986252, + "grad_norm": 0.1715633873894811, + "learning_rate": 1.342452221742167e-06, + "loss": 0.024899673461914063, + "step": 158750 + }, + { + "epoch": 1.3727075425201685, + "grad_norm": 10.987990553580984, + "learning_rate": 1.3422824134901014e-06, + "loss": 0.0282867431640625, + "step": 158755 + }, + { + "epoch": 1.3727507760417117, + "grad_norm": 0.27853387821944375, + "learning_rate": 1.342112612883282e-06, + "loss": 0.042516326904296874, + "step": 158760 + }, + { + "epoch": 1.372794009563255, + "grad_norm": 0.9360614485090797, + "learning_rate": 1.3419428199224936e-06, + "loss": 0.05490875244140625, + "step": 158765 + }, + { + "epoch": 1.3728372430847982, + "grad_norm": 11.440038685293581, + "learning_rate": 1.3417730346085187e-06, + "loss": 0.03519706726074219, + "step": 158770 + }, + { + "epoch": 1.3728804766063414, + "grad_norm": 0.47815877946563506, + "learning_rate": 1.3416032569421399e-06, + "loss": 0.03696746826171875, + "step": 158775 + }, + { + "epoch": 1.3729237101278848, + "grad_norm": 55.649676832250314, + "learning_rate": 1.341433486924139e-06, + "loss": 0.20856170654296874, + "step": 158780 + }, + { + "epoch": 1.372966943649428, + "grad_norm": 10.372238708875972, + "learning_rate": 1.3412637245553018e-06, + "loss": 0.061248397827148436, + "step": 158785 + }, + { + "epoch": 1.3730101771709713, + "grad_norm": 5.292358237652709, + "learning_rate": 1.3410939698364095e-06, + "loss": 0.022276687622070312, + "step": 158790 + }, + { + "epoch": 1.3730534106925145, + "grad_norm": 1.7860258414098882, + "learning_rate": 1.3409242227682455e-06, + "loss": 0.04850616455078125, + "step": 158795 + }, + { + "epoch": 1.3730966442140577, + "grad_norm": 0.7362590208899485, + "learning_rate": 1.3407544833515918e-06, + "loss": 0.0405792236328125, + "step": 158800 + }, + { + "epoch": 1.3731398777356012, + "grad_norm": 0.189210668570189, + "learning_rate": 1.3405847515872325e-06, + "loss": 0.060986328125, + "step": 158805 + }, + { + "epoch": 1.3731831112571444, + "grad_norm": 0.20779324598760812, + "learning_rate": 1.3404150274759483e-06, + "loss": 0.1712890625, + "step": 158810 + }, + { + "epoch": 1.3732263447786877, + "grad_norm": 9.615381749069051, + "learning_rate": 1.3402453110185242e-06, + "loss": 0.08601722717285157, + "step": 158815 + }, + { + "epoch": 1.3732695783002309, + "grad_norm": 0.20131668869474958, + "learning_rate": 1.3400756022157415e-06, + "loss": 0.03822784423828125, + "step": 158820 + }, + { + "epoch": 1.3733128118217741, + "grad_norm": 74.3731059362338, + "learning_rate": 1.339905901068384e-06, + "loss": 0.15722427368164063, + "step": 158825 + }, + { + "epoch": 1.3733560453433173, + "grad_norm": 7.452641662487066, + "learning_rate": 1.3397362075772341e-06, + "loss": 0.0572418212890625, + "step": 158830 + }, + { + "epoch": 1.3733992788648606, + "grad_norm": 0.35419249561381927, + "learning_rate": 1.3395665217430741e-06, + "loss": 0.08096237182617187, + "step": 158835 + }, + { + "epoch": 1.3734425123864038, + "grad_norm": 0.9839039313662745, + "learning_rate": 1.3393968435666853e-06, + "loss": 0.033667373657226565, + "step": 158840 + }, + { + "epoch": 1.3734857459079473, + "grad_norm": 0.42177459618269647, + "learning_rate": 1.3392271730488529e-06, + "loss": 0.06389274597167968, + "step": 158845 + }, + { + "epoch": 1.3735289794294905, + "grad_norm": 8.799557835653143, + "learning_rate": 1.339057510190358e-06, + "loss": 0.10272140502929687, + "step": 158850 + }, + { + "epoch": 1.3735722129510337, + "grad_norm": 0.297750621793198, + "learning_rate": 1.338887854991983e-06, + "loss": 0.04362640380859375, + "step": 158855 + }, + { + "epoch": 1.373615446472577, + "grad_norm": 52.48616107554594, + "learning_rate": 1.3387182074545103e-06, + "loss": 0.5010047912597656, + "step": 158860 + }, + { + "epoch": 1.3736586799941202, + "grad_norm": 2.3954952853970872, + "learning_rate": 1.3385485675787226e-06, + "loss": 0.03415184020996094, + "step": 158865 + }, + { + "epoch": 1.3737019135156636, + "grad_norm": 11.140567526840865, + "learning_rate": 1.3383789353654008e-06, + "loss": 0.10679130554199219, + "step": 158870 + }, + { + "epoch": 1.3737451470372068, + "grad_norm": 13.534121068776187, + "learning_rate": 1.3382093108153298e-06, + "loss": 0.0895904541015625, + "step": 158875 + }, + { + "epoch": 1.37378838055875, + "grad_norm": 3.6884392059116253, + "learning_rate": 1.3380396939292896e-06, + "loss": 0.16731338500976561, + "step": 158880 + }, + { + "epoch": 1.3738316140802933, + "grad_norm": 0.502232515550214, + "learning_rate": 1.3378700847080645e-06, + "loss": 0.012085723876953124, + "step": 158885 + }, + { + "epoch": 1.3738748476018365, + "grad_norm": 15.046546843517994, + "learning_rate": 1.3377004831524357e-06, + "loss": 0.031854248046875, + "step": 158890 + }, + { + "epoch": 1.3739180811233798, + "grad_norm": 0.6617512408848578, + "learning_rate": 1.3375308892631852e-06, + "loss": 0.05250244140625, + "step": 158895 + }, + { + "epoch": 1.373961314644923, + "grad_norm": 1.840626236919761, + "learning_rate": 1.3373613030410957e-06, + "loss": 0.11068344116210938, + "step": 158900 + }, + { + "epoch": 1.3740045481664662, + "grad_norm": 5.236030825491459, + "learning_rate": 1.3371917244869477e-06, + "loss": 0.03101348876953125, + "step": 158905 + }, + { + "epoch": 1.3740477816880097, + "grad_norm": 11.403538144925326, + "learning_rate": 1.3370221536015256e-06, + "loss": 0.0266265869140625, + "step": 158910 + }, + { + "epoch": 1.374091015209553, + "grad_norm": 0.7777275031017802, + "learning_rate": 1.3368525903856103e-06, + "loss": 0.02562408447265625, + "step": 158915 + }, + { + "epoch": 1.3741342487310961, + "grad_norm": 1.3948417975758682, + "learning_rate": 1.3366830348399843e-06, + "loss": 0.17923736572265625, + "step": 158920 + }, + { + "epoch": 1.3741774822526394, + "grad_norm": 4.509269079832437, + "learning_rate": 1.3365134869654285e-06, + "loss": 0.06729278564453126, + "step": 158925 + }, + { + "epoch": 1.3742207157741826, + "grad_norm": 6.114220955153033, + "learning_rate": 1.336343946762725e-06, + "loss": 0.038623809814453125, + "step": 158930 + }, + { + "epoch": 1.374263949295726, + "grad_norm": 1.1748248039499496, + "learning_rate": 1.336174414232656e-06, + "loss": 0.0488800048828125, + "step": 158935 + }, + { + "epoch": 1.3743071828172693, + "grad_norm": 9.042421112870983, + "learning_rate": 1.3360048893760045e-06, + "loss": 0.04809150695800781, + "step": 158940 + }, + { + "epoch": 1.3743504163388125, + "grad_norm": 4.782597364763274, + "learning_rate": 1.3358353721935517e-06, + "loss": 0.07999916076660156, + "step": 158945 + }, + { + "epoch": 1.3743936498603557, + "grad_norm": 0.09534940664427233, + "learning_rate": 1.335665862686079e-06, + "loss": 0.017885208129882812, + "step": 158950 + }, + { + "epoch": 1.374436883381899, + "grad_norm": 49.84563042991477, + "learning_rate": 1.3354963608543684e-06, + "loss": 0.18864669799804687, + "step": 158955 + }, + { + "epoch": 1.3744801169034422, + "grad_norm": 11.205933177937032, + "learning_rate": 1.3353268666992011e-06, + "loss": 0.040593719482421874, + "step": 158960 + }, + { + "epoch": 1.3745233504249854, + "grad_norm": 7.206630512822025, + "learning_rate": 1.3351573802213595e-06, + "loss": 0.03863525390625, + "step": 158965 + }, + { + "epoch": 1.3745665839465289, + "grad_norm": 1.9718807235834976, + "learning_rate": 1.3349879014216242e-06, + "loss": 0.011049652099609375, + "step": 158970 + }, + { + "epoch": 1.374609817468072, + "grad_norm": 5.927232288790647, + "learning_rate": 1.334818430300778e-06, + "loss": 0.09609527587890625, + "step": 158975 + }, + { + "epoch": 1.3746530509896153, + "grad_norm": 16.142588879930152, + "learning_rate": 1.3346489668596023e-06, + "loss": 0.08574771881103516, + "step": 158980 + }, + { + "epoch": 1.3746962845111586, + "grad_norm": 1.2576756781245895, + "learning_rate": 1.3344795110988775e-06, + "loss": 0.13336257934570311, + "step": 158985 + }, + { + "epoch": 1.3747395180327018, + "grad_norm": 0.9414962488048592, + "learning_rate": 1.3343100630193872e-06, + "loss": 0.13428726196289062, + "step": 158990 + }, + { + "epoch": 1.3747827515542452, + "grad_norm": 14.64356167355408, + "learning_rate": 1.3341406226219113e-06, + "loss": 0.1303802490234375, + "step": 158995 + }, + { + "epoch": 1.3748259850757885, + "grad_norm": 1.1670107948425081, + "learning_rate": 1.3339711899072308e-06, + "loss": 0.052976226806640624, + "step": 159000 + }, + { + "epoch": 1.3748692185973317, + "grad_norm": 1.7801515002286765, + "learning_rate": 1.3338017648761293e-06, + "loss": 0.025783538818359375, + "step": 159005 + }, + { + "epoch": 1.374912452118875, + "grad_norm": 10.585144688930503, + "learning_rate": 1.3336323475293864e-06, + "loss": 0.14262161254882813, + "step": 159010 + }, + { + "epoch": 1.3749556856404181, + "grad_norm": 7.969084403311556, + "learning_rate": 1.3334629378677841e-06, + "loss": 0.1010467529296875, + "step": 159015 + }, + { + "epoch": 1.3749989191619614, + "grad_norm": 0.9034184175681564, + "learning_rate": 1.3332935358921035e-06, + "loss": 0.3101973533630371, + "step": 159020 + }, + { + "epoch": 1.3750421526835046, + "grad_norm": 0.6093397024555881, + "learning_rate": 1.3331241416031256e-06, + "loss": 0.010878372192382812, + "step": 159025 + }, + { + "epoch": 1.3750853862050478, + "grad_norm": 13.371300786418043, + "learning_rate": 1.3329547550016308e-06, + "loss": 0.04014606475830078, + "step": 159030 + }, + { + "epoch": 1.3751286197265913, + "grad_norm": 5.871003433762854, + "learning_rate": 1.3327853760884028e-06, + "loss": 0.104388427734375, + "step": 159035 + }, + { + "epoch": 1.3751718532481345, + "grad_norm": 7.677716431937186, + "learning_rate": 1.3326160048642211e-06, + "loss": 0.03751716613769531, + "step": 159040 + }, + { + "epoch": 1.3752150867696777, + "grad_norm": 6.340339213654821, + "learning_rate": 1.3324466413298658e-06, + "loss": 0.029974365234375, + "step": 159045 + }, + { + "epoch": 1.375258320291221, + "grad_norm": 1.1733944620986916, + "learning_rate": 1.3322772854861207e-06, + "loss": 0.1832275390625, + "step": 159050 + }, + { + "epoch": 1.3753015538127642, + "grad_norm": 43.65312264207127, + "learning_rate": 1.332107937333765e-06, + "loss": 0.13847732543945312, + "step": 159055 + }, + { + "epoch": 1.3753447873343077, + "grad_norm": 52.26807303884481, + "learning_rate": 1.3319385968735804e-06, + "loss": 0.15625457763671874, + "step": 159060 + }, + { + "epoch": 1.3753880208558509, + "grad_norm": 0.24973085755518182, + "learning_rate": 1.3317692641063465e-06, + "loss": 0.030457305908203124, + "step": 159065 + }, + { + "epoch": 1.375431254377394, + "grad_norm": 0.8384353054015394, + "learning_rate": 1.3315999390328462e-06, + "loss": 0.00804595947265625, + "step": 159070 + }, + { + "epoch": 1.3754744878989373, + "grad_norm": 0.24111459621447812, + "learning_rate": 1.33143062165386e-06, + "loss": 0.09496965408325195, + "step": 159075 + }, + { + "epoch": 1.3755177214204806, + "grad_norm": 6.9470600401626, + "learning_rate": 1.3312613119701678e-06, + "loss": 0.04040260314941406, + "step": 159080 + }, + { + "epoch": 1.3755609549420238, + "grad_norm": 3.4812194174241373, + "learning_rate": 1.3310920099825514e-06, + "loss": 0.06007881164550781, + "step": 159085 + }, + { + "epoch": 1.375604188463567, + "grad_norm": 0.18428530389632447, + "learning_rate": 1.3309227156917908e-06, + "loss": 0.010599327087402344, + "step": 159090 + }, + { + "epoch": 1.3756474219851103, + "grad_norm": 0.15728894336699795, + "learning_rate": 1.3307534290986664e-06, + "loss": 0.1353456497192383, + "step": 159095 + }, + { + "epoch": 1.3756906555066537, + "grad_norm": 2.464696074065901, + "learning_rate": 1.3305841502039608e-06, + "loss": 0.024573898315429686, + "step": 159100 + }, + { + "epoch": 1.375733889028197, + "grad_norm": 0.06832377686173235, + "learning_rate": 1.3304148790084524e-06, + "loss": 0.016043853759765626, + "step": 159105 + }, + { + "epoch": 1.3757771225497402, + "grad_norm": 1.8135102706978672, + "learning_rate": 1.330245615512924e-06, + "loss": 0.06315765380859376, + "step": 159110 + }, + { + "epoch": 1.3758203560712834, + "grad_norm": 4.346023557707889, + "learning_rate": 1.3300763597181553e-06, + "loss": 0.030278396606445313, + "step": 159115 + }, + { + "epoch": 1.3758635895928266, + "grad_norm": 0.11720872567083175, + "learning_rate": 1.329907111624927e-06, + "loss": 0.0417724609375, + "step": 159120 + }, + { + "epoch": 1.37590682311437, + "grad_norm": 4.995936164401895, + "learning_rate": 1.3297378712340195e-06, + "loss": 0.02154541015625, + "step": 159125 + }, + { + "epoch": 1.3759500566359133, + "grad_norm": 0.16431508098462622, + "learning_rate": 1.3295686385462123e-06, + "loss": 0.010225296020507812, + "step": 159130 + }, + { + "epoch": 1.3759932901574565, + "grad_norm": 0.49692763889266234, + "learning_rate": 1.329399413562288e-06, + "loss": 0.07579421997070312, + "step": 159135 + }, + { + "epoch": 1.3760365236789998, + "grad_norm": 19.494995295217876, + "learning_rate": 1.3292301962830259e-06, + "loss": 0.18448333740234374, + "step": 159140 + }, + { + "epoch": 1.376079757200543, + "grad_norm": 5.727832239125712, + "learning_rate": 1.3290609867092065e-06, + "loss": 0.052064132690429685, + "step": 159145 + }, + { + "epoch": 1.3761229907220862, + "grad_norm": 5.7038081915242715, + "learning_rate": 1.3288917848416104e-06, + "loss": 0.034454345703125, + "step": 159150 + }, + { + "epoch": 1.3761662242436294, + "grad_norm": 13.887524895015567, + "learning_rate": 1.3287225906810165e-06, + "loss": 0.0631103515625, + "step": 159155 + }, + { + "epoch": 1.3762094577651727, + "grad_norm": 0.5702461533353815, + "learning_rate": 1.3285534042282062e-06, + "loss": 0.026395416259765624, + "step": 159160 + }, + { + "epoch": 1.3762526912867161, + "grad_norm": 6.212830683730001, + "learning_rate": 1.3283842254839613e-06, + "loss": 0.09412097930908203, + "step": 159165 + }, + { + "epoch": 1.3762959248082594, + "grad_norm": 0.06058683903604188, + "learning_rate": 1.3282150544490603e-06, + "loss": 0.007506370544433594, + "step": 159170 + }, + { + "epoch": 1.3763391583298026, + "grad_norm": 0.9966415642123229, + "learning_rate": 1.328045891124284e-06, + "loss": 0.02718982696533203, + "step": 159175 + }, + { + "epoch": 1.3763823918513458, + "grad_norm": 0.40395217505475217, + "learning_rate": 1.3278767355104121e-06, + "loss": 0.03544807434082031, + "step": 159180 + }, + { + "epoch": 1.3764256253728893, + "grad_norm": 0.898169758649437, + "learning_rate": 1.327707587608225e-06, + "loss": 0.020726776123046874, + "step": 159185 + }, + { + "epoch": 1.3764688588944325, + "grad_norm": 6.657036039562994, + "learning_rate": 1.3275384474185018e-06, + "loss": 0.07516288757324219, + "step": 159190 + }, + { + "epoch": 1.3765120924159757, + "grad_norm": 1.6702779305585007, + "learning_rate": 1.3273693149420246e-06, + "loss": 0.02306709289550781, + "step": 159195 + }, + { + "epoch": 1.376555325937519, + "grad_norm": 13.213793979337142, + "learning_rate": 1.327200190179572e-06, + "loss": 0.13285751342773439, + "step": 159200 + }, + { + "epoch": 1.3765985594590622, + "grad_norm": 2.371494072817074, + "learning_rate": 1.3270310731319247e-06, + "loss": 0.1159637451171875, + "step": 159205 + }, + { + "epoch": 1.3766417929806054, + "grad_norm": 3.804152606385273, + "learning_rate": 1.326861963799861e-06, + "loss": 0.017165756225585936, + "step": 159210 + }, + { + "epoch": 1.3766850265021486, + "grad_norm": 14.142669761409172, + "learning_rate": 1.3266928621841635e-06, + "loss": 0.073553466796875, + "step": 159215 + }, + { + "epoch": 1.3767282600236919, + "grad_norm": 6.877087880165153, + "learning_rate": 1.3265237682856101e-06, + "loss": 0.03160076141357422, + "step": 159220 + }, + { + "epoch": 1.3767714935452353, + "grad_norm": 2.618620737368354, + "learning_rate": 1.3263546821049803e-06, + "loss": 0.032049560546875, + "step": 159225 + }, + { + "epoch": 1.3768147270667785, + "grad_norm": 7.997962593873688, + "learning_rate": 1.326185603643056e-06, + "loss": 0.14639739990234374, + "step": 159230 + }, + { + "epoch": 1.3768579605883218, + "grad_norm": 1.9505776832853008, + "learning_rate": 1.3260165329006158e-06, + "loss": 0.045438194274902345, + "step": 159235 + }, + { + "epoch": 1.376901194109865, + "grad_norm": 1.2593165859950366, + "learning_rate": 1.3258474698784397e-06, + "loss": 0.02515544891357422, + "step": 159240 + }, + { + "epoch": 1.3769444276314082, + "grad_norm": 15.67909012245382, + "learning_rate": 1.3256784145773066e-06, + "loss": 0.20590057373046874, + "step": 159245 + }, + { + "epoch": 1.3769876611529517, + "grad_norm": 4.043423519071135, + "learning_rate": 1.3255093669979968e-06, + "loss": 0.1481304168701172, + "step": 159250 + }, + { + "epoch": 1.377030894674495, + "grad_norm": 14.40304749708575, + "learning_rate": 1.325340327141289e-06, + "loss": 0.10048370361328125, + "step": 159255 + }, + { + "epoch": 1.3770741281960381, + "grad_norm": 6.406796771404762, + "learning_rate": 1.3251712950079646e-06, + "loss": 0.0717193603515625, + "step": 159260 + }, + { + "epoch": 1.3771173617175814, + "grad_norm": 4.452538791017032, + "learning_rate": 1.3250022705988023e-06, + "loss": 0.03527374267578125, + "step": 159265 + }, + { + "epoch": 1.3771605952391246, + "grad_norm": 0.7652236124446372, + "learning_rate": 1.3248332539145803e-06, + "loss": 0.09379768371582031, + "step": 159270 + }, + { + "epoch": 1.3772038287606678, + "grad_norm": 3.1494329680681794, + "learning_rate": 1.3246642449560807e-06, + "loss": 0.0271759033203125, + "step": 159275 + }, + { + "epoch": 1.377247062282211, + "grad_norm": 16.56074238408745, + "learning_rate": 1.3244952437240812e-06, + "loss": 0.16802330017089845, + "step": 159280 + }, + { + "epoch": 1.3772902958037543, + "grad_norm": 14.551905050485813, + "learning_rate": 1.3243262502193607e-06, + "loss": 0.0351654052734375, + "step": 159285 + }, + { + "epoch": 1.3773335293252977, + "grad_norm": 4.541028547514014, + "learning_rate": 1.3241572644427004e-06, + "loss": 0.044959068298339844, + "step": 159290 + }, + { + "epoch": 1.377376762846841, + "grad_norm": 5.714146955256165, + "learning_rate": 1.323988286394879e-06, + "loss": 0.01061553955078125, + "step": 159295 + }, + { + "epoch": 1.3774199963683842, + "grad_norm": 0.11520046417591026, + "learning_rate": 1.3238193160766752e-06, + "loss": 0.18732471466064454, + "step": 159300 + }, + { + "epoch": 1.3774632298899274, + "grad_norm": 19.711135007960877, + "learning_rate": 1.3236503534888686e-06, + "loss": 0.09923973083496093, + "step": 159305 + }, + { + "epoch": 1.3775064634114706, + "grad_norm": 2.2268967703476332, + "learning_rate": 1.3234813986322387e-06, + "loss": 0.12952423095703125, + "step": 159310 + }, + { + "epoch": 1.377549696933014, + "grad_norm": 3.064169650422368, + "learning_rate": 1.3233124515075644e-06, + "loss": 0.04690284729003906, + "step": 159315 + }, + { + "epoch": 1.3775929304545573, + "grad_norm": 0.4503210276028591, + "learning_rate": 1.3231435121156238e-06, + "loss": 0.23010940551757814, + "step": 159320 + }, + { + "epoch": 1.3776361639761006, + "grad_norm": 3.854745232768011, + "learning_rate": 1.3229745804571983e-06, + "loss": 0.074310302734375, + "step": 159325 + }, + { + "epoch": 1.3776793974976438, + "grad_norm": 56.081944722777976, + "learning_rate": 1.3228056565330646e-06, + "loss": 0.162139892578125, + "step": 159330 + }, + { + "epoch": 1.377722631019187, + "grad_norm": 9.299652562498514, + "learning_rate": 1.3226367403440041e-06, + "loss": 0.039983367919921874, + "step": 159335 + }, + { + "epoch": 1.3777658645407302, + "grad_norm": 18.67941965130474, + "learning_rate": 1.3224678318907948e-06, + "loss": 0.27351722717285154, + "step": 159340 + }, + { + "epoch": 1.3778090980622735, + "grad_norm": 2.9268130928672735, + "learning_rate": 1.3222989311742157e-06, + "loss": 0.080145263671875, + "step": 159345 + }, + { + "epoch": 1.3778523315838167, + "grad_norm": 1.036602135707911, + "learning_rate": 1.3221300381950447e-06, + "loss": 0.07437744140625, + "step": 159350 + }, + { + "epoch": 1.3778955651053602, + "grad_norm": 1.3926654154201377, + "learning_rate": 1.3219611529540627e-06, + "loss": 0.028302764892578124, + "step": 159355 + }, + { + "epoch": 1.3779387986269034, + "grad_norm": 20.73632715754977, + "learning_rate": 1.3217922754520473e-06, + "loss": 0.05968780517578125, + "step": 159360 + }, + { + "epoch": 1.3779820321484466, + "grad_norm": 4.701199985387431, + "learning_rate": 1.3216234056897777e-06, + "loss": 0.07747459411621094, + "step": 159365 + }, + { + "epoch": 1.3780252656699898, + "grad_norm": 0.3595110424213008, + "learning_rate": 1.3214545436680328e-06, + "loss": 0.06030731201171875, + "step": 159370 + }, + { + "epoch": 1.378068499191533, + "grad_norm": 1.2437221470702022, + "learning_rate": 1.3212856893875913e-06, + "loss": 0.06543769836425781, + "step": 159375 + }, + { + "epoch": 1.3781117327130765, + "grad_norm": 0.42550116247651976, + "learning_rate": 1.3211168428492308e-06, + "loss": 0.013162994384765625, + "step": 159380 + }, + { + "epoch": 1.3781549662346197, + "grad_norm": 3.982355474070888, + "learning_rate": 1.3209480040537309e-06, + "loss": 0.057661819458007815, + "step": 159385 + }, + { + "epoch": 1.378198199756163, + "grad_norm": 9.631198132894012, + "learning_rate": 1.3207791730018716e-06, + "loss": 0.013869094848632812, + "step": 159390 + }, + { + "epoch": 1.3782414332777062, + "grad_norm": 5.24438420156877, + "learning_rate": 1.3206103496944307e-06, + "loss": 0.04199676513671875, + "step": 159395 + }, + { + "epoch": 1.3782846667992494, + "grad_norm": 2.048400327220306, + "learning_rate": 1.320441534132186e-06, + "loss": 0.033967208862304685, + "step": 159400 + }, + { + "epoch": 1.3783279003207927, + "grad_norm": 7.413139093349057, + "learning_rate": 1.3202727263159167e-06, + "loss": 0.02301025390625, + "step": 159405 + }, + { + "epoch": 1.378371133842336, + "grad_norm": 1.4477015955971138, + "learning_rate": 1.3201039262464012e-06, + "loss": 0.15075130462646485, + "step": 159410 + }, + { + "epoch": 1.3784143673638791, + "grad_norm": 45.37072155789872, + "learning_rate": 1.3199351339244167e-06, + "loss": 0.21898651123046875, + "step": 159415 + }, + { + "epoch": 1.3784576008854226, + "grad_norm": 0.890772026997948, + "learning_rate": 1.3197663493507441e-06, + "loss": 0.028192138671875, + "step": 159420 + }, + { + "epoch": 1.3785008344069658, + "grad_norm": 6.185328775876612, + "learning_rate": 1.3195975725261607e-06, + "loss": 0.04618644714355469, + "step": 159425 + }, + { + "epoch": 1.378544067928509, + "grad_norm": 1.53538574345127, + "learning_rate": 1.3194288034514448e-06, + "loss": 0.169293212890625, + "step": 159430 + }, + { + "epoch": 1.3785873014500523, + "grad_norm": 1.5755143574661252, + "learning_rate": 1.3192600421273735e-06, + "loss": 0.181402587890625, + "step": 159435 + }, + { + "epoch": 1.3786305349715957, + "grad_norm": 12.688438668438536, + "learning_rate": 1.3190912885547278e-06, + "loss": 0.131475830078125, + "step": 159440 + }, + { + "epoch": 1.378673768493139, + "grad_norm": 16.165723589885072, + "learning_rate": 1.3189225427342833e-06, + "loss": 0.07613525390625, + "step": 159445 + }, + { + "epoch": 1.3787170020146822, + "grad_norm": 0.050056215960009455, + "learning_rate": 1.3187538046668207e-06, + "loss": 0.007546043395996094, + "step": 159450 + }, + { + "epoch": 1.3787602355362254, + "grad_norm": 19.82843155740657, + "learning_rate": 1.3185850743531166e-06, + "loss": 0.0894805908203125, + "step": 159455 + }, + { + "epoch": 1.3788034690577686, + "grad_norm": 3.346677106790348, + "learning_rate": 1.3184163517939499e-06, + "loss": 0.05326900482177734, + "step": 159460 + }, + { + "epoch": 1.3788467025793119, + "grad_norm": 0.3145415362241521, + "learning_rate": 1.3182476369900982e-06, + "loss": 0.05095863342285156, + "step": 159465 + }, + { + "epoch": 1.378889936100855, + "grad_norm": 0.7201165711210366, + "learning_rate": 1.3180789299423399e-06, + "loss": 0.015885162353515624, + "step": 159470 + }, + { + "epoch": 1.3789331696223983, + "grad_norm": 7.120089186561511, + "learning_rate": 1.3179102306514532e-06, + "loss": 0.06736831665039063, + "step": 159475 + }, + { + "epoch": 1.3789764031439418, + "grad_norm": 0.4803298351970695, + "learning_rate": 1.3177415391182144e-06, + "loss": 0.01479930877685547, + "step": 159480 + }, + { + "epoch": 1.379019636665485, + "grad_norm": 3.099115171012203, + "learning_rate": 1.317572855343404e-06, + "loss": 0.27622184753417967, + "step": 159485 + }, + { + "epoch": 1.3790628701870282, + "grad_norm": 12.783443077341817, + "learning_rate": 1.3174041793277993e-06, + "loss": 0.1453948974609375, + "step": 159490 + }, + { + "epoch": 1.3791061037085715, + "grad_norm": 0.3560117743884063, + "learning_rate": 1.3172355110721766e-06, + "loss": 0.14096603393554688, + "step": 159495 + }, + { + "epoch": 1.3791493372301147, + "grad_norm": 7.842778566817809, + "learning_rate": 1.317066850577316e-06, + "loss": 0.0432647705078125, + "step": 159500 + }, + { + "epoch": 1.3791925707516581, + "grad_norm": 5.428431628556406, + "learning_rate": 1.3168981978439948e-06, + "loss": 0.048066329956054685, + "step": 159505 + }, + { + "epoch": 1.3792358042732014, + "grad_norm": 3.7230304080896426, + "learning_rate": 1.3167295528729887e-06, + "loss": 0.058446311950683595, + "step": 159510 + }, + { + "epoch": 1.3792790377947446, + "grad_norm": 10.07977259212207, + "learning_rate": 1.3165609156650789e-06, + "loss": 0.0676919937133789, + "step": 159515 + }, + { + "epoch": 1.3793222713162878, + "grad_norm": 3.4098255864416616, + "learning_rate": 1.316392286221041e-06, + "loss": 0.03165931701660156, + "step": 159520 + }, + { + "epoch": 1.379365504837831, + "grad_norm": 4.006823729700269, + "learning_rate": 1.3162236645416533e-06, + "loss": 0.035823822021484375, + "step": 159525 + }, + { + "epoch": 1.3794087383593743, + "grad_norm": 4.212001779033527, + "learning_rate": 1.3160550506276931e-06, + "loss": 0.05942459106445312, + "step": 159530 + }, + { + "epoch": 1.3794519718809175, + "grad_norm": 6.809382549748102, + "learning_rate": 1.3158864444799383e-06, + "loss": 0.044001007080078126, + "step": 159535 + }, + { + "epoch": 1.3794952054024607, + "grad_norm": 1.9505638069931301, + "learning_rate": 1.3157178460991652e-06, + "loss": 0.046779632568359375, + "step": 159540 + }, + { + "epoch": 1.3795384389240042, + "grad_norm": 3.8259800355981195, + "learning_rate": 1.3155492554861535e-06, + "loss": 0.0403656005859375, + "step": 159545 + }, + { + "epoch": 1.3795816724455474, + "grad_norm": 2.299475813908187, + "learning_rate": 1.3153806726416803e-06, + "loss": 0.022064971923828124, + "step": 159550 + }, + { + "epoch": 1.3796249059670906, + "grad_norm": 15.836604182030094, + "learning_rate": 1.315212097566521e-06, + "loss": 0.052191162109375, + "step": 159555 + }, + { + "epoch": 1.3796681394886339, + "grad_norm": 19.25944506945715, + "learning_rate": 1.3150435302614562e-06, + "loss": 0.09075813293457032, + "step": 159560 + }, + { + "epoch": 1.379711373010177, + "grad_norm": 12.763581907328694, + "learning_rate": 1.3148749707272614e-06, + "loss": 0.09380950927734374, + "step": 159565 + }, + { + "epoch": 1.3797546065317206, + "grad_norm": 14.890694326415545, + "learning_rate": 1.3147064189647143e-06, + "loss": 0.15849227905273439, + "step": 159570 + }, + { + "epoch": 1.3797978400532638, + "grad_norm": 1.6481386239275255, + "learning_rate": 1.3145378749745915e-06, + "loss": 0.060783004760742186, + "step": 159575 + }, + { + "epoch": 1.379841073574807, + "grad_norm": 2.4104980000467644, + "learning_rate": 1.314369338757672e-06, + "loss": 0.11544418334960938, + "step": 159580 + }, + { + "epoch": 1.3798843070963502, + "grad_norm": 7.997426764155116, + "learning_rate": 1.314200810314732e-06, + "loss": 0.040020751953125, + "step": 159585 + }, + { + "epoch": 1.3799275406178935, + "grad_norm": 12.3055666960364, + "learning_rate": 1.314032289646549e-06, + "loss": 0.07288131713867188, + "step": 159590 + }, + { + "epoch": 1.3799707741394367, + "grad_norm": 7.039921324261148, + "learning_rate": 1.3138637767539003e-06, + "loss": 0.035977745056152345, + "step": 159595 + }, + { + "epoch": 1.38001400766098, + "grad_norm": 1.7421904956034573, + "learning_rate": 1.3136952716375629e-06, + "loss": 0.07640590667724609, + "step": 159600 + }, + { + "epoch": 1.3800572411825232, + "grad_norm": 7.251611319634603, + "learning_rate": 1.3135267742983127e-06, + "loss": 0.03808441162109375, + "step": 159605 + }, + { + "epoch": 1.3801004747040666, + "grad_norm": 0.47717606257611106, + "learning_rate": 1.3133582847369278e-06, + "loss": 0.1676311492919922, + "step": 159610 + }, + { + "epoch": 1.3801437082256098, + "grad_norm": 10.970608582232487, + "learning_rate": 1.313189802954187e-06, + "loss": 0.09243850708007813, + "step": 159615 + }, + { + "epoch": 1.380186941747153, + "grad_norm": 9.404541816409873, + "learning_rate": 1.3130213289508655e-06, + "loss": 0.067041015625, + "step": 159620 + }, + { + "epoch": 1.3802301752686963, + "grad_norm": 28.2259140761786, + "learning_rate": 1.3128528627277405e-06, + "loss": 0.09124298095703125, + "step": 159625 + }, + { + "epoch": 1.3802734087902395, + "grad_norm": 15.559163942574507, + "learning_rate": 1.3126844042855891e-06, + "loss": 0.0506439208984375, + "step": 159630 + }, + { + "epoch": 1.380316642311783, + "grad_norm": 1.1841984206169682, + "learning_rate": 1.3125159536251869e-06, + "loss": 0.014294910430908202, + "step": 159635 + }, + { + "epoch": 1.3803598758333262, + "grad_norm": 0.5258095470538824, + "learning_rate": 1.3123475107473131e-06, + "loss": 0.0823211669921875, + "step": 159640 + }, + { + "epoch": 1.3804031093548694, + "grad_norm": 29.596123323608573, + "learning_rate": 1.3121790756527438e-06, + "loss": 0.06581954956054688, + "step": 159645 + }, + { + "epoch": 1.3804463428764127, + "grad_norm": 0.2018299934433699, + "learning_rate": 1.312010648342255e-06, + "loss": 0.05464324951171875, + "step": 159650 + }, + { + "epoch": 1.3804895763979559, + "grad_norm": 4.52852363346308, + "learning_rate": 1.3118422288166242e-06, + "loss": 0.02959785461425781, + "step": 159655 + }, + { + "epoch": 1.3805328099194991, + "grad_norm": 0.24532196978179138, + "learning_rate": 1.3116738170766271e-06, + "loss": 0.0096923828125, + "step": 159660 + }, + { + "epoch": 1.3805760434410423, + "grad_norm": 0.34075933959941046, + "learning_rate": 1.3115054131230416e-06, + "loss": 0.08335762023925782, + "step": 159665 + }, + { + "epoch": 1.3806192769625856, + "grad_norm": 0.11329781940059201, + "learning_rate": 1.3113370169566435e-06, + "loss": 0.028769683837890626, + "step": 159670 + }, + { + "epoch": 1.380662510484129, + "grad_norm": 0.06844483326432416, + "learning_rate": 1.3111686285782105e-06, + "loss": 0.1636871337890625, + "step": 159675 + }, + { + "epoch": 1.3807057440056723, + "grad_norm": 8.356842435629947, + "learning_rate": 1.3110002479885185e-06, + "loss": 0.046948814392089845, + "step": 159680 + }, + { + "epoch": 1.3807489775272155, + "grad_norm": 8.510470897289292, + "learning_rate": 1.3108318751883443e-06, + "loss": 0.22559471130371095, + "step": 159685 + }, + { + "epoch": 1.3807922110487587, + "grad_norm": 6.08904565464845, + "learning_rate": 1.3106635101784645e-06, + "loss": 0.0269287109375, + "step": 159690 + }, + { + "epoch": 1.3808354445703022, + "grad_norm": 0.8069758832532916, + "learning_rate": 1.310495152959655e-06, + "loss": 0.06162128448486328, + "step": 159695 + }, + { + "epoch": 1.3808786780918454, + "grad_norm": 0.5953716543823903, + "learning_rate": 1.3103268035326917e-06, + "loss": 0.034704971313476565, + "step": 159700 + }, + { + "epoch": 1.3809219116133886, + "grad_norm": 2.439919936518466, + "learning_rate": 1.3101584618983527e-06, + "loss": 0.05953712463378906, + "step": 159705 + }, + { + "epoch": 1.3809651451349318, + "grad_norm": 4.300488539007833, + "learning_rate": 1.3099901280574135e-06, + "loss": 0.05989151000976563, + "step": 159710 + }, + { + "epoch": 1.381008378656475, + "grad_norm": 8.437143028017273, + "learning_rate": 1.3098218020106508e-06, + "loss": 0.06369476318359375, + "step": 159715 + }, + { + "epoch": 1.3810516121780183, + "grad_norm": 1.0818242321911509, + "learning_rate": 1.3096534837588395e-06, + "loss": 0.15223464965820313, + "step": 159720 + }, + { + "epoch": 1.3810948456995615, + "grad_norm": 1.173456621564503, + "learning_rate": 1.3094851733027578e-06, + "loss": 0.0486541748046875, + "step": 159725 + }, + { + "epoch": 1.3811380792211048, + "grad_norm": 2.8164381730031063, + "learning_rate": 1.3093168706431802e-06, + "loss": 0.09335403442382813, + "step": 159730 + }, + { + "epoch": 1.3811813127426482, + "grad_norm": 0.5483048378840176, + "learning_rate": 1.3091485757808848e-06, + "loss": 0.025816726684570312, + "step": 159735 + }, + { + "epoch": 1.3812245462641914, + "grad_norm": 4.662161313132062, + "learning_rate": 1.3089802887166468e-06, + "loss": 0.09678306579589843, + "step": 159740 + }, + { + "epoch": 1.3812677797857347, + "grad_norm": 1.1938053816668446, + "learning_rate": 1.3088120094512423e-06, + "loss": 0.0221099853515625, + "step": 159745 + }, + { + "epoch": 1.381311013307278, + "grad_norm": 0.7371492507250682, + "learning_rate": 1.3086437379854472e-06, + "loss": 0.07946548461914063, + "step": 159750 + }, + { + "epoch": 1.3813542468288211, + "grad_norm": 0.144553268124329, + "learning_rate": 1.3084754743200383e-06, + "loss": 0.08984298706054687, + "step": 159755 + }, + { + "epoch": 1.3813974803503646, + "grad_norm": 0.2876331358971767, + "learning_rate": 1.3083072184557905e-06, + "loss": 0.13507461547851562, + "step": 159760 + }, + { + "epoch": 1.3814407138719078, + "grad_norm": 22.680597200117084, + "learning_rate": 1.3081389703934793e-06, + "loss": 0.14390640258789061, + "step": 159765 + }, + { + "epoch": 1.381483947393451, + "grad_norm": 2.8673969857357164, + "learning_rate": 1.307970730133883e-06, + "loss": 0.0268341064453125, + "step": 159770 + }, + { + "epoch": 1.3815271809149943, + "grad_norm": 2.2554740758363474, + "learning_rate": 1.307802497677776e-06, + "loss": 0.009313583374023438, + "step": 159775 + }, + { + "epoch": 1.3815704144365375, + "grad_norm": 1.7332736515183875, + "learning_rate": 1.3076342730259333e-06, + "loss": 0.165643310546875, + "step": 159780 + }, + { + "epoch": 1.3816136479580807, + "grad_norm": 0.23483198617148718, + "learning_rate": 1.307466056179133e-06, + "loss": 0.01107025146484375, + "step": 159785 + }, + { + "epoch": 1.381656881479624, + "grad_norm": 11.016372892510335, + "learning_rate": 1.3072978471381495e-06, + "loss": 0.11655502319335938, + "step": 159790 + }, + { + "epoch": 1.3817001150011672, + "grad_norm": 9.773502115446723, + "learning_rate": 1.3071296459037575e-06, + "loss": 0.0565277099609375, + "step": 159795 + }, + { + "epoch": 1.3817433485227106, + "grad_norm": 0.4425589621750117, + "learning_rate": 1.3069614524767356e-06, + "loss": 0.009210586547851562, + "step": 159800 + }, + { + "epoch": 1.3817865820442539, + "grad_norm": 13.286647632355862, + "learning_rate": 1.3067932668578575e-06, + "loss": 0.099578857421875, + "step": 159805 + }, + { + "epoch": 1.381829815565797, + "grad_norm": 3.432540893526164, + "learning_rate": 1.3066250890478993e-06, + "loss": 0.021803665161132812, + "step": 159810 + }, + { + "epoch": 1.3818730490873403, + "grad_norm": 69.35514354007644, + "learning_rate": 1.3064569190476365e-06, + "loss": 0.2120513916015625, + "step": 159815 + }, + { + "epoch": 1.3819162826088836, + "grad_norm": 0.16946767701264956, + "learning_rate": 1.306288756857845e-06, + "loss": 0.11754684448242188, + "step": 159820 + }, + { + "epoch": 1.381959516130427, + "grad_norm": 10.985134865347058, + "learning_rate": 1.3061206024792987e-06, + "loss": 0.04627532958984375, + "step": 159825 + }, + { + "epoch": 1.3820027496519702, + "grad_norm": 8.284725426495681, + "learning_rate": 1.305952455912776e-06, + "loss": 0.04828643798828125, + "step": 159830 + }, + { + "epoch": 1.3820459831735135, + "grad_norm": 0.6307204604474492, + "learning_rate": 1.3057843171590495e-06, + "loss": 0.14177932739257812, + "step": 159835 + }, + { + "epoch": 1.3820892166950567, + "grad_norm": 0.7869906064208414, + "learning_rate": 1.305616186218897e-06, + "loss": 0.14135208129882812, + "step": 159840 + }, + { + "epoch": 1.3821324502166, + "grad_norm": 2.0297595417376217, + "learning_rate": 1.3054480630930934e-06, + "loss": 0.017196273803710936, + "step": 159845 + }, + { + "epoch": 1.3821756837381431, + "grad_norm": 73.31430257563711, + "learning_rate": 1.305279947782413e-06, + "loss": 0.2669075012207031, + "step": 159850 + }, + { + "epoch": 1.3822189172596864, + "grad_norm": 0.714077442834735, + "learning_rate": 1.3051118402876322e-06, + "loss": 0.24109916687011718, + "step": 159855 + }, + { + "epoch": 1.3822621507812296, + "grad_norm": 1.6747610407786298, + "learning_rate": 1.3049437406095245e-06, + "loss": 0.07622318267822266, + "step": 159860 + }, + { + "epoch": 1.382305384302773, + "grad_norm": 1.0597776280247264, + "learning_rate": 1.3047756487488677e-06, + "loss": 0.25160064697265627, + "step": 159865 + }, + { + "epoch": 1.3823486178243163, + "grad_norm": 0.5458807353156306, + "learning_rate": 1.3046075647064357e-06, + "loss": 0.04935741424560547, + "step": 159870 + }, + { + "epoch": 1.3823918513458595, + "grad_norm": 2.739623001197913, + "learning_rate": 1.3044394884830039e-06, + "loss": 0.033827590942382815, + "step": 159875 + }, + { + "epoch": 1.3824350848674027, + "grad_norm": 2.129823132977197, + "learning_rate": 1.3042714200793471e-06, + "loss": 0.06484794616699219, + "step": 159880 + }, + { + "epoch": 1.382478318388946, + "grad_norm": 2.145294671777527, + "learning_rate": 1.3041033594962401e-06, + "loss": 0.0422210693359375, + "step": 159885 + }, + { + "epoch": 1.3825215519104894, + "grad_norm": 1.9222353320600352, + "learning_rate": 1.3039353067344594e-06, + "loss": 0.01450958251953125, + "step": 159890 + }, + { + "epoch": 1.3825647854320327, + "grad_norm": 7.074921037570957, + "learning_rate": 1.3037672617947778e-06, + "loss": 0.056926727294921875, + "step": 159895 + }, + { + "epoch": 1.3826080189535759, + "grad_norm": 15.185936882081608, + "learning_rate": 1.303599224677973e-06, + "loss": 0.024480819702148438, + "step": 159900 + }, + { + "epoch": 1.382651252475119, + "grad_norm": 4.496142890194366, + "learning_rate": 1.303431195384819e-06, + "loss": 0.015772056579589844, + "step": 159905 + }, + { + "epoch": 1.3826944859966623, + "grad_norm": 14.793088773061939, + "learning_rate": 1.30326317391609e-06, + "loss": 0.18913497924804687, + "step": 159910 + }, + { + "epoch": 1.3827377195182056, + "grad_norm": 4.966409823436136, + "learning_rate": 1.3030951602725615e-06, + "loss": 0.049514389038085936, + "step": 159915 + }, + { + "epoch": 1.3827809530397488, + "grad_norm": 0.42586344846848884, + "learning_rate": 1.3029271544550084e-06, + "loss": 0.020489883422851563, + "step": 159920 + }, + { + "epoch": 1.3828241865612922, + "grad_norm": 0.6251355676401887, + "learning_rate": 1.3027591564642036e-06, + "loss": 0.036151885986328125, + "step": 159925 + }, + { + "epoch": 1.3828674200828355, + "grad_norm": 0.03386357889005115, + "learning_rate": 1.3025911663009251e-06, + "loss": 0.008979988098144532, + "step": 159930 + }, + { + "epoch": 1.3829106536043787, + "grad_norm": 0.328971338030499, + "learning_rate": 1.3024231839659461e-06, + "loss": 0.025787353515625, + "step": 159935 + }, + { + "epoch": 1.382953887125922, + "grad_norm": 4.009244344327893, + "learning_rate": 1.3022552094600414e-06, + "loss": 0.02331695556640625, + "step": 159940 + }, + { + "epoch": 1.3829971206474652, + "grad_norm": 1.8111688717362824, + "learning_rate": 1.3020872427839846e-06, + "loss": 0.029900169372558592, + "step": 159945 + }, + { + "epoch": 1.3830403541690086, + "grad_norm": 43.686276261647016, + "learning_rate": 1.3019192839385525e-06, + "loss": 0.5469741821289062, + "step": 159950 + }, + { + "epoch": 1.3830835876905518, + "grad_norm": 2.7505552241910602, + "learning_rate": 1.3017513329245176e-06, + "loss": 0.026381301879882812, + "step": 159955 + }, + { + "epoch": 1.383126821212095, + "grad_norm": 4.08467450216329, + "learning_rate": 1.3015833897426564e-06, + "loss": 0.21733455657958983, + "step": 159960 + }, + { + "epoch": 1.3831700547336383, + "grad_norm": 4.512904796841489, + "learning_rate": 1.3014154543937427e-06, + "loss": 0.22695083618164064, + "step": 159965 + }, + { + "epoch": 1.3832132882551815, + "grad_norm": 0.10203910075324567, + "learning_rate": 1.3012475268785506e-06, + "loss": 0.00409698486328125, + "step": 159970 + }, + { + "epoch": 1.3832565217767248, + "grad_norm": 0.5413809388555905, + "learning_rate": 1.301079607197855e-06, + "loss": 0.05012931823730469, + "step": 159975 + }, + { + "epoch": 1.383299755298268, + "grad_norm": 1.1485615307731951, + "learning_rate": 1.3009116953524302e-06, + "loss": 0.003536224365234375, + "step": 159980 + }, + { + "epoch": 1.3833429888198112, + "grad_norm": 32.530275704290325, + "learning_rate": 1.300743791343049e-06, + "loss": 0.2203968048095703, + "step": 159985 + }, + { + "epoch": 1.3833862223413547, + "grad_norm": 0.6171682909505954, + "learning_rate": 1.3005758951704889e-06, + "loss": 0.019170379638671874, + "step": 159990 + }, + { + "epoch": 1.383429455862898, + "grad_norm": 6.658972955276942, + "learning_rate": 1.3004080068355223e-06, + "loss": 0.1140289306640625, + "step": 159995 + }, + { + "epoch": 1.3834726893844411, + "grad_norm": 0.629194708620014, + "learning_rate": 1.300240126338924e-06, + "loss": 0.1167572021484375, + "step": 160000 + }, + { + "epoch": 1.3834726893844411, + "eval_loss": 0.1266646385192871, + "eval_margin": 0.16131319105625153, + "eval_mean_neg": -0.00310450978577137, + "eval_mean_pos": 0.7200332880020142, + "eval_runtime": 31.1721, + "eval_samples_per_second": 7.41, + "eval_steps_per_second": 3.721, + "step": 160000 + } + ], + "logging_steps": 5, + "max_steps": 231302, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 40000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 38457055641600.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}