{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7712, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012967750824667904, "grad_norm": 1.99446702003479, "learning_rate": 0.0, "loss": 29.972253799438477, "step": 1 }, { "epoch": 0.00025935501649335807, "grad_norm": 1.5365673303604126, "learning_rate": 5e-06, "loss": 28.50031089782715, "step": 2 }, { "epoch": 0.0003890325247400371, "grad_norm": 3.7682950496673584, "learning_rate": 1e-05, "loss": 30.23283576965332, "step": 3 }, { "epoch": 0.0005187100329867161, "grad_norm": 1.192661166191101, "learning_rate": 1.5e-05, "loss": 32.73917770385742, "step": 4 }, { "epoch": 0.0006483875412333952, "grad_norm": 1.8167951107025146, "learning_rate": 2e-05, "loss": 27.364229202270508, "step": 5 }, { "epoch": 0.0007780650494800742, "grad_norm": 1.5789637565612793, "learning_rate": 2.5e-05, "loss": 25.982967376708984, "step": 6 }, { "epoch": 0.0009077425577267532, "grad_norm": 1.1444451808929443, "learning_rate": 3e-05, "loss": 22.610790252685547, "step": 7 }, { "epoch": 0.0010374200659734323, "grad_norm": 1.0524781942367554, "learning_rate": 3.5e-05, "loss": 25.685462951660156, "step": 8 }, { "epoch": 0.0011670975742201113, "grad_norm": 0.842551589012146, "learning_rate": 4e-05, "loss": 23.15970802307129, "step": 9 }, { "epoch": 0.0012967750824667904, "grad_norm": 1.1337637901306152, "learning_rate": 4.5e-05, "loss": 24.807453155517578, "step": 10 }, { "epoch": 0.0014264525907134694, "grad_norm": 0.9530572295188904, "learning_rate": 5e-05, "loss": 21.88750457763672, "step": 11 }, { "epoch": 0.0015561300989601484, "grad_norm": 0.7088567614555359, "learning_rate": 5.500000000000001e-05, "loss": 21.77642059326172, "step": 12 }, { "epoch": 0.0016858076072068275, "grad_norm": 0.7426387071609497, "learning_rate": 6e-05, "loss": 21.867076873779297, "step": 13 }, { "epoch": 0.0018154851154535065, "grad_norm": 0.7576494216918945, "learning_rate": 6.500000000000001e-05, "loss": 22.541257858276367, "step": 14 }, { "epoch": 0.0019451626237001855, "grad_norm": 0.8299188613891602, "learning_rate": 7e-05, "loss": 24.21377944946289, "step": 15 }, { "epoch": 0.0020748401319468646, "grad_norm": 0.8835577368736267, "learning_rate": 7.500000000000001e-05, "loss": 23.714468002319336, "step": 16 }, { "epoch": 0.0022045176401935436, "grad_norm": 0.825509250164032, "learning_rate": 8e-05, "loss": 19.401344299316406, "step": 17 }, { "epoch": 0.0023341951484402226, "grad_norm": 1.2908402681350708, "learning_rate": 8.5e-05, "loss": 22.684490203857422, "step": 18 }, { "epoch": 0.0024638726566869017, "grad_norm": 1.2061680555343628, "learning_rate": 9e-05, "loss": 19.569255828857422, "step": 19 }, { "epoch": 0.0025935501649335807, "grad_norm": 0.8769367337226868, "learning_rate": 9.5e-05, "loss": 20.547962188720703, "step": 20 }, { "epoch": 0.0027232276731802597, "grad_norm": 1.033817172050476, "learning_rate": 0.0001, "loss": 21.69057846069336, "step": 21 }, { "epoch": 0.0028529051814269388, "grad_norm": 0.7497889995574951, "learning_rate": 9.999999582975858e-05, "loss": 18.646129608154297, "step": 22 }, { "epoch": 0.002982582689673618, "grad_norm": 0.7991786003112793, "learning_rate": 9.999998331903505e-05, "loss": 18.2806396484375, "step": 23 }, { "epoch": 0.003112260197920297, "grad_norm": 0.684329628944397, "learning_rate": 9.999996246783146e-05, "loss": 15.906598091125488, "step": 24 }, { "epoch": 0.003241937706166976, "grad_norm": 0.7937395572662354, "learning_rate": 9.99999332761513e-05, "loss": 17.692142486572266, "step": 25 }, { "epoch": 0.003371615214413655, "grad_norm": 0.7443481683731079, "learning_rate": 9.999989574399944e-05, "loss": 20.26650047302246, "step": 26 }, { "epoch": 0.003501292722660334, "grad_norm": 0.7506320476531982, "learning_rate": 9.999984987138213e-05, "loss": 20.076740264892578, "step": 27 }, { "epoch": 0.003630970230907013, "grad_norm": 1.0534642934799194, "learning_rate": 9.999979565830706e-05, "loss": 18.726058959960938, "step": 28 }, { "epoch": 0.003760647739153692, "grad_norm": 0.874366044998169, "learning_rate": 9.999973310478323e-05, "loss": 18.916032791137695, "step": 29 }, { "epoch": 0.003890325247400371, "grad_norm": 0.980228841304779, "learning_rate": 9.999966221082111e-05, "loss": 21.073606491088867, "step": 30 }, { "epoch": 0.00402000275564705, "grad_norm": 0.8392835259437561, "learning_rate": 9.99995829764325e-05, "loss": 18.34318733215332, "step": 31 }, { "epoch": 0.004149680263893729, "grad_norm": 0.7055050134658813, "learning_rate": 9.999949540163062e-05, "loss": 17.08624267578125, "step": 32 }, { "epoch": 0.004279357772140408, "grad_norm": 0.8285980224609375, "learning_rate": 9.999939948643008e-05, "loss": 17.958904266357422, "step": 33 }, { "epoch": 0.004409035280387087, "grad_norm": 0.6524158716201782, "learning_rate": 9.999929523084689e-05, "loss": 17.278413772583008, "step": 34 }, { "epoch": 0.004538712788633766, "grad_norm": 0.8498830199241638, "learning_rate": 9.999918263489843e-05, "loss": 18.814016342163086, "step": 35 }, { "epoch": 0.004668390296880445, "grad_norm": 0.8304724097251892, "learning_rate": 9.999906169860349e-05, "loss": 15.172435760498047, "step": 36 }, { "epoch": 0.004798067805127124, "grad_norm": 1.0757167339324951, "learning_rate": 9.999893242198224e-05, "loss": 24.622419357299805, "step": 37 }, { "epoch": 0.004927745313373803, "grad_norm": 0.8124226927757263, "learning_rate": 9.999879480505626e-05, "loss": 16.60487937927246, "step": 38 }, { "epoch": 0.005057422821620482, "grad_norm": 0.9555235505104065, "learning_rate": 9.999864884784847e-05, "loss": 20.352645874023438, "step": 39 }, { "epoch": 0.005187100329867161, "grad_norm": 0.8045514822006226, "learning_rate": 9.999849455038323e-05, "loss": 16.24432373046875, "step": 40 }, { "epoch": 0.0053167778381138404, "grad_norm": 0.7953528761863708, "learning_rate": 9.999833191268629e-05, "loss": 20.807884216308594, "step": 41 }, { "epoch": 0.0054464553463605195, "grad_norm": 0.9222216010093689, "learning_rate": 9.999816093478477e-05, "loss": 20.426984786987305, "step": 42 }, { "epoch": 0.0055761328546071985, "grad_norm": 1.3482370376586914, "learning_rate": 9.999798161670721e-05, "loss": 20.99248504638672, "step": 43 }, { "epoch": 0.0057058103628538775, "grad_norm": 0.8713453412055969, "learning_rate": 9.999779395848352e-05, "loss": 17.972089767456055, "step": 44 }, { "epoch": 0.005835487871100557, "grad_norm": 0.9604137539863586, "learning_rate": 9.999759796014497e-05, "loss": 13.864036560058594, "step": 45 }, { "epoch": 0.005965165379347236, "grad_norm": 0.932900071144104, "learning_rate": 9.999739362172428e-05, "loss": 17.096282958984375, "step": 46 }, { "epoch": 0.006094842887593915, "grad_norm": 0.6013771295547485, "learning_rate": 9.999718094325553e-05, "loss": 16.2541446685791, "step": 47 }, { "epoch": 0.006224520395840594, "grad_norm": 0.8627391457557678, "learning_rate": 9.99969599247742e-05, "loss": 19.470233917236328, "step": 48 }, { "epoch": 0.006354197904087273, "grad_norm": 1.0893890857696533, "learning_rate": 9.999673056631715e-05, "loss": 14.94808578491211, "step": 49 }, { "epoch": 0.006483875412333952, "grad_norm": 1.2360256910324097, "learning_rate": 9.999649286792266e-05, "loss": 21.195995330810547, "step": 50 }, { "epoch": 0.006613552920580631, "grad_norm": 0.8026022911071777, "learning_rate": 9.999624682963034e-05, "loss": 14.900138854980469, "step": 51 }, { "epoch": 0.00674323042882731, "grad_norm": 0.6285673379898071, "learning_rate": 9.999599245148127e-05, "loss": 14.523721694946289, "step": 52 }, { "epoch": 0.006872907937073989, "grad_norm": 0.8919270634651184, "learning_rate": 9.999572973351787e-05, "loss": 17.731807708740234, "step": 53 }, { "epoch": 0.007002585445320668, "grad_norm": 0.8102715015411377, "learning_rate": 9.999545867578397e-05, "loss": 15.209760665893555, "step": 54 }, { "epoch": 0.007132262953567347, "grad_norm": 0.7593254446983337, "learning_rate": 9.999517927832476e-05, "loss": 17.717327117919922, "step": 55 }, { "epoch": 0.007261940461814026, "grad_norm": 0.70952308177948, "learning_rate": 9.999489154118688e-05, "loss": 14.131778717041016, "step": 56 }, { "epoch": 0.007391617970060705, "grad_norm": 0.7633631229400635, "learning_rate": 9.999459546441831e-05, "loss": 18.560646057128906, "step": 57 }, { "epoch": 0.007521295478307384, "grad_norm": 0.8972079753875732, "learning_rate": 9.999429104806844e-05, "loss": 20.428647994995117, "step": 58 }, { "epoch": 0.007650972986554063, "grad_norm": 0.6677055358886719, "learning_rate": 9.999397829218805e-05, "loss": 17.1019344329834, "step": 59 }, { "epoch": 0.007780650494800742, "grad_norm": 1.2617015838623047, "learning_rate": 9.999365719682932e-05, "loss": 16.433256149291992, "step": 60 }, { "epoch": 0.007910328003047421, "grad_norm": 0.6165164113044739, "learning_rate": 9.999332776204578e-05, "loss": 13.483903884887695, "step": 61 }, { "epoch": 0.0080400055112941, "grad_norm": 0.8209776282310486, "learning_rate": 9.999298998789242e-05, "loss": 15.431573867797852, "step": 62 }, { "epoch": 0.00816968301954078, "grad_norm": 0.7284005284309387, "learning_rate": 9.999264387442558e-05, "loss": 14.9796781539917, "step": 63 }, { "epoch": 0.008299360527787458, "grad_norm": 0.8546894788742065, "learning_rate": 9.999228942170295e-05, "loss": 16.415163040161133, "step": 64 }, { "epoch": 0.008429038036034137, "grad_norm": 0.6746416687965393, "learning_rate": 9.999192662978371e-05, "loss": 17.581092834472656, "step": 65 }, { "epoch": 0.008558715544280816, "grad_norm": 1.030363917350769, "learning_rate": 9.999155549872837e-05, "loss": 16.821130752563477, "step": 66 }, { "epoch": 0.008688393052527495, "grad_norm": 0.7666603922843933, "learning_rate": 9.999117602859883e-05, "loss": 14.857105255126953, "step": 67 }, { "epoch": 0.008818070560774174, "grad_norm": 0.6857046484947205, "learning_rate": 9.999078821945835e-05, "loss": 14.857301712036133, "step": 68 }, { "epoch": 0.008947748069020853, "grad_norm": 0.7946093082427979, "learning_rate": 9.999039207137169e-05, "loss": 14.327031135559082, "step": 69 }, { "epoch": 0.009077425577267532, "grad_norm": 0.7441522479057312, "learning_rate": 9.998998758440487e-05, "loss": 17.4942569732666, "step": 70 }, { "epoch": 0.009207103085514211, "grad_norm": 0.7171614766120911, "learning_rate": 9.99895747586254e-05, "loss": 13.931483268737793, "step": 71 }, { "epoch": 0.00933678059376089, "grad_norm": 1.0579191446304321, "learning_rate": 9.998915359410215e-05, "loss": 21.5108585357666, "step": 72 }, { "epoch": 0.00946645810200757, "grad_norm": 0.6834903359413147, "learning_rate": 9.998872409090534e-05, "loss": 15.178611755371094, "step": 73 }, { "epoch": 0.009596135610254249, "grad_norm": 0.7107231616973877, "learning_rate": 9.998828624910663e-05, "loss": 13.373851776123047, "step": 74 }, { "epoch": 0.009725813118500928, "grad_norm": 0.7681564092636108, "learning_rate": 9.998784006877907e-05, "loss": 12.136377334594727, "step": 75 }, { "epoch": 0.009855490626747607, "grad_norm": 0.944520115852356, "learning_rate": 9.998738554999707e-05, "loss": 16.70394515991211, "step": 76 }, { "epoch": 0.009985168134994286, "grad_norm": 0.6348839998245239, "learning_rate": 9.998692269283644e-05, "loss": 11.289188385009766, "step": 77 }, { "epoch": 0.010114845643240965, "grad_norm": 0.9611713886260986, "learning_rate": 9.998645149737443e-05, "loss": 18.007591247558594, "step": 78 }, { "epoch": 0.010244523151487644, "grad_norm": 0.7476375102996826, "learning_rate": 9.99859719636896e-05, "loss": 17.458255767822266, "step": 79 }, { "epoch": 0.010374200659734323, "grad_norm": 0.8788313865661621, "learning_rate": 9.998548409186195e-05, "loss": 17.938974380493164, "step": 80 }, { "epoch": 0.010503878167981002, "grad_norm": 0.6228886842727661, "learning_rate": 9.998498788197286e-05, "loss": 13.284579277038574, "step": 81 }, { "epoch": 0.010633555676227681, "grad_norm": 0.8922936916351318, "learning_rate": 9.998448333410513e-05, "loss": 17.84210777282715, "step": 82 }, { "epoch": 0.01076323318447436, "grad_norm": 0.731972873210907, "learning_rate": 9.99839704483429e-05, "loss": 16.253339767456055, "step": 83 }, { "epoch": 0.010892910692721039, "grad_norm": 0.9008980393409729, "learning_rate": 9.99834492247717e-05, "loss": 16.514802932739258, "step": 84 }, { "epoch": 0.011022588200967718, "grad_norm": 0.789330244064331, "learning_rate": 9.99829196634785e-05, "loss": 15.773104667663574, "step": 85 }, { "epoch": 0.011152265709214397, "grad_norm": 0.6390179395675659, "learning_rate": 9.998238176455166e-05, "loss": 17.54451560974121, "step": 86 }, { "epoch": 0.011281943217461076, "grad_norm": 0.6974011063575745, "learning_rate": 9.998183552808088e-05, "loss": 12.258065223693848, "step": 87 }, { "epoch": 0.011411620725707755, "grad_norm": 0.7774472832679749, "learning_rate": 9.998128095415727e-05, "loss": 13.619235038757324, "step": 88 }, { "epoch": 0.011541298233954434, "grad_norm": 0.8641868233680725, "learning_rate": 9.998071804287336e-05, "loss": 16.118249893188477, "step": 89 }, { "epoch": 0.011670975742201113, "grad_norm": 0.7703794836997986, "learning_rate": 9.998014679432302e-05, "loss": 16.55685806274414, "step": 90 }, { "epoch": 0.011800653250447792, "grad_norm": 0.6656123399734497, "learning_rate": 9.997956720860158e-05, "loss": 13.488643646240234, "step": 91 }, { "epoch": 0.011930330758694471, "grad_norm": 0.8589026927947998, "learning_rate": 9.997897928580568e-05, "loss": 13.201653480529785, "step": 92 }, { "epoch": 0.01206000826694115, "grad_norm": 0.8647993206977844, "learning_rate": 9.997838302603343e-05, "loss": 16.647052764892578, "step": 93 }, { "epoch": 0.01218968577518783, "grad_norm": 0.8215928077697754, "learning_rate": 9.997777842938426e-05, "loss": 16.17182731628418, "step": 94 }, { "epoch": 0.012319363283434508, "grad_norm": 0.9821729063987732, "learning_rate": 9.997716549595904e-05, "loss": 15.517809867858887, "step": 95 }, { "epoch": 0.012449040791681187, "grad_norm": 0.8612799644470215, "learning_rate": 9.997654422586001e-05, "loss": 17.174152374267578, "step": 96 }, { "epoch": 0.012578718299927866, "grad_norm": 0.9174747467041016, "learning_rate": 9.997591461919079e-05, "loss": 16.287784576416016, "step": 97 }, { "epoch": 0.012708395808174545, "grad_norm": 1.013405442237854, "learning_rate": 9.997527667605642e-05, "loss": 16.26395034790039, "step": 98 }, { "epoch": 0.012838073316421224, "grad_norm": 0.7249537706375122, "learning_rate": 9.997463039656331e-05, "loss": 16.138471603393555, "step": 99 }, { "epoch": 0.012967750824667904, "grad_norm": 0.8283922076225281, "learning_rate": 9.997397578081926e-05, "loss": 14.60390853881836, "step": 100 }, { "epoch": 0.013097428332914583, "grad_norm": 0.8634133338928223, "learning_rate": 9.997331282893348e-05, "loss": 16.648088455200195, "step": 101 }, { "epoch": 0.013227105841161262, "grad_norm": 1.0726207494735718, "learning_rate": 9.997264154101656e-05, "loss": 19.015094757080078, "step": 102 }, { "epoch": 0.01335678334940794, "grad_norm": 0.9459465146064758, "learning_rate": 9.997196191718045e-05, "loss": 16.535240173339844, "step": 103 }, { "epoch": 0.01348646085765462, "grad_norm": 0.8438609838485718, "learning_rate": 9.997127395753853e-05, "loss": 18.138343811035156, "step": 104 }, { "epoch": 0.013616138365901299, "grad_norm": 0.9081044793128967, "learning_rate": 9.997057766220559e-05, "loss": 13.429120063781738, "step": 105 }, { "epoch": 0.013745815874147978, "grad_norm": 0.6254591941833496, "learning_rate": 9.996987303129773e-05, "loss": 12.02120304107666, "step": 106 }, { "epoch": 0.013875493382394657, "grad_norm": 0.7332279682159424, "learning_rate": 9.996916006493252e-05, "loss": 14.581884384155273, "step": 107 }, { "epoch": 0.014005170890641336, "grad_norm": 0.7699545621871948, "learning_rate": 9.996843876322888e-05, "loss": 19.12632942199707, "step": 108 }, { "epoch": 0.014134848398888015, "grad_norm": 0.7483221888542175, "learning_rate": 9.996770912630713e-05, "loss": 16.221357345581055, "step": 109 }, { "epoch": 0.014264525907134694, "grad_norm": 0.8801233172416687, "learning_rate": 9.996697115428898e-05, "loss": 20.58415985107422, "step": 110 }, { "epoch": 0.014394203415381373, "grad_norm": 0.6339449286460876, "learning_rate": 9.996622484729753e-05, "loss": 13.710550308227539, "step": 111 }, { "epoch": 0.014523880923628052, "grad_norm": 0.75882488489151, "learning_rate": 9.996547020545728e-05, "loss": 14.74293041229248, "step": 112 }, { "epoch": 0.014653558431874731, "grad_norm": 0.5900855660438538, "learning_rate": 9.996470722889411e-05, "loss": 14.006826400756836, "step": 113 }, { "epoch": 0.01478323594012141, "grad_norm": 0.6844921112060547, "learning_rate": 9.996393591773526e-05, "loss": 13.710423469543457, "step": 114 }, { "epoch": 0.014912913448368089, "grad_norm": 0.6423506736755371, "learning_rate": 9.996315627210943e-05, "loss": 12.285567283630371, "step": 115 }, { "epoch": 0.015042590956614768, "grad_norm": 0.6166879534721375, "learning_rate": 9.996236829214667e-05, "loss": 14.989514350891113, "step": 116 }, { "epoch": 0.015172268464861447, "grad_norm": 0.5335398316383362, "learning_rate": 9.996157197797842e-05, "loss": 12.199869155883789, "step": 117 }, { "epoch": 0.015301945973108126, "grad_norm": 0.8087159395217896, "learning_rate": 9.99607673297375e-05, "loss": 14.441145896911621, "step": 118 }, { "epoch": 0.015431623481354805, "grad_norm": 0.8633890748023987, "learning_rate": 9.995995434755814e-05, "loss": 16.93353271484375, "step": 119 }, { "epoch": 0.015561300989601484, "grad_norm": 0.7567999958992004, "learning_rate": 9.995913303157596e-05, "loss": 15.338491439819336, "step": 120 }, { "epoch": 0.015690978497848163, "grad_norm": 0.5853908061981201, "learning_rate": 9.995830338192794e-05, "loss": 14.168076515197754, "step": 121 }, { "epoch": 0.015820656006094842, "grad_norm": 0.9012466669082642, "learning_rate": 9.99574653987525e-05, "loss": 15.134222030639648, "step": 122 }, { "epoch": 0.01595033351434152, "grad_norm": 0.8024187684059143, "learning_rate": 9.99566190821894e-05, "loss": 17.508609771728516, "step": 123 }, { "epoch": 0.0160800110225882, "grad_norm": 0.7465009093284607, "learning_rate": 9.995576443237985e-05, "loss": 13.961990356445312, "step": 124 }, { "epoch": 0.01620968853083488, "grad_norm": 0.8868094682693481, "learning_rate": 9.995490144946638e-05, "loss": 14.866785049438477, "step": 125 }, { "epoch": 0.01633936603908156, "grad_norm": 0.7175377011299133, "learning_rate": 9.995403013359297e-05, "loss": 14.846168518066406, "step": 126 }, { "epoch": 0.016469043547328237, "grad_norm": 1.1290738582611084, "learning_rate": 9.995315048490493e-05, "loss": 15.916056632995605, "step": 127 }, { "epoch": 0.016598721055574917, "grad_norm": 0.741549551486969, "learning_rate": 9.995226250354902e-05, "loss": 12.156702995300293, "step": 128 }, { "epoch": 0.016728398563821596, "grad_norm": 0.6835430264472961, "learning_rate": 9.995136618967336e-05, "loss": 15.739537239074707, "step": 129 }, { "epoch": 0.016858076072068275, "grad_norm": 0.6662110090255737, "learning_rate": 9.995046154342745e-05, "loss": 12.12717056274414, "step": 130 }, { "epoch": 0.016987753580314954, "grad_norm": 0.7359044551849365, "learning_rate": 9.994954856496221e-05, "loss": 9.210710525512695, "step": 131 }, { "epoch": 0.017117431088561633, "grad_norm": 0.7593847513198853, "learning_rate": 9.994862725442994e-05, "loss": 16.78412437438965, "step": 132 }, { "epoch": 0.01724710859680831, "grad_norm": 0.6865171790122986, "learning_rate": 9.99476976119843e-05, "loss": 14.700671195983887, "step": 133 }, { "epoch": 0.01737678610505499, "grad_norm": 0.6353207230567932, "learning_rate": 9.994675963778037e-05, "loss": 13.743029594421387, "step": 134 }, { "epoch": 0.01750646361330167, "grad_norm": 0.6035429835319519, "learning_rate": 9.994581333197464e-05, "loss": 15.924673080444336, "step": 135 }, { "epoch": 0.01763614112154835, "grad_norm": 0.5980501770973206, "learning_rate": 9.994485869472492e-05, "loss": 13.908863067626953, "step": 136 }, { "epoch": 0.017765818629795028, "grad_norm": 0.7518458962440491, "learning_rate": 9.994389572619049e-05, "loss": 14.311891555786133, "step": 137 }, { "epoch": 0.017895496138041707, "grad_norm": 0.6964307427406311, "learning_rate": 9.994292442653195e-05, "loss": 12.289799690246582, "step": 138 }, { "epoch": 0.018025173646288386, "grad_norm": 0.5756608247756958, "learning_rate": 9.994194479591134e-05, "loss": 11.736403465270996, "step": 139 }, { "epoch": 0.018154851154535065, "grad_norm": 0.5409243106842041, "learning_rate": 9.994095683449209e-05, "loss": 13.429733276367188, "step": 140 }, { "epoch": 0.018284528662781744, "grad_norm": 0.5477737784385681, "learning_rate": 9.993996054243897e-05, "loss": 13.076322555541992, "step": 141 }, { "epoch": 0.018414206171028423, "grad_norm": 0.8242630958557129, "learning_rate": 9.993895591991818e-05, "loss": 17.702531814575195, "step": 142 }, { "epoch": 0.018543883679275102, "grad_norm": 0.6053379774093628, "learning_rate": 9.993794296709731e-05, "loss": 13.410865783691406, "step": 143 }, { "epoch": 0.01867356118752178, "grad_norm": 0.9162344336509705, "learning_rate": 9.993692168414532e-05, "loss": 13.961666107177734, "step": 144 }, { "epoch": 0.01880323869576846, "grad_norm": 0.8027300834655762, "learning_rate": 9.993589207123257e-05, "loss": 14.480069160461426, "step": 145 }, { "epoch": 0.01893291620401514, "grad_norm": 0.8382586240768433, "learning_rate": 9.993485412853081e-05, "loss": 17.05339813232422, "step": 146 }, { "epoch": 0.019062593712261818, "grad_norm": 0.6254875659942627, "learning_rate": 9.993380785621319e-05, "loss": 12.464069366455078, "step": 147 }, { "epoch": 0.019192271220508497, "grad_norm": 0.6930795907974243, "learning_rate": 9.993275325445423e-05, "loss": 16.203718185424805, "step": 148 }, { "epoch": 0.019321948728755176, "grad_norm": 0.6520224809646606, "learning_rate": 9.993169032342985e-05, "loss": 12.879267692565918, "step": 149 }, { "epoch": 0.019451626237001855, "grad_norm": 0.7034899592399597, "learning_rate": 9.993061906331736e-05, "loss": 16.240419387817383, "step": 150 }, { "epoch": 0.019581303745248534, "grad_norm": 0.776711106300354, "learning_rate": 9.992953947429545e-05, "loss": 18.739643096923828, "step": 151 }, { "epoch": 0.019710981253495213, "grad_norm": 0.6514577865600586, "learning_rate": 9.992845155654419e-05, "loss": 16.200809478759766, "step": 152 }, { "epoch": 0.019840658761741892, "grad_norm": 0.5990867018699646, "learning_rate": 9.992735531024509e-05, "loss": 17.119558334350586, "step": 153 }, { "epoch": 0.01997033626998857, "grad_norm": 0.9283354878425598, "learning_rate": 9.9926250735581e-05, "loss": 15.968012809753418, "step": 154 }, { "epoch": 0.02010001377823525, "grad_norm": 0.6891978979110718, "learning_rate": 9.992513783273615e-05, "loss": 14.653592109680176, "step": 155 }, { "epoch": 0.02022969128648193, "grad_norm": 0.6850877404212952, "learning_rate": 9.992401660189622e-05, "loss": 17.322980880737305, "step": 156 }, { "epoch": 0.02035936879472861, "grad_norm": 0.6504268646240234, "learning_rate": 9.992288704324823e-05, "loss": 16.744346618652344, "step": 157 }, { "epoch": 0.020489046302975288, "grad_norm": 0.6781274080276489, "learning_rate": 9.992174915698059e-05, "loss": 16.69521141052246, "step": 158 }, { "epoch": 0.020618723811221967, "grad_norm": 0.736351490020752, "learning_rate": 9.992060294328312e-05, "loss": 17.017196655273438, "step": 159 }, { "epoch": 0.020748401319468646, "grad_norm": 0.703440248966217, "learning_rate": 9.991944840234702e-05, "loss": 15.295707702636719, "step": 160 }, { "epoch": 0.020878078827715325, "grad_norm": 0.7144283652305603, "learning_rate": 9.991828553436486e-05, "loss": 16.378511428833008, "step": 161 }, { "epoch": 0.021007756335962004, "grad_norm": 0.8070990443229675, "learning_rate": 9.991711433953065e-05, "loss": 13.861655235290527, "step": 162 }, { "epoch": 0.021137433844208683, "grad_norm": 0.5517581105232239, "learning_rate": 9.991593481803974e-05, "loss": 13.02780818939209, "step": 163 }, { "epoch": 0.021267111352455362, "grad_norm": 0.858184278011322, "learning_rate": 9.991474697008888e-05, "loss": 19.2922306060791, "step": 164 }, { "epoch": 0.02139678886070204, "grad_norm": 0.7001565098762512, "learning_rate": 9.991355079587624e-05, "loss": 17.390228271484375, "step": 165 }, { "epoch": 0.02152646636894872, "grad_norm": 0.5164411067962646, "learning_rate": 9.991234629560131e-05, "loss": 12.91584300994873, "step": 166 }, { "epoch": 0.0216561438771954, "grad_norm": 0.8562296032905579, "learning_rate": 9.991113346946504e-05, "loss": 14.465737342834473, "step": 167 }, { "epoch": 0.021785821385442078, "grad_norm": 0.557271420955658, "learning_rate": 9.990991231766976e-05, "loss": 12.372171401977539, "step": 168 }, { "epoch": 0.021915498893688757, "grad_norm": 0.8666216731071472, "learning_rate": 9.990868284041912e-05, "loss": 13.087031364440918, "step": 169 }, { "epoch": 0.022045176401935436, "grad_norm": 0.8201056718826294, "learning_rate": 9.990744503791825e-05, "loss": 15.958559036254883, "step": 170 }, { "epoch": 0.022174853910182115, "grad_norm": 0.7201080918312073, "learning_rate": 9.990619891037361e-05, "loss": 19.10097885131836, "step": 171 }, { "epoch": 0.022304531418428794, "grad_norm": 0.6159600019454956, "learning_rate": 9.990494445799305e-05, "loss": 14.281743049621582, "step": 172 }, { "epoch": 0.022434208926675473, "grad_norm": 0.6387598514556885, "learning_rate": 9.990368168098588e-05, "loss": 15.9447603225708, "step": 173 }, { "epoch": 0.022563886434922152, "grad_norm": 0.889892041683197, "learning_rate": 9.990241057956266e-05, "loss": 14.47803020477295, "step": 174 }, { "epoch": 0.02269356394316883, "grad_norm": 0.6799455285072327, "learning_rate": 9.990113115393551e-05, "loss": 14.713265419006348, "step": 175 }, { "epoch": 0.02282324145141551, "grad_norm": 0.6960963606834412, "learning_rate": 9.98998434043178e-05, "loss": 13.320306777954102, "step": 176 }, { "epoch": 0.02295291895966219, "grad_norm": 0.635917603969574, "learning_rate": 9.989854733092434e-05, "loss": 14.00332260131836, "step": 177 }, { "epoch": 0.023082596467908868, "grad_norm": 0.5850063562393188, "learning_rate": 9.989724293397134e-05, "loss": 11.777130126953125, "step": 178 }, { "epoch": 0.023212273976155547, "grad_norm": 0.8830193281173706, "learning_rate": 9.98959302136764e-05, "loss": 15.535618782043457, "step": 179 }, { "epoch": 0.023341951484402226, "grad_norm": 0.7027676105499268, "learning_rate": 9.989460917025846e-05, "loss": 14.908431053161621, "step": 180 }, { "epoch": 0.023471628992648905, "grad_norm": 0.692041277885437, "learning_rate": 9.98932798039379e-05, "loss": 13.611295700073242, "step": 181 }, { "epoch": 0.023601306500895584, "grad_norm": 0.6282123327255249, "learning_rate": 9.989194211493646e-05, "loss": 12.556886672973633, "step": 182 }, { "epoch": 0.023730984009142263, "grad_norm": 0.587705135345459, "learning_rate": 9.989059610347731e-05, "loss": 13.552556991577148, "step": 183 }, { "epoch": 0.023860661517388942, "grad_norm": 0.7592556476593018, "learning_rate": 9.988924176978496e-05, "loss": 18.15322494506836, "step": 184 }, { "epoch": 0.02399033902563562, "grad_norm": 0.8225769996643066, "learning_rate": 9.988787911408531e-05, "loss": 12.72614574432373, "step": 185 }, { "epoch": 0.0241200165338823, "grad_norm": 0.6613843441009521, "learning_rate": 9.98865081366057e-05, "loss": 16.42293357849121, "step": 186 }, { "epoch": 0.02424969404212898, "grad_norm": 0.6214976906776428, "learning_rate": 9.988512883757477e-05, "loss": 14.086792945861816, "step": 187 }, { "epoch": 0.02437937155037566, "grad_norm": 0.6281592845916748, "learning_rate": 9.988374121722264e-05, "loss": 10.515913963317871, "step": 188 }, { "epoch": 0.024509049058622338, "grad_norm": 0.949230432510376, "learning_rate": 9.988234527578079e-05, "loss": 18.254520416259766, "step": 189 }, { "epoch": 0.024638726566869017, "grad_norm": 0.5513569712638855, "learning_rate": 9.988094101348205e-05, "loss": 11.945792198181152, "step": 190 }, { "epoch": 0.024768404075115696, "grad_norm": 0.9063296318054199, "learning_rate": 9.987952843056066e-05, "loss": 18.44182586669922, "step": 191 }, { "epoch": 0.024898081583362375, "grad_norm": 0.7568091750144958, "learning_rate": 9.987810752725225e-05, "loss": 17.2150821685791, "step": 192 }, { "epoch": 0.025027759091609054, "grad_norm": 0.6218354105949402, "learning_rate": 9.987667830379387e-05, "loss": 9.80892276763916, "step": 193 }, { "epoch": 0.025157436599855733, "grad_norm": 0.5903156995773315, "learning_rate": 9.98752407604239e-05, "loss": 13.252067565917969, "step": 194 }, { "epoch": 0.025287114108102412, "grad_norm": 0.8450694680213928, "learning_rate": 9.987379489738215e-05, "loss": 19.846410751342773, "step": 195 }, { "epoch": 0.02541679161634909, "grad_norm": 0.7465935349464417, "learning_rate": 9.987234071490983e-05, "loss": 15.39233112335205, "step": 196 }, { "epoch": 0.02554646912459577, "grad_norm": 0.6320073008537292, "learning_rate": 9.987087821324945e-05, "loss": 13.292259216308594, "step": 197 }, { "epoch": 0.02567614663284245, "grad_norm": 0.6522201895713806, "learning_rate": 9.986940739264503e-05, "loss": 12.319164276123047, "step": 198 }, { "epoch": 0.025805824141089128, "grad_norm": 0.4866675138473511, "learning_rate": 9.986792825334185e-05, "loss": 11.439050674438477, "step": 199 }, { "epoch": 0.025935501649335807, "grad_norm": 0.8772282600402832, "learning_rate": 9.986644079558671e-05, "loss": 18.009950637817383, "step": 200 }, { "epoch": 0.026065179157582486, "grad_norm": 0.5503013730049133, "learning_rate": 9.986494501962771e-05, "loss": 12.380666732788086, "step": 201 }, { "epoch": 0.026194856665829165, "grad_norm": 0.7024089097976685, "learning_rate": 9.986344092571436e-05, "loss": 14.847145080566406, "step": 202 }, { "epoch": 0.026324534174075844, "grad_norm": 0.8300754427909851, "learning_rate": 9.986192851409754e-05, "loss": 12.644577026367188, "step": 203 }, { "epoch": 0.026454211682322523, "grad_norm": 0.6045570373535156, "learning_rate": 9.986040778502956e-05, "loss": 13.23320198059082, "step": 204 }, { "epoch": 0.026583889190569202, "grad_norm": 0.6263315677642822, "learning_rate": 9.985887873876408e-05, "loss": 15.084025382995605, "step": 205 }, { "epoch": 0.02671356669881588, "grad_norm": 0.9264289736747742, "learning_rate": 9.985734137555615e-05, "loss": 15.554827690124512, "step": 206 }, { "epoch": 0.02684324420706256, "grad_norm": 0.7351172566413879, "learning_rate": 9.985579569566223e-05, "loss": 14.95765209197998, "step": 207 }, { "epoch": 0.02697292171530924, "grad_norm": 0.6139023900032043, "learning_rate": 9.985424169934016e-05, "loss": 14.019194602966309, "step": 208 }, { "epoch": 0.02710259922355592, "grad_norm": 0.8368632793426514, "learning_rate": 9.985267938684915e-05, "loss": 12.867145538330078, "step": 209 }, { "epoch": 0.027232276731802597, "grad_norm": 0.49342620372772217, "learning_rate": 9.985110875844981e-05, "loss": 15.454392433166504, "step": 210 }, { "epoch": 0.027361954240049276, "grad_norm": 0.6508436799049377, "learning_rate": 9.984952981440414e-05, "loss": 11.860733032226562, "step": 211 }, { "epoch": 0.027491631748295955, "grad_norm": 0.5429646968841553, "learning_rate": 9.984794255497552e-05, "loss": 15.797540664672852, "step": 212 }, { "epoch": 0.027621309256542634, "grad_norm": 0.5954781770706177, "learning_rate": 9.984634698042874e-05, "loss": 16.707677841186523, "step": 213 }, { "epoch": 0.027750986764789314, "grad_norm": 0.5375392436981201, "learning_rate": 9.984474309102991e-05, "loss": 17.184226989746094, "step": 214 }, { "epoch": 0.027880664273035993, "grad_norm": 0.6772399544715881, "learning_rate": 9.984313088704662e-05, "loss": 18.755735397338867, "step": 215 }, { "epoch": 0.02801034178128267, "grad_norm": 0.709560215473175, "learning_rate": 9.984151036874779e-05, "loss": 15.367445945739746, "step": 216 }, { "epoch": 0.02814001928952935, "grad_norm": 0.6286648511886597, "learning_rate": 9.983988153640372e-05, "loss": 11.992483139038086, "step": 217 }, { "epoch": 0.02826969679777603, "grad_norm": 0.6555359363555908, "learning_rate": 9.983824439028613e-05, "loss": 13.799430847167969, "step": 218 }, { "epoch": 0.02839937430602271, "grad_norm": 0.5754333138465881, "learning_rate": 9.983659893066812e-05, "loss": 16.81977081298828, "step": 219 }, { "epoch": 0.028529051814269388, "grad_norm": 0.5951015949249268, "learning_rate": 9.983494515782415e-05, "loss": 17.695621490478516, "step": 220 }, { "epoch": 0.028658729322516067, "grad_norm": 0.6369666457176208, "learning_rate": 9.98332830720301e-05, "loss": 13.119431495666504, "step": 221 }, { "epoch": 0.028788406830762746, "grad_norm": 0.5906679034233093, "learning_rate": 9.98316126735632e-05, "loss": 12.356934547424316, "step": 222 }, { "epoch": 0.028918084339009425, "grad_norm": 0.6189340949058533, "learning_rate": 9.982993396270211e-05, "loss": 13.00375747680664, "step": 223 }, { "epoch": 0.029047761847256104, "grad_norm": 0.6888294816017151, "learning_rate": 9.982824693972685e-05, "loss": 17.2258358001709, "step": 224 }, { "epoch": 0.029177439355502783, "grad_norm": 0.8594734072685242, "learning_rate": 9.982655160491882e-05, "loss": 15.10021686553955, "step": 225 }, { "epoch": 0.029307116863749462, "grad_norm": 1.0227826833724976, "learning_rate": 9.982484795856087e-05, "loss": 16.50509262084961, "step": 226 }, { "epoch": 0.02943679437199614, "grad_norm": 0.9017974138259888, "learning_rate": 9.98231360009371e-05, "loss": 19.29532241821289, "step": 227 }, { "epoch": 0.02956647188024282, "grad_norm": 0.6121225953102112, "learning_rate": 9.982141573233314e-05, "loss": 17.712467193603516, "step": 228 }, { "epoch": 0.0296961493884895, "grad_norm": 0.644993782043457, "learning_rate": 9.981968715303593e-05, "loss": 12.583715438842773, "step": 229 }, { "epoch": 0.029825826896736178, "grad_norm": 0.7130120992660522, "learning_rate": 9.981795026333382e-05, "loss": 13.5723876953125, "step": 230 }, { "epoch": 0.029955504404982857, "grad_norm": 0.6966693997383118, "learning_rate": 9.981620506351654e-05, "loss": 18.789146423339844, "step": 231 }, { "epoch": 0.030085181913229536, "grad_norm": 0.554803192615509, "learning_rate": 9.981445155387519e-05, "loss": 10.595657348632812, "step": 232 }, { "epoch": 0.030214859421476215, "grad_norm": 0.6361915469169617, "learning_rate": 9.981268973470228e-05, "loss": 13.844128608703613, "step": 233 }, { "epoch": 0.030344536929722894, "grad_norm": 1.0067015886306763, "learning_rate": 9.981091960629172e-05, "loss": 12.309060096740723, "step": 234 }, { "epoch": 0.030474214437969573, "grad_norm": 0.6028741598129272, "learning_rate": 9.980914116893875e-05, "loss": 10.623408317565918, "step": 235 }, { "epoch": 0.030603891946216252, "grad_norm": 0.6109175682067871, "learning_rate": 9.980735442294007e-05, "loss": 14.01678466796875, "step": 236 }, { "epoch": 0.03073356945446293, "grad_norm": 0.6071600914001465, "learning_rate": 9.980555936859368e-05, "loss": 10.67809772491455, "step": 237 }, { "epoch": 0.03086324696270961, "grad_norm": 1.0849055051803589, "learning_rate": 9.980375600619906e-05, "loss": 15.63008975982666, "step": 238 }, { "epoch": 0.03099292447095629, "grad_norm": 0.6163658499717712, "learning_rate": 9.980194433605699e-05, "loss": 12.877599716186523, "step": 239 }, { "epoch": 0.03112260197920297, "grad_norm": 0.5170084238052368, "learning_rate": 9.980012435846969e-05, "loss": 10.709845542907715, "step": 240 }, { "epoch": 0.03125227948744965, "grad_norm": 0.8438666462898254, "learning_rate": 9.979829607374076e-05, "loss": 15.863036155700684, "step": 241 }, { "epoch": 0.031381956995696327, "grad_norm": 0.6056291460990906, "learning_rate": 9.979645948217515e-05, "loss": 11.822467803955078, "step": 242 }, { "epoch": 0.03151163450394301, "grad_norm": 0.8565738797187805, "learning_rate": 9.979461458407924e-05, "loss": 16.96407127380371, "step": 243 }, { "epoch": 0.031641312012189685, "grad_norm": 0.6105753183364868, "learning_rate": 9.979276137976076e-05, "loss": 14.04648208618164, "step": 244 }, { "epoch": 0.03177098952043637, "grad_norm": 0.6340601444244385, "learning_rate": 9.979089986952887e-05, "loss": 12.374695777893066, "step": 245 }, { "epoch": 0.03190066702868304, "grad_norm": 0.6162251830101013, "learning_rate": 9.978903005369407e-05, "loss": 13.083205223083496, "step": 246 }, { "epoch": 0.032030344536929725, "grad_norm": 0.7624620795249939, "learning_rate": 9.978715193256826e-05, "loss": 14.15430736541748, "step": 247 }, { "epoch": 0.0321600220451764, "grad_norm": 0.5401633381843567, "learning_rate": 9.978526550646475e-05, "loss": 13.153575897216797, "step": 248 }, { "epoch": 0.03228969955342308, "grad_norm": 0.5691508054733276, "learning_rate": 9.978337077569819e-05, "loss": 14.065956115722656, "step": 249 }, { "epoch": 0.03241937706166976, "grad_norm": 0.6108981966972351, "learning_rate": 9.978146774058465e-05, "loss": 15.765032768249512, "step": 250 }, { "epoch": 0.03254905456991644, "grad_norm": 0.6369106769561768, "learning_rate": 9.977955640144157e-05, "loss": 16.559831619262695, "step": 251 }, { "epoch": 0.03267873207816312, "grad_norm": 0.580872654914856, "learning_rate": 9.977763675858778e-05, "loss": 11.353405952453613, "step": 252 }, { "epoch": 0.0328084095864098, "grad_norm": 0.4582988917827606, "learning_rate": 9.977570881234351e-05, "loss": 8.03659439086914, "step": 253 }, { "epoch": 0.032938087094656475, "grad_norm": 0.7888506650924683, "learning_rate": 9.977377256303035e-05, "loss": 16.711767196655273, "step": 254 }, { "epoch": 0.03306776460290316, "grad_norm": 0.4202457368373871, "learning_rate": 9.977182801097127e-05, "loss": 11.6011962890625, "step": 255 }, { "epoch": 0.03319744211114983, "grad_norm": 0.6061436533927917, "learning_rate": 9.976987515649065e-05, "loss": 15.674724578857422, "step": 256 }, { "epoch": 0.033327119619396516, "grad_norm": 0.6076532006263733, "learning_rate": 9.976791399991426e-05, "loss": 14.476892471313477, "step": 257 }, { "epoch": 0.03345679712764319, "grad_norm": 0.5184932351112366, "learning_rate": 9.976594454156922e-05, "loss": 12.383511543273926, "step": 258 }, { "epoch": 0.033586474635889874, "grad_norm": 0.6873825192451477, "learning_rate": 9.976396678178406e-05, "loss": 15.305620193481445, "step": 259 }, { "epoch": 0.03371615214413655, "grad_norm": 0.7306692004203796, "learning_rate": 9.976198072088869e-05, "loss": 15.25389575958252, "step": 260 }, { "epoch": 0.03384582965238323, "grad_norm": 0.7438482046127319, "learning_rate": 9.975998635921442e-05, "loss": 13.380558013916016, "step": 261 }, { "epoch": 0.03397550716062991, "grad_norm": 0.709275484085083, "learning_rate": 9.97579836970939e-05, "loss": 17.871097564697266, "step": 262 }, { "epoch": 0.03410518466887659, "grad_norm": 0.6084342002868652, "learning_rate": 9.975597273486122e-05, "loss": 11.132612228393555, "step": 263 }, { "epoch": 0.034234862177123265, "grad_norm": 0.596790075302124, "learning_rate": 9.97539534728518e-05, "loss": 12.765750885009766, "step": 264 }, { "epoch": 0.03436453968536995, "grad_norm": 0.6415562033653259, "learning_rate": 9.97519259114025e-05, "loss": 18.778072357177734, "step": 265 }, { "epoch": 0.03449421719361662, "grad_norm": 0.6328409314155579, "learning_rate": 9.974989005085153e-05, "loss": 13.27912712097168, "step": 266 }, { "epoch": 0.034623894701863306, "grad_norm": 0.5752634406089783, "learning_rate": 9.974784589153847e-05, "loss": 13.256582260131836, "step": 267 }, { "epoch": 0.03475357221010998, "grad_norm": 0.7719902992248535, "learning_rate": 9.974579343380434e-05, "loss": 13.257369995117188, "step": 268 }, { "epoch": 0.034883249718356664, "grad_norm": 0.645984947681427, "learning_rate": 9.974373267799148e-05, "loss": 13.220763206481934, "step": 269 }, { "epoch": 0.03501292722660334, "grad_norm": 0.5822069048881531, "learning_rate": 9.974166362444367e-05, "loss": 14.604852676391602, "step": 270 }, { "epoch": 0.03514260473485002, "grad_norm": 0.5535624623298645, "learning_rate": 9.973958627350601e-05, "loss": 12.605985641479492, "step": 271 }, { "epoch": 0.0352722822430967, "grad_norm": 0.5603841543197632, "learning_rate": 9.973750062552505e-05, "loss": 11.126901626586914, "step": 272 }, { "epoch": 0.03540195975134338, "grad_norm": 0.5524628758430481, "learning_rate": 9.973540668084871e-05, "loss": 17.079275131225586, "step": 273 }, { "epoch": 0.035531637259590056, "grad_norm": 0.8491634130477905, "learning_rate": 9.973330443982624e-05, "loss": 12.695565223693848, "step": 274 }, { "epoch": 0.03566131476783674, "grad_norm": 0.6707756519317627, "learning_rate": 9.973119390280836e-05, "loss": 14.320554733276367, "step": 275 }, { "epoch": 0.035790992276083414, "grad_norm": 0.6706116199493408, "learning_rate": 9.97290750701471e-05, "loss": 15.987317085266113, "step": 276 }, { "epoch": 0.035920669784330096, "grad_norm": 0.7506695985794067, "learning_rate": 9.97269479421959e-05, "loss": 16.301807403564453, "step": 277 }, { "epoch": 0.03605034729257677, "grad_norm": 0.7455649971961975, "learning_rate": 9.972481251930959e-05, "loss": 14.09969425201416, "step": 278 }, { "epoch": 0.036180024800823454, "grad_norm": 0.8268964290618896, "learning_rate": 9.972266880184438e-05, "loss": 11.93542766571045, "step": 279 }, { "epoch": 0.03630970230907013, "grad_norm": 0.655888020992279, "learning_rate": 9.972051679015787e-05, "loss": 12.74757194519043, "step": 280 }, { "epoch": 0.03643937981731681, "grad_norm": 0.5842821002006531, "learning_rate": 9.971835648460903e-05, "loss": 15.124288558959961, "step": 281 }, { "epoch": 0.03656905732556349, "grad_norm": 0.6860345005989075, "learning_rate": 9.97161878855582e-05, "loss": 16.117891311645508, "step": 282 }, { "epoch": 0.03669873483381017, "grad_norm": 0.6918210387229919, "learning_rate": 9.971401099336717e-05, "loss": 16.063486099243164, "step": 283 }, { "epoch": 0.036828412342056846, "grad_norm": 0.7027039527893066, "learning_rate": 9.971182580839903e-05, "loss": 15.15269947052002, "step": 284 }, { "epoch": 0.03695808985030353, "grad_norm": 0.5626243352890015, "learning_rate": 9.97096323310183e-05, "loss": 11.72461986541748, "step": 285 }, { "epoch": 0.037087767358550204, "grad_norm": 0.627890408039093, "learning_rate": 9.970743056159087e-05, "loss": 13.477994918823242, "step": 286 }, { "epoch": 0.03721744486679689, "grad_norm": 0.5579079985618591, "learning_rate": 9.970522050048404e-05, "loss": 14.51529312133789, "step": 287 }, { "epoch": 0.03734712237504356, "grad_norm": 0.7468105554580688, "learning_rate": 9.970300214806642e-05, "loss": 16.157501220703125, "step": 288 }, { "epoch": 0.037476799883290245, "grad_norm": 0.7659796476364136, "learning_rate": 9.970077550470808e-05, "loss": 15.460332870483398, "step": 289 }, { "epoch": 0.03760647739153692, "grad_norm": 0.6598422527313232, "learning_rate": 9.969854057078047e-05, "loss": 13.49856185913086, "step": 290 }, { "epoch": 0.0377361548997836, "grad_norm": 0.5955674052238464, "learning_rate": 9.969629734665636e-05, "loss": 15.01758861541748, "step": 291 }, { "epoch": 0.03786583240803028, "grad_norm": 0.7191686630249023, "learning_rate": 9.969404583270995e-05, "loss": 17.253700256347656, "step": 292 }, { "epoch": 0.03799550991627696, "grad_norm": 0.7814321517944336, "learning_rate": 9.969178602931685e-05, "loss": 13.234031677246094, "step": 293 }, { "epoch": 0.038125187424523636, "grad_norm": 0.7697437405586243, "learning_rate": 9.968951793685397e-05, "loss": 17.670461654663086, "step": 294 }, { "epoch": 0.03825486493277032, "grad_norm": 0.5434023141860962, "learning_rate": 9.968724155569966e-05, "loss": 12.769017219543457, "step": 295 }, { "epoch": 0.038384542441016994, "grad_norm": 0.7477104663848877, "learning_rate": 9.968495688623368e-05, "loss": 13.336082458496094, "step": 296 }, { "epoch": 0.03851421994926368, "grad_norm": 0.8359196186065674, "learning_rate": 9.968266392883708e-05, "loss": 19.620784759521484, "step": 297 }, { "epoch": 0.03864389745751035, "grad_norm": 0.6103721857070923, "learning_rate": 9.968036268389237e-05, "loss": 15.827261924743652, "step": 298 }, { "epoch": 0.038773574965757035, "grad_norm": 0.8016976714134216, "learning_rate": 9.967805315178342e-05, "loss": 15.192021369934082, "step": 299 }, { "epoch": 0.03890325247400371, "grad_norm": 0.4979427754878998, "learning_rate": 9.967573533289551e-05, "loss": 11.728495597839355, "step": 300 }, { "epoch": 0.03903292998225039, "grad_norm": 0.48569726943969727, "learning_rate": 9.967340922761523e-05, "loss": 10.249739646911621, "step": 301 }, { "epoch": 0.03916260749049707, "grad_norm": 0.6723886132240295, "learning_rate": 9.967107483633063e-05, "loss": 15.96652889251709, "step": 302 }, { "epoch": 0.03929228499874375, "grad_norm": 0.46979400515556335, "learning_rate": 9.966873215943107e-05, "loss": 10.44056510925293, "step": 303 }, { "epoch": 0.03942196250699043, "grad_norm": 0.5139780640602112, "learning_rate": 9.966638119730738e-05, "loss": 12.431100845336914, "step": 304 }, { "epoch": 0.03955164001523711, "grad_norm": 0.6063764095306396, "learning_rate": 9.966402195035169e-05, "loss": 16.562206268310547, "step": 305 }, { "epoch": 0.039681317523483785, "grad_norm": 0.69105464220047, "learning_rate": 9.966165441895756e-05, "loss": 12.935013771057129, "step": 306 }, { "epoch": 0.03981099503173047, "grad_norm": 0.8405634164810181, "learning_rate": 9.965927860351991e-05, "loss": 20.48694610595703, "step": 307 }, { "epoch": 0.03994067253997714, "grad_norm": 0.4778294861316681, "learning_rate": 9.965689450443505e-05, "loss": 10.534642219543457, "step": 308 }, { "epoch": 0.040070350048223825, "grad_norm": 0.5237623453140259, "learning_rate": 9.965450212210067e-05, "loss": 12.770011901855469, "step": 309 }, { "epoch": 0.0402000275564705, "grad_norm": 0.6840708255767822, "learning_rate": 9.965210145691584e-05, "loss": 13.334748268127441, "step": 310 }, { "epoch": 0.04032970506471718, "grad_norm": 0.8804484009742737, "learning_rate": 9.964969250928102e-05, "loss": 19.634563446044922, "step": 311 }, { "epoch": 0.04045938257296386, "grad_norm": 0.5332384705543518, "learning_rate": 9.964727527959806e-05, "loss": 10.712540626525879, "step": 312 }, { "epoch": 0.04058906008121054, "grad_norm": 0.733146607875824, "learning_rate": 9.964484976827014e-05, "loss": 13.784358024597168, "step": 313 }, { "epoch": 0.04071873758945722, "grad_norm": 0.6280352473258972, "learning_rate": 9.96424159757019e-05, "loss": 14.108776092529297, "step": 314 }, { "epoch": 0.0408484150977039, "grad_norm": 0.5731650590896606, "learning_rate": 9.963997390229929e-05, "loss": 13.205307006835938, "step": 315 }, { "epoch": 0.040978092605950575, "grad_norm": 0.6232280731201172, "learning_rate": 9.963752354846967e-05, "loss": 14.684749603271484, "step": 316 }, { "epoch": 0.04110777011419726, "grad_norm": 0.5875867605209351, "learning_rate": 9.96350649146218e-05, "loss": 12.00442886352539, "step": 317 }, { "epoch": 0.04123744762244393, "grad_norm": 0.7717530131340027, "learning_rate": 9.963259800116581e-05, "loss": 17.170236587524414, "step": 318 }, { "epoch": 0.041367125130690616, "grad_norm": 0.7596086859703064, "learning_rate": 9.963012280851318e-05, "loss": 10.180490493774414, "step": 319 }, { "epoch": 0.04149680263893729, "grad_norm": 0.4954187273979187, "learning_rate": 9.962763933707681e-05, "loss": 10.833785057067871, "step": 320 }, { "epoch": 0.041626480147183974, "grad_norm": 0.9698978066444397, "learning_rate": 9.962514758727097e-05, "loss": 17.894800186157227, "step": 321 }, { "epoch": 0.04175615765543065, "grad_norm": 0.5331231355667114, "learning_rate": 9.962264755951132e-05, "loss": 12.891242980957031, "step": 322 }, { "epoch": 0.04188583516367733, "grad_norm": 0.752059280872345, "learning_rate": 9.962013925421485e-05, "loss": 16.344385147094727, "step": 323 }, { "epoch": 0.04201551267192401, "grad_norm": 0.6164954900741577, "learning_rate": 9.961762267179999e-05, "loss": 16.689481735229492, "step": 324 }, { "epoch": 0.04214519018017069, "grad_norm": 0.8831734657287598, "learning_rate": 9.961509781268655e-05, "loss": 17.731529235839844, "step": 325 }, { "epoch": 0.042274867688417365, "grad_norm": 0.8171341419219971, "learning_rate": 9.961256467729567e-05, "loss": 14.661459922790527, "step": 326 }, { "epoch": 0.04240454519666405, "grad_norm": 0.6391863226890564, "learning_rate": 9.961002326604994e-05, "loss": 13.551554679870605, "step": 327 }, { "epoch": 0.042534222704910724, "grad_norm": 0.6536072492599487, "learning_rate": 9.960747357937324e-05, "loss": 12.37224292755127, "step": 328 }, { "epoch": 0.042663900213157406, "grad_norm": 0.489868700504303, "learning_rate": 9.960491561769091e-05, "loss": 12.749567031860352, "step": 329 }, { "epoch": 0.04279357772140408, "grad_norm": 0.6846359372138977, "learning_rate": 9.960234938142964e-05, "loss": 13.946901321411133, "step": 330 }, { "epoch": 0.042923255229650764, "grad_norm": 0.4727092683315277, "learning_rate": 9.959977487101752e-05, "loss": 12.599920272827148, "step": 331 }, { "epoch": 0.04305293273789744, "grad_norm": 0.758101761341095, "learning_rate": 9.959719208688399e-05, "loss": 16.327640533447266, "step": 332 }, { "epoch": 0.04318261024614412, "grad_norm": 0.5946663022041321, "learning_rate": 9.959460102945985e-05, "loss": 12.552489280700684, "step": 333 }, { "epoch": 0.0433122877543908, "grad_norm": 0.6418777704238892, "learning_rate": 9.959200169917737e-05, "loss": 12.237491607666016, "step": 334 }, { "epoch": 0.04344196526263748, "grad_norm": 0.6106101274490356, "learning_rate": 9.958939409647012e-05, "loss": 15.568214416503906, "step": 335 }, { "epoch": 0.043571642770884156, "grad_norm": 0.6916753649711609, "learning_rate": 9.958677822177307e-05, "loss": 11.350931167602539, "step": 336 }, { "epoch": 0.04370132027913084, "grad_norm": 0.8590516448020935, "learning_rate": 9.958415407552258e-05, "loss": 18.31802749633789, "step": 337 }, { "epoch": 0.043830997787377514, "grad_norm": 1.1363708972930908, "learning_rate": 9.958152165815636e-05, "loss": 21.493797302246094, "step": 338 }, { "epoch": 0.043960675295624196, "grad_norm": 0.7159785032272339, "learning_rate": 9.957888097011355e-05, "loss": 16.5307674407959, "step": 339 }, { "epoch": 0.04409035280387087, "grad_norm": 0.5552977919578552, "learning_rate": 9.957623201183464e-05, "loss": 14.916229248046875, "step": 340 }, { "epoch": 0.044220030312117554, "grad_norm": 0.5937854647636414, "learning_rate": 9.957357478376148e-05, "loss": 14.432008743286133, "step": 341 }, { "epoch": 0.04434970782036423, "grad_norm": 0.7719844579696655, "learning_rate": 9.957090928633734e-05, "loss": 22.36751937866211, "step": 342 }, { "epoch": 0.04447938532861091, "grad_norm": 0.5446810126304626, "learning_rate": 9.956823552000685e-05, "loss": 11.68948745727539, "step": 343 }, { "epoch": 0.04460906283685759, "grad_norm": 0.6660674810409546, "learning_rate": 9.9565553485216e-05, "loss": 11.763224601745605, "step": 344 }, { "epoch": 0.04473874034510427, "grad_norm": 0.6678521037101746, "learning_rate": 9.956286318241222e-05, "loss": 13.123283386230469, "step": 345 }, { "epoch": 0.044868417853350946, "grad_norm": 0.5554252862930298, "learning_rate": 9.956016461204425e-05, "loss": 12.586359977722168, "step": 346 }, { "epoch": 0.04499809536159763, "grad_norm": 0.6690151691436768, "learning_rate": 9.955745777456223e-05, "loss": 11.13213062286377, "step": 347 }, { "epoch": 0.045127772869844304, "grad_norm": 0.766044020652771, "learning_rate": 9.95547426704177e-05, "loss": 15.389257431030273, "step": 348 }, { "epoch": 0.04525745037809099, "grad_norm": 0.5058057308197021, "learning_rate": 9.955201930006357e-05, "loss": 14.112174987792969, "step": 349 }, { "epoch": 0.04538712788633766, "grad_norm": 0.6603040099143982, "learning_rate": 9.954928766395413e-05, "loss": 17.106782913208008, "step": 350 }, { "epoch": 0.045516805394584345, "grad_norm": 0.729411244392395, "learning_rate": 9.954654776254501e-05, "loss": 15.764400482177734, "step": 351 }, { "epoch": 0.04564648290283102, "grad_norm": 0.6432146430015564, "learning_rate": 9.954379959629328e-05, "loss": 17.778356552124023, "step": 352 }, { "epoch": 0.0457761604110777, "grad_norm": 0.6290650367736816, "learning_rate": 9.954104316565736e-05, "loss": 15.677207946777344, "step": 353 }, { "epoch": 0.04590583791932438, "grad_norm": 0.6776597499847412, "learning_rate": 9.953827847109703e-05, "loss": 15.481393814086914, "step": 354 }, { "epoch": 0.04603551542757106, "grad_norm": 0.4254447817802429, "learning_rate": 9.953550551307349e-05, "loss": 14.770302772521973, "step": 355 }, { "epoch": 0.046165192935817737, "grad_norm": 0.5614014863967896, "learning_rate": 9.953272429204929e-05, "loss": 11.481058120727539, "step": 356 }, { "epoch": 0.04629487044406442, "grad_norm": 0.6092226505279541, "learning_rate": 9.952993480848836e-05, "loss": 16.70001792907715, "step": 357 }, { "epoch": 0.046424547952311095, "grad_norm": 0.6560048460960388, "learning_rate": 9.952713706285603e-05, "loss": 10.802106857299805, "step": 358 }, { "epoch": 0.04655422546055778, "grad_norm": 0.6190686821937561, "learning_rate": 9.952433105561896e-05, "loss": 12.083375930786133, "step": 359 }, { "epoch": 0.04668390296880445, "grad_norm": 0.6091585755348206, "learning_rate": 9.952151678724522e-05, "loss": 10.759639739990234, "step": 360 }, { "epoch": 0.046813580477051135, "grad_norm": 0.7702950239181519, "learning_rate": 9.95186942582043e-05, "loss": 17.8530330657959, "step": 361 }, { "epoch": 0.04694325798529781, "grad_norm": 0.6855915784835815, "learning_rate": 9.951586346896698e-05, "loss": 16.494426727294922, "step": 362 }, { "epoch": 0.04707293549354449, "grad_norm": 0.660856306552887, "learning_rate": 9.95130244200055e-05, "loss": 16.61265754699707, "step": 363 }, { "epoch": 0.04720261300179117, "grad_norm": 0.6728490591049194, "learning_rate": 9.95101771117934e-05, "loss": 14.656720161437988, "step": 364 }, { "epoch": 0.04733229051003785, "grad_norm": 0.6917411088943481, "learning_rate": 9.950732154480567e-05, "loss": 14.891343116760254, "step": 365 }, { "epoch": 0.04746196801828453, "grad_norm": 0.6411337852478027, "learning_rate": 9.950445771951863e-05, "loss": 17.553958892822266, "step": 366 }, { "epoch": 0.04759164552653121, "grad_norm": 0.6056779623031616, "learning_rate": 9.950158563641e-05, "loss": 13.984214782714844, "step": 367 }, { "epoch": 0.047721323034777885, "grad_norm": 0.6478114724159241, "learning_rate": 9.949870529595887e-05, "loss": 12.999788284301758, "step": 368 }, { "epoch": 0.04785100054302457, "grad_norm": 0.6322752237319946, "learning_rate": 9.949581669864572e-05, "loss": 13.293198585510254, "step": 369 }, { "epoch": 0.04798067805127124, "grad_norm": 0.6804073452949524, "learning_rate": 9.949291984495237e-05, "loss": 13.394220352172852, "step": 370 }, { "epoch": 0.048110355559517926, "grad_norm": 0.5037692189216614, "learning_rate": 9.949001473536206e-05, "loss": 11.006689071655273, "step": 371 }, { "epoch": 0.0482400330677646, "grad_norm": 0.5307057499885559, "learning_rate": 9.948710137035939e-05, "loss": 15.512666702270508, "step": 372 }, { "epoch": 0.048369710576011284, "grad_norm": 0.5312576293945312, "learning_rate": 9.948417975043035e-05, "loss": 12.736532211303711, "step": 373 }, { "epoch": 0.04849938808425796, "grad_norm": 0.43625155091285706, "learning_rate": 9.948124987606227e-05, "loss": 11.72647762298584, "step": 374 }, { "epoch": 0.04862906559250464, "grad_norm": 0.7103204727172852, "learning_rate": 9.94783117477439e-05, "loss": 17.24320411682129, "step": 375 }, { "epoch": 0.04875874310075132, "grad_norm": 0.5165747404098511, "learning_rate": 9.947536536596531e-05, "loss": 15.038126945495605, "step": 376 }, { "epoch": 0.048888420608998, "grad_norm": 0.61861252784729, "learning_rate": 9.947241073121805e-05, "loss": 12.85089111328125, "step": 377 }, { "epoch": 0.049018098117244675, "grad_norm": 0.5793192982673645, "learning_rate": 9.946944784399493e-05, "loss": 15.12120532989502, "step": 378 }, { "epoch": 0.04914777562549136, "grad_norm": 0.8364868760108948, "learning_rate": 9.946647670479021e-05, "loss": 15.899293899536133, "step": 379 }, { "epoch": 0.04927745313373803, "grad_norm": 0.6467633843421936, "learning_rate": 9.94634973140995e-05, "loss": 14.12485122680664, "step": 380 }, { "epoch": 0.049407130641984716, "grad_norm": 0.7113352417945862, "learning_rate": 9.946050967241977e-05, "loss": 16.345117568969727, "step": 381 }, { "epoch": 0.04953680815023139, "grad_norm": 0.5768975019454956, "learning_rate": 9.945751378024943e-05, "loss": 13.630057334899902, "step": 382 }, { "epoch": 0.049666485658478074, "grad_norm": 0.5584546327590942, "learning_rate": 9.94545096380882e-05, "loss": 13.479740142822266, "step": 383 }, { "epoch": 0.04979616316672475, "grad_norm": 0.6693471074104309, "learning_rate": 9.94514972464372e-05, "loss": 15.100529670715332, "step": 384 }, { "epoch": 0.04992584067497143, "grad_norm": 0.7146291136741638, "learning_rate": 9.944847660579892e-05, "loss": 12.281229019165039, "step": 385 }, { "epoch": 0.05005551818321811, "grad_norm": 0.5753458738327026, "learning_rate": 9.944544771667726e-05, "loss": 14.10885238647461, "step": 386 }, { "epoch": 0.05018519569146479, "grad_norm": 0.8231804370880127, "learning_rate": 9.944241057957742e-05, "loss": 13.471972465515137, "step": 387 }, { "epoch": 0.050314873199711466, "grad_norm": 0.6449377536773682, "learning_rate": 9.943936519500608e-05, "loss": 14.006996154785156, "step": 388 }, { "epoch": 0.05044455070795815, "grad_norm": 0.7208792567253113, "learning_rate": 9.943631156347119e-05, "loss": 13.17851734161377, "step": 389 }, { "epoch": 0.050574228216204824, "grad_norm": 0.8027352094650269, "learning_rate": 9.943324968548215e-05, "loss": 15.776914596557617, "step": 390 }, { "epoch": 0.050703905724451506, "grad_norm": 0.5542784333229065, "learning_rate": 9.943017956154971e-05, "loss": 12.051703453063965, "step": 391 }, { "epoch": 0.05083358323269818, "grad_norm": 0.6847180724143982, "learning_rate": 9.942710119218599e-05, "loss": 14.546102523803711, "step": 392 }, { "epoch": 0.050963260740944864, "grad_norm": 0.5026363134384155, "learning_rate": 9.942401457790449e-05, "loss": 11.30323314666748, "step": 393 }, { "epoch": 0.05109293824919154, "grad_norm": 0.6277825236320496, "learning_rate": 9.94209197192201e-05, "loss": 15.111623764038086, "step": 394 }, { "epoch": 0.05122261575743822, "grad_norm": 0.9335335493087769, "learning_rate": 9.941781661664907e-05, "loss": 14.1354341506958, "step": 395 }, { "epoch": 0.0513522932656849, "grad_norm": 0.7578979134559631, "learning_rate": 9.941470527070902e-05, "loss": 12.983556747436523, "step": 396 }, { "epoch": 0.05148197077393158, "grad_norm": 0.8057076930999756, "learning_rate": 9.941158568191895e-05, "loss": 15.43847942352295, "step": 397 }, { "epoch": 0.051611648282178256, "grad_norm": 0.7702135443687439, "learning_rate": 9.940845785079925e-05, "loss": 12.33649730682373, "step": 398 }, { "epoch": 0.05174132579042494, "grad_norm": 0.914004385471344, "learning_rate": 9.940532177787166e-05, "loss": 13.972308158874512, "step": 399 }, { "epoch": 0.051871003298671614, "grad_norm": 0.6502673029899597, "learning_rate": 9.94021774636593e-05, "loss": 13.177061080932617, "step": 400 }, { "epoch": 0.0520006808069183, "grad_norm": 0.7090312242507935, "learning_rate": 9.939902490868669e-05, "loss": 15.10678768157959, "step": 401 }, { "epoch": 0.05213035831516497, "grad_norm": 0.7748519778251648, "learning_rate": 9.93958641134797e-05, "loss": 15.99815845489502, "step": 402 }, { "epoch": 0.052260035823411655, "grad_norm": 0.5139735341072083, "learning_rate": 9.939269507856559e-05, "loss": 12.566969871520996, "step": 403 }, { "epoch": 0.05238971333165833, "grad_norm": 0.4394737184047699, "learning_rate": 9.938951780447297e-05, "loss": 11.970096588134766, "step": 404 }, { "epoch": 0.05251939083990501, "grad_norm": 0.7074579000473022, "learning_rate": 9.938633229173184e-05, "loss": 12.299063682556152, "step": 405 }, { "epoch": 0.05264906834815169, "grad_norm": 0.6685451865196228, "learning_rate": 9.93831385408736e-05, "loss": 12.406294822692871, "step": 406 }, { "epoch": 0.05277874585639837, "grad_norm": 0.5292627811431885, "learning_rate": 9.937993655243096e-05, "loss": 12.985032081604004, "step": 407 }, { "epoch": 0.052908423364645046, "grad_norm": 0.4986940920352936, "learning_rate": 9.937672632693808e-05, "loss": 9.92176342010498, "step": 408 }, { "epoch": 0.05303810087289173, "grad_norm": 0.5049495697021484, "learning_rate": 9.937350786493043e-05, "loss": 10.79319953918457, "step": 409 }, { "epoch": 0.053167778381138404, "grad_norm": 0.5089507102966309, "learning_rate": 9.937028116694489e-05, "loss": 13.9389009475708, "step": 410 }, { "epoch": 0.05329745588938509, "grad_norm": 0.7168765664100647, "learning_rate": 9.936704623351971e-05, "loss": 14.426945686340332, "step": 411 }, { "epoch": 0.05342713339763176, "grad_norm": 0.6856073141098022, "learning_rate": 9.936380306519451e-05, "loss": 14.270302772521973, "step": 412 }, { "epoch": 0.053556810905878445, "grad_norm": 0.6406601071357727, "learning_rate": 9.936055166251025e-05, "loss": 15.319515228271484, "step": 413 }, { "epoch": 0.05368648841412512, "grad_norm": 0.706902801990509, "learning_rate": 9.935729202600934e-05, "loss": 15.86837100982666, "step": 414 }, { "epoch": 0.0538161659223718, "grad_norm": 0.6404340267181396, "learning_rate": 9.935402415623548e-05, "loss": 14.003229141235352, "step": 415 }, { "epoch": 0.05394584343061848, "grad_norm": 0.6085966229438782, "learning_rate": 9.935074805373381e-05, "loss": 12.291999816894531, "step": 416 }, { "epoch": 0.05407552093886516, "grad_norm": 0.6862949728965759, "learning_rate": 9.93474637190508e-05, "loss": 11.860067367553711, "step": 417 }, { "epoch": 0.05420519844711184, "grad_norm": 0.7975915670394897, "learning_rate": 9.934417115273432e-05, "loss": 15.978974342346191, "step": 418 }, { "epoch": 0.05433487595535852, "grad_norm": 0.7535678744316101, "learning_rate": 9.934087035533359e-05, "loss": 17.26249122619629, "step": 419 }, { "epoch": 0.054464553463605195, "grad_norm": 0.6328299641609192, "learning_rate": 9.933756132739924e-05, "loss": 17.02556610107422, "step": 420 }, { "epoch": 0.05459423097185188, "grad_norm": 0.7655121088027954, "learning_rate": 9.933424406948322e-05, "loss": 13.057223320007324, "step": 421 }, { "epoch": 0.05472390848009855, "grad_norm": 0.6120569705963135, "learning_rate": 9.933091858213887e-05, "loss": 15.034424781799316, "step": 422 }, { "epoch": 0.054853585988345235, "grad_norm": 0.5594831109046936, "learning_rate": 9.932758486592096e-05, "loss": 12.881865501403809, "step": 423 }, { "epoch": 0.05498326349659191, "grad_norm": 0.5711956024169922, "learning_rate": 9.932424292138556e-05, "loss": 10.127326011657715, "step": 424 }, { "epoch": 0.05511294100483859, "grad_norm": 0.5657038688659668, "learning_rate": 9.932089274909011e-05, "loss": 12.041322708129883, "step": 425 }, { "epoch": 0.05524261851308527, "grad_norm": 0.6070735454559326, "learning_rate": 9.93175343495935e-05, "loss": 12.343717575073242, "step": 426 }, { "epoch": 0.05537229602133195, "grad_norm": 0.6004707217216492, "learning_rate": 9.931416772345592e-05, "loss": 15.595497131347656, "step": 427 }, { "epoch": 0.05550197352957863, "grad_norm": 0.5989517569541931, "learning_rate": 9.931079287123897e-05, "loss": 11.341623306274414, "step": 428 }, { "epoch": 0.05563165103782531, "grad_norm": 0.6429723501205444, "learning_rate": 9.930740979350557e-05, "loss": 14.133995056152344, "step": 429 }, { "epoch": 0.055761328546071985, "grad_norm": 0.48299479484558105, "learning_rate": 9.930401849082009e-05, "loss": 9.469074249267578, "step": 430 }, { "epoch": 0.05589100605431867, "grad_norm": 0.5058231353759766, "learning_rate": 9.930061896374823e-05, "loss": 10.656564712524414, "step": 431 }, { "epoch": 0.05602068356256534, "grad_norm": 0.5808942317962646, "learning_rate": 9.929721121285703e-05, "loss": 14.640192985534668, "step": 432 }, { "epoch": 0.056150361070812026, "grad_norm": 0.6277649998664856, "learning_rate": 9.929379523871497e-05, "loss": 15.606886863708496, "step": 433 }, { "epoch": 0.0562800385790587, "grad_norm": 0.8133577108383179, "learning_rate": 9.929037104189185e-05, "loss": 14.565540313720703, "step": 434 }, { "epoch": 0.056409716087305384, "grad_norm": 0.5143656134605408, "learning_rate": 9.928693862295887e-05, "loss": 13.448037147521973, "step": 435 }, { "epoch": 0.05653939359555206, "grad_norm": 0.6093129515647888, "learning_rate": 9.928349798248858e-05, "loss": 15.3789701461792, "step": 436 }, { "epoch": 0.05666907110379874, "grad_norm": 0.8139561414718628, "learning_rate": 9.928004912105493e-05, "loss": 14.456807136535645, "step": 437 }, { "epoch": 0.05679874861204542, "grad_norm": 0.6939098238945007, "learning_rate": 9.927659203923318e-05, "loss": 13.197617530822754, "step": 438 }, { "epoch": 0.0569284261202921, "grad_norm": 0.7152105569839478, "learning_rate": 9.927312673760008e-05, "loss": 11.897920608520508, "step": 439 }, { "epoch": 0.057058103628538775, "grad_norm": 0.5791128277778625, "learning_rate": 9.92696532167336e-05, "loss": 14.30905532836914, "step": 440 }, { "epoch": 0.05718778113678546, "grad_norm": 0.5938891172409058, "learning_rate": 9.926617147721319e-05, "loss": 12.249624252319336, "step": 441 }, { "epoch": 0.057317458645032134, "grad_norm": 0.7121455073356628, "learning_rate": 9.926268151961964e-05, "loss": 12.549040794372559, "step": 442 }, { "epoch": 0.057447136153278816, "grad_norm": 0.9112157821655273, "learning_rate": 9.92591833445351e-05, "loss": 11.884353637695312, "step": 443 }, { "epoch": 0.05757681366152549, "grad_norm": 0.559302568435669, "learning_rate": 9.925567695254312e-05, "loss": 13.737269401550293, "step": 444 }, { "epoch": 0.057706491169772174, "grad_norm": 0.5780378580093384, "learning_rate": 9.925216234422856e-05, "loss": 15.028351783752441, "step": 445 }, { "epoch": 0.05783616867801885, "grad_norm": 0.8141120076179504, "learning_rate": 9.924863952017774e-05, "loss": 10.9053955078125, "step": 446 }, { "epoch": 0.05796584618626553, "grad_norm": 0.5402355194091797, "learning_rate": 9.924510848097826e-05, "loss": 12.67887020111084, "step": 447 }, { "epoch": 0.05809552369451221, "grad_norm": 0.6612342596054077, "learning_rate": 9.924156922721915e-05, "loss": 14.005718231201172, "step": 448 }, { "epoch": 0.05822520120275889, "grad_norm": 0.557390570640564, "learning_rate": 9.923802175949078e-05, "loss": 11.751516342163086, "step": 449 }, { "epoch": 0.058354878711005566, "grad_norm": 0.49933889508247375, "learning_rate": 9.923446607838492e-05, "loss": 9.447665214538574, "step": 450 }, { "epoch": 0.05848455621925225, "grad_norm": 0.7730929255485535, "learning_rate": 9.923090218449467e-05, "loss": 16.329744338989258, "step": 451 }, { "epoch": 0.058614233727498924, "grad_norm": 0.7180250883102417, "learning_rate": 9.922733007841454e-05, "loss": 10.676664352416992, "step": 452 }, { "epoch": 0.058743911235745606, "grad_norm": 0.5502244830131531, "learning_rate": 9.922374976074039e-05, "loss": 13.900432586669922, "step": 453 }, { "epoch": 0.05887358874399228, "grad_norm": 0.5622300505638123, "learning_rate": 9.922016123206946e-05, "loss": 13.42091178894043, "step": 454 }, { "epoch": 0.059003266252238964, "grad_norm": 0.5987425446510315, "learning_rate": 9.921656449300032e-05, "loss": 15.17783260345459, "step": 455 }, { "epoch": 0.05913294376048564, "grad_norm": 0.9199643135070801, "learning_rate": 9.921295954413296e-05, "loss": 12.883594512939453, "step": 456 }, { "epoch": 0.05926262126873232, "grad_norm": 0.6505893468856812, "learning_rate": 9.920934638606873e-05, "loss": 15.929274559020996, "step": 457 }, { "epoch": 0.059392298776979, "grad_norm": 0.6347104907035828, "learning_rate": 9.920572501941032e-05, "loss": 16.84392547607422, "step": 458 }, { "epoch": 0.05952197628522568, "grad_norm": 0.6528505682945251, "learning_rate": 9.920209544476182e-05, "loss": 14.314495086669922, "step": 459 }, { "epoch": 0.059651653793472356, "grad_norm": 0.7148313522338867, "learning_rate": 9.919845766272869e-05, "loss": 15.362360000610352, "step": 460 }, { "epoch": 0.05978133130171904, "grad_norm": 0.5695946216583252, "learning_rate": 9.919481167391772e-05, "loss": 11.823159217834473, "step": 461 }, { "epoch": 0.059911008809965714, "grad_norm": 0.6362658143043518, "learning_rate": 9.919115747893711e-05, "loss": 12.827754020690918, "step": 462 }, { "epoch": 0.0600406863182124, "grad_norm": 0.5802624821662903, "learning_rate": 9.918749507839642e-05, "loss": 11.530866622924805, "step": 463 }, { "epoch": 0.06017036382645907, "grad_norm": 0.6441169381141663, "learning_rate": 9.918382447290657e-05, "loss": 15.490558624267578, "step": 464 }, { "epoch": 0.060300041334705755, "grad_norm": 0.46875062584877014, "learning_rate": 9.918014566307985e-05, "loss": 9.096238136291504, "step": 465 }, { "epoch": 0.06042971884295243, "grad_norm": 0.46292367577552795, "learning_rate": 9.917645864952991e-05, "loss": 10.186484336853027, "step": 466 }, { "epoch": 0.06055939635119911, "grad_norm": 0.6733406186103821, "learning_rate": 9.91727634328718e-05, "loss": 14.507068634033203, "step": 467 }, { "epoch": 0.06068907385944579, "grad_norm": 0.7179827690124512, "learning_rate": 9.916906001372191e-05, "loss": 13.134424209594727, "step": 468 }, { "epoch": 0.06081875136769247, "grad_norm": 0.5807068347930908, "learning_rate": 9.916534839269802e-05, "loss": 12.012392044067383, "step": 469 }, { "epoch": 0.06094842887593915, "grad_norm": 0.7670884728431702, "learning_rate": 9.916162857041923e-05, "loss": 15.578720092773438, "step": 470 }, { "epoch": 0.06107810638418583, "grad_norm": 0.6145723462104797, "learning_rate": 9.915790054750607e-05, "loss": 11.97812557220459, "step": 471 }, { "epoch": 0.061207783892432505, "grad_norm": 0.5823312997817993, "learning_rate": 9.91541643245804e-05, "loss": 14.764711380004883, "step": 472 }, { "epoch": 0.06133746140067919, "grad_norm": 0.6985733509063721, "learning_rate": 9.915041990226546e-05, "loss": 14.544801712036133, "step": 473 }, { "epoch": 0.06146713890892586, "grad_norm": 0.5867170691490173, "learning_rate": 9.914666728118586e-05, "loss": 11.870173454284668, "step": 474 }, { "epoch": 0.061596816417172545, "grad_norm": 0.6208317279815674, "learning_rate": 9.914290646196756e-05, "loss": 17.001483917236328, "step": 475 }, { "epoch": 0.06172649392541922, "grad_norm": 0.6422634720802307, "learning_rate": 9.913913744523792e-05, "loss": 13.620519638061523, "step": 476 }, { "epoch": 0.0618561714336659, "grad_norm": 0.6846035718917847, "learning_rate": 9.913536023162564e-05, "loss": 12.971317291259766, "step": 477 }, { "epoch": 0.06198584894191258, "grad_norm": 0.5202223658561707, "learning_rate": 9.913157482176078e-05, "loss": 12.273370742797852, "step": 478 }, { "epoch": 0.06211552645015926, "grad_norm": 0.6067312359809875, "learning_rate": 9.912778121627482e-05, "loss": 18.57939338684082, "step": 479 }, { "epoch": 0.06224520395840594, "grad_norm": 0.653340756893158, "learning_rate": 9.912397941580053e-05, "loss": 13.13972282409668, "step": 480 }, { "epoch": 0.06237488146665262, "grad_norm": 0.6497677564620972, "learning_rate": 9.912016942097211e-05, "loss": 12.415700912475586, "step": 481 }, { "epoch": 0.0625045589748993, "grad_norm": 0.7264840602874756, "learning_rate": 9.911635123242509e-05, "loss": 11.780216217041016, "step": 482 }, { "epoch": 0.06263423648314598, "grad_norm": 0.431162565946579, "learning_rate": 9.91125248507964e-05, "loss": 10.906045913696289, "step": 483 }, { "epoch": 0.06276391399139265, "grad_norm": 0.6572893261909485, "learning_rate": 9.91086902767243e-05, "loss": 12.006500244140625, "step": 484 }, { "epoch": 0.06289359149963933, "grad_norm": 0.6836082935333252, "learning_rate": 9.910484751084845e-05, "loss": 13.710506439208984, "step": 485 }, { "epoch": 0.06302326900788602, "grad_norm": 0.6924465298652649, "learning_rate": 9.910099655380985e-05, "loss": 15.000121116638184, "step": 486 }, { "epoch": 0.0631529465161327, "grad_norm": 0.8263676762580872, "learning_rate": 9.909713740625086e-05, "loss": 11.951507568359375, "step": 487 }, { "epoch": 0.06328262402437937, "grad_norm": 0.5356961488723755, "learning_rate": 9.909327006881526e-05, "loss": 11.891735076904297, "step": 488 }, { "epoch": 0.06341230153262604, "grad_norm": 0.6826813817024231, "learning_rate": 9.908939454214814e-05, "loss": 13.203059196472168, "step": 489 }, { "epoch": 0.06354197904087273, "grad_norm": 0.7763580083847046, "learning_rate": 9.908551082689596e-05, "loss": 12.84561824798584, "step": 490 }, { "epoch": 0.06367165654911941, "grad_norm": 0.7962359189987183, "learning_rate": 9.908161892370659e-05, "loss": 16.887781143188477, "step": 491 }, { "epoch": 0.06380133405736609, "grad_norm": 0.6427197456359863, "learning_rate": 9.907771883322923e-05, "loss": 13.19885540008545, "step": 492 }, { "epoch": 0.06393101156561276, "grad_norm": 0.6712356805801392, "learning_rate": 9.907381055611442e-05, "loss": 13.695987701416016, "step": 493 }, { "epoch": 0.06406068907385945, "grad_norm": 0.5005567669868469, "learning_rate": 9.906989409301415e-05, "loss": 12.44936466217041, "step": 494 }, { "epoch": 0.06419036658210613, "grad_norm": 0.6338671445846558, "learning_rate": 9.906596944458169e-05, "loss": 12.546296119689941, "step": 495 }, { "epoch": 0.0643200440903528, "grad_norm": 0.5689544677734375, "learning_rate": 9.906203661147172e-05, "loss": 15.71023941040039, "step": 496 }, { "epoch": 0.06444972159859948, "grad_norm": 0.6799784302711487, "learning_rate": 9.905809559434028e-05, "loss": 15.427892684936523, "step": 497 }, { "epoch": 0.06457939910684617, "grad_norm": 0.4668198227882385, "learning_rate": 9.905414639384474e-05, "loss": 10.679040908813477, "step": 498 }, { "epoch": 0.06470907661509284, "grad_norm": 0.6549769639968872, "learning_rate": 9.90501890106439e-05, "loss": 16.671762466430664, "step": 499 }, { "epoch": 0.06483875412333952, "grad_norm": 0.48568692803382874, "learning_rate": 9.904622344539787e-05, "loss": 9.531511306762695, "step": 500 }, { "epoch": 0.0649684316315862, "grad_norm": 0.6793501377105713, "learning_rate": 9.904224969876816e-05, "loss": 15.234033584594727, "step": 501 }, { "epoch": 0.06509810913983288, "grad_norm": 0.6938835382461548, "learning_rate": 9.903826777141761e-05, "loss": 13.396774291992188, "step": 502 }, { "epoch": 0.06522778664807956, "grad_norm": 0.6408324837684631, "learning_rate": 9.903427766401046e-05, "loss": 13.669108390808105, "step": 503 }, { "epoch": 0.06535746415632623, "grad_norm": 0.6428709626197815, "learning_rate": 9.90302793772123e-05, "loss": 10.988391876220703, "step": 504 }, { "epoch": 0.06548714166457291, "grad_norm": 0.6849573850631714, "learning_rate": 9.902627291169007e-05, "loss": 15.762940406799316, "step": 505 }, { "epoch": 0.0656168191728196, "grad_norm": 0.7071104645729065, "learning_rate": 9.902225826811208e-05, "loss": 14.860156059265137, "step": 506 }, { "epoch": 0.06574649668106627, "grad_norm": 0.6631439924240112, "learning_rate": 9.901823544714806e-05, "loss": 16.81210708618164, "step": 507 }, { "epoch": 0.06587617418931295, "grad_norm": 0.5157784223556519, "learning_rate": 9.901420444946899e-05, "loss": 9.801735877990723, "step": 508 }, { "epoch": 0.06600585169755963, "grad_norm": 0.5889451503753662, "learning_rate": 9.901016527574731e-05, "loss": 14.321083068847656, "step": 509 }, { "epoch": 0.06613552920580631, "grad_norm": 0.7152422666549683, "learning_rate": 9.900611792665681e-05, "loss": 13.91443157196045, "step": 510 }, { "epoch": 0.06626520671405299, "grad_norm": 0.599090576171875, "learning_rate": 9.90020624028726e-05, "loss": 13.960058212280273, "step": 511 }, { "epoch": 0.06639488422229967, "grad_norm": 0.7810689210891724, "learning_rate": 9.899799870507119e-05, "loss": 15.756057739257812, "step": 512 }, { "epoch": 0.06652456173054634, "grad_norm": 0.6220107078552246, "learning_rate": 9.899392683393042e-05, "loss": 15.647005081176758, "step": 513 }, { "epoch": 0.06665423923879303, "grad_norm": 0.5762065052986145, "learning_rate": 9.898984679012958e-05, "loss": 14.562575340270996, "step": 514 }, { "epoch": 0.0667839167470397, "grad_norm": 0.6083389520645142, "learning_rate": 9.89857585743492e-05, "loss": 11.12233829498291, "step": 515 }, { "epoch": 0.06691359425528638, "grad_norm": 0.6254123449325562, "learning_rate": 9.898166218727125e-05, "loss": 11.250137329101562, "step": 516 }, { "epoch": 0.06704327176353306, "grad_norm": 0.6324824690818787, "learning_rate": 9.897755762957908e-05, "loss": 18.574939727783203, "step": 517 }, { "epoch": 0.06717294927177975, "grad_norm": 0.6829573512077332, "learning_rate": 9.897344490195732e-05, "loss": 12.709811210632324, "step": 518 }, { "epoch": 0.06730262678002642, "grad_norm": 0.5810401439666748, "learning_rate": 9.896932400509204e-05, "loss": 12.774682998657227, "step": 519 }, { "epoch": 0.0674323042882731, "grad_norm": 0.7150804996490479, "learning_rate": 9.896519493967062e-05, "loss": 17.763957977294922, "step": 520 }, { "epoch": 0.06756198179651977, "grad_norm": 0.5788652896881104, "learning_rate": 9.896105770638189e-05, "loss": 14.816871643066406, "step": 521 }, { "epoch": 0.06769165930476646, "grad_norm": 0.6710848212242126, "learning_rate": 9.89569123059159e-05, "loss": 12.36782455444336, "step": 522 }, { "epoch": 0.06782133681301314, "grad_norm": 0.6142889261245728, "learning_rate": 9.895275873896422e-05, "loss": 14.297414779663086, "step": 523 }, { "epoch": 0.06795101432125981, "grad_norm": 0.6299270391464233, "learning_rate": 9.894859700621964e-05, "loss": 13.945069313049316, "step": 524 }, { "epoch": 0.06808069182950649, "grad_norm": 0.759856641292572, "learning_rate": 9.89444271083764e-05, "loss": 16.587963104248047, "step": 525 }, { "epoch": 0.06821036933775318, "grad_norm": 0.6918278336524963, "learning_rate": 9.89402490461301e-05, "loss": 18.86844253540039, "step": 526 }, { "epoch": 0.06834004684599986, "grad_norm": 0.6984198093414307, "learning_rate": 9.893606282017764e-05, "loss": 14.510298728942871, "step": 527 }, { "epoch": 0.06846972435424653, "grad_norm": 0.46475544571876526, "learning_rate": 9.893186843121738e-05, "loss": 13.08603572845459, "step": 528 }, { "epoch": 0.0685994018624932, "grad_norm": 0.56032395362854, "learning_rate": 9.892766587994893e-05, "loss": 13.375372886657715, "step": 529 }, { "epoch": 0.0687290793707399, "grad_norm": 0.6246709823608398, "learning_rate": 9.892345516707336e-05, "loss": 13.642683029174805, "step": 530 }, { "epoch": 0.06885875687898657, "grad_norm": 0.534841775894165, "learning_rate": 9.8919236293293e-05, "loss": 10.133692741394043, "step": 531 }, { "epoch": 0.06898843438723325, "grad_norm": 0.6059836149215698, "learning_rate": 9.891500925931166e-05, "loss": 12.646660804748535, "step": 532 }, { "epoch": 0.06911811189547992, "grad_norm": 0.6873205304145813, "learning_rate": 9.891077406583442e-05, "loss": 11.754528999328613, "step": 533 }, { "epoch": 0.06924778940372661, "grad_norm": 0.5909413695335388, "learning_rate": 9.890653071356776e-05, "loss": 15.211105346679688, "step": 534 }, { "epoch": 0.06937746691197329, "grad_norm": 0.665557861328125, "learning_rate": 9.89022792032195e-05, "loss": 13.393436431884766, "step": 535 }, { "epoch": 0.06950714442021996, "grad_norm": 0.9922134280204773, "learning_rate": 9.889801953549884e-05, "loss": 23.05068016052246, "step": 536 }, { "epoch": 0.06963682192846664, "grad_norm": 0.640308678150177, "learning_rate": 9.889375171111634e-05, "loss": 17.88047218322754, "step": 537 }, { "epoch": 0.06976649943671333, "grad_norm": 0.754206657409668, "learning_rate": 9.88894757307839e-05, "loss": 14.797497749328613, "step": 538 }, { "epoch": 0.06989617694496, "grad_norm": 0.5122467875480652, "learning_rate": 9.88851915952148e-05, "loss": 10.591959953308105, "step": 539 }, { "epoch": 0.07002585445320668, "grad_norm": 0.6685355305671692, "learning_rate": 9.888089930512371e-05, "loss": 11.84328556060791, "step": 540 }, { "epoch": 0.07015553196145335, "grad_norm": 0.5518898367881775, "learning_rate": 9.887659886122658e-05, "loss": 14.611933708190918, "step": 541 }, { "epoch": 0.07028520946970004, "grad_norm": 0.6535414457321167, "learning_rate": 9.887229026424077e-05, "loss": 12.34908676147461, "step": 542 }, { "epoch": 0.07041488697794672, "grad_norm": 0.5531066060066223, "learning_rate": 9.8867973514885e-05, "loss": 12.216665267944336, "step": 543 }, { "epoch": 0.0705445644861934, "grad_norm": 0.5862695574760437, "learning_rate": 9.886364861387937e-05, "loss": 12.081276893615723, "step": 544 }, { "epoch": 0.07067424199444007, "grad_norm": 0.6536324620246887, "learning_rate": 9.885931556194528e-05, "loss": 11.293411254882812, "step": 545 }, { "epoch": 0.07080391950268676, "grad_norm": 0.696247398853302, "learning_rate": 9.885497435980555e-05, "loss": 14.6002779006958, "step": 546 }, { "epoch": 0.07093359701093344, "grad_norm": 0.7941091060638428, "learning_rate": 9.885062500818432e-05, "loss": 16.916259765625, "step": 547 }, { "epoch": 0.07106327451918011, "grad_norm": 0.7399619817733765, "learning_rate": 9.88462675078071e-05, "loss": 13.543157577514648, "step": 548 }, { "epoch": 0.07119295202742679, "grad_norm": 0.6055309772491455, "learning_rate": 9.88419018594008e-05, "loss": 12.213431358337402, "step": 549 }, { "epoch": 0.07132262953567348, "grad_norm": 0.6347383260726929, "learning_rate": 9.88375280636936e-05, "loss": 11.482978820800781, "step": 550 }, { "epoch": 0.07145230704392015, "grad_norm": 0.6051961183547974, "learning_rate": 9.883314612141513e-05, "loss": 12.705227851867676, "step": 551 }, { "epoch": 0.07158198455216683, "grad_norm": 0.8959698677062988, "learning_rate": 9.882875603329633e-05, "loss": 14.981653213500977, "step": 552 }, { "epoch": 0.0717116620604135, "grad_norm": 0.5396490097045898, "learning_rate": 9.88243578000695e-05, "loss": 13.317931175231934, "step": 553 }, { "epoch": 0.07184133956866019, "grad_norm": 0.7771835327148438, "learning_rate": 9.88199514224683e-05, "loss": 14.463287353515625, "step": 554 }, { "epoch": 0.07197101707690687, "grad_norm": 0.616375207901001, "learning_rate": 9.881553690122778e-05, "loss": 13.62911319732666, "step": 555 }, { "epoch": 0.07210069458515354, "grad_norm": 0.5589193105697632, "learning_rate": 9.881111423708432e-05, "loss": 10.986077308654785, "step": 556 }, { "epoch": 0.07223037209340022, "grad_norm": 0.5000418424606323, "learning_rate": 9.880668343077567e-05, "loss": 13.954507827758789, "step": 557 }, { "epoch": 0.07236004960164691, "grad_norm": 0.8767417073249817, "learning_rate": 9.880224448304089e-05, "loss": 13.887316703796387, "step": 558 }, { "epoch": 0.07248972710989358, "grad_norm": 0.5933501720428467, "learning_rate": 9.879779739462048e-05, "loss": 15.939184188842773, "step": 559 }, { "epoch": 0.07261940461814026, "grad_norm": 0.5326430201530457, "learning_rate": 9.879334216625624e-05, "loss": 11.078551292419434, "step": 560 }, { "epoch": 0.07274908212638694, "grad_norm": 0.6132035851478577, "learning_rate": 9.878887879869136e-05, "loss": 11.729357719421387, "step": 561 }, { "epoch": 0.07287875963463362, "grad_norm": 0.7773407101631165, "learning_rate": 9.878440729267036e-05, "loss": 16.281530380249023, "step": 562 }, { "epoch": 0.0730084371428803, "grad_norm": 0.6384958624839783, "learning_rate": 9.877992764893913e-05, "loss": 11.574790954589844, "step": 563 }, { "epoch": 0.07313811465112698, "grad_norm": 0.5965842604637146, "learning_rate": 9.877543986824493e-05, "loss": 16.55224609375, "step": 564 }, { "epoch": 0.07326779215937365, "grad_norm": 0.5894248485565186, "learning_rate": 9.877094395133634e-05, "loss": 13.158276557922363, "step": 565 }, { "epoch": 0.07339746966762034, "grad_norm": 0.8346449732780457, "learning_rate": 9.876643989896335e-05, "loss": 12.455999374389648, "step": 566 }, { "epoch": 0.07352714717586702, "grad_norm": 0.6097031235694885, "learning_rate": 9.876192771187726e-05, "loss": 15.221434593200684, "step": 567 }, { "epoch": 0.07365682468411369, "grad_norm": 0.5360113978385925, "learning_rate": 9.875740739083078e-05, "loss": 9.663721084594727, "step": 568 }, { "epoch": 0.07378650219236037, "grad_norm": 0.6163883805274963, "learning_rate": 9.87528789365779e-05, "loss": 16.357166290283203, "step": 569 }, { "epoch": 0.07391617970060706, "grad_norm": 0.5604690313339233, "learning_rate": 9.874834234987402e-05, "loss": 11.518315315246582, "step": 570 }, { "epoch": 0.07404585720885373, "grad_norm": 0.9600113034248352, "learning_rate": 9.874379763147592e-05, "loss": 14.433492660522461, "step": 571 }, { "epoch": 0.07417553471710041, "grad_norm": 0.6150817275047302, "learning_rate": 9.873924478214165e-05, "loss": 13.450672149658203, "step": 572 }, { "epoch": 0.07430521222534708, "grad_norm": 0.8268720507621765, "learning_rate": 9.873468380263071e-05, "loss": 12.715935707092285, "step": 573 }, { "epoch": 0.07443488973359377, "grad_norm": 0.5547956824302673, "learning_rate": 9.87301146937039e-05, "loss": 11.150325775146484, "step": 574 }, { "epoch": 0.07456456724184045, "grad_norm": 0.6162202954292297, "learning_rate": 9.87255374561234e-05, "loss": 14.799978256225586, "step": 575 }, { "epoch": 0.07469424475008712, "grad_norm": 0.8294524550437927, "learning_rate": 9.872095209065273e-05, "loss": 17.679922103881836, "step": 576 }, { "epoch": 0.0748239222583338, "grad_norm": 0.7112117409706116, "learning_rate": 9.871635859805677e-05, "loss": 14.529324531555176, "step": 577 }, { "epoch": 0.07495359976658049, "grad_norm": 0.6975422501564026, "learning_rate": 9.871175697910175e-05, "loss": 11.266965866088867, "step": 578 }, { "epoch": 0.07508327727482716, "grad_norm": 0.782598078250885, "learning_rate": 9.870714723455529e-05, "loss": 16.341262817382812, "step": 579 }, { "epoch": 0.07521295478307384, "grad_norm": 0.7552753686904907, "learning_rate": 9.870252936518633e-05, "loss": 15.464098930358887, "step": 580 }, { "epoch": 0.07534263229132052, "grad_norm": 0.45039689540863037, "learning_rate": 9.869790337176518e-05, "loss": 9.807880401611328, "step": 581 }, { "epoch": 0.0754723097995672, "grad_norm": 0.5632513165473938, "learning_rate": 9.869326925506347e-05, "loss": 12.773099899291992, "step": 582 }, { "epoch": 0.07560198730781388, "grad_norm": 0.7758616805076599, "learning_rate": 9.868862701585426e-05, "loss": 21.435258865356445, "step": 583 }, { "epoch": 0.07573166481606056, "grad_norm": 0.6984838247299194, "learning_rate": 9.86839766549119e-05, "loss": 13.258147239685059, "step": 584 }, { "epoch": 0.07586134232430723, "grad_norm": 0.6130015254020691, "learning_rate": 9.86793181730121e-05, "loss": 15.499639511108398, "step": 585 }, { "epoch": 0.07599101983255392, "grad_norm": 0.7277253270149231, "learning_rate": 9.867465157093196e-05, "loss": 17.617326736450195, "step": 586 }, { "epoch": 0.0761206973408006, "grad_norm": 0.7246189117431641, "learning_rate": 9.86699768494499e-05, "loss": 18.111467361450195, "step": 587 }, { "epoch": 0.07625037484904727, "grad_norm": 0.7233960032463074, "learning_rate": 9.866529400934572e-05, "loss": 10.696638107299805, "step": 588 }, { "epoch": 0.07638005235729395, "grad_norm": 0.5004526972770691, "learning_rate": 9.866060305140057e-05, "loss": 11.290072441101074, "step": 589 }, { "epoch": 0.07650972986554064, "grad_norm": 0.5393227934837341, "learning_rate": 9.865590397639692e-05, "loss": 8.91596794128418, "step": 590 }, { "epoch": 0.07663940737378731, "grad_norm": 0.523263156414032, "learning_rate": 9.865119678511865e-05, "loss": 10.642009735107422, "step": 591 }, { "epoch": 0.07676908488203399, "grad_norm": 0.4371395707130432, "learning_rate": 9.864648147835097e-05, "loss": 10.495644569396973, "step": 592 }, { "epoch": 0.07689876239028066, "grad_norm": 0.7404257655143738, "learning_rate": 9.864175805688039e-05, "loss": 13.333492279052734, "step": 593 }, { "epoch": 0.07702843989852735, "grad_norm": 0.720705509185791, "learning_rate": 9.863702652149486e-05, "loss": 14.934284210205078, "step": 594 }, { "epoch": 0.07715811740677403, "grad_norm": 0.48237499594688416, "learning_rate": 9.863228687298365e-05, "loss": 12.153090476989746, "step": 595 }, { "epoch": 0.0772877949150207, "grad_norm": 0.7494909763336182, "learning_rate": 9.862753911213735e-05, "loss": 16.006162643432617, "step": 596 }, { "epoch": 0.07741747242326738, "grad_norm": 0.4764847755432129, "learning_rate": 9.862278323974797e-05, "loss": 10.286541938781738, "step": 597 }, { "epoch": 0.07754714993151407, "grad_norm": 0.79954993724823, "learning_rate": 9.861801925660882e-05, "loss": 10.936358451843262, "step": 598 }, { "epoch": 0.07767682743976075, "grad_norm": 0.5892449021339417, "learning_rate": 9.861324716351456e-05, "loss": 13.159932136535645, "step": 599 }, { "epoch": 0.07780650494800742, "grad_norm": 0.8094329833984375, "learning_rate": 9.860846696126125e-05, "loss": 14.403253555297852, "step": 600 }, { "epoch": 0.0779361824562541, "grad_norm": 0.8988799452781677, "learning_rate": 9.860367865064625e-05, "loss": 15.195571899414062, "step": 601 }, { "epoch": 0.07806585996450079, "grad_norm": 0.5742378234863281, "learning_rate": 9.859888223246832e-05, "loss": 13.783763885498047, "step": 602 }, { "epoch": 0.07819553747274746, "grad_norm": 0.7105887532234192, "learning_rate": 9.85940777075275e-05, "loss": 15.686917304992676, "step": 603 }, { "epoch": 0.07832521498099414, "grad_norm": 0.6023337841033936, "learning_rate": 9.858926507662531e-05, "loss": 11.48656940460205, "step": 604 }, { "epoch": 0.07845489248924081, "grad_norm": 0.5727295875549316, "learning_rate": 9.858444434056447e-05, "loss": 12.087520599365234, "step": 605 }, { "epoch": 0.0785845699974875, "grad_norm": 0.5954825282096863, "learning_rate": 9.857961550014917e-05, "loss": 11.306135177612305, "step": 606 }, { "epoch": 0.07871424750573418, "grad_norm": 0.4633122682571411, "learning_rate": 9.857477855618489e-05, "loss": 10.179925918579102, "step": 607 }, { "epoch": 0.07884392501398085, "grad_norm": 0.583802342414856, "learning_rate": 9.856993350947847e-05, "loss": 13.534601211547852, "step": 608 }, { "epoch": 0.07897360252222753, "grad_norm": 0.7376986145973206, "learning_rate": 9.856508036083813e-05, "loss": 15.467509269714355, "step": 609 }, { "epoch": 0.07910328003047422, "grad_norm": 0.6326844096183777, "learning_rate": 9.856021911107342e-05, "loss": 12.618134498596191, "step": 610 }, { "epoch": 0.0792329575387209, "grad_norm": 0.579444408416748, "learning_rate": 9.855534976099523e-05, "loss": 13.486949920654297, "step": 611 }, { "epoch": 0.07936263504696757, "grad_norm": 0.7967085838317871, "learning_rate": 9.855047231141583e-05, "loss": 16.50241470336914, "step": 612 }, { "epoch": 0.07949231255521425, "grad_norm": 0.620961606502533, "learning_rate": 9.854558676314881e-05, "loss": 12.297102928161621, "step": 613 }, { "epoch": 0.07962199006346093, "grad_norm": 0.7281185984611511, "learning_rate": 9.854069311700913e-05, "loss": 12.555364608764648, "step": 614 }, { "epoch": 0.07975166757170761, "grad_norm": 0.7536788582801819, "learning_rate": 9.853579137381311e-05, "loss": 18.14286994934082, "step": 615 }, { "epoch": 0.07988134507995429, "grad_norm": 0.6200419068336487, "learning_rate": 9.853088153437839e-05, "loss": 11.849344253540039, "step": 616 }, { "epoch": 0.08001102258820096, "grad_norm": 0.7917067408561707, "learning_rate": 9.852596359952401e-05, "loss": 15.495223045349121, "step": 617 }, { "epoch": 0.08014070009644765, "grad_norm": 0.5903688073158264, "learning_rate": 9.852103757007029e-05, "loss": 13.561473846435547, "step": 618 }, { "epoch": 0.08027037760469433, "grad_norm": 0.5259999632835388, "learning_rate": 9.851610344683898e-05, "loss": 10.539424896240234, "step": 619 }, { "epoch": 0.080400055112941, "grad_norm": 0.5729999542236328, "learning_rate": 9.851116123065309e-05, "loss": 13.328714370727539, "step": 620 }, { "epoch": 0.08052973262118768, "grad_norm": 0.7550066709518433, "learning_rate": 9.850621092233705e-05, "loss": 12.135092735290527, "step": 621 }, { "epoch": 0.08065941012943437, "grad_norm": 0.6380069851875305, "learning_rate": 9.850125252271664e-05, "loss": 16.153419494628906, "step": 622 }, { "epoch": 0.08078908763768104, "grad_norm": 0.6862779259681702, "learning_rate": 9.849628603261895e-05, "loss": 13.411323547363281, "step": 623 }, { "epoch": 0.08091876514592772, "grad_norm": 0.7606167197227478, "learning_rate": 9.849131145287245e-05, "loss": 14.578572273254395, "step": 624 }, { "epoch": 0.0810484426541744, "grad_norm": 0.6104399561882019, "learning_rate": 9.848632878430693e-05, "loss": 16.06842803955078, "step": 625 }, { "epoch": 0.08117812016242108, "grad_norm": 0.6365898847579956, "learning_rate": 9.848133802775356e-05, "loss": 9.752567291259766, "step": 626 }, { "epoch": 0.08130779767066776, "grad_norm": 0.6589376926422119, "learning_rate": 9.847633918404485e-05, "loss": 12.219035148620605, "step": 627 }, { "epoch": 0.08143747517891443, "grad_norm": 0.7469303011894226, "learning_rate": 9.847133225401464e-05, "loss": 13.111729621887207, "step": 628 }, { "epoch": 0.08156715268716111, "grad_norm": 0.7411143779754639, "learning_rate": 9.846631723849814e-05, "loss": 14.80292797088623, "step": 629 }, { "epoch": 0.0816968301954078, "grad_norm": 0.5708732604980469, "learning_rate": 9.846129413833191e-05, "loss": 12.908976554870605, "step": 630 }, { "epoch": 0.08182650770365447, "grad_norm": 0.4901103973388672, "learning_rate": 9.845626295435386e-05, "loss": 9.143135070800781, "step": 631 }, { "epoch": 0.08195618521190115, "grad_norm": 0.5056179165840149, "learning_rate": 9.84512236874032e-05, "loss": 9.402608871459961, "step": 632 }, { "epoch": 0.08208586272014783, "grad_norm": 0.6659551858901978, "learning_rate": 9.844617633832059e-05, "loss": 12.996792793273926, "step": 633 }, { "epoch": 0.08221554022839452, "grad_norm": 0.6621403098106384, "learning_rate": 9.844112090794792e-05, "loss": 17.078662872314453, "step": 634 }, { "epoch": 0.08234521773664119, "grad_norm": 0.6379255056381226, "learning_rate": 9.843605739712852e-05, "loss": 11.9566068649292, "step": 635 }, { "epoch": 0.08247489524488787, "grad_norm": 0.8916085362434387, "learning_rate": 9.843098580670702e-05, "loss": 16.246143341064453, "step": 636 }, { "epoch": 0.08260457275313454, "grad_norm": 0.5975480079650879, "learning_rate": 9.84259061375294e-05, "loss": 9.997272491455078, "step": 637 }, { "epoch": 0.08273425026138123, "grad_norm": 0.4550190567970276, "learning_rate": 9.842081839044302e-05, "loss": 10.171467781066895, "step": 638 }, { "epoch": 0.08286392776962791, "grad_norm": 0.9569541811943054, "learning_rate": 9.841572256629655e-05, "loss": 14.84948444366455, "step": 639 }, { "epoch": 0.08299360527787458, "grad_norm": 0.5297786593437195, "learning_rate": 9.841061866594004e-05, "loss": 10.274797439575195, "step": 640 }, { "epoch": 0.08312328278612126, "grad_norm": 0.5936999917030334, "learning_rate": 9.840550669022483e-05, "loss": 13.969682693481445, "step": 641 }, { "epoch": 0.08325296029436795, "grad_norm": 1.4392775297164917, "learning_rate": 9.840038664000368e-05, "loss": 14.241463661193848, "step": 642 }, { "epoch": 0.08338263780261462, "grad_norm": 0.71551913022995, "learning_rate": 9.839525851613067e-05, "loss": 17.170692443847656, "step": 643 }, { "epoch": 0.0835123153108613, "grad_norm": 0.5722990036010742, "learning_rate": 9.83901223194612e-05, "loss": 13.577824592590332, "step": 644 }, { "epoch": 0.08364199281910797, "grad_norm": 0.5022734999656677, "learning_rate": 9.838497805085204e-05, "loss": 9.400544166564941, "step": 645 }, { "epoch": 0.08377167032735466, "grad_norm": 0.5293589234352112, "learning_rate": 9.837982571116131e-05, "loss": 11.793157577514648, "step": 646 }, { "epoch": 0.08390134783560134, "grad_norm": 0.9389479160308838, "learning_rate": 9.837466530124848e-05, "loss": 14.142337799072266, "step": 647 }, { "epoch": 0.08403102534384801, "grad_norm": 0.6338376998901367, "learning_rate": 9.836949682197433e-05, "loss": 10.705351829528809, "step": 648 }, { "epoch": 0.08416070285209469, "grad_norm": 0.7073705196380615, "learning_rate": 9.836432027420104e-05, "loss": 12.44075870513916, "step": 649 }, { "epoch": 0.08429038036034138, "grad_norm": 0.5368925333023071, "learning_rate": 9.835913565879207e-05, "loss": 12.549359321594238, "step": 650 }, { "epoch": 0.08442005786858806, "grad_norm": 0.7703208923339844, "learning_rate": 9.835394297661231e-05, "loss": 14.887734413146973, "step": 651 }, { "epoch": 0.08454973537683473, "grad_norm": 0.686187207698822, "learning_rate": 9.834874222852792e-05, "loss": 15.109253883361816, "step": 652 }, { "epoch": 0.0846794128850814, "grad_norm": 0.5934284329414368, "learning_rate": 9.834353341540645e-05, "loss": 15.097224235534668, "step": 653 }, { "epoch": 0.0848090903933281, "grad_norm": 0.7889694571495056, "learning_rate": 9.833831653811678e-05, "loss": 15.650040626525879, "step": 654 }, { "epoch": 0.08493876790157477, "grad_norm": 0.9885437488555908, "learning_rate": 9.833309159752913e-05, "loss": 17.847633361816406, "step": 655 }, { "epoch": 0.08506844540982145, "grad_norm": 0.6640464067459106, "learning_rate": 9.832785859451506e-05, "loss": 13.038588523864746, "step": 656 }, { "epoch": 0.08519812291806812, "grad_norm": 0.5936490297317505, "learning_rate": 9.832261752994749e-05, "loss": 14.664173126220703, "step": 657 }, { "epoch": 0.08532780042631481, "grad_norm": 0.6145002841949463, "learning_rate": 9.83173684047007e-05, "loss": 13.298013687133789, "step": 658 }, { "epoch": 0.08545747793456149, "grad_norm": 0.7183120250701904, "learning_rate": 9.831211121965027e-05, "loss": 18.142580032348633, "step": 659 }, { "epoch": 0.08558715544280816, "grad_norm": 0.6145687699317932, "learning_rate": 9.830684597567316e-05, "loss": 13.345375061035156, "step": 660 }, { "epoch": 0.08571683295105484, "grad_norm": 0.6271507143974304, "learning_rate": 9.830157267364766e-05, "loss": 15.125938415527344, "step": 661 }, { "epoch": 0.08584651045930153, "grad_norm": 0.5531763434410095, "learning_rate": 9.829629131445342e-05, "loss": 11.281048774719238, "step": 662 }, { "epoch": 0.0859761879675482, "grad_norm": 0.6785033345222473, "learning_rate": 9.82910018989714e-05, "loss": 14.986109733581543, "step": 663 }, { "epoch": 0.08610586547579488, "grad_norm": 0.8494035005569458, "learning_rate": 9.828570442808396e-05, "loss": 14.369709014892578, "step": 664 }, { "epoch": 0.08623554298404155, "grad_norm": 0.988063395023346, "learning_rate": 9.828039890267473e-05, "loss": 16.732866287231445, "step": 665 }, { "epoch": 0.08636522049228824, "grad_norm": 0.6194493174552917, "learning_rate": 9.827508532362874e-05, "loss": 13.041450500488281, "step": 666 }, { "epoch": 0.08649489800053492, "grad_norm": 0.507203996181488, "learning_rate": 9.826976369183235e-05, "loss": 10.099478721618652, "step": 667 }, { "epoch": 0.0866245755087816, "grad_norm": 0.5470262765884399, "learning_rate": 9.826443400817325e-05, "loss": 11.428974151611328, "step": 668 }, { "epoch": 0.08675425301702827, "grad_norm": 0.5824233889579773, "learning_rate": 9.825909627354051e-05, "loss": 14.343768119812012, "step": 669 }, { "epoch": 0.08688393052527496, "grad_norm": 0.6164177060127258, "learning_rate": 9.825375048882448e-05, "loss": 13.857247352600098, "step": 670 }, { "epoch": 0.08701360803352164, "grad_norm": 0.7552283406257629, "learning_rate": 9.824839665491689e-05, "loss": 14.561564445495605, "step": 671 }, { "epoch": 0.08714328554176831, "grad_norm": 0.6074038147926331, "learning_rate": 9.824303477271083e-05, "loss": 12.271245956420898, "step": 672 }, { "epoch": 0.08727296305001499, "grad_norm": 0.5104284882545471, "learning_rate": 9.823766484310073e-05, "loss": 14.564360618591309, "step": 673 }, { "epoch": 0.08740264055826168, "grad_norm": 0.6121594905853271, "learning_rate": 9.823228686698231e-05, "loss": 13.289522171020508, "step": 674 }, { "epoch": 0.08753231806650835, "grad_norm": 0.5805521011352539, "learning_rate": 9.822690084525267e-05, "loss": 11.693001747131348, "step": 675 }, { "epoch": 0.08766199557475503, "grad_norm": 0.6430831551551819, "learning_rate": 9.822150677881028e-05, "loss": 10.21288013458252, "step": 676 }, { "epoch": 0.0877916730830017, "grad_norm": 0.7023777365684509, "learning_rate": 9.821610466855489e-05, "loss": 11.107335090637207, "step": 677 }, { "epoch": 0.08792135059124839, "grad_norm": 0.739605724811554, "learning_rate": 9.821069451538766e-05, "loss": 12.914473533630371, "step": 678 }, { "epoch": 0.08805102809949507, "grad_norm": 0.547779381275177, "learning_rate": 9.820527632021101e-05, "loss": 9.576738357543945, "step": 679 }, { "epoch": 0.08818070560774174, "grad_norm": 0.5928511023521423, "learning_rate": 9.81998500839288e-05, "loss": 10.92983627319336, "step": 680 }, { "epoch": 0.08831038311598842, "grad_norm": 0.6693719029426575, "learning_rate": 9.819441580744612e-05, "loss": 15.584964752197266, "step": 681 }, { "epoch": 0.08844006062423511, "grad_norm": 0.529341995716095, "learning_rate": 9.818897349166952e-05, "loss": 12.4015531539917, "step": 682 }, { "epoch": 0.08856973813248178, "grad_norm": 0.5102854371070862, "learning_rate": 9.818352313750679e-05, "loss": 8.510113716125488, "step": 683 }, { "epoch": 0.08869941564072846, "grad_norm": 0.5775505900382996, "learning_rate": 9.817806474586711e-05, "loss": 14.595126152038574, "step": 684 }, { "epoch": 0.08882909314897514, "grad_norm": 0.6095919013023376, "learning_rate": 9.817259831766101e-05, "loss": 12.559283256530762, "step": 685 }, { "epoch": 0.08895877065722183, "grad_norm": 0.6606473326683044, "learning_rate": 9.816712385380031e-05, "loss": 15.58281135559082, "step": 686 }, { "epoch": 0.0890884481654685, "grad_norm": 0.5393222570419312, "learning_rate": 9.816164135519824e-05, "loss": 9.485161781311035, "step": 687 }, { "epoch": 0.08921812567371518, "grad_norm": 0.6846520304679871, "learning_rate": 9.81561508227693e-05, "loss": 13.780858993530273, "step": 688 }, { "epoch": 0.08934780318196185, "grad_norm": 0.60622638463974, "learning_rate": 9.815065225742937e-05, "loss": 12.239559173583984, "step": 689 }, { "epoch": 0.08947748069020854, "grad_norm": 0.6509008407592773, "learning_rate": 9.81451456600957e-05, "loss": 14.381168365478516, "step": 690 }, { "epoch": 0.08960715819845522, "grad_norm": 0.5165316462516785, "learning_rate": 9.81396310316868e-05, "loss": 15.656526565551758, "step": 691 }, { "epoch": 0.08973683570670189, "grad_norm": 0.8474082350730896, "learning_rate": 9.813410837312258e-05, "loss": 13.124181747436523, "step": 692 }, { "epoch": 0.08986651321494857, "grad_norm": 0.6758612990379333, "learning_rate": 9.812857768532428e-05, "loss": 14.02819538116455, "step": 693 }, { "epoch": 0.08999619072319526, "grad_norm": 0.6328012943267822, "learning_rate": 9.812303896921446e-05, "loss": 13.871882438659668, "step": 694 }, { "epoch": 0.09012586823144193, "grad_norm": 0.6074875593185425, "learning_rate": 9.811749222571705e-05, "loss": 14.585973739624023, "step": 695 }, { "epoch": 0.09025554573968861, "grad_norm": 0.5948125720024109, "learning_rate": 9.811193745575729e-05, "loss": 12.231098175048828, "step": 696 }, { "epoch": 0.09038522324793528, "grad_norm": 0.803438127040863, "learning_rate": 9.810637466026174e-05, "loss": 12.405365943908691, "step": 697 }, { "epoch": 0.09051490075618197, "grad_norm": 0.571281373500824, "learning_rate": 9.810080384015838e-05, "loss": 14.156464576721191, "step": 698 }, { "epoch": 0.09064457826442865, "grad_norm": 0.7414058446884155, "learning_rate": 9.809522499637643e-05, "loss": 13.572741508483887, "step": 699 }, { "epoch": 0.09077425577267532, "grad_norm": 0.5125430226325989, "learning_rate": 9.808963812984653e-05, "loss": 13.821239471435547, "step": 700 }, { "epoch": 0.090903933280922, "grad_norm": 0.6254539489746094, "learning_rate": 9.80840432415006e-05, "loss": 14.720401763916016, "step": 701 }, { "epoch": 0.09103361078916869, "grad_norm": 0.5286750793457031, "learning_rate": 9.807844033227194e-05, "loss": 10.949761390686035, "step": 702 }, { "epoch": 0.09116328829741537, "grad_norm": 0.8773366212844849, "learning_rate": 9.807282940309515e-05, "loss": 12.315903663635254, "step": 703 }, { "epoch": 0.09129296580566204, "grad_norm": 0.6060436964035034, "learning_rate": 9.806721045490621e-05, "loss": 12.026006698608398, "step": 704 }, { "epoch": 0.09142264331390872, "grad_norm": 0.8061842918395996, "learning_rate": 9.806158348864238e-05, "loss": 11.338479995727539, "step": 705 }, { "epoch": 0.0915523208221554, "grad_norm": 0.6252006888389587, "learning_rate": 9.805594850524233e-05, "loss": 13.252829551696777, "step": 706 }, { "epoch": 0.09168199833040208, "grad_norm": 0.685161828994751, "learning_rate": 9.805030550564602e-05, "loss": 13.851468086242676, "step": 707 }, { "epoch": 0.09181167583864876, "grad_norm": 0.5933393239974976, "learning_rate": 9.804465449079474e-05, "loss": 15.812424659729004, "step": 708 }, { "epoch": 0.09194135334689543, "grad_norm": 0.6765407919883728, "learning_rate": 9.803899546163116e-05, "loss": 14.904987335205078, "step": 709 }, { "epoch": 0.09207103085514212, "grad_norm": 0.7180079817771912, "learning_rate": 9.803332841909922e-05, "loss": 15.099379539489746, "step": 710 }, { "epoch": 0.0922007083633888, "grad_norm": 0.7323721647262573, "learning_rate": 9.802765336414428e-05, "loss": 15.600417137145996, "step": 711 }, { "epoch": 0.09233038587163547, "grad_norm": 0.5043389797210693, "learning_rate": 9.802197029771297e-05, "loss": 9.966063499450684, "step": 712 }, { "epoch": 0.09246006337988215, "grad_norm": 0.7862127423286438, "learning_rate": 9.80162792207533e-05, "loss": 18.579980850219727, "step": 713 }, { "epoch": 0.09258974088812884, "grad_norm": 0.6705971360206604, "learning_rate": 9.801058013421457e-05, "loss": 14.342391967773438, "step": 714 }, { "epoch": 0.09271941839637551, "grad_norm": 0.6877833604812622, "learning_rate": 9.800487303904746e-05, "loss": 13.394746780395508, "step": 715 }, { "epoch": 0.09284909590462219, "grad_norm": 0.6332268118858337, "learning_rate": 9.799915793620396e-05, "loss": 12.913576126098633, "step": 716 }, { "epoch": 0.09297877341286888, "grad_norm": 0.559180498123169, "learning_rate": 9.79934348266374e-05, "loss": 12.857748031616211, "step": 717 }, { "epoch": 0.09310845092111555, "grad_norm": 0.714386522769928, "learning_rate": 9.79877037113025e-05, "loss": 16.472658157348633, "step": 718 }, { "epoch": 0.09323812842936223, "grad_norm": 0.6064816117286682, "learning_rate": 9.798196459115518e-05, "loss": 11.944611549377441, "step": 719 }, { "epoch": 0.0933678059376089, "grad_norm": 0.681951105594635, "learning_rate": 9.797621746715285e-05, "loss": 16.468502044677734, "step": 720 }, { "epoch": 0.0934974834458556, "grad_norm": 0.6459832191467285, "learning_rate": 9.797046234025416e-05, "loss": 9.987601280212402, "step": 721 }, { "epoch": 0.09362716095410227, "grad_norm": 0.6628790497779846, "learning_rate": 9.796469921141911e-05, "loss": 11.623053550720215, "step": 722 }, { "epoch": 0.09375683846234895, "grad_norm": 0.5207695960998535, "learning_rate": 9.795892808160907e-05, "loss": 10.439223289489746, "step": 723 }, { "epoch": 0.09388651597059562, "grad_norm": 0.6651392579078674, "learning_rate": 9.795314895178668e-05, "loss": 10.708843231201172, "step": 724 }, { "epoch": 0.09401619347884231, "grad_norm": 0.5628407597541809, "learning_rate": 9.794736182291602e-05, "loss": 15.415487289428711, "step": 725 }, { "epoch": 0.09414587098708899, "grad_norm": 0.752558708190918, "learning_rate": 9.794156669596238e-05, "loss": 11.796111106872559, "step": 726 }, { "epoch": 0.09427554849533566, "grad_norm": 0.7408208250999451, "learning_rate": 9.793576357189247e-05, "loss": 15.041296005249023, "step": 727 }, { "epoch": 0.09440522600358234, "grad_norm": 0.5733314156532288, "learning_rate": 9.792995245167428e-05, "loss": 9.858293533325195, "step": 728 }, { "epoch": 0.09453490351182903, "grad_norm": 0.7661930918693542, "learning_rate": 9.79241333362772e-05, "loss": 15.461386680603027, "step": 729 }, { "epoch": 0.0946645810200757, "grad_norm": 0.547805666923523, "learning_rate": 9.79183062266719e-05, "loss": 12.318775177001953, "step": 730 }, { "epoch": 0.09479425852832238, "grad_norm": 0.6686564087867737, "learning_rate": 9.791247112383041e-05, "loss": 13.239184379577637, "step": 731 }, { "epoch": 0.09492393603656905, "grad_norm": 0.7277751564979553, "learning_rate": 9.790662802872605e-05, "loss": 16.605287551879883, "step": 732 }, { "epoch": 0.09505361354481574, "grad_norm": 0.6401767134666443, "learning_rate": 9.790077694233353e-05, "loss": 13.6016206741333, "step": 733 }, { "epoch": 0.09518329105306242, "grad_norm": 0.6414504647254944, "learning_rate": 9.789491786562883e-05, "loss": 14.037013053894043, "step": 734 }, { "epoch": 0.0953129685613091, "grad_norm": 0.8517059683799744, "learning_rate": 9.788905079958937e-05, "loss": 16.573213577270508, "step": 735 }, { "epoch": 0.09544264606955577, "grad_norm": 0.7814995646476746, "learning_rate": 9.788317574519377e-05, "loss": 13.845521926879883, "step": 736 }, { "epoch": 0.09557232357780246, "grad_norm": 0.5258078575134277, "learning_rate": 9.787729270342206e-05, "loss": 9.26827335357666, "step": 737 }, { "epoch": 0.09570200108604913, "grad_norm": 0.5832918286323547, "learning_rate": 9.787140167525561e-05, "loss": 11.729735374450684, "step": 738 }, { "epoch": 0.09583167859429581, "grad_norm": 0.5548670887947083, "learning_rate": 9.786550266167709e-05, "loss": 13.408539772033691, "step": 739 }, { "epoch": 0.09596135610254249, "grad_norm": 0.5889064073562622, "learning_rate": 9.785959566367051e-05, "loss": 9.159322738647461, "step": 740 }, { "epoch": 0.09609103361078918, "grad_norm": 0.6797948479652405, "learning_rate": 9.78536806822212e-05, "loss": 12.699451446533203, "step": 741 }, { "epoch": 0.09622071111903585, "grad_norm": 0.6770082116127014, "learning_rate": 9.784775771831587e-05, "loss": 15.845621109008789, "step": 742 }, { "epoch": 0.09635038862728253, "grad_norm": 0.4858608543872833, "learning_rate": 9.78418267729425e-05, "loss": 16.128198623657227, "step": 743 }, { "epoch": 0.0964800661355292, "grad_norm": 0.5952367186546326, "learning_rate": 9.783588784709042e-05, "loss": 11.797974586486816, "step": 744 }, { "epoch": 0.09660974364377589, "grad_norm": 0.5463916659355164, "learning_rate": 9.782994094175033e-05, "loss": 14.102853775024414, "step": 745 }, { "epoch": 0.09673942115202257, "grad_norm": 0.6869577169418335, "learning_rate": 9.782398605791422e-05, "loss": 12.345647811889648, "step": 746 }, { "epoch": 0.09686909866026924, "grad_norm": 0.5952135920524597, "learning_rate": 9.781802319657543e-05, "loss": 12.165334701538086, "step": 747 }, { "epoch": 0.09699877616851592, "grad_norm": 0.5455002784729004, "learning_rate": 9.781205235872859e-05, "loss": 9.335546493530273, "step": 748 }, { "epoch": 0.09712845367676261, "grad_norm": 0.6000592112541199, "learning_rate": 9.780607354536972e-05, "loss": 12.285622596740723, "step": 749 }, { "epoch": 0.09725813118500928, "grad_norm": 0.6183569431304932, "learning_rate": 9.780008675749616e-05, "loss": 13.249423027038574, "step": 750 }, { "epoch": 0.09738780869325596, "grad_norm": 0.6163614988327026, "learning_rate": 9.779409199610653e-05, "loss": 10.890416145324707, "step": 751 }, { "epoch": 0.09751748620150263, "grad_norm": 0.6314038038253784, "learning_rate": 9.778808926220083e-05, "loss": 15.53564739227295, "step": 752 }, { "epoch": 0.09764716370974932, "grad_norm": 0.5880821943283081, "learning_rate": 9.778207855678036e-05, "loss": 13.542702674865723, "step": 753 }, { "epoch": 0.097776841217996, "grad_norm": 0.641538679599762, "learning_rate": 9.777605988084779e-05, "loss": 9.451089859008789, "step": 754 }, { "epoch": 0.09790651872624268, "grad_norm": 0.7371824383735657, "learning_rate": 9.777003323540708e-05, "loss": 12.114771842956543, "step": 755 }, { "epoch": 0.09803619623448935, "grad_norm": 0.548925518989563, "learning_rate": 9.776399862146354e-05, "loss": 8.315980911254883, "step": 756 }, { "epoch": 0.09816587374273604, "grad_norm": 0.6841121315956116, "learning_rate": 9.775795604002378e-05, "loss": 16.629648208618164, "step": 757 }, { "epoch": 0.09829555125098272, "grad_norm": 0.6645810008049011, "learning_rate": 9.775190549209578e-05, "loss": 13.670251846313477, "step": 758 }, { "epoch": 0.09842522875922939, "grad_norm": 0.8377872109413147, "learning_rate": 9.774584697868882e-05, "loss": 12.070540428161621, "step": 759 }, { "epoch": 0.09855490626747607, "grad_norm": 0.6948733925819397, "learning_rate": 9.773978050081352e-05, "loss": 13.439692497253418, "step": 760 }, { "epoch": 0.09868458377572276, "grad_norm": 0.6475218534469604, "learning_rate": 9.773370605948184e-05, "loss": 16.600915908813477, "step": 761 }, { "epoch": 0.09881426128396943, "grad_norm": 0.6604883670806885, "learning_rate": 9.772762365570704e-05, "loss": 11.12086009979248, "step": 762 }, { "epoch": 0.09894393879221611, "grad_norm": 0.9199029803276062, "learning_rate": 9.772153329050373e-05, "loss": 13.557989120483398, "step": 763 }, { "epoch": 0.09907361630046278, "grad_norm": 0.5849283933639526, "learning_rate": 9.771543496488785e-05, "loss": 12.61538314819336, "step": 764 }, { "epoch": 0.09920329380870947, "grad_norm": 0.5128777027130127, "learning_rate": 9.770932867987663e-05, "loss": 8.702414512634277, "step": 765 }, { "epoch": 0.09933297131695615, "grad_norm": 0.8129996657371521, "learning_rate": 9.77032144364887e-05, "loss": 13.739102363586426, "step": 766 }, { "epoch": 0.09946264882520282, "grad_norm": 0.5558087229728699, "learning_rate": 9.769709223574394e-05, "loss": 12.15659236907959, "step": 767 }, { "epoch": 0.0995923263334495, "grad_norm": 0.6471200585365295, "learning_rate": 9.769096207866361e-05, "loss": 13.40754222869873, "step": 768 }, { "epoch": 0.09972200384169619, "grad_norm": 0.6508768200874329, "learning_rate": 9.768482396627028e-05, "loss": 12.078865051269531, "step": 769 }, { "epoch": 0.09985168134994286, "grad_norm": 0.5520119667053223, "learning_rate": 9.767867789958782e-05, "loss": 7.944957256317139, "step": 770 }, { "epoch": 0.09998135885818954, "grad_norm": 0.49490195512771606, "learning_rate": 9.76725238796415e-05, "loss": 10.722743034362793, "step": 771 }, { "epoch": 0.10011103636643622, "grad_norm": 0.6700094938278198, "learning_rate": 9.766636190745783e-05, "loss": 15.128602981567383, "step": 772 }, { "epoch": 0.1002407138746829, "grad_norm": 0.5630554556846619, "learning_rate": 9.76601919840647e-05, "loss": 11.942176818847656, "step": 773 }, { "epoch": 0.10037039138292958, "grad_norm": 0.7473769187927246, "learning_rate": 9.765401411049131e-05, "loss": 15.505025863647461, "step": 774 }, { "epoch": 0.10050006889117626, "grad_norm": 0.6585121750831604, "learning_rate": 9.764782828776819e-05, "loss": 14.420419692993164, "step": 775 }, { "epoch": 0.10062974639942293, "grad_norm": 0.884878396987915, "learning_rate": 9.764163451692721e-05, "loss": 16.610145568847656, "step": 776 }, { "epoch": 0.10075942390766962, "grad_norm": 0.6321209669113159, "learning_rate": 9.763543279900154e-05, "loss": 11.153735160827637, "step": 777 }, { "epoch": 0.1008891014159163, "grad_norm": 0.5676471590995789, "learning_rate": 9.762922313502567e-05, "loss": 15.081352233886719, "step": 778 }, { "epoch": 0.10101877892416297, "grad_norm": 0.5133286118507385, "learning_rate": 9.762300552603545e-05, "loss": 9.651726722717285, "step": 779 }, { "epoch": 0.10114845643240965, "grad_norm": 0.8029236793518066, "learning_rate": 9.761677997306803e-05, "loss": 16.09563446044922, "step": 780 }, { "epoch": 0.10127813394065634, "grad_norm": 0.7545228600502014, "learning_rate": 9.761054647716193e-05, "loss": 12.619175910949707, "step": 781 }, { "epoch": 0.10140781144890301, "grad_norm": 0.8507408499717712, "learning_rate": 9.760430503935687e-05, "loss": 16.54038429260254, "step": 782 }, { "epoch": 0.10153748895714969, "grad_norm": 0.6967519521713257, "learning_rate": 9.759805566069407e-05, "loss": 12.380276679992676, "step": 783 }, { "epoch": 0.10166716646539636, "grad_norm": 0.6933241486549377, "learning_rate": 9.759179834221595e-05, "loss": 11.505949020385742, "step": 784 }, { "epoch": 0.10179684397364305, "grad_norm": 0.6897650361061096, "learning_rate": 9.758553308496629e-05, "loss": 12.126667976379395, "step": 785 }, { "epoch": 0.10192652148188973, "grad_norm": 0.5836649537086487, "learning_rate": 9.757925988999019e-05, "loss": 10.297802925109863, "step": 786 }, { "epoch": 0.1020561989901364, "grad_norm": 0.8574612140655518, "learning_rate": 9.757297875833409e-05, "loss": 14.831589698791504, "step": 787 }, { "epoch": 0.10218587649838308, "grad_norm": 0.6905381083488464, "learning_rate": 9.756668969104574e-05, "loss": 11.75239086151123, "step": 788 }, { "epoch": 0.10231555400662977, "grad_norm": 0.5907912850379944, "learning_rate": 9.756039268917424e-05, "loss": 12.862861633300781, "step": 789 }, { "epoch": 0.10244523151487644, "grad_norm": 0.5855189561843872, "learning_rate": 9.755408775376996e-05, "loss": 16.242773056030273, "step": 790 }, { "epoch": 0.10257490902312312, "grad_norm": 0.8017807006835938, "learning_rate": 9.754777488588462e-05, "loss": 12.188158988952637, "step": 791 }, { "epoch": 0.1027045865313698, "grad_norm": 0.7124102711677551, "learning_rate": 9.754145408657129e-05, "loss": 14.655255317687988, "step": 792 }, { "epoch": 0.10283426403961649, "grad_norm": 0.7599523663520813, "learning_rate": 9.753512535688432e-05, "loss": 12.894255638122559, "step": 793 }, { "epoch": 0.10296394154786316, "grad_norm": 0.6907903552055359, "learning_rate": 9.752878869787943e-05, "loss": 12.337054252624512, "step": 794 }, { "epoch": 0.10309361905610984, "grad_norm": 0.61981600522995, "learning_rate": 9.752244411061359e-05, "loss": 16.477550506591797, "step": 795 }, { "epoch": 0.10322329656435651, "grad_norm": 0.6604536771774292, "learning_rate": 9.751609159614521e-05, "loss": 11.42963695526123, "step": 796 }, { "epoch": 0.1033529740726032, "grad_norm": 0.6245795488357544, "learning_rate": 9.75097311555339e-05, "loss": 12.521026611328125, "step": 797 }, { "epoch": 0.10348265158084988, "grad_norm": 0.7379133105278015, "learning_rate": 9.750336278984063e-05, "loss": 14.659137725830078, "step": 798 }, { "epoch": 0.10361232908909655, "grad_norm": 0.6140783429145813, "learning_rate": 9.749698650012775e-05, "loss": 12.681082725524902, "step": 799 }, { "epoch": 0.10374200659734323, "grad_norm": 0.46737417578697205, "learning_rate": 9.749060228745886e-05, "loss": 9.464601516723633, "step": 800 }, { "epoch": 0.10387168410558992, "grad_norm": 0.6771970987319946, "learning_rate": 9.74842101528989e-05, "loss": 12.57666015625, "step": 801 }, { "epoch": 0.1040013616138366, "grad_norm": 0.6052643656730652, "learning_rate": 9.747781009751415e-05, "loss": 9.809150695800781, "step": 802 }, { "epoch": 0.10413103912208327, "grad_norm": 0.6388693451881409, "learning_rate": 9.747140212237223e-05, "loss": 11.350627899169922, "step": 803 }, { "epoch": 0.10426071663032994, "grad_norm": 0.5812678337097168, "learning_rate": 9.7464986228542e-05, "loss": 11.53203296661377, "step": 804 }, { "epoch": 0.10439039413857663, "grad_norm": 0.9008743166923523, "learning_rate": 9.745856241709372e-05, "loss": 20.461332321166992, "step": 805 }, { "epoch": 0.10452007164682331, "grad_norm": 0.6958600282669067, "learning_rate": 9.745213068909895e-05, "loss": 12.190463066101074, "step": 806 }, { "epoch": 0.10464974915506998, "grad_norm": 0.6032463312149048, "learning_rate": 9.744569104563056e-05, "loss": 9.958528518676758, "step": 807 }, { "epoch": 0.10477942666331666, "grad_norm": 0.5262093544006348, "learning_rate": 9.743924348776275e-05, "loss": 9.638267517089844, "step": 808 }, { "epoch": 0.10490910417156335, "grad_norm": 0.6702240705490112, "learning_rate": 9.7432788016571e-05, "loss": 15.251382827758789, "step": 809 }, { "epoch": 0.10503878167981003, "grad_norm": 0.6208896636962891, "learning_rate": 9.742632463313218e-05, "loss": 11.840744018554688, "step": 810 }, { "epoch": 0.1051684591880567, "grad_norm": 0.7123780250549316, "learning_rate": 9.741985333852443e-05, "loss": 10.788328170776367, "step": 811 }, { "epoch": 0.10529813669630338, "grad_norm": 0.5054624676704407, "learning_rate": 9.741337413382726e-05, "loss": 12.54277229309082, "step": 812 }, { "epoch": 0.10542781420455007, "grad_norm": 0.45985814929008484, "learning_rate": 9.740688702012141e-05, "loss": 9.639142990112305, "step": 813 }, { "epoch": 0.10555749171279674, "grad_norm": 0.6757042407989502, "learning_rate": 9.740039199848902e-05, "loss": 13.559226989746094, "step": 814 }, { "epoch": 0.10568716922104342, "grad_norm": 0.607757568359375, "learning_rate": 9.739388907001353e-05, "loss": 13.814410209655762, "step": 815 }, { "epoch": 0.10581684672929009, "grad_norm": 0.6802065968513489, "learning_rate": 9.738737823577966e-05, "loss": 12.735308647155762, "step": 816 }, { "epoch": 0.10594652423753678, "grad_norm": 0.5436018109321594, "learning_rate": 9.738085949687351e-05, "loss": 10.14826488494873, "step": 817 }, { "epoch": 0.10607620174578346, "grad_norm": 0.811250627040863, "learning_rate": 9.737433285438246e-05, "loss": 14.060860633850098, "step": 818 }, { "epoch": 0.10620587925403013, "grad_norm": 0.471758633852005, "learning_rate": 9.736779830939522e-05, "loss": 11.118765830993652, "step": 819 }, { "epoch": 0.10633555676227681, "grad_norm": 0.5363497734069824, "learning_rate": 9.736125586300181e-05, "loss": 10.251605033874512, "step": 820 }, { "epoch": 0.1064652342705235, "grad_norm": 0.6470177173614502, "learning_rate": 9.735470551629357e-05, "loss": 13.430367469787598, "step": 821 }, { "epoch": 0.10659491177877017, "grad_norm": 0.5752120018005371, "learning_rate": 9.734814727036317e-05, "loss": 8.062775611877441, "step": 822 }, { "epoch": 0.10672458928701685, "grad_norm": 0.5782566666603088, "learning_rate": 9.734158112630459e-05, "loss": 9.175610542297363, "step": 823 }, { "epoch": 0.10685426679526353, "grad_norm": 0.660672664642334, "learning_rate": 9.733500708521313e-05, "loss": 11.862310409545898, "step": 824 }, { "epoch": 0.10698394430351021, "grad_norm": 0.5285208821296692, "learning_rate": 9.732842514818537e-05, "loss": 10.560565948486328, "step": 825 }, { "epoch": 0.10711362181175689, "grad_norm": 0.7094899415969849, "learning_rate": 9.732183531631929e-05, "loss": 13.989683151245117, "step": 826 }, { "epoch": 0.10724329932000357, "grad_norm": 0.6238594055175781, "learning_rate": 9.73152375907141e-05, "loss": 15.759725570678711, "step": 827 }, { "epoch": 0.10737297682825024, "grad_norm": 0.6978776454925537, "learning_rate": 9.730863197247038e-05, "loss": 12.839484214782715, "step": 828 }, { "epoch": 0.10750265433649693, "grad_norm": 0.5662786960601807, "learning_rate": 9.730201846268999e-05, "loss": 12.367762565612793, "step": 829 }, { "epoch": 0.1076323318447436, "grad_norm": 0.6316700577735901, "learning_rate": 9.729539706247617e-05, "loss": 12.050024032592773, "step": 830 }, { "epoch": 0.10776200935299028, "grad_norm": 0.7700303196907043, "learning_rate": 9.728876777293339e-05, "loss": 13.80044937133789, "step": 831 }, { "epoch": 0.10789168686123696, "grad_norm": 0.6307612061500549, "learning_rate": 9.728213059516751e-05, "loss": 9.864330291748047, "step": 832 }, { "epoch": 0.10802136436948365, "grad_norm": 0.8184398412704468, "learning_rate": 9.727548553028567e-05, "loss": 18.12462043762207, "step": 833 }, { "epoch": 0.10815104187773032, "grad_norm": 0.6562761068344116, "learning_rate": 9.726883257939631e-05, "loss": 11.57369613647461, "step": 834 }, { "epoch": 0.108280719385977, "grad_norm": 0.6642476916313171, "learning_rate": 9.726217174360923e-05, "loss": 11.249893188476562, "step": 835 }, { "epoch": 0.10841039689422367, "grad_norm": 0.620245635509491, "learning_rate": 9.725550302403551e-05, "loss": 11.626629829406738, "step": 836 }, { "epoch": 0.10854007440247036, "grad_norm": 0.5924183130264282, "learning_rate": 9.724882642178756e-05, "loss": 12.772928237915039, "step": 837 }, { "epoch": 0.10866975191071704, "grad_norm": 0.7052860260009766, "learning_rate": 9.72421419379791e-05, "loss": 16.85528564453125, "step": 838 }, { "epoch": 0.10879942941896371, "grad_norm": 0.8585041165351868, "learning_rate": 9.723544957372517e-05, "loss": 14.385931968688965, "step": 839 }, { "epoch": 0.10892910692721039, "grad_norm": 0.8324592709541321, "learning_rate": 9.722874933014212e-05, "loss": 15.435629844665527, "step": 840 }, { "epoch": 0.10905878443545708, "grad_norm": 0.5600746273994446, "learning_rate": 9.722204120834762e-05, "loss": 11.006803512573242, "step": 841 }, { "epoch": 0.10918846194370375, "grad_norm": 0.6207736730575562, "learning_rate": 9.721532520946062e-05, "loss": 11.634012222290039, "step": 842 }, { "epoch": 0.10931813945195043, "grad_norm": 0.615797758102417, "learning_rate": 9.720860133460145e-05, "loss": 12.354187965393066, "step": 843 }, { "epoch": 0.1094478169601971, "grad_norm": 0.7340205311775208, "learning_rate": 9.720186958489173e-05, "loss": 15.968505859375, "step": 844 }, { "epoch": 0.1095774944684438, "grad_norm": 0.472852885723114, "learning_rate": 9.719512996145433e-05, "loss": 9.789281845092773, "step": 845 }, { "epoch": 0.10970717197669047, "grad_norm": 0.5162587761878967, "learning_rate": 9.718838246541351e-05, "loss": 11.53940486907959, "step": 846 }, { "epoch": 0.10983684948493715, "grad_norm": 0.5105847120285034, "learning_rate": 9.718162709789483e-05, "loss": 9.082786560058594, "step": 847 }, { "epoch": 0.10996652699318382, "grad_norm": 0.6916034817695618, "learning_rate": 9.717486386002513e-05, "loss": 11.476656913757324, "step": 848 }, { "epoch": 0.11009620450143051, "grad_norm": 0.7206137180328369, "learning_rate": 9.716809275293257e-05, "loss": 12.337594985961914, "step": 849 }, { "epoch": 0.11022588200967719, "grad_norm": 0.6888196468353271, "learning_rate": 9.716131377774669e-05, "loss": 15.784624099731445, "step": 850 }, { "epoch": 0.11035555951792386, "grad_norm": 0.6877467632293701, "learning_rate": 9.715452693559824e-05, "loss": 12.29951286315918, "step": 851 }, { "epoch": 0.11048523702617054, "grad_norm": 0.5936599373817444, "learning_rate": 9.714773222761934e-05, "loss": 15.580309867858887, "step": 852 }, { "epoch": 0.11061491453441723, "grad_norm": 0.46311649680137634, "learning_rate": 9.714092965494342e-05, "loss": 9.141448974609375, "step": 853 }, { "epoch": 0.1107445920426639, "grad_norm": 0.6047902703285217, "learning_rate": 9.713411921870523e-05, "loss": 12.822632789611816, "step": 854 }, { "epoch": 0.11087426955091058, "grad_norm": 0.6291902661323547, "learning_rate": 9.712730092004079e-05, "loss": 15.306619644165039, "step": 855 }, { "epoch": 0.11100394705915725, "grad_norm": 0.6573824882507324, "learning_rate": 9.712047476008746e-05, "loss": 13.745976448059082, "step": 856 }, { "epoch": 0.11113362456740394, "grad_norm": 0.572220504283905, "learning_rate": 9.711364073998394e-05, "loss": 11.483335494995117, "step": 857 }, { "epoch": 0.11126330207565062, "grad_norm": 0.4829918444156647, "learning_rate": 9.710679886087017e-05, "loss": 9.672606468200684, "step": 858 }, { "epoch": 0.1113929795838973, "grad_norm": 0.6106831431388855, "learning_rate": 9.709994912388746e-05, "loss": 13.094961166381836, "step": 859 }, { "epoch": 0.11152265709214397, "grad_norm": 0.7709701657295227, "learning_rate": 9.709309153017843e-05, "loss": 17.194660186767578, "step": 860 }, { "epoch": 0.11165233460039066, "grad_norm": 0.6334962844848633, "learning_rate": 9.708622608088696e-05, "loss": 11.605995178222656, "step": 861 }, { "epoch": 0.11178201210863734, "grad_norm": 0.7882482409477234, "learning_rate": 9.70793527771583e-05, "loss": 13.777533531188965, "step": 862 }, { "epoch": 0.11191168961688401, "grad_norm": 0.7338902354240417, "learning_rate": 9.707247162013896e-05, "loss": 12.054269790649414, "step": 863 }, { "epoch": 0.11204136712513069, "grad_norm": 0.5085812211036682, "learning_rate": 9.70655826109768e-05, "loss": 12.078034400939941, "step": 864 }, { "epoch": 0.11217104463337738, "grad_norm": 0.42416542768478394, "learning_rate": 9.705868575082097e-05, "loss": 9.10986328125, "step": 865 }, { "epoch": 0.11230072214162405, "grad_norm": 0.5195383429527283, "learning_rate": 9.705178104082193e-05, "loss": 9.210439682006836, "step": 866 }, { "epoch": 0.11243039964987073, "grad_norm": 0.6198639273643494, "learning_rate": 9.704486848213145e-05, "loss": 10.085807800292969, "step": 867 }, { "epoch": 0.1125600771581174, "grad_norm": 0.6022745370864868, "learning_rate": 9.703794807590263e-05, "loss": 12.047049522399902, "step": 868 }, { "epoch": 0.11268975466636409, "grad_norm": 0.8130295276641846, "learning_rate": 9.703101982328983e-05, "loss": 17.913217544555664, "step": 869 }, { "epoch": 0.11281943217461077, "grad_norm": 0.6192635893821716, "learning_rate": 9.702408372544877e-05, "loss": 15.71097183227539, "step": 870 }, { "epoch": 0.11294910968285744, "grad_norm": 0.632990300655365, "learning_rate": 9.701713978353645e-05, "loss": 9.228867530822754, "step": 871 }, { "epoch": 0.11307878719110412, "grad_norm": 0.7902992963790894, "learning_rate": 9.70101879987112e-05, "loss": 14.904610633850098, "step": 872 }, { "epoch": 0.11320846469935081, "grad_norm": 0.46735620498657227, "learning_rate": 9.700322837213263e-05, "loss": 10.316558837890625, "step": 873 }, { "epoch": 0.11333814220759748, "grad_norm": 0.7107702493667603, "learning_rate": 9.699626090496168e-05, "loss": 11.115180969238281, "step": 874 }, { "epoch": 0.11346781971584416, "grad_norm": 0.6412847638130188, "learning_rate": 9.698928559836058e-05, "loss": 11.747369766235352, "step": 875 }, { "epoch": 0.11359749722409083, "grad_norm": 0.6919794678688049, "learning_rate": 9.698230245349289e-05, "loss": 14.00417423248291, "step": 876 }, { "epoch": 0.11372717473233752, "grad_norm": 0.6997156143188477, "learning_rate": 9.697531147152347e-05, "loss": 10.67754077911377, "step": 877 }, { "epoch": 0.1138568522405842, "grad_norm": 0.8397054672241211, "learning_rate": 9.696831265361847e-05, "loss": 18.534452438354492, "step": 878 }, { "epoch": 0.11398652974883088, "grad_norm": 0.6282390356063843, "learning_rate": 9.696130600094538e-05, "loss": 12.755675315856934, "step": 879 }, { "epoch": 0.11411620725707755, "grad_norm": 0.5229089260101318, "learning_rate": 9.695429151467295e-05, "loss": 10.170788764953613, "step": 880 }, { "epoch": 0.11424588476532424, "grad_norm": 0.5616969466209412, "learning_rate": 9.694726919597128e-05, "loss": 10.284127235412598, "step": 881 }, { "epoch": 0.11437556227357092, "grad_norm": 0.5578743815422058, "learning_rate": 9.694023904601175e-05, "loss": 12.389543533325195, "step": 882 }, { "epoch": 0.11450523978181759, "grad_norm": 0.5167187452316284, "learning_rate": 9.693320106596709e-05, "loss": 12.458404541015625, "step": 883 }, { "epoch": 0.11463491729006427, "grad_norm": 0.5814396739006042, "learning_rate": 9.692615525701125e-05, "loss": 12.777783393859863, "step": 884 }, { "epoch": 0.11476459479831096, "grad_norm": 0.4981113374233246, "learning_rate": 9.691910162031958e-05, "loss": 9.137696266174316, "step": 885 }, { "epoch": 0.11489427230655763, "grad_norm": 0.5172182321548462, "learning_rate": 9.691204015706869e-05, "loss": 9.04930591583252, "step": 886 }, { "epoch": 0.11502394981480431, "grad_norm": 0.5797091722488403, "learning_rate": 9.690497086843649e-05, "loss": 12.828479766845703, "step": 887 }, { "epoch": 0.11515362732305098, "grad_norm": 0.7344362735748291, "learning_rate": 9.68978937556022e-05, "loss": 14.91055965423584, "step": 888 }, { "epoch": 0.11528330483129767, "grad_norm": 0.6834031343460083, "learning_rate": 9.689080881974637e-05, "loss": 13.445867538452148, "step": 889 }, { "epoch": 0.11541298233954435, "grad_norm": 0.5165653824806213, "learning_rate": 9.688371606205083e-05, "loss": 9.470730781555176, "step": 890 }, { "epoch": 0.11554265984779102, "grad_norm": 0.6233329772949219, "learning_rate": 9.687661548369869e-05, "loss": 17.371286392211914, "step": 891 }, { "epoch": 0.1156723373560377, "grad_norm": 0.7265377044677734, "learning_rate": 9.686950708587443e-05, "loss": 12.311059951782227, "step": 892 }, { "epoch": 0.11580201486428439, "grad_norm": 0.6248566508293152, "learning_rate": 9.686239086976381e-05, "loss": 12.452751159667969, "step": 893 }, { "epoch": 0.11593169237253106, "grad_norm": 0.5758731365203857, "learning_rate": 9.685526683655384e-05, "loss": 10.428237915039062, "step": 894 }, { "epoch": 0.11606136988077774, "grad_norm": 0.7428364157676697, "learning_rate": 9.68481349874329e-05, "loss": 10.63679027557373, "step": 895 }, { "epoch": 0.11619104738902442, "grad_norm": 0.7119849920272827, "learning_rate": 9.684099532359066e-05, "loss": 10.247482299804688, "step": 896 }, { "epoch": 0.1163207248972711, "grad_norm": 0.5910686254501343, "learning_rate": 9.683384784621808e-05, "loss": 11.295024871826172, "step": 897 }, { "epoch": 0.11645040240551778, "grad_norm": 0.6385067701339722, "learning_rate": 9.682669255650742e-05, "loss": 14.510329246520996, "step": 898 }, { "epoch": 0.11658007991376446, "grad_norm": 0.6635705232620239, "learning_rate": 9.681952945565226e-05, "loss": 15.328779220581055, "step": 899 }, { "epoch": 0.11670975742201113, "grad_norm": 0.5186585187911987, "learning_rate": 9.681235854484747e-05, "loss": 7.8532938957214355, "step": 900 }, { "epoch": 0.11683943493025782, "grad_norm": 0.7379116415977478, "learning_rate": 9.680517982528923e-05, "loss": 13.065644264221191, "step": 901 }, { "epoch": 0.1169691124385045, "grad_norm": 0.7188771367073059, "learning_rate": 9.679799329817501e-05, "loss": 12.852218627929688, "step": 902 }, { "epoch": 0.11709878994675117, "grad_norm": 0.638837993144989, "learning_rate": 9.67907989647036e-05, "loss": 12.138005256652832, "step": 903 }, { "epoch": 0.11722846745499785, "grad_norm": 0.4740491807460785, "learning_rate": 9.678359682607509e-05, "loss": 10.655099868774414, "step": 904 }, { "epoch": 0.11735814496324454, "grad_norm": 0.5896816253662109, "learning_rate": 9.677638688349087e-05, "loss": 14.009065628051758, "step": 905 }, { "epoch": 0.11748782247149121, "grad_norm": 0.5808525681495667, "learning_rate": 9.67691691381536e-05, "loss": 12.520010948181152, "step": 906 }, { "epoch": 0.11761749997973789, "grad_norm": 0.6708912253379822, "learning_rate": 9.67619435912673e-05, "loss": 11.591299057006836, "step": 907 }, { "epoch": 0.11774717748798456, "grad_norm": 0.9095627069473267, "learning_rate": 9.675471024403725e-05, "loss": 16.8852481842041, "step": 908 }, { "epoch": 0.11787685499623125, "grad_norm": 0.6022106409072876, "learning_rate": 9.674746909767002e-05, "loss": 9.797540664672852, "step": 909 }, { "epoch": 0.11800653250447793, "grad_norm": 0.558942973613739, "learning_rate": 9.674022015337356e-05, "loss": 9.799460411071777, "step": 910 }, { "epoch": 0.1181362100127246, "grad_norm": 0.7486589550971985, "learning_rate": 9.6732963412357e-05, "loss": 12.466327667236328, "step": 911 }, { "epoch": 0.11826588752097128, "grad_norm": 0.6443554759025574, "learning_rate": 9.672569887583088e-05, "loss": 13.925772666931152, "step": 912 }, { "epoch": 0.11839556502921797, "grad_norm": 0.7596750259399414, "learning_rate": 9.671842654500698e-05, "loss": 14.620573043823242, "step": 913 }, { "epoch": 0.11852524253746465, "grad_norm": 0.5114801526069641, "learning_rate": 9.671114642109838e-05, "loss": 10.194307327270508, "step": 914 }, { "epoch": 0.11865492004571132, "grad_norm": 0.4305884838104248, "learning_rate": 9.670385850531949e-05, "loss": 8.36527156829834, "step": 915 }, { "epoch": 0.118784597553958, "grad_norm": 0.6832963824272156, "learning_rate": 9.669656279888601e-05, "loss": 18.433712005615234, "step": 916 }, { "epoch": 0.11891427506220469, "grad_norm": 0.5151519775390625, "learning_rate": 9.668925930301491e-05, "loss": 11.95013427734375, "step": 917 }, { "epoch": 0.11904395257045136, "grad_norm": 0.5781393051147461, "learning_rate": 9.668194801892452e-05, "loss": 12.612841606140137, "step": 918 }, { "epoch": 0.11917363007869804, "grad_norm": 0.6457188129425049, "learning_rate": 9.66746289478344e-05, "loss": 13.369162559509277, "step": 919 }, { "epoch": 0.11930330758694471, "grad_norm": 0.7199733853340149, "learning_rate": 9.666730209096546e-05, "loss": 12.7456693649292, "step": 920 }, { "epoch": 0.1194329850951914, "grad_norm": 0.8768038749694824, "learning_rate": 9.665996744953988e-05, "loss": 17.031965255737305, "step": 921 }, { "epoch": 0.11956266260343808, "grad_norm": 0.7003462314605713, "learning_rate": 9.665262502478117e-05, "loss": 15.985292434692383, "step": 922 }, { "epoch": 0.11969234011168475, "grad_norm": 0.6308867931365967, "learning_rate": 9.664527481791408e-05, "loss": 12.865787506103516, "step": 923 }, { "epoch": 0.11982201761993143, "grad_norm": 0.6948814988136292, "learning_rate": 9.663791683016472e-05, "loss": 12.680521965026855, "step": 924 }, { "epoch": 0.11995169512817812, "grad_norm": 0.716060221195221, "learning_rate": 9.663055106276049e-05, "loss": 12.326536178588867, "step": 925 }, { "epoch": 0.1200813726364248, "grad_norm": 0.6682624220848083, "learning_rate": 9.662317751693004e-05, "loss": 15.473153114318848, "step": 926 }, { "epoch": 0.12021105014467147, "grad_norm": 0.47229698300361633, "learning_rate": 9.661579619390335e-05, "loss": 11.450708389282227, "step": 927 }, { "epoch": 0.12034072765291814, "grad_norm": 0.4694766104221344, "learning_rate": 9.660840709491173e-05, "loss": 10.641596794128418, "step": 928 }, { "epoch": 0.12047040516116483, "grad_norm": 0.5459566712379456, "learning_rate": 9.66010102211877e-05, "loss": 12.064996719360352, "step": 929 }, { "epoch": 0.12060008266941151, "grad_norm": 0.635089635848999, "learning_rate": 9.659360557396519e-05, "loss": 12.424580574035645, "step": 930 }, { "epoch": 0.12072976017765819, "grad_norm": 0.6444430947303772, "learning_rate": 9.658619315447932e-05, "loss": 8.701326370239258, "step": 931 }, { "epoch": 0.12085943768590486, "grad_norm": 0.8633102774620056, "learning_rate": 9.657877296396657e-05, "loss": 12.974603652954102, "step": 932 }, { "epoch": 0.12098911519415155, "grad_norm": 0.4805313050746918, "learning_rate": 9.65713450036647e-05, "loss": 12.919297218322754, "step": 933 }, { "epoch": 0.12111879270239823, "grad_norm": 0.6674125790596008, "learning_rate": 9.656390927481276e-05, "loss": 13.879841804504395, "step": 934 }, { "epoch": 0.1212484702106449, "grad_norm": 0.8577249050140381, "learning_rate": 9.655646577865111e-05, "loss": 12.785417556762695, "step": 935 }, { "epoch": 0.12137814771889158, "grad_norm": 0.5894284844398499, "learning_rate": 9.65490145164214e-05, "loss": 13.361734390258789, "step": 936 }, { "epoch": 0.12150782522713827, "grad_norm": 1.1208934783935547, "learning_rate": 9.654155548936655e-05, "loss": 15.747013092041016, "step": 937 }, { "epoch": 0.12163750273538494, "grad_norm": 0.836283802986145, "learning_rate": 9.653408869873082e-05, "loss": 13.144194602966309, "step": 938 }, { "epoch": 0.12176718024363162, "grad_norm": 0.6978098750114441, "learning_rate": 9.652661414575974e-05, "loss": 14.961515426635742, "step": 939 }, { "epoch": 0.1218968577518783, "grad_norm": 0.5303219556808472, "learning_rate": 9.651913183170013e-05, "loss": 12.089550018310547, "step": 940 }, { "epoch": 0.12202653526012498, "grad_norm": 0.5927948951721191, "learning_rate": 9.651164175780011e-05, "loss": 13.50830364227295, "step": 941 }, { "epoch": 0.12215621276837166, "grad_norm": 0.7233521938323975, "learning_rate": 9.650414392530911e-05, "loss": 14.460704803466797, "step": 942 }, { "epoch": 0.12228589027661833, "grad_norm": 0.46084868907928467, "learning_rate": 9.649663833547781e-05, "loss": 11.039809226989746, "step": 943 }, { "epoch": 0.12241556778486501, "grad_norm": 0.718779444694519, "learning_rate": 9.648912498955827e-05, "loss": 17.009357452392578, "step": 944 }, { "epoch": 0.1225452452931117, "grad_norm": 0.5727702975273132, "learning_rate": 9.648160388880374e-05, "loss": 10.662954330444336, "step": 945 }, { "epoch": 0.12267492280135837, "grad_norm": 0.5547502636909485, "learning_rate": 9.647407503446883e-05, "loss": 13.606513023376465, "step": 946 }, { "epoch": 0.12280460030960505, "grad_norm": 0.7273021936416626, "learning_rate": 9.646653842780944e-05, "loss": 14.694541931152344, "step": 947 }, { "epoch": 0.12293427781785173, "grad_norm": 0.6268500685691833, "learning_rate": 9.645899407008272e-05, "loss": 12.559924125671387, "step": 948 }, { "epoch": 0.12306395532609841, "grad_norm": 0.6714907288551331, "learning_rate": 9.645144196254714e-05, "loss": 13.996879577636719, "step": 949 }, { "epoch": 0.12319363283434509, "grad_norm": 0.575838029384613, "learning_rate": 9.64438821064625e-05, "loss": 13.904638290405273, "step": 950 }, { "epoch": 0.12332331034259177, "grad_norm": 0.495877206325531, "learning_rate": 9.643631450308983e-05, "loss": 10.038643836975098, "step": 951 }, { "epoch": 0.12345298785083844, "grad_norm": 0.6878101825714111, "learning_rate": 9.642873915369148e-05, "loss": 14.698320388793945, "step": 952 }, { "epoch": 0.12358266535908513, "grad_norm": 0.49068236351013184, "learning_rate": 9.64211560595311e-05, "loss": 8.682990074157715, "step": 953 }, { "epoch": 0.1237123428673318, "grad_norm": 0.5082986354827881, "learning_rate": 9.641356522187363e-05, "loss": 11.188288688659668, "step": 954 }, { "epoch": 0.12384202037557848, "grad_norm": 0.5607951879501343, "learning_rate": 9.640596664198528e-05, "loss": 12.149499893188477, "step": 955 }, { "epoch": 0.12397169788382516, "grad_norm": 0.6467757225036621, "learning_rate": 9.639836032113356e-05, "loss": 9.55885124206543, "step": 956 }, { "epoch": 0.12410137539207185, "grad_norm": 0.40439373254776, "learning_rate": 9.63907462605873e-05, "loss": 10.189956665039062, "step": 957 }, { "epoch": 0.12423105290031852, "grad_norm": 0.838201105594635, "learning_rate": 9.638312446161659e-05, "loss": 10.753410339355469, "step": 958 }, { "epoch": 0.1243607304085652, "grad_norm": 0.7667984962463379, "learning_rate": 9.637549492549283e-05, "loss": 11.0311279296875, "step": 959 }, { "epoch": 0.12449040791681187, "grad_norm": 0.5604751110076904, "learning_rate": 9.636785765348865e-05, "loss": 11.272706985473633, "step": 960 }, { "epoch": 0.12462008542505856, "grad_norm": 0.7735433578491211, "learning_rate": 9.63602126468781e-05, "loss": 15.900496482849121, "step": 961 }, { "epoch": 0.12474976293330524, "grad_norm": 0.7438057661056519, "learning_rate": 9.63525599069364e-05, "loss": 14.443717956542969, "step": 962 }, { "epoch": 0.12487944044155191, "grad_norm": 0.7678650617599487, "learning_rate": 9.634489943494007e-05, "loss": 10.566226959228516, "step": 963 }, { "epoch": 0.1250091179497986, "grad_norm": 0.6947063207626343, "learning_rate": 9.6337231232167e-05, "loss": 15.5138578414917, "step": 964 }, { "epoch": 0.12513879545804527, "grad_norm": 0.5355461239814758, "learning_rate": 9.63295552998963e-05, "loss": 12.228760719299316, "step": 965 }, { "epoch": 0.12526847296629195, "grad_norm": 0.899102509021759, "learning_rate": 9.632187163940838e-05, "loss": 18.893531799316406, "step": 966 }, { "epoch": 0.12539815047453864, "grad_norm": 0.7717065215110779, "learning_rate": 9.631418025198498e-05, "loss": 15.2034330368042, "step": 967 }, { "epoch": 0.1255278279827853, "grad_norm": 0.754851222038269, "learning_rate": 9.630648113890905e-05, "loss": 14.065054893493652, "step": 968 }, { "epoch": 0.125657505491032, "grad_norm": 0.6519491076469421, "learning_rate": 9.629877430146494e-05, "loss": 11.152877807617188, "step": 969 }, { "epoch": 0.12578718299927866, "grad_norm": 0.9380288124084473, "learning_rate": 9.629105974093817e-05, "loss": 14.674327850341797, "step": 970 }, { "epoch": 0.12591686050752535, "grad_norm": 1.2388032674789429, "learning_rate": 9.628333745861562e-05, "loss": 19.733627319335938, "step": 971 }, { "epoch": 0.12604653801577204, "grad_norm": 0.8247560858726501, "learning_rate": 9.627560745578546e-05, "loss": 14.89880657196045, "step": 972 }, { "epoch": 0.1261762155240187, "grad_norm": 0.7927754521369934, "learning_rate": 9.626786973373709e-05, "loss": 15.696523666381836, "step": 973 }, { "epoch": 0.1263058930322654, "grad_norm": 0.4998933672904968, "learning_rate": 9.626012429376128e-05, "loss": 11.024713516235352, "step": 974 }, { "epoch": 0.12643557054051208, "grad_norm": 0.5627827048301697, "learning_rate": 9.625237113715e-05, "loss": 13.52882194519043, "step": 975 }, { "epoch": 0.12656524804875874, "grad_norm": 0.5583561062812805, "learning_rate": 9.62446102651966e-05, "loss": 14.141764640808105, "step": 976 }, { "epoch": 0.12669492555700543, "grad_norm": 0.514876663684845, "learning_rate": 9.623684167919564e-05, "loss": 10.600132942199707, "step": 977 }, { "epoch": 0.1268246030652521, "grad_norm": 0.6047519445419312, "learning_rate": 9.622906538044298e-05, "loss": 12.042130470275879, "step": 978 }, { "epoch": 0.12695428057349878, "grad_norm": 0.6365256309509277, "learning_rate": 9.622128137023581e-05, "loss": 11.082721710205078, "step": 979 }, { "epoch": 0.12708395808174547, "grad_norm": 0.6143451929092407, "learning_rate": 9.621348964987258e-05, "loss": 11.788333892822266, "step": 980 }, { "epoch": 0.12721363558999213, "grad_norm": 0.7299305200576782, "learning_rate": 9.6205690220653e-05, "loss": 12.526213645935059, "step": 981 }, { "epoch": 0.12734331309823882, "grad_norm": 0.5531489849090576, "learning_rate": 9.619788308387812e-05, "loss": 8.538188934326172, "step": 982 }, { "epoch": 0.1274729906064855, "grad_norm": 0.63649982213974, "learning_rate": 9.619006824085021e-05, "loss": 12.064722061157227, "step": 983 }, { "epoch": 0.12760266811473217, "grad_norm": 0.6863217949867249, "learning_rate": 9.618224569287289e-05, "loss": 10.869258880615234, "step": 984 }, { "epoch": 0.12773234562297886, "grad_norm": 0.7698781490325928, "learning_rate": 9.617441544125103e-05, "loss": 13.790416717529297, "step": 985 }, { "epoch": 0.12786202313122552, "grad_norm": 0.6326967477798462, "learning_rate": 9.616657748729076e-05, "loss": 13.188835144042969, "step": 986 }, { "epoch": 0.1279917006394722, "grad_norm": 0.5523319244384766, "learning_rate": 9.615873183229959e-05, "loss": 11.690300941467285, "step": 987 }, { "epoch": 0.1281213781477189, "grad_norm": 0.5714508891105652, "learning_rate": 9.615087847758619e-05, "loss": 10.207776069641113, "step": 988 }, { "epoch": 0.12825105565596556, "grad_norm": 0.6594138145446777, "learning_rate": 9.614301742446061e-05, "loss": 14.324613571166992, "step": 989 }, { "epoch": 0.12838073316421225, "grad_norm": 0.5401836037635803, "learning_rate": 9.613514867423414e-05, "loss": 11.142324447631836, "step": 990 }, { "epoch": 0.12851041067245894, "grad_norm": 0.49129852652549744, "learning_rate": 9.612727222821937e-05, "loss": 9.342357635498047, "step": 991 }, { "epoch": 0.1286400881807056, "grad_norm": 0.6331088542938232, "learning_rate": 9.611938808773017e-05, "loss": 12.890392303466797, "step": 992 }, { "epoch": 0.1287697656889523, "grad_norm": 0.547965407371521, "learning_rate": 9.611149625408167e-05, "loss": 11.727015495300293, "step": 993 }, { "epoch": 0.12889944319719895, "grad_norm": 0.6551260352134705, "learning_rate": 9.610359672859031e-05, "loss": 14.982718467712402, "step": 994 }, { "epoch": 0.12902912070544564, "grad_norm": 0.8083457946777344, "learning_rate": 9.609568951257382e-05, "loss": 16.734758377075195, "step": 995 }, { "epoch": 0.12915879821369233, "grad_norm": 0.674416720867157, "learning_rate": 9.60877746073512e-05, "loss": 13.576701164245605, "step": 996 }, { "epoch": 0.129288475721939, "grad_norm": 0.5614551305770874, "learning_rate": 9.607985201424273e-05, "loss": 8.999975204467773, "step": 997 }, { "epoch": 0.12941815323018568, "grad_norm": 0.686526894569397, "learning_rate": 9.607192173456993e-05, "loss": 11.620552062988281, "step": 998 }, { "epoch": 0.12954783073843237, "grad_norm": 0.46830132603645325, "learning_rate": 9.606398376965574e-05, "loss": 9.37678050994873, "step": 999 }, { "epoch": 0.12967750824667904, "grad_norm": 0.7091798782348633, "learning_rate": 9.605603812082421e-05, "loss": 12.59516429901123, "step": 1000 }, { "epoch": 0.12980718575492572, "grad_norm": 0.5891287326812744, "learning_rate": 9.604808478940079e-05, "loss": 11.433989524841309, "step": 1001 }, { "epoch": 0.1299368632631724, "grad_norm": 0.856380820274353, "learning_rate": 9.604012377671214e-05, "loss": 19.403169631958008, "step": 1002 }, { "epoch": 0.13006654077141908, "grad_norm": 0.5762919783592224, "learning_rate": 9.603215508408627e-05, "loss": 12.932489395141602, "step": 1003 }, { "epoch": 0.13019621827966577, "grad_norm": 0.7384105920791626, "learning_rate": 9.602417871285243e-05, "loss": 10.928237915039062, "step": 1004 }, { "epoch": 0.13032589578791243, "grad_norm": 0.5286999344825745, "learning_rate": 9.601619466434113e-05, "loss": 10.348069190979004, "step": 1005 }, { "epoch": 0.13045557329615912, "grad_norm": 0.7535045146942139, "learning_rate": 9.60082029398842e-05, "loss": 12.624205589294434, "step": 1006 }, { "epoch": 0.1305852508044058, "grad_norm": 0.5708901882171631, "learning_rate": 9.600020354081474e-05, "loss": 11.839516639709473, "step": 1007 }, { "epoch": 0.13071492831265247, "grad_norm": 0.640329897403717, "learning_rate": 9.599219646846713e-05, "loss": 13.455423355102539, "step": 1008 }, { "epoch": 0.13084460582089916, "grad_norm": 0.6536089777946472, "learning_rate": 9.598418172417703e-05, "loss": 13.46671199798584, "step": 1009 }, { "epoch": 0.13097428332914582, "grad_norm": 0.7068425416946411, "learning_rate": 9.597615930928136e-05, "loss": 14.378619194030762, "step": 1010 }, { "epoch": 0.1311039608373925, "grad_norm": 0.5162143111228943, "learning_rate": 9.596812922511834e-05, "loss": 10.264287948608398, "step": 1011 }, { "epoch": 0.1312336383456392, "grad_norm": 0.55985426902771, "learning_rate": 9.596009147302749e-05, "loss": 11.776264190673828, "step": 1012 }, { "epoch": 0.13136331585388586, "grad_norm": 0.4664520025253296, "learning_rate": 9.595204605434955e-05, "loss": 11.40167236328125, "step": 1013 }, { "epoch": 0.13149299336213255, "grad_norm": 0.48571932315826416, "learning_rate": 9.594399297042657e-05, "loss": 9.108915328979492, "step": 1014 }, { "epoch": 0.13162267087037924, "grad_norm": 0.6579462289810181, "learning_rate": 9.593593222260194e-05, "loss": 13.917731285095215, "step": 1015 }, { "epoch": 0.1317523483786259, "grad_norm": 0.5955867171287537, "learning_rate": 9.59278638122202e-05, "loss": 11.212127685546875, "step": 1016 }, { "epoch": 0.1318820258868726, "grad_norm": 0.6023703813552856, "learning_rate": 9.59197877406273e-05, "loss": 9.696388244628906, "step": 1017 }, { "epoch": 0.13201170339511925, "grad_norm": 0.7262200117111206, "learning_rate": 9.591170400917036e-05, "loss": 13.144492149353027, "step": 1018 }, { "epoch": 0.13214138090336594, "grad_norm": 0.6956731081008911, "learning_rate": 9.590361261919785e-05, "loss": 14.025407791137695, "step": 1019 }, { "epoch": 0.13227105841161263, "grad_norm": 0.6839199662208557, "learning_rate": 9.589551357205948e-05, "loss": 13.162237167358398, "step": 1020 }, { "epoch": 0.1324007359198593, "grad_norm": 0.5407817959785461, "learning_rate": 9.588740686910625e-05, "loss": 12.730395317077637, "step": 1021 }, { "epoch": 0.13253041342810598, "grad_norm": 0.8312307000160217, "learning_rate": 9.587929251169043e-05, "loss": 11.689886093139648, "step": 1022 }, { "epoch": 0.13266009093635267, "grad_norm": 0.6938460469245911, "learning_rate": 9.587117050116561e-05, "loss": 8.977513313293457, "step": 1023 }, { "epoch": 0.13278976844459933, "grad_norm": 0.5759596824645996, "learning_rate": 9.586304083888658e-05, "loss": 9.615765571594238, "step": 1024 }, { "epoch": 0.13291944595284602, "grad_norm": 0.3996327817440033, "learning_rate": 9.585490352620945e-05, "loss": 10.224505424499512, "step": 1025 }, { "epoch": 0.13304912346109268, "grad_norm": 0.7119802236557007, "learning_rate": 9.58467585644916e-05, "loss": 15.749842643737793, "step": 1026 }, { "epoch": 0.13317880096933937, "grad_norm": 0.6728116273880005, "learning_rate": 9.583860595509173e-05, "loss": 17.03350067138672, "step": 1027 }, { "epoch": 0.13330847847758606, "grad_norm": 0.5790900588035583, "learning_rate": 9.583044569936971e-05, "loss": 9.908719062805176, "step": 1028 }, { "epoch": 0.13343815598583272, "grad_norm": 0.6479288935661316, "learning_rate": 9.582227779868682e-05, "loss": 12.625658988952637, "step": 1029 }, { "epoch": 0.1335678334940794, "grad_norm": 0.513470470905304, "learning_rate": 9.581410225440548e-05, "loss": 13.680508613586426, "step": 1030 }, { "epoch": 0.1336975110023261, "grad_norm": 0.5733829140663147, "learning_rate": 9.58059190678895e-05, "loss": 8.27120304107666, "step": 1031 }, { "epoch": 0.13382718851057276, "grad_norm": 0.7473448514938354, "learning_rate": 9.579772824050387e-05, "loss": 10.737751007080078, "step": 1032 }, { "epoch": 0.13395686601881945, "grad_norm": 0.6297739148139954, "learning_rate": 9.578952977361492e-05, "loss": 11.573152542114258, "step": 1033 }, { "epoch": 0.13408654352706612, "grad_norm": 0.5468558073043823, "learning_rate": 9.578132366859024e-05, "loss": 10.367084503173828, "step": 1034 }, { "epoch": 0.1342162210353128, "grad_norm": 0.5922073721885681, "learning_rate": 9.577310992679868e-05, "loss": 10.607857704162598, "step": 1035 }, { "epoch": 0.1343458985435595, "grad_norm": 0.5199747681617737, "learning_rate": 9.576488854961038e-05, "loss": 14.23222827911377, "step": 1036 }, { "epoch": 0.13447557605180616, "grad_norm": 0.6061374545097351, "learning_rate": 9.575665953839674e-05, "loss": 11.332438468933105, "step": 1037 }, { "epoch": 0.13460525356005285, "grad_norm": 0.6829493045806885, "learning_rate": 9.574842289453043e-05, "loss": 12.948652267456055, "step": 1038 }, { "epoch": 0.13473493106829953, "grad_norm": 0.7503253817558289, "learning_rate": 9.574017861938542e-05, "loss": 13.212285041809082, "step": 1039 }, { "epoch": 0.1348646085765462, "grad_norm": 0.6017504334449768, "learning_rate": 9.573192671433691e-05, "loss": 13.157572746276855, "step": 1040 }, { "epoch": 0.13499428608479289, "grad_norm": 0.7614055871963501, "learning_rate": 9.572366718076142e-05, "loss": 14.435025215148926, "step": 1041 }, { "epoch": 0.13512396359303955, "grad_norm": 0.6406972408294678, "learning_rate": 9.571540002003671e-05, "loss": 12.037311553955078, "step": 1042 }, { "epoch": 0.13525364110128624, "grad_norm": 0.5759994983673096, "learning_rate": 9.570712523354182e-05, "loss": 11.021997451782227, "step": 1043 }, { "epoch": 0.13538331860953293, "grad_norm": 0.5208097696304321, "learning_rate": 9.569884282265706e-05, "loss": 10.497994422912598, "step": 1044 }, { "epoch": 0.1355129961177796, "grad_norm": 0.7134842872619629, "learning_rate": 9.569055278876402e-05, "loss": 15.087265014648438, "step": 1045 }, { "epoch": 0.13564267362602628, "grad_norm": 0.7748257517814636, "learning_rate": 9.568225513324558e-05, "loss": 11.201611518859863, "step": 1046 }, { "epoch": 0.13577235113427297, "grad_norm": 0.695395290851593, "learning_rate": 9.567394985748583e-05, "loss": 13.319302558898926, "step": 1047 }, { "epoch": 0.13590202864251963, "grad_norm": 0.5450520515441895, "learning_rate": 9.566563696287021e-05, "loss": 12.749044418334961, "step": 1048 }, { "epoch": 0.13603170615076632, "grad_norm": 0.5977713465690613, "learning_rate": 9.565731645078534e-05, "loss": 14.913856506347656, "step": 1049 }, { "epoch": 0.13616138365901298, "grad_norm": 0.6356267333030701, "learning_rate": 9.564898832261924e-05, "loss": 11.485332489013672, "step": 1050 }, { "epoch": 0.13629106116725967, "grad_norm": 0.7852414846420288, "learning_rate": 9.564065257976105e-05, "loss": 11.385254859924316, "step": 1051 }, { "epoch": 0.13642073867550636, "grad_norm": 0.7264748215675354, "learning_rate": 9.563230922360127e-05, "loss": 11.639116287231445, "step": 1052 }, { "epoch": 0.13655041618375302, "grad_norm": 0.5905643105506897, "learning_rate": 9.562395825553168e-05, "loss": 10.216331481933594, "step": 1053 }, { "epoch": 0.1366800936919997, "grad_norm": 0.45453789830207825, "learning_rate": 9.561559967694527e-05, "loss": 8.626541137695312, "step": 1054 }, { "epoch": 0.1368097712002464, "grad_norm": 0.5758622288703918, "learning_rate": 9.560723348923632e-05, "loss": 9.278189659118652, "step": 1055 }, { "epoch": 0.13693944870849306, "grad_norm": 0.8829402923583984, "learning_rate": 9.559885969380044e-05, "loss": 13.48403263092041, "step": 1056 }, { "epoch": 0.13706912621673975, "grad_norm": 0.7009143829345703, "learning_rate": 9.559047829203445e-05, "loss": 7.8426008224487305, "step": 1057 }, { "epoch": 0.1371988037249864, "grad_norm": 0.6606895923614502, "learning_rate": 9.55820892853364e-05, "loss": 13.011465072631836, "step": 1058 }, { "epoch": 0.1373284812332331, "grad_norm": 0.6473771929740906, "learning_rate": 9.557369267510572e-05, "loss": 10.61316967010498, "step": 1059 }, { "epoch": 0.1374581587414798, "grad_norm": 1.0125131607055664, "learning_rate": 9.5565288462743e-05, "loss": 14.723230361938477, "step": 1060 }, { "epoch": 0.13758783624972645, "grad_norm": 0.5434773564338684, "learning_rate": 9.555687664965016e-05, "loss": 10.127298355102539, "step": 1061 }, { "epoch": 0.13771751375797314, "grad_norm": 0.5315357446670532, "learning_rate": 9.554845723723038e-05, "loss": 9.233654022216797, "step": 1062 }, { "epoch": 0.13784719126621983, "grad_norm": 0.6883785724639893, "learning_rate": 9.554003022688809e-05, "loss": 11.103659629821777, "step": 1063 }, { "epoch": 0.1379768687744665, "grad_norm": 0.6169447302818298, "learning_rate": 9.5531595620029e-05, "loss": 10.131103515625, "step": 1064 }, { "epoch": 0.13810654628271318, "grad_norm": 0.612446665763855, "learning_rate": 9.552315341806008e-05, "loss": 12.537229537963867, "step": 1065 }, { "epoch": 0.13823622379095984, "grad_norm": 0.6995895504951477, "learning_rate": 9.551470362238957e-05, "loss": 12.192423820495605, "step": 1066 }, { "epoch": 0.13836590129920653, "grad_norm": 0.4509081244468689, "learning_rate": 9.550624623442698e-05, "loss": 6.907068729400635, "step": 1067 }, { "epoch": 0.13849557880745322, "grad_norm": 0.9927897453308105, "learning_rate": 9.549778125558309e-05, "loss": 13.715683937072754, "step": 1068 }, { "epoch": 0.13862525631569989, "grad_norm": 0.5450388193130493, "learning_rate": 9.548930868726991e-05, "loss": 13.964339256286621, "step": 1069 }, { "epoch": 0.13875493382394657, "grad_norm": 0.7825160622596741, "learning_rate": 9.548082853090079e-05, "loss": 13.682096481323242, "step": 1070 }, { "epoch": 0.13888461133219326, "grad_norm": 0.7308781147003174, "learning_rate": 9.54723407878903e-05, "loss": 9.50837230682373, "step": 1071 }, { "epoch": 0.13901428884043993, "grad_norm": 0.6370099186897278, "learning_rate": 9.546384545965424e-05, "loss": 10.435580253601074, "step": 1072 }, { "epoch": 0.13914396634868662, "grad_norm": 0.8046977519989014, "learning_rate": 9.545534254760973e-05, "loss": 14.3733549118042, "step": 1073 }, { "epoch": 0.13927364385693328, "grad_norm": 0.5703807473182678, "learning_rate": 9.544683205317513e-05, "loss": 12.86377239227295, "step": 1074 }, { "epoch": 0.13940332136517997, "grad_norm": 0.5358613133430481, "learning_rate": 9.54383139777701e-05, "loss": 10.45273208618164, "step": 1075 }, { "epoch": 0.13953299887342666, "grad_norm": 0.588448166847229, "learning_rate": 9.542978832281552e-05, "loss": 11.872001647949219, "step": 1076 }, { "epoch": 0.13966267638167332, "grad_norm": 0.6156410574913025, "learning_rate": 9.542125508973355e-05, "loss": 12.235819816589355, "step": 1077 }, { "epoch": 0.13979235388992, "grad_norm": 0.7391674518585205, "learning_rate": 9.541271427994762e-05, "loss": 14.483527183532715, "step": 1078 }, { "epoch": 0.1399220313981667, "grad_norm": 0.7976967096328735, "learning_rate": 9.540416589488241e-05, "loss": 17.023147583007812, "step": 1079 }, { "epoch": 0.14005170890641336, "grad_norm": 0.6818749308586121, "learning_rate": 9.53956099359639e-05, "loss": 13.203322410583496, "step": 1080 }, { "epoch": 0.14018138641466005, "grad_norm": 0.5764489769935608, "learning_rate": 9.538704640461926e-05, "loss": 13.491050720214844, "step": 1081 }, { "epoch": 0.1403110639229067, "grad_norm": 0.8126029372215271, "learning_rate": 9.537847530227701e-05, "loss": 14.342537879943848, "step": 1082 }, { "epoch": 0.1404407414311534, "grad_norm": 0.6402326822280884, "learning_rate": 9.536989663036688e-05, "loss": 13.488175392150879, "step": 1083 }, { "epoch": 0.1405704189394001, "grad_norm": 0.6205629706382751, "learning_rate": 9.536131039031988e-05, "loss": 13.968840599060059, "step": 1084 }, { "epoch": 0.14070009644764675, "grad_norm": 0.6666122674942017, "learning_rate": 9.535271658356826e-05, "loss": 15.582221984863281, "step": 1085 }, { "epoch": 0.14082977395589344, "grad_norm": 0.5984408855438232, "learning_rate": 9.534411521154556e-05, "loss": 11.441217422485352, "step": 1086 }, { "epoch": 0.14095945146414013, "grad_norm": 0.6656485795974731, "learning_rate": 9.53355062756866e-05, "loss": 13.926955223083496, "step": 1087 }, { "epoch": 0.1410891289723868, "grad_norm": 0.6960499882698059, "learning_rate": 9.532688977742738e-05, "loss": 17.913652420043945, "step": 1088 }, { "epoch": 0.14121880648063348, "grad_norm": 0.594423234462738, "learning_rate": 9.531826571820526e-05, "loss": 12.063305854797363, "step": 1089 }, { "epoch": 0.14134848398888014, "grad_norm": 0.5682513117790222, "learning_rate": 9.530963409945879e-05, "loss": 11.249244689941406, "step": 1090 }, { "epoch": 0.14147816149712683, "grad_norm": 0.5767633318901062, "learning_rate": 9.530099492262782e-05, "loss": 9.503552436828613, "step": 1091 }, { "epoch": 0.14160783900537352, "grad_norm": 0.7053566575050354, "learning_rate": 9.529234818915345e-05, "loss": 12.820785522460938, "step": 1092 }, { "epoch": 0.14173751651362018, "grad_norm": 0.7517769932746887, "learning_rate": 9.528369390047803e-05, "loss": 13.309176445007324, "step": 1093 }, { "epoch": 0.14186719402186687, "grad_norm": 0.5630333423614502, "learning_rate": 9.527503205804517e-05, "loss": 14.02785587310791, "step": 1094 }, { "epoch": 0.14199687153011356, "grad_norm": 0.49478745460510254, "learning_rate": 9.526636266329977e-05, "loss": 11.870808601379395, "step": 1095 }, { "epoch": 0.14212654903836022, "grad_norm": 0.5605244636535645, "learning_rate": 9.525768571768797e-05, "loss": 14.740020751953125, "step": 1096 }, { "epoch": 0.1422562265466069, "grad_norm": 0.7449425458908081, "learning_rate": 9.524900122265714e-05, "loss": 14.10329818725586, "step": 1097 }, { "epoch": 0.14238590405485357, "grad_norm": 0.6239170432090759, "learning_rate": 9.524030917965596e-05, "loss": 11.834142684936523, "step": 1098 }, { "epoch": 0.14251558156310026, "grad_norm": 0.7019071578979492, "learning_rate": 9.523160959013435e-05, "loss": 11.900226593017578, "step": 1099 }, { "epoch": 0.14264525907134695, "grad_norm": 0.6994392275810242, "learning_rate": 9.522290245554349e-05, "loss": 10.346997261047363, "step": 1100 }, { "epoch": 0.14277493657959361, "grad_norm": 0.6645036935806274, "learning_rate": 9.52141877773358e-05, "loss": 12.982048034667969, "step": 1101 }, { "epoch": 0.1429046140878403, "grad_norm": 0.4235362708568573, "learning_rate": 9.520546555696495e-05, "loss": 14.481728553771973, "step": 1102 }, { "epoch": 0.143034291596087, "grad_norm": 0.5193394422531128, "learning_rate": 9.519673579588593e-05, "loss": 11.1940336227417, "step": 1103 }, { "epoch": 0.14316396910433365, "grad_norm": 0.6468207836151123, "learning_rate": 9.518799849555494e-05, "loss": 12.121329307556152, "step": 1104 }, { "epoch": 0.14329364661258034, "grad_norm": 0.765026867389679, "learning_rate": 9.517925365742943e-05, "loss": 14.68029499053955, "step": 1105 }, { "epoch": 0.143423324120827, "grad_norm": 0.6515055894851685, "learning_rate": 9.517050128296814e-05, "loss": 11.621413230895996, "step": 1106 }, { "epoch": 0.1435530016290737, "grad_norm": 0.6749771237373352, "learning_rate": 9.516174137363104e-05, "loss": 16.870845794677734, "step": 1107 }, { "epoch": 0.14368267913732038, "grad_norm": 0.648826539516449, "learning_rate": 9.515297393087937e-05, "loss": 11.556873321533203, "step": 1108 }, { "epoch": 0.14381235664556705, "grad_norm": 0.6102558970451355, "learning_rate": 9.514419895617564e-05, "loss": 13.042947769165039, "step": 1109 }, { "epoch": 0.14394203415381374, "grad_norm": 1.1032278537750244, "learning_rate": 9.513541645098358e-05, "loss": 19.146102905273438, "step": 1110 }, { "epoch": 0.14407171166206043, "grad_norm": 0.7934659123420715, "learning_rate": 9.51266264167682e-05, "loss": 13.65077018737793, "step": 1111 }, { "epoch": 0.1442013891703071, "grad_norm": 0.5926424264907837, "learning_rate": 9.511782885499576e-05, "loss": 11.773767471313477, "step": 1112 }, { "epoch": 0.14433106667855378, "grad_norm": 0.7616752982139587, "learning_rate": 9.510902376713377e-05, "loss": 14.691789627075195, "step": 1113 }, { "epoch": 0.14446074418680044, "grad_norm": 0.7652319073677063, "learning_rate": 9.510021115465104e-05, "loss": 15.971230506896973, "step": 1114 }, { "epoch": 0.14459042169504713, "grad_norm": 0.7517275214195251, "learning_rate": 9.509139101901758e-05, "loss": 13.902227401733398, "step": 1115 }, { "epoch": 0.14472009920329382, "grad_norm": 0.6345762610435486, "learning_rate": 9.508256336170467e-05, "loss": 11.696782112121582, "step": 1116 }, { "epoch": 0.14484977671154048, "grad_norm": 0.7453287839889526, "learning_rate": 9.507372818418483e-05, "loss": 13.412357330322266, "step": 1117 }, { "epoch": 0.14497945421978717, "grad_norm": 0.8161035180091858, "learning_rate": 9.506488548793189e-05, "loss": 13.604275703430176, "step": 1118 }, { "epoch": 0.14510913172803386, "grad_norm": 0.7014778852462769, "learning_rate": 9.505603527442087e-05, "loss": 12.35749626159668, "step": 1119 }, { "epoch": 0.14523880923628052, "grad_norm": 0.6049841642379761, "learning_rate": 9.504717754512807e-05, "loss": 12.67331314086914, "step": 1120 }, { "epoch": 0.1453684867445272, "grad_norm": 0.6172060370445251, "learning_rate": 9.503831230153106e-05, "loss": 12.063820838928223, "step": 1121 }, { "epoch": 0.14549816425277387, "grad_norm": 0.4367067515850067, "learning_rate": 9.502943954510866e-05, "loss": 9.192913055419922, "step": 1122 }, { "epoch": 0.14562784176102056, "grad_norm": 0.6508811116218567, "learning_rate": 9.50205592773409e-05, "loss": 12.262340545654297, "step": 1123 }, { "epoch": 0.14575751926926725, "grad_norm": 0.7973765730857849, "learning_rate": 9.501167149970911e-05, "loss": 14.249886512756348, "step": 1124 }, { "epoch": 0.1458871967775139, "grad_norm": 0.5146879553794861, "learning_rate": 9.500277621369585e-05, "loss": 10.264135360717773, "step": 1125 }, { "epoch": 0.1460168742857606, "grad_norm": 0.6099331974983215, "learning_rate": 9.499387342078495e-05, "loss": 13.457747459411621, "step": 1126 }, { "epoch": 0.1461465517940073, "grad_norm": 0.6891277432441711, "learning_rate": 9.498496312246147e-05, "loss": 12.310956954956055, "step": 1127 }, { "epoch": 0.14627622930225395, "grad_norm": 0.6130430698394775, "learning_rate": 9.497604532021176e-05, "loss": 11.46591567993164, "step": 1128 }, { "epoch": 0.14640590681050064, "grad_norm": 0.5342187285423279, "learning_rate": 9.496712001552337e-05, "loss": 11.226677894592285, "step": 1129 }, { "epoch": 0.1465355843187473, "grad_norm": 0.7886817455291748, "learning_rate": 9.495818720988512e-05, "loss": 13.857114791870117, "step": 1130 }, { "epoch": 0.146665261826994, "grad_norm": 0.5771041512489319, "learning_rate": 9.494924690478712e-05, "loss": 13.915203094482422, "step": 1131 }, { "epoch": 0.14679493933524068, "grad_norm": 0.79150390625, "learning_rate": 9.494029910172067e-05, "loss": 14.606945991516113, "step": 1132 }, { "epoch": 0.14692461684348734, "grad_norm": 0.651750922203064, "learning_rate": 9.493134380217837e-05, "loss": 13.42309856414795, "step": 1133 }, { "epoch": 0.14705429435173403, "grad_norm": 0.9537623524665833, "learning_rate": 9.492238100765403e-05, "loss": 14.786742210388184, "step": 1134 }, { "epoch": 0.14718397185998072, "grad_norm": 0.7145496010780334, "learning_rate": 9.491341071964273e-05, "loss": 14.317926406860352, "step": 1135 }, { "epoch": 0.14731364936822738, "grad_norm": 0.6517206430435181, "learning_rate": 9.490443293964084e-05, "loss": 14.772457122802734, "step": 1136 }, { "epoch": 0.14744332687647407, "grad_norm": 0.8459444642066956, "learning_rate": 9.48954476691459e-05, "loss": 13.339004516601562, "step": 1137 }, { "epoch": 0.14757300438472074, "grad_norm": 0.9165939688682556, "learning_rate": 9.488645490965675e-05, "loss": 13.943110466003418, "step": 1138 }, { "epoch": 0.14770268189296742, "grad_norm": 0.5371766090393066, "learning_rate": 9.487745466267348e-05, "loss": 12.210506439208984, "step": 1139 }, { "epoch": 0.14783235940121411, "grad_norm": 0.8053990602493286, "learning_rate": 9.486844692969741e-05, "loss": 12.892284393310547, "step": 1140 }, { "epoch": 0.14796203690946078, "grad_norm": 0.39018678665161133, "learning_rate": 9.48594317122311e-05, "loss": 7.737568378448486, "step": 1141 }, { "epoch": 0.14809171441770747, "grad_norm": 0.5785089135169983, "learning_rate": 9.485040901177841e-05, "loss": 11.073189735412598, "step": 1142 }, { "epoch": 0.14822139192595415, "grad_norm": 0.5855242013931274, "learning_rate": 9.48413788298444e-05, "loss": 9.237961769104004, "step": 1143 }, { "epoch": 0.14835106943420082, "grad_norm": 0.7164289951324463, "learning_rate": 9.483234116793538e-05, "loss": 14.8562650680542, "step": 1144 }, { "epoch": 0.1484807469424475, "grad_norm": 0.7006059288978577, "learning_rate": 9.482329602755892e-05, "loss": 13.280426979064941, "step": 1145 }, { "epoch": 0.14861042445069417, "grad_norm": 0.4463295042514801, "learning_rate": 9.481424341022385e-05, "loss": 6.643946647644043, "step": 1146 }, { "epoch": 0.14874010195894086, "grad_norm": 0.7406589388847351, "learning_rate": 9.480518331744023e-05, "loss": 14.66064167022705, "step": 1147 }, { "epoch": 0.14886977946718755, "grad_norm": 0.743232011795044, "learning_rate": 9.479611575071936e-05, "loss": 13.285927772521973, "step": 1148 }, { "epoch": 0.1489994569754342, "grad_norm": 0.6988601088523865, "learning_rate": 9.478704071157381e-05, "loss": 13.136412620544434, "step": 1149 }, { "epoch": 0.1491291344836809, "grad_norm": 0.6875441074371338, "learning_rate": 9.477795820151739e-05, "loss": 14.052318572998047, "step": 1150 }, { "epoch": 0.1492588119919276, "grad_norm": 0.5115428566932678, "learning_rate": 9.476886822206514e-05, "loss": 9.858247756958008, "step": 1151 }, { "epoch": 0.14938848950017425, "grad_norm": 0.6641473174095154, "learning_rate": 9.475977077473335e-05, "loss": 10.109060287475586, "step": 1152 }, { "epoch": 0.14951816700842094, "grad_norm": 0.498160183429718, "learning_rate": 9.475066586103955e-05, "loss": 11.566919326782227, "step": 1153 }, { "epoch": 0.1496478445166676, "grad_norm": 0.6405898332595825, "learning_rate": 9.474155348250258e-05, "loss": 16.455394744873047, "step": 1154 }, { "epoch": 0.1497775220249143, "grad_norm": 0.628951370716095, "learning_rate": 9.473243364064241e-05, "loss": 15.222400665283203, "step": 1155 }, { "epoch": 0.14990719953316098, "grad_norm": 0.540979266166687, "learning_rate": 9.472330633698038e-05, "loss": 9.636613845825195, "step": 1156 }, { "epoch": 0.15003687704140764, "grad_norm": 0.7593186497688293, "learning_rate": 9.471417157303895e-05, "loss": 13.788069725036621, "step": 1157 }, { "epoch": 0.15016655454965433, "grad_norm": 0.5467612147331238, "learning_rate": 9.470502935034194e-05, "loss": 9.669466972351074, "step": 1158 }, { "epoch": 0.15029623205790102, "grad_norm": 0.5210986137390137, "learning_rate": 9.469587967041432e-05, "loss": 10.116228103637695, "step": 1159 }, { "epoch": 0.15042590956614768, "grad_norm": 0.6332812309265137, "learning_rate": 9.468672253478237e-05, "loss": 11.247475624084473, "step": 1160 }, { "epoch": 0.15055558707439437, "grad_norm": 0.6525443196296692, "learning_rate": 9.467755794497358e-05, "loss": 14.133341789245605, "step": 1161 }, { "epoch": 0.15068526458264103, "grad_norm": 0.7775889039039612, "learning_rate": 9.466838590251668e-05, "loss": 17.86797523498535, "step": 1162 }, { "epoch": 0.15081494209088772, "grad_norm": 0.6810725331306458, "learning_rate": 9.465920640894169e-05, "loss": 15.249818801879883, "step": 1163 }, { "epoch": 0.1509446195991344, "grad_norm": 0.6617209911346436, "learning_rate": 9.46500194657798e-05, "loss": 9.770066261291504, "step": 1164 }, { "epoch": 0.15107429710738107, "grad_norm": 0.6724355220794678, "learning_rate": 9.464082507456351e-05, "loss": 12.949036598205566, "step": 1165 }, { "epoch": 0.15120397461562776, "grad_norm": 0.4076022803783417, "learning_rate": 9.463162323682651e-05, "loss": 9.580194473266602, "step": 1166 }, { "epoch": 0.15133365212387445, "grad_norm": 0.6431264281272888, "learning_rate": 9.462241395410377e-05, "loss": 11.371747016906738, "step": 1167 }, { "epoch": 0.1514633296321211, "grad_norm": 0.6753648519515991, "learning_rate": 9.461319722793148e-05, "loss": 12.46013069152832, "step": 1168 }, { "epoch": 0.1515930071403678, "grad_norm": 0.8519299626350403, "learning_rate": 9.46039730598471e-05, "loss": 12.309142112731934, "step": 1169 }, { "epoch": 0.15172268464861446, "grad_norm": 0.6200892925262451, "learning_rate": 9.459474145138927e-05, "loss": 10.723628044128418, "step": 1170 }, { "epoch": 0.15185236215686115, "grad_norm": 0.6157596707344055, "learning_rate": 9.458550240409794e-05, "loss": 15.268046379089355, "step": 1171 }, { "epoch": 0.15198203966510784, "grad_norm": 0.5113491415977478, "learning_rate": 9.457625591951427e-05, "loss": 11.122304916381836, "step": 1172 }, { "epoch": 0.1521117171733545, "grad_norm": 0.7457115650177002, "learning_rate": 9.456700199918065e-05, "loss": 16.50860023498535, "step": 1173 }, { "epoch": 0.1522413946816012, "grad_norm": 0.6027665138244629, "learning_rate": 9.455774064464073e-05, "loss": 11.988386154174805, "step": 1174 }, { "epoch": 0.15237107218984788, "grad_norm": 0.6642643809318542, "learning_rate": 9.454847185743942e-05, "loss": 13.628084182739258, "step": 1175 }, { "epoch": 0.15250074969809455, "grad_norm": 0.6081165075302124, "learning_rate": 9.45391956391228e-05, "loss": 11.177910804748535, "step": 1176 }, { "epoch": 0.15263042720634123, "grad_norm": 0.5412510633468628, "learning_rate": 9.452991199123827e-05, "loss": 8.879996299743652, "step": 1177 }, { "epoch": 0.1527601047145879, "grad_norm": 0.6964301466941833, "learning_rate": 9.452062091533438e-05, "loss": 15.741020202636719, "step": 1178 }, { "epoch": 0.15288978222283459, "grad_norm": 0.6068300604820251, "learning_rate": 9.451132241296104e-05, "loss": 8.90910816192627, "step": 1179 }, { "epoch": 0.15301945973108128, "grad_norm": 0.7203145027160645, "learning_rate": 9.450201648566927e-05, "loss": 16.530853271484375, "step": 1180 }, { "epoch": 0.15314913723932794, "grad_norm": 0.7462804913520813, "learning_rate": 9.449270313501141e-05, "loss": 16.409332275390625, "step": 1181 }, { "epoch": 0.15327881474757463, "grad_norm": 0.6281173229217529, "learning_rate": 9.448338236254104e-05, "loss": 10.86018180847168, "step": 1182 }, { "epoch": 0.15340849225582132, "grad_norm": 0.45214900374412537, "learning_rate": 9.447405416981293e-05, "loss": 9.200676918029785, "step": 1183 }, { "epoch": 0.15353816976406798, "grad_norm": 0.8850328922271729, "learning_rate": 9.446471855838312e-05, "loss": 11.900320053100586, "step": 1184 }, { "epoch": 0.15366784727231467, "grad_norm": 0.7209997773170471, "learning_rate": 9.445537552980887e-05, "loss": 15.383081436157227, "step": 1185 }, { "epoch": 0.15379752478056133, "grad_norm": 0.5450252890586853, "learning_rate": 9.444602508564871e-05, "loss": 12.6364164352417, "step": 1186 }, { "epoch": 0.15392720228880802, "grad_norm": 0.5607177019119263, "learning_rate": 9.443666722746236e-05, "loss": 10.937962532043457, "step": 1187 }, { "epoch": 0.1540568797970547, "grad_norm": 0.8547936081886292, "learning_rate": 9.44273019568108e-05, "loss": 17.51538848876953, "step": 1188 }, { "epoch": 0.15418655730530137, "grad_norm": 0.5718072056770325, "learning_rate": 9.441792927525627e-05, "loss": 9.429852485656738, "step": 1189 }, { "epoch": 0.15431623481354806, "grad_norm": 0.6320536136627197, "learning_rate": 9.440854918436222e-05, "loss": 10.24947452545166, "step": 1190 }, { "epoch": 0.15444591232179475, "grad_norm": 0.55560302734375, "learning_rate": 9.439916168569332e-05, "loss": 10.320053100585938, "step": 1191 }, { "epoch": 0.1545755898300414, "grad_norm": 0.9212626218795776, "learning_rate": 9.43897667808155e-05, "loss": 15.830717086791992, "step": 1192 }, { "epoch": 0.1547052673382881, "grad_norm": 0.7748242020606995, "learning_rate": 9.438036447129595e-05, "loss": 13.7052583694458, "step": 1193 }, { "epoch": 0.15483494484653476, "grad_norm": 0.577587902545929, "learning_rate": 9.437095475870304e-05, "loss": 9.09989070892334, "step": 1194 }, { "epoch": 0.15496462235478145, "grad_norm": 0.6759243607521057, "learning_rate": 9.43615376446064e-05, "loss": 13.190268516540527, "step": 1195 }, { "epoch": 0.15509429986302814, "grad_norm": 0.5319687128067017, "learning_rate": 9.43521131305769e-05, "loss": 13.13029670715332, "step": 1196 }, { "epoch": 0.1552239773712748, "grad_norm": 0.48444533348083496, "learning_rate": 9.434268121818664e-05, "loss": 9.194229125976562, "step": 1197 }, { "epoch": 0.1553536548795215, "grad_norm": 0.6541350483894348, "learning_rate": 9.433324190900894e-05, "loss": 10.483979225158691, "step": 1198 }, { "epoch": 0.15548333238776818, "grad_norm": 0.6547751426696777, "learning_rate": 9.43237952046184e-05, "loss": 12.8245849609375, "step": 1199 }, { "epoch": 0.15561300989601484, "grad_norm": 0.6263282895088196, "learning_rate": 9.431434110659081e-05, "loss": 13.64683723449707, "step": 1200 }, { "epoch": 0.15574268740426153, "grad_norm": 0.5942660570144653, "learning_rate": 9.430487961650318e-05, "loss": 14.011165618896484, "step": 1201 }, { "epoch": 0.1558723649125082, "grad_norm": 0.6357228755950928, "learning_rate": 9.429541073593381e-05, "loss": 13.32821273803711, "step": 1202 }, { "epoch": 0.15600204242075488, "grad_norm": 0.5915622711181641, "learning_rate": 9.428593446646219e-05, "loss": 8.663605690002441, "step": 1203 }, { "epoch": 0.15613171992900157, "grad_norm": 0.6303569078445435, "learning_rate": 9.427645080966904e-05, "loss": 10.247987747192383, "step": 1204 }, { "epoch": 0.15626139743724823, "grad_norm": 0.868641197681427, "learning_rate": 9.426695976713635e-05, "loss": 15.447882652282715, "step": 1205 }, { "epoch": 0.15639107494549492, "grad_norm": 0.6854186058044434, "learning_rate": 9.425746134044729e-05, "loss": 17.067039489746094, "step": 1206 }, { "epoch": 0.1565207524537416, "grad_norm": 0.6120694279670715, "learning_rate": 9.424795553118631e-05, "loss": 12.283736228942871, "step": 1207 }, { "epoch": 0.15665042996198827, "grad_norm": 0.6431418657302856, "learning_rate": 9.423844234093907e-05, "loss": 15.80728530883789, "step": 1208 }, { "epoch": 0.15678010747023496, "grad_norm": 0.636968195438385, "learning_rate": 9.422892177129245e-05, "loss": 13.378993034362793, "step": 1209 }, { "epoch": 0.15690978497848163, "grad_norm": 0.5784395933151245, "learning_rate": 9.421939382383457e-05, "loss": 10.096490859985352, "step": 1210 }, { "epoch": 0.15703946248672832, "grad_norm": 0.628153383731842, "learning_rate": 9.42098585001548e-05, "loss": 9.203752517700195, "step": 1211 }, { "epoch": 0.157169139994975, "grad_norm": 0.6443082690238953, "learning_rate": 9.420031580184373e-05, "loss": 9.453516960144043, "step": 1212 }, { "epoch": 0.15729881750322167, "grad_norm": 0.5284234881401062, "learning_rate": 9.419076573049315e-05, "loss": 8.683065414428711, "step": 1213 }, { "epoch": 0.15742849501146836, "grad_norm": 0.7083219885826111, "learning_rate": 9.418120828769612e-05, "loss": 10.11889934539795, "step": 1214 }, { "epoch": 0.15755817251971505, "grad_norm": 0.7683936953544617, "learning_rate": 9.41716434750469e-05, "loss": 13.956297874450684, "step": 1215 }, { "epoch": 0.1576878500279617, "grad_norm": 0.7834264636039734, "learning_rate": 9.416207129414101e-05, "loss": 13.305187225341797, "step": 1216 }, { "epoch": 0.1578175275362084, "grad_norm": 0.5660114288330078, "learning_rate": 9.415249174657517e-05, "loss": 10.609109878540039, "step": 1217 }, { "epoch": 0.15794720504445506, "grad_norm": 0.6107883453369141, "learning_rate": 9.414290483394735e-05, "loss": 12.73965835571289, "step": 1218 }, { "epoch": 0.15807688255270175, "grad_norm": 0.7461848855018616, "learning_rate": 9.413331055785672e-05, "loss": 14.079699516296387, "step": 1219 }, { "epoch": 0.15820656006094844, "grad_norm": 0.7927287220954895, "learning_rate": 9.412370891990372e-05, "loss": 16.733671188354492, "step": 1220 }, { "epoch": 0.1583362375691951, "grad_norm": 0.7910512685775757, "learning_rate": 9.411409992169e-05, "loss": 16.136322021484375, "step": 1221 }, { "epoch": 0.1584659150774418, "grad_norm": 0.6589934825897217, "learning_rate": 9.410448356481842e-05, "loss": 15.048563003540039, "step": 1222 }, { "epoch": 0.15859559258568848, "grad_norm": 0.7604432702064514, "learning_rate": 9.409485985089307e-05, "loss": 14.255594253540039, "step": 1223 }, { "epoch": 0.15872527009393514, "grad_norm": 0.7368545532226562, "learning_rate": 9.408522878151931e-05, "loss": 11.885909080505371, "step": 1224 }, { "epoch": 0.15885494760218183, "grad_norm": 0.7528782486915588, "learning_rate": 9.407559035830366e-05, "loss": 14.27896499633789, "step": 1225 }, { "epoch": 0.1589846251104285, "grad_norm": 0.5413593649864197, "learning_rate": 9.406594458285391e-05, "loss": 10.799860000610352, "step": 1226 }, { "epoch": 0.15911430261867518, "grad_norm": 0.6633049249649048, "learning_rate": 9.405629145677912e-05, "loss": 15.39102840423584, "step": 1227 }, { "epoch": 0.15924398012692187, "grad_norm": 0.7310099601745605, "learning_rate": 9.404663098168944e-05, "loss": 17.368101119995117, "step": 1228 }, { "epoch": 0.15937365763516853, "grad_norm": 0.8963406085968018, "learning_rate": 9.403696315919639e-05, "loss": 16.158876419067383, "step": 1229 }, { "epoch": 0.15950333514341522, "grad_norm": 0.5810767412185669, "learning_rate": 9.402728799091265e-05, "loss": 11.518445014953613, "step": 1230 }, { "epoch": 0.1596330126516619, "grad_norm": 0.7358455657958984, "learning_rate": 9.40176054784521e-05, "loss": 9.940085411071777, "step": 1231 }, { "epoch": 0.15976269015990857, "grad_norm": 0.6795528531074524, "learning_rate": 9.400791562342991e-05, "loss": 15.36703109741211, "step": 1232 }, { "epoch": 0.15989236766815526, "grad_norm": 0.7616089582443237, "learning_rate": 9.399821842746243e-05, "loss": 15.164130210876465, "step": 1233 }, { "epoch": 0.16002204517640192, "grad_norm": 0.6156619787216187, "learning_rate": 9.398851389216725e-05, "loss": 10.676051139831543, "step": 1234 }, { "epoch": 0.1601517226846486, "grad_norm": 0.6592935919761658, "learning_rate": 9.397880201916316e-05, "loss": 12.825885772705078, "step": 1235 }, { "epoch": 0.1602814001928953, "grad_norm": 0.7505614161491394, "learning_rate": 9.396908281007021e-05, "loss": 12.422921180725098, "step": 1236 }, { "epoch": 0.16041107770114196, "grad_norm": 0.5052564144134521, "learning_rate": 9.395935626650967e-05, "loss": 7.575169563293457, "step": 1237 }, { "epoch": 0.16054075520938865, "grad_norm": 0.4236067533493042, "learning_rate": 9.394962239010399e-05, "loss": 10.005026817321777, "step": 1238 }, { "epoch": 0.16067043271763534, "grad_norm": 0.6504272222518921, "learning_rate": 9.393988118247689e-05, "loss": 17.68712043762207, "step": 1239 }, { "epoch": 0.160800110225882, "grad_norm": 0.8075551390647888, "learning_rate": 9.393013264525332e-05, "loss": 15.676973342895508, "step": 1240 }, { "epoch": 0.1609297877341287, "grad_norm": 0.701305091381073, "learning_rate": 9.39203767800594e-05, "loss": 13.98523235321045, "step": 1241 }, { "epoch": 0.16105946524237535, "grad_norm": 0.6076581478118896, "learning_rate": 9.391061358852253e-05, "loss": 9.070703506469727, "step": 1242 }, { "epoch": 0.16118914275062204, "grad_norm": 0.6790268421173096, "learning_rate": 9.390084307227128e-05, "loss": 12.108288764953613, "step": 1243 }, { "epoch": 0.16131882025886873, "grad_norm": 0.6585282683372498, "learning_rate": 9.389106523293545e-05, "loss": 15.442532539367676, "step": 1244 }, { "epoch": 0.1614484977671154, "grad_norm": 0.7309683561325073, "learning_rate": 9.388128007214614e-05, "loss": 14.422286987304688, "step": 1245 }, { "epoch": 0.16157817527536208, "grad_norm": 0.7612535953521729, "learning_rate": 9.387148759153554e-05, "loss": 12.393383026123047, "step": 1246 }, { "epoch": 0.16170785278360877, "grad_norm": 0.6766488552093506, "learning_rate": 9.386168779273718e-05, "loss": 15.090743064880371, "step": 1247 }, { "epoch": 0.16183753029185544, "grad_norm": 0.7126681804656982, "learning_rate": 9.385188067738573e-05, "loss": 12.64419174194336, "step": 1248 }, { "epoch": 0.16196720780010213, "grad_norm": 0.6725571751594543, "learning_rate": 9.384206624711713e-05, "loss": 14.50971794128418, "step": 1249 }, { "epoch": 0.1620968853083488, "grad_norm": 0.7033421397209167, "learning_rate": 9.383224450356852e-05, "loss": 13.885799407958984, "step": 1250 }, { "epoch": 0.16222656281659548, "grad_norm": 0.6463679671287537, "learning_rate": 9.382241544837827e-05, "loss": 12.773669242858887, "step": 1251 }, { "epoch": 0.16235624032484217, "grad_norm": 0.6437786221504211, "learning_rate": 9.381257908318592e-05, "loss": 13.38596248626709, "step": 1252 }, { "epoch": 0.16248591783308883, "grad_norm": 0.6366758942604065, "learning_rate": 9.380273540963231e-05, "loss": 12.837656021118164, "step": 1253 }, { "epoch": 0.16261559534133552, "grad_norm": 0.5274977684020996, "learning_rate": 9.379288442935946e-05, "loss": 12.38036823272705, "step": 1254 }, { "epoch": 0.1627452728495822, "grad_norm": 0.823564887046814, "learning_rate": 9.37830261440106e-05, "loss": 12.887914657592773, "step": 1255 }, { "epoch": 0.16287495035782887, "grad_norm": 0.6230447888374329, "learning_rate": 9.377316055523017e-05, "loss": 9.676289558410645, "step": 1256 }, { "epoch": 0.16300462786607556, "grad_norm": 0.7015088200569153, "learning_rate": 9.376328766466386e-05, "loss": 15.111750602722168, "step": 1257 }, { "epoch": 0.16313430537432222, "grad_norm": 0.5434131026268005, "learning_rate": 9.375340747395858e-05, "loss": 11.320969581604004, "step": 1258 }, { "epoch": 0.1632639828825689, "grad_norm": 0.7234469056129456, "learning_rate": 9.374351998476243e-05, "loss": 12.242509841918945, "step": 1259 }, { "epoch": 0.1633936603908156, "grad_norm": 0.5345122814178467, "learning_rate": 9.373362519872473e-05, "loss": 10.657807350158691, "step": 1260 }, { "epoch": 0.16352333789906226, "grad_norm": 0.7301766276359558, "learning_rate": 9.372372311749601e-05, "loss": 13.719015121459961, "step": 1261 }, { "epoch": 0.16365301540730895, "grad_norm": 0.7636396288871765, "learning_rate": 9.371381374272807e-05, "loss": 12.098258018493652, "step": 1262 }, { "epoch": 0.16378269291555564, "grad_norm": 0.7117213010787964, "learning_rate": 9.370389707607387e-05, "loss": 13.793091773986816, "step": 1263 }, { "epoch": 0.1639123704238023, "grad_norm": 0.7449361681938171, "learning_rate": 9.369397311918762e-05, "loss": 10.341382026672363, "step": 1264 }, { "epoch": 0.164042047932049, "grad_norm": 0.6840626001358032, "learning_rate": 9.368404187372471e-05, "loss": 13.828246116638184, "step": 1265 }, { "epoch": 0.16417172544029565, "grad_norm": 0.8678398132324219, "learning_rate": 9.367410334134176e-05, "loss": 15.672152519226074, "step": 1266 }, { "epoch": 0.16430140294854234, "grad_norm": 0.7837735414505005, "learning_rate": 9.366415752369665e-05, "loss": 15.273831367492676, "step": 1267 }, { "epoch": 0.16443108045678903, "grad_norm": 0.688663125038147, "learning_rate": 9.36542044224484e-05, "loss": 12.244056701660156, "step": 1268 }, { "epoch": 0.1645607579650357, "grad_norm": 0.7809090614318848, "learning_rate": 9.364424403925734e-05, "loss": 13.978997230529785, "step": 1269 }, { "epoch": 0.16469043547328238, "grad_norm": 0.6833919286727905, "learning_rate": 9.363427637578489e-05, "loss": 11.110675811767578, "step": 1270 }, { "epoch": 0.16482011298152907, "grad_norm": 0.8828254342079163, "learning_rate": 9.36243014336938e-05, "loss": 12.349454879760742, "step": 1271 }, { "epoch": 0.16494979048977573, "grad_norm": 0.6666484475135803, "learning_rate": 9.361431921464796e-05, "loss": 13.572114944458008, "step": 1272 }, { "epoch": 0.16507946799802242, "grad_norm": 0.73006671667099, "learning_rate": 9.360432972031252e-05, "loss": 14.872614860534668, "step": 1273 }, { "epoch": 0.16520914550626908, "grad_norm": 0.7772067785263062, "learning_rate": 9.35943329523538e-05, "loss": 10.173829078674316, "step": 1274 }, { "epoch": 0.16533882301451577, "grad_norm": 0.8376923203468323, "learning_rate": 9.358432891243938e-05, "loss": 13.376483917236328, "step": 1275 }, { "epoch": 0.16546850052276246, "grad_norm": 0.7773094773292542, "learning_rate": 9.357431760223802e-05, "loss": 15.141493797302246, "step": 1276 }, { "epoch": 0.16559817803100912, "grad_norm": 0.5517333149909973, "learning_rate": 9.35642990234197e-05, "loss": 10.635115623474121, "step": 1277 }, { "epoch": 0.16572785553925581, "grad_norm": 0.585989236831665, "learning_rate": 9.355427317765563e-05, "loss": 13.094246864318848, "step": 1278 }, { "epoch": 0.1658575330475025, "grad_norm": 0.6466255187988281, "learning_rate": 9.354424006661822e-05, "loss": 11.794665336608887, "step": 1279 }, { "epoch": 0.16598721055574917, "grad_norm": 0.5273731350898743, "learning_rate": 9.353419969198105e-05, "loss": 11.445046424865723, "step": 1280 }, { "epoch": 0.16611688806399585, "grad_norm": 1.3155585527420044, "learning_rate": 9.3524152055419e-05, "loss": 16.178770065307617, "step": 1281 }, { "epoch": 0.16624656557224252, "grad_norm": 0.5467811226844788, "learning_rate": 9.351409715860809e-05, "loss": 9.667154312133789, "step": 1282 }, { "epoch": 0.1663762430804892, "grad_norm": 0.6696975827217102, "learning_rate": 9.350403500322558e-05, "loss": 12.24773120880127, "step": 1283 }, { "epoch": 0.1665059205887359, "grad_norm": 0.8096140623092651, "learning_rate": 9.349396559094994e-05, "loss": 12.440237045288086, "step": 1284 }, { "epoch": 0.16663559809698256, "grad_norm": 0.524750292301178, "learning_rate": 9.348388892346083e-05, "loss": 12.006431579589844, "step": 1285 }, { "epoch": 0.16676527560522925, "grad_norm": 0.9727802276611328, "learning_rate": 9.347380500243913e-05, "loss": 15.055575370788574, "step": 1286 }, { "epoch": 0.16689495311347594, "grad_norm": 1.0331013202667236, "learning_rate": 9.346371382956696e-05, "loss": 13.94388198852539, "step": 1287 }, { "epoch": 0.1670246306217226, "grad_norm": 0.7825690507888794, "learning_rate": 9.345361540652762e-05, "loss": 11.754390716552734, "step": 1288 }, { "epoch": 0.1671543081299693, "grad_norm": 0.6277890801429749, "learning_rate": 9.34435097350056e-05, "loss": 12.27373218536377, "step": 1289 }, { "epoch": 0.16728398563821595, "grad_norm": 0.6464993357658386, "learning_rate": 9.343339681668664e-05, "loss": 12.484064102172852, "step": 1290 }, { "epoch": 0.16741366314646264, "grad_norm": 0.5722995400428772, "learning_rate": 9.342327665325769e-05, "loss": 12.724472999572754, "step": 1291 }, { "epoch": 0.16754334065470933, "grad_norm": 0.7374557852745056, "learning_rate": 9.341314924640687e-05, "loss": 15.421234130859375, "step": 1292 }, { "epoch": 0.167673018162956, "grad_norm": 0.5530861020088196, "learning_rate": 9.340301459782352e-05, "loss": 14.442209243774414, "step": 1293 }, { "epoch": 0.16780269567120268, "grad_norm": 0.5111709833145142, "learning_rate": 9.339287270919823e-05, "loss": 9.630996704101562, "step": 1294 }, { "epoch": 0.16793237317944937, "grad_norm": 0.6884410381317139, "learning_rate": 9.338272358222274e-05, "loss": 12.006231307983398, "step": 1295 }, { "epoch": 0.16806205068769603, "grad_norm": 0.5460083484649658, "learning_rate": 9.337256721859002e-05, "loss": 10.01119327545166, "step": 1296 }, { "epoch": 0.16819172819594272, "grad_norm": 0.6940889358520508, "learning_rate": 9.336240361999427e-05, "loss": 12.154088020324707, "step": 1297 }, { "epoch": 0.16832140570418938, "grad_norm": 0.6406136751174927, "learning_rate": 9.335223278813085e-05, "loss": 11.480722427368164, "step": 1298 }, { "epoch": 0.16845108321243607, "grad_norm": 0.8503709435462952, "learning_rate": 9.334205472469639e-05, "loss": 12.682260513305664, "step": 1299 }, { "epoch": 0.16858076072068276, "grad_norm": 0.5450230836868286, "learning_rate": 9.333186943138865e-05, "loss": 11.910852432250977, "step": 1300 }, { "epoch": 0.16871043822892942, "grad_norm": 0.4846251904964447, "learning_rate": 9.332167690990665e-05, "loss": 10.41724967956543, "step": 1301 }, { "epoch": 0.1688401157371761, "grad_norm": 0.9759328365325928, "learning_rate": 9.33114771619506e-05, "loss": 15.404868125915527, "step": 1302 }, { "epoch": 0.1689697932454228, "grad_norm": 0.5456883907318115, "learning_rate": 9.330127018922194e-05, "loss": 10.77624797821045, "step": 1303 }, { "epoch": 0.16909947075366946, "grad_norm": 0.5805171728134155, "learning_rate": 9.329105599342325e-05, "loss": 10.158660888671875, "step": 1304 }, { "epoch": 0.16922914826191615, "grad_norm": 0.7417058348655701, "learning_rate": 9.328083457625838e-05, "loss": 16.347139358520508, "step": 1305 }, { "epoch": 0.1693588257701628, "grad_norm": 0.7736333012580872, "learning_rate": 9.327060593943237e-05, "loss": 12.86513614654541, "step": 1306 }, { "epoch": 0.1694885032784095, "grad_norm": 0.5943130254745483, "learning_rate": 9.326037008465144e-05, "loss": 10.35800552368164, "step": 1307 }, { "epoch": 0.1696181807866562, "grad_norm": 0.7041255235671997, "learning_rate": 9.325012701362304e-05, "loss": 16.65458106994629, "step": 1308 }, { "epoch": 0.16974785829490285, "grad_norm": 0.7432065010070801, "learning_rate": 9.323987672805579e-05, "loss": 7.939420700073242, "step": 1309 }, { "epoch": 0.16987753580314954, "grad_norm": 0.42237842082977295, "learning_rate": 9.322961922965956e-05, "loss": 8.500224113464355, "step": 1310 }, { "epoch": 0.17000721331139623, "grad_norm": 0.6497122049331665, "learning_rate": 9.321935452014538e-05, "loss": 10.485608100891113, "step": 1311 }, { "epoch": 0.1701368908196429, "grad_norm": 0.5293042063713074, "learning_rate": 9.320908260122554e-05, "loss": 11.333552360534668, "step": 1312 }, { "epoch": 0.17026656832788958, "grad_norm": 0.5583896636962891, "learning_rate": 9.319880347461346e-05, "loss": 9.046578407287598, "step": 1313 }, { "epoch": 0.17039624583613625, "grad_norm": 0.7829743027687073, "learning_rate": 9.318851714202381e-05, "loss": 18.010570526123047, "step": 1314 }, { "epoch": 0.17052592334438293, "grad_norm": 0.7149142622947693, "learning_rate": 9.317822360517243e-05, "loss": 10.78217887878418, "step": 1315 }, { "epoch": 0.17065560085262962, "grad_norm": 0.6210273504257202, "learning_rate": 9.316792286577643e-05, "loss": 9.961250305175781, "step": 1316 }, { "epoch": 0.17078527836087629, "grad_norm": 0.5404316186904907, "learning_rate": 9.315761492555402e-05, "loss": 11.766264915466309, "step": 1317 }, { "epoch": 0.17091495586912298, "grad_norm": 0.7484415769577026, "learning_rate": 9.31472997862247e-05, "loss": 14.698514938354492, "step": 1318 }, { "epoch": 0.17104463337736966, "grad_norm": 0.5971083641052246, "learning_rate": 9.31369774495091e-05, "loss": 16.247167587280273, "step": 1319 }, { "epoch": 0.17117431088561633, "grad_norm": 0.633971631526947, "learning_rate": 9.312664791712913e-05, "loss": 13.685760498046875, "step": 1320 }, { "epoch": 0.17130398839386302, "grad_norm": 0.610241174697876, "learning_rate": 9.311631119080782e-05, "loss": 11.408214569091797, "step": 1321 }, { "epoch": 0.17143366590210968, "grad_norm": 0.6450475454330444, "learning_rate": 9.310596727226945e-05, "loss": 12.77410888671875, "step": 1322 }, { "epoch": 0.17156334341035637, "grad_norm": 0.8601217865943909, "learning_rate": 9.309561616323948e-05, "loss": 14.637548446655273, "step": 1323 }, { "epoch": 0.17169302091860306, "grad_norm": 0.6130483150482178, "learning_rate": 9.308525786544458e-05, "loss": 12.29831314086914, "step": 1324 }, { "epoch": 0.17182269842684972, "grad_norm": 0.5402876138687134, "learning_rate": 9.307489238061263e-05, "loss": 10.057293891906738, "step": 1325 }, { "epoch": 0.1719523759350964, "grad_norm": 0.5119245648384094, "learning_rate": 9.306451971047266e-05, "loss": 8.232065200805664, "step": 1326 }, { "epoch": 0.1720820534433431, "grad_norm": 0.687891960144043, "learning_rate": 9.305413985675493e-05, "loss": 11.111883163452148, "step": 1327 }, { "epoch": 0.17221173095158976, "grad_norm": 0.6160717010498047, "learning_rate": 9.304375282119095e-05, "loss": 13.71507740020752, "step": 1328 }, { "epoch": 0.17234140845983645, "grad_norm": 0.6544666290283203, "learning_rate": 9.303335860551331e-05, "loss": 11.459029197692871, "step": 1329 }, { "epoch": 0.1724710859680831, "grad_norm": 0.9377602338790894, "learning_rate": 9.302295721145592e-05, "loss": 10.423369407653809, "step": 1330 }, { "epoch": 0.1726007634763298, "grad_norm": 0.6434646844863892, "learning_rate": 9.301254864075381e-05, "loss": 12.910067558288574, "step": 1331 }, { "epoch": 0.1727304409845765, "grad_norm": 0.6167563796043396, "learning_rate": 9.300213289514324e-05, "loss": 13.415725708007812, "step": 1332 }, { "epoch": 0.17286011849282315, "grad_norm": 0.7026565074920654, "learning_rate": 9.299170997636164e-05, "loss": 14.007767677307129, "step": 1333 }, { "epoch": 0.17298979600106984, "grad_norm": 0.6950235366821289, "learning_rate": 9.298127988614767e-05, "loss": 12.474652290344238, "step": 1334 }, { "epoch": 0.17311947350931653, "grad_norm": 0.9199247360229492, "learning_rate": 9.297084262624116e-05, "loss": 16.023454666137695, "step": 1335 }, { "epoch": 0.1732491510175632, "grad_norm": 0.694179117679596, "learning_rate": 9.296039819838315e-05, "loss": 17.563983917236328, "step": 1336 }, { "epoch": 0.17337882852580988, "grad_norm": 0.46529248356819153, "learning_rate": 9.294994660431588e-05, "loss": 13.464468955993652, "step": 1337 }, { "epoch": 0.17350850603405654, "grad_norm": 0.6055808663368225, "learning_rate": 9.293948784578277e-05, "loss": 14.964914321899414, "step": 1338 }, { "epoch": 0.17363818354230323, "grad_norm": 0.590668797492981, "learning_rate": 9.292902192452841e-05, "loss": 10.481353759765625, "step": 1339 }, { "epoch": 0.17376786105054992, "grad_norm": 0.5549517869949341, "learning_rate": 9.291854884229867e-05, "loss": 11.481959342956543, "step": 1340 }, { "epoch": 0.17389753855879658, "grad_norm": 0.45666268467903137, "learning_rate": 9.290806860084054e-05, "loss": 10.71753215789795, "step": 1341 }, { "epoch": 0.17402721606704327, "grad_norm": 0.6731905341148376, "learning_rate": 9.289758120190221e-05, "loss": 12.373437881469727, "step": 1342 }, { "epoch": 0.17415689357528996, "grad_norm": 0.7714097499847412, "learning_rate": 9.28870866472331e-05, "loss": 15.385411262512207, "step": 1343 }, { "epoch": 0.17428657108353662, "grad_norm": 0.5945069193840027, "learning_rate": 9.287658493858379e-05, "loss": 14.226012229919434, "step": 1344 }, { "epoch": 0.1744162485917833, "grad_norm": 0.6546865701675415, "learning_rate": 9.286607607770608e-05, "loss": 15.656988143920898, "step": 1345 }, { "epoch": 0.17454592610002997, "grad_norm": 0.6275337338447571, "learning_rate": 9.285556006635292e-05, "loss": 14.363018035888672, "step": 1346 }, { "epoch": 0.17467560360827666, "grad_norm": 0.6028327345848083, "learning_rate": 9.284503690627853e-05, "loss": 12.630115509033203, "step": 1347 }, { "epoch": 0.17480528111652335, "grad_norm": 0.9415167570114136, "learning_rate": 9.283450659923824e-05, "loss": 12.90896987915039, "step": 1348 }, { "epoch": 0.17493495862477002, "grad_norm": 0.7310294508934021, "learning_rate": 9.282396914698862e-05, "loss": 13.3305025100708, "step": 1349 }, { "epoch": 0.1750646361330167, "grad_norm": 0.8122154474258423, "learning_rate": 9.281342455128739e-05, "loss": 15.318249702453613, "step": 1350 }, { "epoch": 0.1751943136412634, "grad_norm": 0.6382533311843872, "learning_rate": 9.280287281389353e-05, "loss": 11.935372352600098, "step": 1351 }, { "epoch": 0.17532399114951006, "grad_norm": 0.8864010572433472, "learning_rate": 9.279231393656716e-05, "loss": 15.96236515045166, "step": 1352 }, { "epoch": 0.17545366865775675, "grad_norm": 0.6325123906135559, "learning_rate": 9.278174792106959e-05, "loss": 14.236640930175781, "step": 1353 }, { "epoch": 0.1755833461660034, "grad_norm": 0.7743614315986633, "learning_rate": 9.277117476916334e-05, "loss": 13.263042449951172, "step": 1354 }, { "epoch": 0.1757130236742501, "grad_norm": 0.6668550968170166, "learning_rate": 9.276059448261213e-05, "loss": 11.340957641601562, "step": 1355 }, { "epoch": 0.17584270118249679, "grad_norm": 0.742553174495697, "learning_rate": 9.275000706318083e-05, "loss": 12.261076927185059, "step": 1356 }, { "epoch": 0.17597237869074345, "grad_norm": 0.5938116312026978, "learning_rate": 9.273941251263554e-05, "loss": 9.524772644042969, "step": 1357 }, { "epoch": 0.17610205619899014, "grad_norm": 0.7218917012214661, "learning_rate": 9.272881083274351e-05, "loss": 14.27734375, "step": 1358 }, { "epoch": 0.17623173370723683, "grad_norm": 0.7124038338661194, "learning_rate": 9.271820202527324e-05, "loss": 12.460880279541016, "step": 1359 }, { "epoch": 0.1763614112154835, "grad_norm": 0.700262725353241, "learning_rate": 9.270758609199435e-05, "loss": 14.241331100463867, "step": 1360 }, { "epoch": 0.17649108872373018, "grad_norm": 0.7188980579376221, "learning_rate": 9.26969630346777e-05, "loss": 13.208131790161133, "step": 1361 }, { "epoch": 0.17662076623197684, "grad_norm": 0.8945969939231873, "learning_rate": 9.26863328550953e-05, "loss": 12.406533241271973, "step": 1362 }, { "epoch": 0.17675044374022353, "grad_norm": 0.7927029728889465, "learning_rate": 9.267569555502038e-05, "loss": 12.68778133392334, "step": 1363 }, { "epoch": 0.17688012124847022, "grad_norm": 0.7558659315109253, "learning_rate": 9.266505113622734e-05, "loss": 15.014171600341797, "step": 1364 }, { "epoch": 0.17700979875671688, "grad_norm": 0.7239084243774414, "learning_rate": 9.265439960049179e-05, "loss": 16.604833602905273, "step": 1365 }, { "epoch": 0.17713947626496357, "grad_norm": 0.7772908806800842, "learning_rate": 9.264374094959046e-05, "loss": 11.703886985778809, "step": 1366 }, { "epoch": 0.17726915377321026, "grad_norm": 0.5988128781318665, "learning_rate": 9.263307518530136e-05, "loss": 11.543061256408691, "step": 1367 }, { "epoch": 0.17739883128145692, "grad_norm": 0.6761249303817749, "learning_rate": 9.262240230940364e-05, "loss": 12.25294303894043, "step": 1368 }, { "epoch": 0.1775285087897036, "grad_norm": 0.7357465028762817, "learning_rate": 9.261172232367764e-05, "loss": 12.46107006072998, "step": 1369 }, { "epoch": 0.17765818629795027, "grad_norm": 0.670656144618988, "learning_rate": 9.260103522990484e-05, "loss": 14.176430702209473, "step": 1370 }, { "epoch": 0.17778786380619696, "grad_norm": 0.5684402585029602, "learning_rate": 9.2590341029868e-05, "loss": 10.381136894226074, "step": 1371 }, { "epoch": 0.17791754131444365, "grad_norm": 0.45136240124702454, "learning_rate": 9.2579639725351e-05, "loss": 8.420106887817383, "step": 1372 }, { "epoch": 0.1780472188226903, "grad_norm": 0.4679040014743805, "learning_rate": 9.256893131813891e-05, "loss": 9.109930038452148, "step": 1373 }, { "epoch": 0.178176896330937, "grad_norm": 0.7862672209739685, "learning_rate": 9.255821581001803e-05, "loss": 13.467483520507812, "step": 1374 }, { "epoch": 0.1783065738391837, "grad_norm": 0.6829008460044861, "learning_rate": 9.254749320277576e-05, "loss": 14.230264663696289, "step": 1375 }, { "epoch": 0.17843625134743035, "grad_norm": 0.5759653449058533, "learning_rate": 9.253676349820077e-05, "loss": 13.505045890808105, "step": 1376 }, { "epoch": 0.17856592885567704, "grad_norm": 0.6500594615936279, "learning_rate": 9.252602669808287e-05, "loss": 11.10610294342041, "step": 1377 }, { "epoch": 0.1786956063639237, "grad_norm": 0.6187829971313477, "learning_rate": 9.251528280421308e-05, "loss": 10.097752571105957, "step": 1378 }, { "epoch": 0.1788252838721704, "grad_norm": 0.8455429673194885, "learning_rate": 9.250453181838354e-05, "loss": 13.381839752197266, "step": 1379 }, { "epoch": 0.17895496138041708, "grad_norm": 0.661544144153595, "learning_rate": 9.249377374238766e-05, "loss": 14.507281303405762, "step": 1380 }, { "epoch": 0.17908463888866374, "grad_norm": 0.5836288332939148, "learning_rate": 9.248300857801998e-05, "loss": 12.026863098144531, "step": 1381 }, { "epoch": 0.17921431639691043, "grad_norm": 0.6297969818115234, "learning_rate": 9.247223632707623e-05, "loss": 12.852378845214844, "step": 1382 }, { "epoch": 0.17934399390515712, "grad_norm": 0.6510768532752991, "learning_rate": 9.246145699135331e-05, "loss": 16.3175106048584, "step": 1383 }, { "epoch": 0.17947367141340378, "grad_norm": 0.6630101799964905, "learning_rate": 9.245067057264934e-05, "loss": 11.594355583190918, "step": 1384 }, { "epoch": 0.17960334892165047, "grad_norm": 0.6286320686340332, "learning_rate": 9.243987707276358e-05, "loss": 13.202811241149902, "step": 1385 }, { "epoch": 0.17973302642989714, "grad_norm": 0.8958710432052612, "learning_rate": 9.242907649349652e-05, "loss": 17.463224411010742, "step": 1386 }, { "epoch": 0.17986270393814383, "grad_norm": 0.5237955451011658, "learning_rate": 9.241826883664977e-05, "loss": 11.061890602111816, "step": 1387 }, { "epoch": 0.17999238144639051, "grad_norm": 0.8546047806739807, "learning_rate": 9.240745410402616e-05, "loss": 16.174203872680664, "step": 1388 }, { "epoch": 0.18012205895463718, "grad_norm": 0.8268042802810669, "learning_rate": 9.23966322974297e-05, "loss": 14.177323341369629, "step": 1389 }, { "epoch": 0.18025173646288387, "grad_norm": 0.6288954615592957, "learning_rate": 9.238580341866556e-05, "loss": 12.096598625183105, "step": 1390 }, { "epoch": 0.18038141397113056, "grad_norm": 0.6194995045661926, "learning_rate": 9.237496746954013e-05, "loss": 8.419069290161133, "step": 1391 }, { "epoch": 0.18051109147937722, "grad_norm": 0.7334689497947693, "learning_rate": 9.236412445186091e-05, "loss": 13.046028137207031, "step": 1392 }, { "epoch": 0.1806407689876239, "grad_norm": 0.6612405180931091, "learning_rate": 9.235327436743664e-05, "loss": 15.217581748962402, "step": 1393 }, { "epoch": 0.18077044649587057, "grad_norm": 0.7460924983024597, "learning_rate": 9.234241721807723e-05, "loss": 17.105817794799805, "step": 1394 }, { "epoch": 0.18090012400411726, "grad_norm": 0.5901540517807007, "learning_rate": 9.233155300559373e-05, "loss": 11.076627731323242, "step": 1395 }, { "epoch": 0.18102980151236395, "grad_norm": 0.49996599555015564, "learning_rate": 9.232068173179842e-05, "loss": 7.448858261108398, "step": 1396 }, { "epoch": 0.1811594790206106, "grad_norm": 0.6440379619598389, "learning_rate": 9.230980339850472e-05, "loss": 7.255945682525635, "step": 1397 }, { "epoch": 0.1812891565288573, "grad_norm": 0.5605825781822205, "learning_rate": 9.229891800752726e-05, "loss": 13.12735366821289, "step": 1398 }, { "epoch": 0.181418834037104, "grad_norm": 0.7139384150505066, "learning_rate": 9.228802556068181e-05, "loss": 14.111385345458984, "step": 1399 }, { "epoch": 0.18154851154535065, "grad_norm": 0.5070477724075317, "learning_rate": 9.227712605978533e-05, "loss": 9.269268989562988, "step": 1400 }, { "epoch": 0.18167818905359734, "grad_norm": 0.7652726173400879, "learning_rate": 9.226621950665598e-05, "loss": 12.372930526733398, "step": 1401 }, { "epoch": 0.181807866561844, "grad_norm": 0.5441699028015137, "learning_rate": 9.225530590311305e-05, "loss": 12.628063201904297, "step": 1402 }, { "epoch": 0.1819375440700907, "grad_norm": 0.6414621472358704, "learning_rate": 9.224438525097707e-05, "loss": 10.852192878723145, "step": 1403 }, { "epoch": 0.18206722157833738, "grad_norm": 0.5689049959182739, "learning_rate": 9.22334575520697e-05, "loss": 10.118568420410156, "step": 1404 }, { "epoch": 0.18219689908658404, "grad_norm": 0.5330605506896973, "learning_rate": 9.222252280821378e-05, "loss": 12.887304306030273, "step": 1405 }, { "epoch": 0.18232657659483073, "grad_norm": 0.48922979831695557, "learning_rate": 9.221158102123331e-05, "loss": 10.37822437286377, "step": 1406 }, { "epoch": 0.18245625410307742, "grad_norm": 0.6082305312156677, "learning_rate": 9.220063219295352e-05, "loss": 10.992140769958496, "step": 1407 }, { "epoch": 0.18258593161132408, "grad_norm": 0.6081309914588928, "learning_rate": 9.218967632520078e-05, "loss": 10.738456726074219, "step": 1408 }, { "epoch": 0.18271560911957077, "grad_norm": 0.5715590119361877, "learning_rate": 9.217871341980261e-05, "loss": 9.581045150756836, "step": 1409 }, { "epoch": 0.18284528662781743, "grad_norm": 0.5112420320510864, "learning_rate": 9.216774347858774e-05, "loss": 8.6530122756958, "step": 1410 }, { "epoch": 0.18297496413606412, "grad_norm": 0.6608807444572449, "learning_rate": 9.215676650338606e-05, "loss": 9.139410972595215, "step": 1411 }, { "epoch": 0.1831046416443108, "grad_norm": 0.4972688555717468, "learning_rate": 9.214578249602865e-05, "loss": 9.980537414550781, "step": 1412 }, { "epoch": 0.18323431915255747, "grad_norm": 0.690727174282074, "learning_rate": 9.213479145834771e-05, "loss": 12.741731643676758, "step": 1413 }, { "epoch": 0.18336399666080416, "grad_norm": 0.6441090106964111, "learning_rate": 9.212379339217669e-05, "loss": 11.556694030761719, "step": 1414 }, { "epoch": 0.18349367416905085, "grad_norm": 0.6661235690116882, "learning_rate": 9.211278829935017e-05, "loss": 9.744851112365723, "step": 1415 }, { "epoch": 0.1836233516772975, "grad_norm": 0.4710608422756195, "learning_rate": 9.21017761817039e-05, "loss": 11.935672760009766, "step": 1416 }, { "epoch": 0.1837530291855442, "grad_norm": 0.6765261888504028, "learning_rate": 9.209075704107479e-05, "loss": 15.660700798034668, "step": 1417 }, { "epoch": 0.18388270669379086, "grad_norm": 0.49937519431114197, "learning_rate": 9.207973087930097e-05, "loss": 10.209214210510254, "step": 1418 }, { "epoch": 0.18401238420203755, "grad_norm": 0.4769105315208435, "learning_rate": 9.206869769822168e-05, "loss": 9.577349662780762, "step": 1419 }, { "epoch": 0.18414206171028424, "grad_norm": 0.576227605342865, "learning_rate": 9.205765749967737e-05, "loss": 11.584311485290527, "step": 1420 }, { "epoch": 0.1842717392185309, "grad_norm": 0.5907102227210999, "learning_rate": 9.204661028550967e-05, "loss": 10.134978294372559, "step": 1421 }, { "epoch": 0.1844014167267776, "grad_norm": 0.5885890126228333, "learning_rate": 9.203555605756133e-05, "loss": 12.14838981628418, "step": 1422 }, { "epoch": 0.18453109423502428, "grad_norm": 0.7133529186248779, "learning_rate": 9.202449481767635e-05, "loss": 12.901793479919434, "step": 1423 }, { "epoch": 0.18466077174327095, "grad_norm": 0.528286874294281, "learning_rate": 9.20134265676998e-05, "loss": 8.251922607421875, "step": 1424 }, { "epoch": 0.18479044925151764, "grad_norm": 0.7257168292999268, "learning_rate": 9.2002351309478e-05, "loss": 11.533385276794434, "step": 1425 }, { "epoch": 0.1849201267597643, "grad_norm": 0.794270932674408, "learning_rate": 9.19912690448584e-05, "loss": 13.743913650512695, "step": 1426 }, { "epoch": 0.185049804268011, "grad_norm": 0.6720030307769775, "learning_rate": 9.198017977568964e-05, "loss": 12.462919235229492, "step": 1427 }, { "epoch": 0.18517948177625768, "grad_norm": 0.5930571556091309, "learning_rate": 9.196908350382149e-05, "loss": 8.767064094543457, "step": 1428 }, { "epoch": 0.18530915928450434, "grad_norm": 0.4627765417098999, "learning_rate": 9.195798023110496e-05, "loss": 10.091083526611328, "step": 1429 }, { "epoch": 0.18543883679275103, "grad_norm": 0.5284252762794495, "learning_rate": 9.194686995939214e-05, "loss": 9.515521049499512, "step": 1430 }, { "epoch": 0.18556851430099772, "grad_norm": 0.7696150541305542, "learning_rate": 9.193575269053634e-05, "loss": 13.674155235290527, "step": 1431 }, { "epoch": 0.18569819180924438, "grad_norm": 0.8078033924102783, "learning_rate": 9.192462842639206e-05, "loss": 10.315352439880371, "step": 1432 }, { "epoch": 0.18582786931749107, "grad_norm": 0.8757477402687073, "learning_rate": 9.19134971688149e-05, "loss": 14.491497993469238, "step": 1433 }, { "epoch": 0.18595754682573776, "grad_norm": 0.7253326177597046, "learning_rate": 9.190235891966165e-05, "loss": 12.990209579467773, "step": 1434 }, { "epoch": 0.18608722433398442, "grad_norm": 0.6654911041259766, "learning_rate": 9.189121368079034e-05, "loss": 16.186498641967773, "step": 1435 }, { "epoch": 0.1862169018422311, "grad_norm": 0.7656073570251465, "learning_rate": 9.188006145406004e-05, "loss": 14.838546752929688, "step": 1436 }, { "epoch": 0.18634657935047777, "grad_norm": 0.6865411996841431, "learning_rate": 9.186890224133107e-05, "loss": 12.344573020935059, "step": 1437 }, { "epoch": 0.18647625685872446, "grad_norm": 0.735233724117279, "learning_rate": 9.18577360444649e-05, "loss": 13.367915153503418, "step": 1438 }, { "epoch": 0.18660593436697115, "grad_norm": 0.6912984848022461, "learning_rate": 9.184656286532416e-05, "loss": 11.981517791748047, "step": 1439 }, { "epoch": 0.1867356118752178, "grad_norm": 0.8728822469711304, "learning_rate": 9.183538270577263e-05, "loss": 14.598284721374512, "step": 1440 }, { "epoch": 0.1868652893834645, "grad_norm": 0.68165123462677, "learning_rate": 9.18241955676753e-05, "loss": 11.303400039672852, "step": 1441 }, { "epoch": 0.1869949668917112, "grad_norm": 0.5787253379821777, "learning_rate": 9.181300145289825e-05, "loss": 8.93808650970459, "step": 1442 }, { "epoch": 0.18712464439995785, "grad_norm": 0.8155090808868408, "learning_rate": 9.180180036330879e-05, "loss": 12.97868824005127, "step": 1443 }, { "epoch": 0.18725432190820454, "grad_norm": 0.6859812140464783, "learning_rate": 9.179059230077538e-05, "loss": 12.988605499267578, "step": 1444 }, { "epoch": 0.1873839994164512, "grad_norm": 0.6460896730422974, "learning_rate": 9.177937726716761e-05, "loss": 12.77868938446045, "step": 1445 }, { "epoch": 0.1875136769246979, "grad_norm": 0.8472962379455566, "learning_rate": 9.176815526435627e-05, "loss": 14.49406623840332, "step": 1446 }, { "epoch": 0.18764335443294458, "grad_norm": 0.48805472254753113, "learning_rate": 9.175692629421328e-05, "loss": 9.280369758605957, "step": 1447 }, { "epoch": 0.18777303194119124, "grad_norm": 0.6868558526039124, "learning_rate": 9.174569035861179e-05, "loss": 11.807720184326172, "step": 1448 }, { "epoch": 0.18790270944943793, "grad_norm": 0.49863356351852417, "learning_rate": 9.1734447459426e-05, "loss": 11.610671997070312, "step": 1449 }, { "epoch": 0.18803238695768462, "grad_norm": 0.5559812784194946, "learning_rate": 9.172319759853136e-05, "loss": 8.376751899719238, "step": 1450 }, { "epoch": 0.18816206446593128, "grad_norm": 0.7484858632087708, "learning_rate": 9.171194077780447e-05, "loss": 15.115728378295898, "step": 1451 }, { "epoch": 0.18829174197417797, "grad_norm": 0.5664128065109253, "learning_rate": 9.170067699912307e-05, "loss": 12.201033592224121, "step": 1452 }, { "epoch": 0.18842141948242463, "grad_norm": 0.6655452847480774, "learning_rate": 9.168940626436606e-05, "loss": 14.538114547729492, "step": 1453 }, { "epoch": 0.18855109699067132, "grad_norm": 0.58143150806427, "learning_rate": 9.167812857541348e-05, "loss": 8.351481437683105, "step": 1454 }, { "epoch": 0.188680774498918, "grad_norm": 0.6838927865028381, "learning_rate": 9.166684393414662e-05, "loss": 13.773444175720215, "step": 1455 }, { "epoch": 0.18881045200716468, "grad_norm": 0.6134893894195557, "learning_rate": 9.165555234244782e-05, "loss": 9.62977409362793, "step": 1456 }, { "epoch": 0.18894012951541136, "grad_norm": 0.906107485294342, "learning_rate": 9.164425380220066e-05, "loss": 12.355607032775879, "step": 1457 }, { "epoch": 0.18906980702365805, "grad_norm": 0.600614607334137, "learning_rate": 9.163294831528979e-05, "loss": 12.686518669128418, "step": 1458 }, { "epoch": 0.18919948453190472, "grad_norm": 0.6147540807723999, "learning_rate": 9.162163588360113e-05, "loss": 11.627294540405273, "step": 1459 }, { "epoch": 0.1893291620401514, "grad_norm": 0.833236038684845, "learning_rate": 9.161031650902168e-05, "loss": 14.152546882629395, "step": 1460 }, { "epoch": 0.18945883954839807, "grad_norm": 0.6368781924247742, "learning_rate": 9.159899019343961e-05, "loss": 9.087470054626465, "step": 1461 }, { "epoch": 0.18958851705664476, "grad_norm": 0.5099127888679504, "learning_rate": 9.158765693874429e-05, "loss": 8.440114974975586, "step": 1462 }, { "epoch": 0.18971819456489145, "grad_norm": 0.6340752243995667, "learning_rate": 9.15763167468262e-05, "loss": 14.84581184387207, "step": 1463 }, { "epoch": 0.1898478720731381, "grad_norm": 0.5994228720664978, "learning_rate": 9.156496961957699e-05, "loss": 11.063112258911133, "step": 1464 }, { "epoch": 0.1899775495813848, "grad_norm": 0.5864614844322205, "learning_rate": 9.155361555888946e-05, "loss": 8.544054985046387, "step": 1465 }, { "epoch": 0.1901072270896315, "grad_norm": 0.7108174562454224, "learning_rate": 9.154225456665759e-05, "loss": 15.115482330322266, "step": 1466 }, { "epoch": 0.19023690459787815, "grad_norm": 0.5828776359558105, "learning_rate": 9.153088664477652e-05, "loss": 10.79585075378418, "step": 1467 }, { "epoch": 0.19036658210612484, "grad_norm": 0.575954258441925, "learning_rate": 9.151951179514251e-05, "loss": 10.180570602416992, "step": 1468 }, { "epoch": 0.1904962596143715, "grad_norm": 0.961477518081665, "learning_rate": 9.150813001965297e-05, "loss": 10.609498977661133, "step": 1469 }, { "epoch": 0.1906259371226182, "grad_norm": 0.7313794493675232, "learning_rate": 9.149674132020653e-05, "loss": 13.700319290161133, "step": 1470 }, { "epoch": 0.19075561463086488, "grad_norm": 0.7349101901054382, "learning_rate": 9.148534569870294e-05, "loss": 13.326370239257812, "step": 1471 }, { "epoch": 0.19088529213911154, "grad_norm": 0.5534580945968628, "learning_rate": 9.147394315704308e-05, "loss": 12.669428825378418, "step": 1472 }, { "epoch": 0.19101496964735823, "grad_norm": 0.5872385501861572, "learning_rate": 9.146253369712898e-05, "loss": 9.775083541870117, "step": 1473 }, { "epoch": 0.19114464715560492, "grad_norm": 0.6410551071166992, "learning_rate": 9.145111732086388e-05, "loss": 9.034079551696777, "step": 1474 }, { "epoch": 0.19127432466385158, "grad_norm": 0.6384986639022827, "learning_rate": 9.143969403015216e-05, "loss": 12.259352684020996, "step": 1475 }, { "epoch": 0.19140400217209827, "grad_norm": 0.7286280989646912, "learning_rate": 9.142826382689928e-05, "loss": 9.936528205871582, "step": 1476 }, { "epoch": 0.19153367968034493, "grad_norm": 0.6420879364013672, "learning_rate": 9.141682671301197e-05, "loss": 10.235933303833008, "step": 1477 }, { "epoch": 0.19166335718859162, "grad_norm": 0.6943111419677734, "learning_rate": 9.1405382690398e-05, "loss": 12.648186683654785, "step": 1478 }, { "epoch": 0.1917930346968383, "grad_norm": 0.702782928943634, "learning_rate": 9.139393176096637e-05, "loss": 11.193832397460938, "step": 1479 }, { "epoch": 0.19192271220508497, "grad_norm": 0.7596161365509033, "learning_rate": 9.13824739266272e-05, "loss": 16.130722045898438, "step": 1480 }, { "epoch": 0.19205238971333166, "grad_norm": 0.5655343532562256, "learning_rate": 9.137100918929176e-05, "loss": 8.598804473876953, "step": 1481 }, { "epoch": 0.19218206722157835, "grad_norm": 0.5812882781028748, "learning_rate": 9.135953755087247e-05, "loss": 9.840352058410645, "step": 1482 }, { "epoch": 0.192311744729825, "grad_norm": 0.5376072525978088, "learning_rate": 9.134805901328296e-05, "loss": 10.173566818237305, "step": 1483 }, { "epoch": 0.1924414222380717, "grad_norm": 0.7057245969772339, "learning_rate": 9.13365735784379e-05, "loss": 10.444578170776367, "step": 1484 }, { "epoch": 0.19257109974631836, "grad_norm": 0.6273888349533081, "learning_rate": 9.132508124825321e-05, "loss": 11.455193519592285, "step": 1485 }, { "epoch": 0.19270077725456505, "grad_norm": 0.5335233211517334, "learning_rate": 9.131358202464591e-05, "loss": 10.03559398651123, "step": 1486 }, { "epoch": 0.19283045476281174, "grad_norm": 0.7883339524269104, "learning_rate": 9.130207590953417e-05, "loss": 12.41603946685791, "step": 1487 }, { "epoch": 0.1929601322710584, "grad_norm": 0.6281414031982422, "learning_rate": 9.129056290483734e-05, "loss": 10.838114738464355, "step": 1488 }, { "epoch": 0.1930898097793051, "grad_norm": 0.5841782093048096, "learning_rate": 9.12790430124759e-05, "loss": 9.51553726196289, "step": 1489 }, { "epoch": 0.19321948728755178, "grad_norm": 0.7726184725761414, "learning_rate": 9.126751623437146e-05, "loss": 13.289560317993164, "step": 1490 }, { "epoch": 0.19334916479579844, "grad_norm": 0.8546269536018372, "learning_rate": 9.12559825724468e-05, "loss": 11.12166690826416, "step": 1491 }, { "epoch": 0.19347884230404513, "grad_norm": 0.7734465599060059, "learning_rate": 9.124444202862587e-05, "loss": 14.693683624267578, "step": 1492 }, { "epoch": 0.1936085198122918, "grad_norm": 0.6344971060752869, "learning_rate": 9.123289460483376e-05, "loss": 10.529915809631348, "step": 1493 }, { "epoch": 0.19373819732053849, "grad_norm": 0.7176871299743652, "learning_rate": 9.122134030299661e-05, "loss": 12.174091339111328, "step": 1494 }, { "epoch": 0.19386787482878517, "grad_norm": 0.6637526154518127, "learning_rate": 9.120977912504187e-05, "loss": 13.23165225982666, "step": 1495 }, { "epoch": 0.19399755233703184, "grad_norm": 0.6573871970176697, "learning_rate": 9.119821107289801e-05, "loss": 10.585233688354492, "step": 1496 }, { "epoch": 0.19412722984527853, "grad_norm": 0.8914831876754761, "learning_rate": 9.118663614849473e-05, "loss": 13.304950714111328, "step": 1497 }, { "epoch": 0.19425690735352522, "grad_norm": 0.6298442482948303, "learning_rate": 9.11750543537628e-05, "loss": 7.174648284912109, "step": 1498 }, { "epoch": 0.19438658486177188, "grad_norm": 0.5234120488166809, "learning_rate": 9.116346569063422e-05, "loss": 13.017308235168457, "step": 1499 }, { "epoch": 0.19451626237001857, "grad_norm": 0.6638168096542358, "learning_rate": 9.115187016104203e-05, "loss": 10.295900344848633, "step": 1500 }, { "epoch": 0.19464593987826523, "grad_norm": 0.561923086643219, "learning_rate": 9.114026776692053e-05, "loss": 9.346524238586426, "step": 1501 }, { "epoch": 0.19477561738651192, "grad_norm": 0.8472296595573425, "learning_rate": 9.112865851020509e-05, "loss": 9.37609577178955, "step": 1502 }, { "epoch": 0.1949052948947586, "grad_norm": 0.7833178043365479, "learning_rate": 9.111704239283225e-05, "loss": 14.39186954498291, "step": 1503 }, { "epoch": 0.19503497240300527, "grad_norm": 0.6236695647239685, "learning_rate": 9.110541941673969e-05, "loss": 11.720149993896484, "step": 1504 }, { "epoch": 0.19516464991125196, "grad_norm": 0.6463998556137085, "learning_rate": 9.109378958386623e-05, "loss": 8.839883804321289, "step": 1505 }, { "epoch": 0.19529432741949865, "grad_norm": 0.7422481179237366, "learning_rate": 9.108215289615185e-05, "loss": 12.624589920043945, "step": 1506 }, { "epoch": 0.1954240049277453, "grad_norm": 0.7067066431045532, "learning_rate": 9.107050935553765e-05, "loss": 13.493709564208984, "step": 1507 }, { "epoch": 0.195553682435992, "grad_norm": 0.7801182270050049, "learning_rate": 9.105885896396588e-05, "loss": 12.765161514282227, "step": 1508 }, { "epoch": 0.19568335994423866, "grad_norm": 0.9372808337211609, "learning_rate": 9.104720172337997e-05, "loss": 10.762984275817871, "step": 1509 }, { "epoch": 0.19581303745248535, "grad_norm": 0.6606913208961487, "learning_rate": 9.103553763572442e-05, "loss": 12.76807689666748, "step": 1510 }, { "epoch": 0.19594271496073204, "grad_norm": 0.56807941198349, "learning_rate": 9.102386670294494e-05, "loss": 10.070831298828125, "step": 1511 }, { "epoch": 0.1960723924689787, "grad_norm": 0.6742000579833984, "learning_rate": 9.101218892698833e-05, "loss": 13.568520545959473, "step": 1512 }, { "epoch": 0.1962020699772254, "grad_norm": 0.7761964201927185, "learning_rate": 9.100050430980258e-05, "loss": 12.640206336975098, "step": 1513 }, { "epoch": 0.19633174748547208, "grad_norm": 0.6498401761054993, "learning_rate": 9.09888128533368e-05, "loss": 11.074994087219238, "step": 1514 }, { "epoch": 0.19646142499371874, "grad_norm": 1.0799564123153687, "learning_rate": 9.097711455954121e-05, "loss": 16.062397003173828, "step": 1515 }, { "epoch": 0.19659110250196543, "grad_norm": 0.6544893383979797, "learning_rate": 9.096540943036721e-05, "loss": 12.535140991210938, "step": 1516 }, { "epoch": 0.1967207800102121, "grad_norm": 0.6086090803146362, "learning_rate": 9.095369746776735e-05, "loss": 10.00190544128418, "step": 1517 }, { "epoch": 0.19685045751845878, "grad_norm": 0.572943389415741, "learning_rate": 9.094197867369525e-05, "loss": 9.520103454589844, "step": 1518 }, { "epoch": 0.19698013502670547, "grad_norm": 0.9407594799995422, "learning_rate": 9.093025305010576e-05, "loss": 13.234639167785645, "step": 1519 }, { "epoch": 0.19710981253495213, "grad_norm": 0.6991309523582458, "learning_rate": 9.091852059895483e-05, "loss": 13.84670352935791, "step": 1520 }, { "epoch": 0.19723949004319882, "grad_norm": 0.6662551760673523, "learning_rate": 9.090678132219953e-05, "loss": 9.836332321166992, "step": 1521 }, { "epoch": 0.1973691675514455, "grad_norm": 0.6144741177558899, "learning_rate": 9.089503522179807e-05, "loss": 12.370819091796875, "step": 1522 }, { "epoch": 0.19749884505969217, "grad_norm": 0.720703125, "learning_rate": 9.088328229970984e-05, "loss": 12.42149543762207, "step": 1523 }, { "epoch": 0.19762852256793886, "grad_norm": 0.7816282510757446, "learning_rate": 9.087152255789533e-05, "loss": 10.161006927490234, "step": 1524 }, { "epoch": 0.19775820007618553, "grad_norm": 0.5323138236999512, "learning_rate": 9.085975599831617e-05, "loss": 9.876378059387207, "step": 1525 }, { "epoch": 0.19788787758443221, "grad_norm": 0.5860947966575623, "learning_rate": 9.084798262293515e-05, "loss": 12.557092666625977, "step": 1526 }, { "epoch": 0.1980175550926789, "grad_norm": 0.6860262751579285, "learning_rate": 9.083620243371617e-05, "loss": 13.345734596252441, "step": 1527 }, { "epoch": 0.19814723260092557, "grad_norm": 0.7406566143035889, "learning_rate": 9.08244154326243e-05, "loss": 10.589510917663574, "step": 1528 }, { "epoch": 0.19827691010917226, "grad_norm": 0.6944018602371216, "learning_rate": 9.081262162162571e-05, "loss": 12.986042022705078, "step": 1529 }, { "epoch": 0.19840658761741894, "grad_norm": 0.6463449597358704, "learning_rate": 9.080082100268773e-05, "loss": 13.66496753692627, "step": 1530 }, { "epoch": 0.1985362651256656, "grad_norm": 0.5040141940116882, "learning_rate": 9.078901357777881e-05, "loss": 10.225909233093262, "step": 1531 }, { "epoch": 0.1986659426339123, "grad_norm": 0.9202936291694641, "learning_rate": 9.077719934886852e-05, "loss": 15.837057113647461, "step": 1532 }, { "epoch": 0.19879562014215896, "grad_norm": 0.6268218159675598, "learning_rate": 9.076537831792765e-05, "loss": 11.545486450195312, "step": 1533 }, { "epoch": 0.19892529765040565, "grad_norm": 0.5875880718231201, "learning_rate": 9.075355048692799e-05, "loss": 12.106127738952637, "step": 1534 }, { "epoch": 0.19905497515865234, "grad_norm": 0.6364083290100098, "learning_rate": 9.074171585784259e-05, "loss": 11.27253246307373, "step": 1535 }, { "epoch": 0.199184652666899, "grad_norm": 0.7905003428459167, "learning_rate": 9.072987443264553e-05, "loss": 11.051615715026855, "step": 1536 }, { "epoch": 0.1993143301751457, "grad_norm": 0.6551885604858398, "learning_rate": 9.071802621331214e-05, "loss": 14.3912935256958, "step": 1537 }, { "epoch": 0.19944400768339238, "grad_norm": 1.0175048112869263, "learning_rate": 9.070617120181875e-05, "loss": 16.05755615234375, "step": 1538 }, { "epoch": 0.19957368519163904, "grad_norm": 0.9059445858001709, "learning_rate": 9.069430940014294e-05, "loss": 15.156665802001953, "step": 1539 }, { "epoch": 0.19970336269988573, "grad_norm": 0.770634651184082, "learning_rate": 9.068244081026336e-05, "loss": 11.932726860046387, "step": 1540 }, { "epoch": 0.1998330402081324, "grad_norm": 0.8647704124450684, "learning_rate": 9.067056543415978e-05, "loss": 13.658349990844727, "step": 1541 }, { "epoch": 0.19996271771637908, "grad_norm": 0.9889906048774719, "learning_rate": 9.065868327381316e-05, "loss": 16.79731559753418, "step": 1542 }, { "epoch": 0.20009239522462577, "grad_norm": 0.6785969734191895, "learning_rate": 9.064679433120555e-05, "loss": 11.253240585327148, "step": 1543 }, { "epoch": 0.20022207273287243, "grad_norm": 0.765724778175354, "learning_rate": 9.063489860832013e-05, "loss": 15.30463695526123, "step": 1544 }, { "epoch": 0.20035175024111912, "grad_norm": 0.6809710264205933, "learning_rate": 9.062299610714122e-05, "loss": 13.062958717346191, "step": 1545 }, { "epoch": 0.2004814277493658, "grad_norm": 0.6565216779708862, "learning_rate": 9.061108682965431e-05, "loss": 12.817317962646484, "step": 1546 }, { "epoch": 0.20061110525761247, "grad_norm": 0.5961976647377014, "learning_rate": 9.059917077784593e-05, "loss": 11.478717803955078, "step": 1547 }, { "epoch": 0.20074078276585916, "grad_norm": 0.6583444476127625, "learning_rate": 9.058724795370381e-05, "loss": 11.014421463012695, "step": 1548 }, { "epoch": 0.20087046027410582, "grad_norm": 0.7402170300483704, "learning_rate": 9.057531835921682e-05, "loss": 13.21375846862793, "step": 1549 }, { "epoch": 0.2010001377823525, "grad_norm": 0.5725599527359009, "learning_rate": 9.05633819963749e-05, "loss": 12.21378231048584, "step": 1550 }, { "epoch": 0.2011298152905992, "grad_norm": 0.6095179319381714, "learning_rate": 9.055143886716916e-05, "loss": 11.014069557189941, "step": 1551 }, { "epoch": 0.20125949279884586, "grad_norm": 0.7074750661849976, "learning_rate": 9.053948897359182e-05, "loss": 11.38670539855957, "step": 1552 }, { "epoch": 0.20138917030709255, "grad_norm": 0.8611879944801331, "learning_rate": 9.052753231763627e-05, "loss": 12.449548721313477, "step": 1553 }, { "epoch": 0.20151884781533924, "grad_norm": 0.8180712461471558, "learning_rate": 9.051556890129696e-05, "loss": 8.916596412658691, "step": 1554 }, { "epoch": 0.2016485253235859, "grad_norm": 0.5621833801269531, "learning_rate": 9.05035987265695e-05, "loss": 10.209124565124512, "step": 1555 }, { "epoch": 0.2017782028318326, "grad_norm": 0.7132371664047241, "learning_rate": 9.049162179545066e-05, "loss": 10.869843482971191, "step": 1556 }, { "epoch": 0.20190788034007925, "grad_norm": 0.7466142177581787, "learning_rate": 9.047963810993829e-05, "loss": 8.52811050415039, "step": 1557 }, { "epoch": 0.20203755784832594, "grad_norm": 0.601719081401825, "learning_rate": 9.04676476720314e-05, "loss": 9.879520416259766, "step": 1558 }, { "epoch": 0.20216723535657263, "grad_norm": 0.5703020691871643, "learning_rate": 9.045565048373009e-05, "loss": 14.496537208557129, "step": 1559 }, { "epoch": 0.2022969128648193, "grad_norm": 0.7588315010070801, "learning_rate": 9.044364654703562e-05, "loss": 15.301981925964355, "step": 1560 }, { "epoch": 0.20242659037306598, "grad_norm": 0.9586737155914307, "learning_rate": 9.043163586395036e-05, "loss": 12.814654350280762, "step": 1561 }, { "epoch": 0.20255626788131267, "grad_norm": 0.6463690996170044, "learning_rate": 9.041961843647781e-05, "loss": 12.508281707763672, "step": 1562 }, { "epoch": 0.20268594538955934, "grad_norm": 0.7561817765235901, "learning_rate": 9.040759426662258e-05, "loss": 13.482619285583496, "step": 1563 }, { "epoch": 0.20281562289780602, "grad_norm": 0.7021132111549377, "learning_rate": 9.039556335639045e-05, "loss": 13.366328239440918, "step": 1564 }, { "epoch": 0.2029453004060527, "grad_norm": 0.5238489508628845, "learning_rate": 9.038352570778825e-05, "loss": 11.444581985473633, "step": 1565 }, { "epoch": 0.20307497791429938, "grad_norm": 0.7657593488693237, "learning_rate": 9.0371481322824e-05, "loss": 12.808454513549805, "step": 1566 }, { "epoch": 0.20320465542254607, "grad_norm": 0.5069196224212646, "learning_rate": 9.035943020350682e-05, "loss": 6.566989898681641, "step": 1567 }, { "epoch": 0.20333433293079273, "grad_norm": 0.8695650100708008, "learning_rate": 9.034737235184694e-05, "loss": 12.012117385864258, "step": 1568 }, { "epoch": 0.20346401043903942, "grad_norm": 1.0565775632858276, "learning_rate": 9.033530776985572e-05, "loss": 17.558666229248047, "step": 1569 }, { "epoch": 0.2035936879472861, "grad_norm": 0.784661054611206, "learning_rate": 9.03232364595457e-05, "loss": 11.942277908325195, "step": 1570 }, { "epoch": 0.20372336545553277, "grad_norm": 0.8549710512161255, "learning_rate": 9.031115842293044e-05, "loss": 15.247854232788086, "step": 1571 }, { "epoch": 0.20385304296377946, "grad_norm": 0.8779823780059814, "learning_rate": 9.029907366202468e-05, "loss": 14.399682998657227, "step": 1572 }, { "epoch": 0.20398272047202612, "grad_norm": 0.7998301982879639, "learning_rate": 9.028698217884429e-05, "loss": 12.746413230895996, "step": 1573 }, { "epoch": 0.2041123979802728, "grad_norm": 0.6085580587387085, "learning_rate": 9.027488397540622e-05, "loss": 11.821823120117188, "step": 1574 }, { "epoch": 0.2042420754885195, "grad_norm": 0.6361148953437805, "learning_rate": 9.026277905372862e-05, "loss": 16.086442947387695, "step": 1575 }, { "epoch": 0.20437175299676616, "grad_norm": 0.6275867819786072, "learning_rate": 9.025066741583065e-05, "loss": 12.40379524230957, "step": 1576 }, { "epoch": 0.20450143050501285, "grad_norm": 0.7052647471427917, "learning_rate": 9.023854906373268e-05, "loss": 14.82239818572998, "step": 1577 }, { "epoch": 0.20463110801325954, "grad_norm": 0.5889415144920349, "learning_rate": 9.022642399945615e-05, "loss": 9.740594863891602, "step": 1578 }, { "epoch": 0.2047607855215062, "grad_norm": 0.5358594059944153, "learning_rate": 9.021429222502365e-05, "loss": 10.370218276977539, "step": 1579 }, { "epoch": 0.2048904630297529, "grad_norm": 0.6187496185302734, "learning_rate": 9.020215374245888e-05, "loss": 12.145465850830078, "step": 1580 }, { "epoch": 0.20502014053799955, "grad_norm": 0.5815461874008179, "learning_rate": 9.019000855378666e-05, "loss": 11.901853561401367, "step": 1581 }, { "epoch": 0.20514981804624624, "grad_norm": 0.6598948836326599, "learning_rate": 9.017785666103291e-05, "loss": 11.458927154541016, "step": 1582 }, { "epoch": 0.20527949555449293, "grad_norm": 0.7155345678329468, "learning_rate": 9.016569806622469e-05, "loss": 13.943390846252441, "step": 1583 }, { "epoch": 0.2054091730627396, "grad_norm": 0.6531734466552734, "learning_rate": 9.015353277139016e-05, "loss": 11.971179962158203, "step": 1584 }, { "epoch": 0.20553885057098628, "grad_norm": 0.7013847231864929, "learning_rate": 9.014136077855862e-05, "loss": 12.98316478729248, "step": 1585 }, { "epoch": 0.20566852807923297, "grad_norm": 0.6810510158538818, "learning_rate": 9.012918208976047e-05, "loss": 14.404476165771484, "step": 1586 }, { "epoch": 0.20579820558747963, "grad_norm": 0.6803292632102966, "learning_rate": 9.011699670702724e-05, "loss": 13.533109664916992, "step": 1587 }, { "epoch": 0.20592788309572632, "grad_norm": 0.5757567882537842, "learning_rate": 9.010480463239158e-05, "loss": 12.11300277709961, "step": 1588 }, { "epoch": 0.20605756060397298, "grad_norm": 1.1852636337280273, "learning_rate": 9.009260586788724e-05, "loss": 12.514708518981934, "step": 1589 }, { "epoch": 0.20618723811221967, "grad_norm": 0.658931314945221, "learning_rate": 9.008040041554906e-05, "loss": 14.757561683654785, "step": 1590 }, { "epoch": 0.20631691562046636, "grad_norm": 0.6950938701629639, "learning_rate": 9.006818827741306e-05, "loss": 12.782118797302246, "step": 1591 }, { "epoch": 0.20644659312871302, "grad_norm": 0.6865368485450745, "learning_rate": 9.005596945551633e-05, "loss": 10.28868293762207, "step": 1592 }, { "epoch": 0.2065762706369597, "grad_norm": 0.6287180185317993, "learning_rate": 9.00437439518971e-05, "loss": 9.450379371643066, "step": 1593 }, { "epoch": 0.2067059481452064, "grad_norm": 0.6907688975334167, "learning_rate": 9.003151176859468e-05, "loss": 13.520181655883789, "step": 1594 }, { "epoch": 0.20683562565345306, "grad_norm": 0.7440113425254822, "learning_rate": 9.001927290764956e-05, "loss": 16.248252868652344, "step": 1595 }, { "epoch": 0.20696530316169975, "grad_norm": 0.4977211654186249, "learning_rate": 9.000702737110325e-05, "loss": 7.844747543334961, "step": 1596 }, { "epoch": 0.20709498066994642, "grad_norm": 0.8279597759246826, "learning_rate": 8.999477516099844e-05, "loss": 15.992016792297363, "step": 1597 }, { "epoch": 0.2072246581781931, "grad_norm": 0.5670040249824524, "learning_rate": 8.998251627937893e-05, "loss": 10.254974365234375, "step": 1598 }, { "epoch": 0.2073543356864398, "grad_norm": 0.7180371284484863, "learning_rate": 8.997025072828959e-05, "loss": 11.776030540466309, "step": 1599 }, { "epoch": 0.20748401319468646, "grad_norm": 0.6788244843482971, "learning_rate": 8.995797850977648e-05, "loss": 13.004737854003906, "step": 1600 }, { "epoch": 0.20761369070293315, "grad_norm": 0.47047632932662964, "learning_rate": 8.994569962588669e-05, "loss": 9.937625885009766, "step": 1601 }, { "epoch": 0.20774336821117984, "grad_norm": 0.5724894404411316, "learning_rate": 8.993341407866847e-05, "loss": 12.523484230041504, "step": 1602 }, { "epoch": 0.2078730457194265, "grad_norm": 0.9178207516670227, "learning_rate": 8.992112187017115e-05, "loss": 13.329151153564453, "step": 1603 }, { "epoch": 0.2080027232276732, "grad_norm": 0.6468197107315063, "learning_rate": 8.990882300244521e-05, "loss": 11.739006042480469, "step": 1604 }, { "epoch": 0.20813240073591985, "grad_norm": 0.6022270321846008, "learning_rate": 8.98965174775422e-05, "loss": 16.124340057373047, "step": 1605 }, { "epoch": 0.20826207824416654, "grad_norm": 0.6932059526443481, "learning_rate": 8.988420529751482e-05, "loss": 15.367348670959473, "step": 1606 }, { "epoch": 0.20839175575241323, "grad_norm": 0.547297477722168, "learning_rate": 8.987188646441684e-05, "loss": 11.22227954864502, "step": 1607 }, { "epoch": 0.2085214332606599, "grad_norm": 0.6009548306465149, "learning_rate": 8.98595609803032e-05, "loss": 9.307907104492188, "step": 1608 }, { "epoch": 0.20865111076890658, "grad_norm": 0.7092543840408325, "learning_rate": 8.984722884722984e-05, "loss": 12.697361946105957, "step": 1609 }, { "epoch": 0.20878078827715327, "grad_norm": 0.7918151617050171, "learning_rate": 8.983489006725396e-05, "loss": 12.922826766967773, "step": 1610 }, { "epoch": 0.20891046578539993, "grad_norm": 0.7953419089317322, "learning_rate": 8.982254464243374e-05, "loss": 9.089729309082031, "step": 1611 }, { "epoch": 0.20904014329364662, "grad_norm": 0.8130795359611511, "learning_rate": 8.981019257482852e-05, "loss": 12.588829040527344, "step": 1612 }, { "epoch": 0.20916982080189328, "grad_norm": 0.5939633846282959, "learning_rate": 8.979783386649874e-05, "loss": 10.100300788879395, "step": 1613 }, { "epoch": 0.20929949831013997, "grad_norm": 0.6527661681175232, "learning_rate": 8.978546851950596e-05, "loss": 10.23585319519043, "step": 1614 }, { "epoch": 0.20942917581838666, "grad_norm": 0.7686752080917358, "learning_rate": 8.977309653591284e-05, "loss": 8.20825481414795, "step": 1615 }, { "epoch": 0.20955885332663332, "grad_norm": 0.815982460975647, "learning_rate": 8.976071791778317e-05, "loss": 12.774003982543945, "step": 1616 }, { "epoch": 0.20968853083488, "grad_norm": 0.7714358568191528, "learning_rate": 8.974833266718179e-05, "loss": 13.653331756591797, "step": 1617 }, { "epoch": 0.2098182083431267, "grad_norm": 0.7632856369018555, "learning_rate": 8.973594078617467e-05, "loss": 12.958029747009277, "step": 1618 }, { "epoch": 0.20994788585137336, "grad_norm": 0.6203066110610962, "learning_rate": 8.972354227682893e-05, "loss": 11.87983226776123, "step": 1619 }, { "epoch": 0.21007756335962005, "grad_norm": 0.658543586730957, "learning_rate": 8.971113714121274e-05, "loss": 14.123905181884766, "step": 1620 }, { "epoch": 0.2102072408678667, "grad_norm": 0.7922609448432922, "learning_rate": 8.96987253813954e-05, "loss": 10.623311042785645, "step": 1621 }, { "epoch": 0.2103369183761134, "grad_norm": 0.5844242572784424, "learning_rate": 8.968630699944732e-05, "loss": 13.25768756866455, "step": 1622 }, { "epoch": 0.2104665958843601, "grad_norm": 0.6896833777427673, "learning_rate": 8.967388199743999e-05, "loss": 11.860840797424316, "step": 1623 }, { "epoch": 0.21059627339260675, "grad_norm": 0.7463036179542542, "learning_rate": 8.966145037744604e-05, "loss": 14.52295970916748, "step": 1624 }, { "epoch": 0.21072595090085344, "grad_norm": 0.7888243794441223, "learning_rate": 8.964901214153915e-05, "loss": 13.633590698242188, "step": 1625 }, { "epoch": 0.21085562840910013, "grad_norm": 0.6123294830322266, "learning_rate": 8.96365672917942e-05, "loss": 11.377408981323242, "step": 1626 }, { "epoch": 0.2109853059173468, "grad_norm": 0.6823068261146545, "learning_rate": 8.962411583028705e-05, "loss": 13.371599197387695, "step": 1627 }, { "epoch": 0.21111498342559348, "grad_norm": 0.716727077960968, "learning_rate": 8.961165775909474e-05, "loss": 11.282048225402832, "step": 1628 }, { "epoch": 0.21124466093384014, "grad_norm": 0.8405282497406006, "learning_rate": 8.959919308029541e-05, "loss": 14.40808391571045, "step": 1629 }, { "epoch": 0.21137433844208683, "grad_norm": 0.682370662689209, "learning_rate": 8.95867217959683e-05, "loss": 11.672012329101562, "step": 1630 }, { "epoch": 0.21150401595033352, "grad_norm": 0.6083051562309265, "learning_rate": 8.95742439081937e-05, "loss": 12.719683647155762, "step": 1631 }, { "epoch": 0.21163369345858019, "grad_norm": 0.8536683320999146, "learning_rate": 8.956175941905307e-05, "loss": 13.631380081176758, "step": 1632 }, { "epoch": 0.21176337096682687, "grad_norm": 0.5970229506492615, "learning_rate": 8.954926833062895e-05, "loss": 15.003937721252441, "step": 1633 }, { "epoch": 0.21189304847507356, "grad_norm": 0.7182809710502625, "learning_rate": 8.953677064500494e-05, "loss": 13.249626159667969, "step": 1634 }, { "epoch": 0.21202272598332023, "grad_norm": 0.7835289835929871, "learning_rate": 8.952426636426581e-05, "loss": 10.483060836791992, "step": 1635 }, { "epoch": 0.21215240349156692, "grad_norm": 0.7268707156181335, "learning_rate": 8.951175549049738e-05, "loss": 13.239021301269531, "step": 1636 }, { "epoch": 0.21228208099981358, "grad_norm": 0.6725872755050659, "learning_rate": 8.949923802578659e-05, "loss": 12.272345542907715, "step": 1637 }, { "epoch": 0.21241175850806027, "grad_norm": 0.5015957355499268, "learning_rate": 8.948671397222148e-05, "loss": 10.41127872467041, "step": 1638 }, { "epoch": 0.21254143601630696, "grad_norm": 0.6637904047966003, "learning_rate": 8.947418333189115e-05, "loss": 13.386845588684082, "step": 1639 }, { "epoch": 0.21267111352455362, "grad_norm": 0.5487120747566223, "learning_rate": 8.946164610688586e-05, "loss": 9.025837898254395, "step": 1640 }, { "epoch": 0.2128007910328003, "grad_norm": 0.6025679707527161, "learning_rate": 8.944910229929695e-05, "loss": 10.092341423034668, "step": 1641 }, { "epoch": 0.212930468541047, "grad_norm": 0.6243714094161987, "learning_rate": 8.943655191121683e-05, "loss": 13.449179649353027, "step": 1642 }, { "epoch": 0.21306014604929366, "grad_norm": 0.7367640137672424, "learning_rate": 8.942399494473901e-05, "loss": 13.750883102416992, "step": 1643 }, { "epoch": 0.21318982355754035, "grad_norm": 0.6160461902618408, "learning_rate": 8.941143140195816e-05, "loss": 13.500097274780273, "step": 1644 }, { "epoch": 0.213319501065787, "grad_norm": 0.7261243462562561, "learning_rate": 8.939886128496995e-05, "loss": 16.752233505249023, "step": 1645 }, { "epoch": 0.2134491785740337, "grad_norm": 0.6753074526786804, "learning_rate": 8.938628459587124e-05, "loss": 10.983277320861816, "step": 1646 }, { "epoch": 0.2135788560822804, "grad_norm": 0.7394240498542786, "learning_rate": 8.937370133675991e-05, "loss": 12.259293556213379, "step": 1647 }, { "epoch": 0.21370853359052705, "grad_norm": 0.6772772073745728, "learning_rate": 8.936111150973499e-05, "loss": 13.410226821899414, "step": 1648 }, { "epoch": 0.21383821109877374, "grad_norm": 0.7191148400306702, "learning_rate": 8.934851511689657e-05, "loss": 14.144783020019531, "step": 1649 }, { "epoch": 0.21396788860702043, "grad_norm": 0.607700526714325, "learning_rate": 8.933591216034587e-05, "loss": 10.703924179077148, "step": 1650 }, { "epoch": 0.2140975661152671, "grad_norm": 0.6509566903114319, "learning_rate": 8.932330264218516e-05, "loss": 9.041886329650879, "step": 1651 }, { "epoch": 0.21422724362351378, "grad_norm": 0.90484219789505, "learning_rate": 8.931068656451784e-05, "loss": 14.413201332092285, "step": 1652 }, { "epoch": 0.21435692113176044, "grad_norm": 0.646018922328949, "learning_rate": 8.929806392944839e-05, "loss": 12.356951713562012, "step": 1653 }, { "epoch": 0.21448659864000713, "grad_norm": 0.8465625643730164, "learning_rate": 8.928543473908239e-05, "loss": 12.686493873596191, "step": 1654 }, { "epoch": 0.21461627614825382, "grad_norm": 0.5633190870285034, "learning_rate": 8.927279899552654e-05, "loss": 11.965429306030273, "step": 1655 }, { "epoch": 0.21474595365650048, "grad_norm": 0.6344377398490906, "learning_rate": 8.926015670088856e-05, "loss": 11.881157875061035, "step": 1656 }, { "epoch": 0.21487563116474717, "grad_norm": 0.5797353982925415, "learning_rate": 8.924750785727732e-05, "loss": 11.718330383300781, "step": 1657 }, { "epoch": 0.21500530867299386, "grad_norm": 0.5872472524642944, "learning_rate": 8.923485246680279e-05, "loss": 10.621222496032715, "step": 1658 }, { "epoch": 0.21513498618124052, "grad_norm": 0.6316848993301392, "learning_rate": 8.922219053157597e-05, "loss": 13.655874252319336, "step": 1659 }, { "epoch": 0.2152646636894872, "grad_norm": 0.6993003487586975, "learning_rate": 8.920952205370904e-05, "loss": 11.034409523010254, "step": 1660 }, { "epoch": 0.21539434119773387, "grad_norm": 0.7872515916824341, "learning_rate": 8.919684703531518e-05, "loss": 13.771505355834961, "step": 1661 }, { "epoch": 0.21552401870598056, "grad_norm": 1.0384994745254517, "learning_rate": 8.918416547850874e-05, "loss": 10.998003959655762, "step": 1662 }, { "epoch": 0.21565369621422725, "grad_norm": 0.6742407083511353, "learning_rate": 8.917147738540512e-05, "loss": 12.922550201416016, "step": 1663 }, { "epoch": 0.21578337372247391, "grad_norm": 0.7151258587837219, "learning_rate": 8.91587827581208e-05, "loss": 11.493577003479004, "step": 1664 }, { "epoch": 0.2159130512307206, "grad_norm": 0.6267984509468079, "learning_rate": 8.914608159877338e-05, "loss": 11.706442832946777, "step": 1665 }, { "epoch": 0.2160427287389673, "grad_norm": 0.8885658383369446, "learning_rate": 8.913337390948153e-05, "loss": 12.45224380493164, "step": 1666 }, { "epoch": 0.21617240624721396, "grad_norm": 0.7480044960975647, "learning_rate": 8.912065969236503e-05, "loss": 11.307336807250977, "step": 1667 }, { "epoch": 0.21630208375546064, "grad_norm": 0.635460615158081, "learning_rate": 8.910793894954472e-05, "loss": 10.021745681762695, "step": 1668 }, { "epoch": 0.2164317612637073, "grad_norm": 0.8476467728614807, "learning_rate": 8.909521168314253e-05, "loss": 10.309342384338379, "step": 1669 }, { "epoch": 0.216561438771954, "grad_norm": 0.7421507835388184, "learning_rate": 8.908247789528152e-05, "loss": 10.197660446166992, "step": 1670 }, { "epoch": 0.21669111628020069, "grad_norm": 0.7507616281509399, "learning_rate": 8.90697375880858e-05, "loss": 11.58251667022705, "step": 1671 }, { "epoch": 0.21682079378844735, "grad_norm": 0.6935054063796997, "learning_rate": 8.905699076368055e-05, "loss": 10.716676712036133, "step": 1672 }, { "epoch": 0.21695047129669404, "grad_norm": 0.867216944694519, "learning_rate": 8.904423742419208e-05, "loss": 16.690555572509766, "step": 1673 }, { "epoch": 0.21708014880494073, "grad_norm": 0.7041745781898499, "learning_rate": 8.90314775717478e-05, "loss": 13.295406341552734, "step": 1674 }, { "epoch": 0.2172098263131874, "grad_norm": 0.5380417704582214, "learning_rate": 8.901871120847614e-05, "loss": 9.28615951538086, "step": 1675 }, { "epoch": 0.21733950382143408, "grad_norm": 0.5598165988922119, "learning_rate": 8.900593833650666e-05, "loss": 13.38121509552002, "step": 1676 }, { "epoch": 0.21746918132968074, "grad_norm": 0.5396419763565063, "learning_rate": 8.899315895797e-05, "loss": 10.368081092834473, "step": 1677 }, { "epoch": 0.21759885883792743, "grad_norm": 0.7194083333015442, "learning_rate": 8.898037307499789e-05, "loss": 10.910897254943848, "step": 1678 }, { "epoch": 0.21772853634617412, "grad_norm": 0.8518515825271606, "learning_rate": 8.896758068972313e-05, "loss": 19.22791862487793, "step": 1679 }, { "epoch": 0.21785821385442078, "grad_norm": 0.6682881116867065, "learning_rate": 8.895478180427963e-05, "loss": 14.75747299194336, "step": 1680 }, { "epoch": 0.21798789136266747, "grad_norm": 0.6234079599380493, "learning_rate": 8.894197642080235e-05, "loss": 12.655137062072754, "step": 1681 }, { "epoch": 0.21811756887091416, "grad_norm": 0.9033676385879517, "learning_rate": 8.892916454142735e-05, "loss": 12.303539276123047, "step": 1682 }, { "epoch": 0.21824724637916082, "grad_norm": 0.597225546836853, "learning_rate": 8.891634616829179e-05, "loss": 9.848550796508789, "step": 1683 }, { "epoch": 0.2183769238874075, "grad_norm": 0.8636020421981812, "learning_rate": 8.890352130353388e-05, "loss": 11.636129379272461, "step": 1684 }, { "epoch": 0.21850660139565417, "grad_norm": 0.7011860013008118, "learning_rate": 8.889068994929295e-05, "loss": 12.16611099243164, "step": 1685 }, { "epoch": 0.21863627890390086, "grad_norm": 0.5898152589797974, "learning_rate": 8.887785210770941e-05, "loss": 8.98620891571045, "step": 1686 }, { "epoch": 0.21876595641214755, "grad_norm": 0.6612181663513184, "learning_rate": 8.886500778092468e-05, "loss": 12.264548301696777, "step": 1687 }, { "epoch": 0.2188956339203942, "grad_norm": 0.7979411482810974, "learning_rate": 8.885215697108135e-05, "loss": 15.393608093261719, "step": 1688 }, { "epoch": 0.2190253114286409, "grad_norm": 0.8622323274612427, "learning_rate": 8.883929968032307e-05, "loss": 14.824735641479492, "step": 1689 }, { "epoch": 0.2191549889368876, "grad_norm": 0.6295353770256042, "learning_rate": 8.882643591079455e-05, "loss": 11.79133129119873, "step": 1690 }, { "epoch": 0.21928466644513425, "grad_norm": 0.7216342687606812, "learning_rate": 8.881356566464158e-05, "loss": 13.201586723327637, "step": 1691 }, { "epoch": 0.21941434395338094, "grad_norm": 0.4478859305381775, "learning_rate": 8.880068894401107e-05, "loss": 11.871737480163574, "step": 1692 }, { "epoch": 0.2195440214616276, "grad_norm": 0.539999783039093, "learning_rate": 8.878780575105094e-05, "loss": 9.182251930236816, "step": 1693 }, { "epoch": 0.2196736989698743, "grad_norm": 1.1810773611068726, "learning_rate": 8.877491608791027e-05, "loss": 18.72719383239746, "step": 1694 }, { "epoch": 0.21980337647812098, "grad_norm": 0.5468450784683228, "learning_rate": 8.876201995673916e-05, "loss": 12.687163352966309, "step": 1695 }, { "epoch": 0.21993305398636764, "grad_norm": 0.7331029772758484, "learning_rate": 8.874911735968882e-05, "loss": 15.442745208740234, "step": 1696 }, { "epoch": 0.22006273149461433, "grad_norm": 1.1368120908737183, "learning_rate": 8.873620829891151e-05, "loss": 12.947010040283203, "step": 1697 }, { "epoch": 0.22019240900286102, "grad_norm": 0.5767549276351929, "learning_rate": 8.87232927765606e-05, "loss": 10.722047805786133, "step": 1698 }, { "epoch": 0.22032208651110768, "grad_norm": 0.6418279409408569, "learning_rate": 8.871037079479052e-05, "loss": 10.895471572875977, "step": 1699 }, { "epoch": 0.22045176401935437, "grad_norm": 0.5942485332489014, "learning_rate": 8.869744235575678e-05, "loss": 8.81638240814209, "step": 1700 }, { "epoch": 0.22058144152760104, "grad_norm": 0.7690417170524597, "learning_rate": 8.868450746161598e-05, "loss": 16.018939971923828, "step": 1701 }, { "epoch": 0.22071111903584772, "grad_norm": 0.5795240998268127, "learning_rate": 8.867156611452576e-05, "loss": 12.114575386047363, "step": 1702 }, { "epoch": 0.22084079654409441, "grad_norm": 0.5049484372138977, "learning_rate": 8.865861831664489e-05, "loss": 10.628402709960938, "step": 1703 }, { "epoch": 0.22097047405234108, "grad_norm": 0.6281123757362366, "learning_rate": 8.864566407013317e-05, "loss": 12.908574104309082, "step": 1704 }, { "epoch": 0.22110015156058777, "grad_norm": 0.648095428943634, "learning_rate": 8.86327033771515e-05, "loss": 12.2579984664917, "step": 1705 }, { "epoch": 0.22122982906883445, "grad_norm": 0.6145463585853577, "learning_rate": 8.861973623986185e-05, "loss": 10.835212707519531, "step": 1706 }, { "epoch": 0.22135950657708112, "grad_norm": 0.8468241095542908, "learning_rate": 8.860676266042727e-05, "loss": 16.220325469970703, "step": 1707 }, { "epoch": 0.2214891840853278, "grad_norm": 0.6341016292572021, "learning_rate": 8.859378264101186e-05, "loss": 12.386012077331543, "step": 1708 }, { "epoch": 0.22161886159357447, "grad_norm": 0.6203516125679016, "learning_rate": 8.85807961837808e-05, "loss": 9.466768264770508, "step": 1709 }, { "epoch": 0.22174853910182116, "grad_norm": 0.6413520574569702, "learning_rate": 8.856780329090043e-05, "loss": 16.032102584838867, "step": 1710 }, { "epoch": 0.22187821661006785, "grad_norm": 0.761562705039978, "learning_rate": 8.855480396453799e-05, "loss": 12.343029022216797, "step": 1711 }, { "epoch": 0.2220078941183145, "grad_norm": 0.7222144603729248, "learning_rate": 8.854179820686197e-05, "loss": 10.11255168914795, "step": 1712 }, { "epoch": 0.2221375716265612, "grad_norm": 0.7958528399467468, "learning_rate": 8.852878602004181e-05, "loss": 12.200928688049316, "step": 1713 }, { "epoch": 0.2222672491348079, "grad_norm": 0.5644139051437378, "learning_rate": 8.85157674062481e-05, "loss": 12.601908683776855, "step": 1714 }, { "epoch": 0.22239692664305455, "grad_norm": 0.7132745981216431, "learning_rate": 8.850274236765246e-05, "loss": 10.597861289978027, "step": 1715 }, { "epoch": 0.22252660415130124, "grad_norm": 0.6833050847053528, "learning_rate": 8.848971090642757e-05, "loss": 11.080493927001953, "step": 1716 }, { "epoch": 0.2226562816595479, "grad_norm": 0.7232903838157654, "learning_rate": 8.847667302474723e-05, "loss": 15.43028450012207, "step": 1717 }, { "epoch": 0.2227859591677946, "grad_norm": 0.8762456178665161, "learning_rate": 8.846362872478629e-05, "loss": 11.744348526000977, "step": 1718 }, { "epoch": 0.22291563667604128, "grad_norm": 0.6517814993858337, "learning_rate": 8.845057800872063e-05, "loss": 11.719131469726562, "step": 1719 }, { "epoch": 0.22304531418428794, "grad_norm": 0.5962242484092712, "learning_rate": 8.843752087872727e-05, "loss": 8.49143123626709, "step": 1720 }, { "epoch": 0.22317499169253463, "grad_norm": 0.7399935126304626, "learning_rate": 8.842445733698426e-05, "loss": 15.093520164489746, "step": 1721 }, { "epoch": 0.22330466920078132, "grad_norm": 0.4596463739871979, "learning_rate": 8.841138738567069e-05, "loss": 8.851478576660156, "step": 1722 }, { "epoch": 0.22343434670902798, "grad_norm": 0.7914230823516846, "learning_rate": 8.839831102696681e-05, "loss": 12.539246559143066, "step": 1723 }, { "epoch": 0.22356402421727467, "grad_norm": 0.6415688395500183, "learning_rate": 8.838522826305385e-05, "loss": 11.309405326843262, "step": 1724 }, { "epoch": 0.22369370172552133, "grad_norm": 0.8104710578918457, "learning_rate": 8.837213909611414e-05, "loss": 9.192031860351562, "step": 1725 }, { "epoch": 0.22382337923376802, "grad_norm": 0.7047544717788696, "learning_rate": 8.835904352833108e-05, "loss": 14.365622520446777, "step": 1726 }, { "epoch": 0.2239530567420147, "grad_norm": 0.7027789354324341, "learning_rate": 8.834594156188916e-05, "loss": 13.659486770629883, "step": 1727 }, { "epoch": 0.22408273425026137, "grad_norm": 0.4114655554294586, "learning_rate": 8.83328331989739e-05, "loss": 6.492047309875488, "step": 1728 }, { "epoch": 0.22421241175850806, "grad_norm": 0.6235030889511108, "learning_rate": 8.831971844177189e-05, "loss": 12.655466079711914, "step": 1729 }, { "epoch": 0.22434208926675475, "grad_norm": 0.6964524984359741, "learning_rate": 8.830659729247079e-05, "loss": 12.0545654296875, "step": 1730 }, { "epoch": 0.2244717667750014, "grad_norm": 0.6448138952255249, "learning_rate": 8.829346975325937e-05, "loss": 9.571343421936035, "step": 1731 }, { "epoch": 0.2246014442832481, "grad_norm": 0.6381599307060242, "learning_rate": 8.828033582632741e-05, "loss": 14.1815185546875, "step": 1732 }, { "epoch": 0.22473112179149476, "grad_norm": 0.5990659594535828, "learning_rate": 8.826719551386577e-05, "loss": 8.961389541625977, "step": 1733 }, { "epoch": 0.22486079929974145, "grad_norm": 0.7614125609397888, "learning_rate": 8.825404881806641e-05, "loss": 12.775397300720215, "step": 1734 }, { "epoch": 0.22499047680798814, "grad_norm": 0.6867539882659912, "learning_rate": 8.824089574112228e-05, "loss": 10.714093208312988, "step": 1735 }, { "epoch": 0.2251201543162348, "grad_norm": 0.6508955955505371, "learning_rate": 8.822773628522747e-05, "loss": 10.169445037841797, "step": 1736 }, { "epoch": 0.2252498318244815, "grad_norm": 0.4957810938358307, "learning_rate": 8.821457045257711e-05, "loss": 8.299983978271484, "step": 1737 }, { "epoch": 0.22537950933272818, "grad_norm": 0.659947395324707, "learning_rate": 8.820139824536735e-05, "loss": 11.236351013183594, "step": 1738 }, { "epoch": 0.22550918684097485, "grad_norm": 0.6419908404350281, "learning_rate": 8.818821966579549e-05, "loss": 13.30324935913086, "step": 1739 }, { "epoch": 0.22563886434922154, "grad_norm": 0.7386071681976318, "learning_rate": 8.817503471605984e-05, "loss": 12.23758316040039, "step": 1740 }, { "epoch": 0.2257685418574682, "grad_norm": 0.6275028586387634, "learning_rate": 8.816184339835974e-05, "loss": 12.367587089538574, "step": 1741 }, { "epoch": 0.2258982193657149, "grad_norm": 0.7042737007141113, "learning_rate": 8.814864571489564e-05, "loss": 12.173375129699707, "step": 1742 }, { "epoch": 0.22602789687396158, "grad_norm": 0.6963284611701965, "learning_rate": 8.813544166786906e-05, "loss": 11.970600128173828, "step": 1743 }, { "epoch": 0.22615757438220824, "grad_norm": 0.5699915885925293, "learning_rate": 8.812223125948256e-05, "loss": 8.380672454833984, "step": 1744 }, { "epoch": 0.22628725189045493, "grad_norm": 0.5003979802131653, "learning_rate": 8.810901449193973e-05, "loss": 11.375907897949219, "step": 1745 }, { "epoch": 0.22641692939870162, "grad_norm": 0.4961988925933838, "learning_rate": 8.80957913674453e-05, "loss": 8.194334030151367, "step": 1746 }, { "epoch": 0.22654660690694828, "grad_norm": 0.8932235240936279, "learning_rate": 8.808256188820499e-05, "loss": 11.190705299377441, "step": 1747 }, { "epoch": 0.22667628441519497, "grad_norm": 0.43648093938827515, "learning_rate": 8.806932605642562e-05, "loss": 10.540812492370605, "step": 1748 }, { "epoch": 0.22680596192344163, "grad_norm": 0.6451976895332336, "learning_rate": 8.805608387431503e-05, "loss": 12.697786331176758, "step": 1749 }, { "epoch": 0.22693563943168832, "grad_norm": 0.729385495185852, "learning_rate": 8.804283534408218e-05, "loss": 12.915703773498535, "step": 1750 }, { "epoch": 0.227065316939935, "grad_norm": 0.59619140625, "learning_rate": 8.802958046793702e-05, "loss": 9.181918144226074, "step": 1751 }, { "epoch": 0.22719499444818167, "grad_norm": 0.6484664678573608, "learning_rate": 8.801631924809059e-05, "loss": 12.199084281921387, "step": 1752 }, { "epoch": 0.22732467195642836, "grad_norm": 0.6962026357650757, "learning_rate": 8.800305168675502e-05, "loss": 11.558329582214355, "step": 1753 }, { "epoch": 0.22745434946467505, "grad_norm": 0.5839061141014099, "learning_rate": 8.798977778614343e-05, "loss": 10.913931846618652, "step": 1754 }, { "epoch": 0.2275840269729217, "grad_norm": 0.6474003791809082, "learning_rate": 8.797649754847009e-05, "loss": 13.038875579833984, "step": 1755 }, { "epoch": 0.2277137044811684, "grad_norm": 1.487755298614502, "learning_rate": 8.79632109759502e-05, "loss": 12.574057579040527, "step": 1756 }, { "epoch": 0.22784338198941506, "grad_norm": 0.5773067474365234, "learning_rate": 8.794991807080014e-05, "loss": 12.923177719116211, "step": 1757 }, { "epoch": 0.22797305949766175, "grad_norm": 0.7320139408111572, "learning_rate": 8.793661883523728e-05, "loss": 11.262589454650879, "step": 1758 }, { "epoch": 0.22810273700590844, "grad_norm": 0.6119062900543213, "learning_rate": 8.792331327148006e-05, "loss": 13.466171264648438, "step": 1759 }, { "epoch": 0.2282324145141551, "grad_norm": 0.901630699634552, "learning_rate": 8.791000138174799e-05, "loss": 12.800196647644043, "step": 1760 }, { "epoch": 0.2283620920224018, "grad_norm": 0.835319459438324, "learning_rate": 8.789668316826159e-05, "loss": 11.694427490234375, "step": 1761 }, { "epoch": 0.22849176953064848, "grad_norm": 0.6477036476135254, "learning_rate": 8.788335863324249e-05, "loss": 9.675407409667969, "step": 1762 }, { "epoch": 0.22862144703889514, "grad_norm": 1.0634242296218872, "learning_rate": 8.787002777891335e-05, "loss": 13.76517105102539, "step": 1763 }, { "epoch": 0.22875112454714183, "grad_norm": 0.5753464698791504, "learning_rate": 8.785669060749789e-05, "loss": 10.752676963806152, "step": 1764 }, { "epoch": 0.2288808020553885, "grad_norm": 0.704602062702179, "learning_rate": 8.784334712122084e-05, "loss": 8.776040077209473, "step": 1765 }, { "epoch": 0.22901047956363518, "grad_norm": 0.8362288475036621, "learning_rate": 8.782999732230807e-05, "loss": 14.375875473022461, "step": 1766 }, { "epoch": 0.22914015707188187, "grad_norm": 0.6662283539772034, "learning_rate": 8.781664121298645e-05, "loss": 11.859092712402344, "step": 1767 }, { "epoch": 0.22926983458012853, "grad_norm": 0.8219818472862244, "learning_rate": 8.780327879548389e-05, "loss": 12.475353240966797, "step": 1768 }, { "epoch": 0.22939951208837522, "grad_norm": 0.6604505777359009, "learning_rate": 8.77899100720294e-05, "loss": 11.078831672668457, "step": 1769 }, { "epoch": 0.2295291895966219, "grad_norm": 0.7594690322875977, "learning_rate": 8.777653504485295e-05, "loss": 12.475851058959961, "step": 1770 }, { "epoch": 0.22965886710486857, "grad_norm": 1.01527738571167, "learning_rate": 8.776315371618569e-05, "loss": 15.37595272064209, "step": 1771 }, { "epoch": 0.22978854461311526, "grad_norm": 0.7536696195602417, "learning_rate": 8.774976608825971e-05, "loss": 12.872720718383789, "step": 1772 }, { "epoch": 0.22991822212136193, "grad_norm": 0.727749228477478, "learning_rate": 8.773637216330823e-05, "loss": 10.983240127563477, "step": 1773 }, { "epoch": 0.23004789962960862, "grad_norm": 0.6852672100067139, "learning_rate": 8.772297194356545e-05, "loss": 12.453330039978027, "step": 1774 }, { "epoch": 0.2301775771378553, "grad_norm": 0.8625197410583496, "learning_rate": 8.770956543126669e-05, "loss": 13.403694152832031, "step": 1775 }, { "epoch": 0.23030725464610197, "grad_norm": 0.8345258831977844, "learning_rate": 8.769615262864828e-05, "loss": 11.752748489379883, "step": 1776 }, { "epoch": 0.23043693215434866, "grad_norm": 0.636264979839325, "learning_rate": 8.768273353794758e-05, "loss": 12.267659187316895, "step": 1777 }, { "epoch": 0.23056660966259535, "grad_norm": 0.6981654167175293, "learning_rate": 8.766930816140304e-05, "loss": 8.84027099609375, "step": 1778 }, { "epoch": 0.230696287170842, "grad_norm": 0.8728288412094116, "learning_rate": 8.765587650125415e-05, "loss": 14.430648803710938, "step": 1779 }, { "epoch": 0.2308259646790887, "grad_norm": 0.4442475140094757, "learning_rate": 8.764243855974142e-05, "loss": 11.633868217468262, "step": 1780 }, { "epoch": 0.23095564218733536, "grad_norm": 0.6867178678512573, "learning_rate": 8.762899433910645e-05, "loss": 12.935770034790039, "step": 1781 }, { "epoch": 0.23108531969558205, "grad_norm": 0.7784473299980164, "learning_rate": 8.761554384159186e-05, "loss": 16.834335327148438, "step": 1782 }, { "epoch": 0.23121499720382874, "grad_norm": 0.6316193342208862, "learning_rate": 8.760208706944133e-05, "loss": 13.963565826416016, "step": 1783 }, { "epoch": 0.2313446747120754, "grad_norm": 0.7231908440589905, "learning_rate": 8.758862402489956e-05, "loss": 9.5066499710083, "step": 1784 }, { "epoch": 0.2314743522203221, "grad_norm": 0.5137810111045837, "learning_rate": 8.757515471021232e-05, "loss": 10.06749439239502, "step": 1785 }, { "epoch": 0.23160402972856878, "grad_norm": 0.4700356721878052, "learning_rate": 8.756167912762643e-05, "loss": 9.27486515045166, "step": 1786 }, { "epoch": 0.23173370723681544, "grad_norm": 0.6942423582077026, "learning_rate": 8.754819727938975e-05, "loss": 11.362563133239746, "step": 1787 }, { "epoch": 0.23186338474506213, "grad_norm": 0.6051609516143799, "learning_rate": 8.753470916775117e-05, "loss": 11.915658950805664, "step": 1788 }, { "epoch": 0.2319930622533088, "grad_norm": 0.8548623323440552, "learning_rate": 8.752121479496064e-05, "loss": 13.673174858093262, "step": 1789 }, { "epoch": 0.23212273976155548, "grad_norm": 0.7171939015388489, "learning_rate": 8.750771416326917e-05, "loss": 11.74157428741455, "step": 1790 }, { "epoch": 0.23225241726980217, "grad_norm": 0.6439371705055237, "learning_rate": 8.749420727492877e-05, "loss": 10.834334373474121, "step": 1791 }, { "epoch": 0.23238209477804883, "grad_norm": 0.7574618458747864, "learning_rate": 8.748069413219254e-05, "loss": 9.604107856750488, "step": 1792 }, { "epoch": 0.23251177228629552, "grad_norm": 0.7725687026977539, "learning_rate": 8.746717473731459e-05, "loss": 14.815180778503418, "step": 1793 }, { "epoch": 0.2326414497945422, "grad_norm": 0.6563302278518677, "learning_rate": 8.74536490925501e-05, "loss": 14.629069328308105, "step": 1794 }, { "epoch": 0.23277112730278887, "grad_norm": 0.8696727752685547, "learning_rate": 8.744011720015526e-05, "loss": 17.21469497680664, "step": 1795 }, { "epoch": 0.23290080481103556, "grad_norm": 0.6485320329666138, "learning_rate": 8.742657906238731e-05, "loss": 12.505608558654785, "step": 1796 }, { "epoch": 0.23303048231928222, "grad_norm": 0.732072114944458, "learning_rate": 8.741303468150459e-05, "loss": 11.869950294494629, "step": 1797 }, { "epoch": 0.2331601598275289, "grad_norm": 0.5574063062667847, "learning_rate": 8.739948405976639e-05, "loss": 8.374719619750977, "step": 1798 }, { "epoch": 0.2332898373357756, "grad_norm": 0.7435089945793152, "learning_rate": 8.738592719943311e-05, "loss": 15.835776329040527, "step": 1799 }, { "epoch": 0.23341951484402226, "grad_norm": 0.6068323254585266, "learning_rate": 8.737236410276613e-05, "loss": 13.201475143432617, "step": 1800 }, { "epoch": 0.23354919235226895, "grad_norm": 0.7444971799850464, "learning_rate": 8.735879477202795e-05, "loss": 11.162436485290527, "step": 1801 }, { "epoch": 0.23367886986051564, "grad_norm": 0.6332582831382751, "learning_rate": 8.734521920948203e-05, "loss": 11.091094970703125, "step": 1802 }, { "epoch": 0.2338085473687623, "grad_norm": 0.8287270069122314, "learning_rate": 8.733163741739292e-05, "loss": 10.388476371765137, "step": 1803 }, { "epoch": 0.233938224877009, "grad_norm": 0.6207584738731384, "learning_rate": 8.73180493980262e-05, "loss": 12.210397720336914, "step": 1804 }, { "epoch": 0.23406790238525566, "grad_norm": 0.6431903839111328, "learning_rate": 8.730445515364847e-05, "loss": 13.102352142333984, "step": 1805 }, { "epoch": 0.23419757989350234, "grad_norm": 0.6658808588981628, "learning_rate": 8.72908546865274e-05, "loss": 9.008322715759277, "step": 1806 }, { "epoch": 0.23432725740174903, "grad_norm": 0.6149439811706543, "learning_rate": 8.727724799893164e-05, "loss": 10.889787673950195, "step": 1807 }, { "epoch": 0.2344569349099957, "grad_norm": 0.6196738481521606, "learning_rate": 8.726363509313098e-05, "loss": 13.424072265625, "step": 1808 }, { "epoch": 0.23458661241824239, "grad_norm": 0.65341717004776, "learning_rate": 8.725001597139613e-05, "loss": 12.958677291870117, "step": 1809 }, { "epoch": 0.23471628992648907, "grad_norm": 0.6258528828620911, "learning_rate": 8.72363906359989e-05, "loss": 10.112517356872559, "step": 1810 }, { "epoch": 0.23484596743473574, "grad_norm": 0.6090853214263916, "learning_rate": 8.722275908921214e-05, "loss": 10.489947319030762, "step": 1811 }, { "epoch": 0.23497564494298243, "grad_norm": 0.8244277834892273, "learning_rate": 8.720912133330974e-05, "loss": 12.832710266113281, "step": 1812 }, { "epoch": 0.2351053224512291, "grad_norm": 0.7395213842391968, "learning_rate": 8.719547737056657e-05, "loss": 12.87496280670166, "step": 1813 }, { "epoch": 0.23523499995947578, "grad_norm": 0.700577974319458, "learning_rate": 8.71818272032586e-05, "loss": 14.294862747192383, "step": 1814 }, { "epoch": 0.23536467746772247, "grad_norm": 0.7407236099243164, "learning_rate": 8.71681708336628e-05, "loss": 13.092572212219238, "step": 1815 }, { "epoch": 0.23549435497596913, "grad_norm": 0.7150853276252747, "learning_rate": 8.715450826405718e-05, "loss": 12.89279556274414, "step": 1816 }, { "epoch": 0.23562403248421582, "grad_norm": 0.7282084822654724, "learning_rate": 8.714083949672082e-05, "loss": 11.48042106628418, "step": 1817 }, { "epoch": 0.2357537099924625, "grad_norm": 0.7301826477050781, "learning_rate": 8.712716453393377e-05, "loss": 14.790639877319336, "step": 1818 }, { "epoch": 0.23588338750070917, "grad_norm": 0.8354873657226562, "learning_rate": 8.711348337797714e-05, "loss": 14.93883228302002, "step": 1819 }, { "epoch": 0.23601306500895586, "grad_norm": 0.5937610864639282, "learning_rate": 8.70997960311331e-05, "loss": 14.033829689025879, "step": 1820 }, { "epoch": 0.23614274251720252, "grad_norm": 0.6896460652351379, "learning_rate": 8.708610249568483e-05, "loss": 11.295072555541992, "step": 1821 }, { "epoch": 0.2362724200254492, "grad_norm": 0.4680429995059967, "learning_rate": 8.707240277391655e-05, "loss": 8.796066284179688, "step": 1822 }, { "epoch": 0.2364020975336959, "grad_norm": 0.5686751008033752, "learning_rate": 8.705869686811347e-05, "loss": 12.044391632080078, "step": 1823 }, { "epoch": 0.23653177504194256, "grad_norm": 0.629165768623352, "learning_rate": 8.704498478056191e-05, "loss": 10.525214195251465, "step": 1824 }, { "epoch": 0.23666145255018925, "grad_norm": 0.7030778527259827, "learning_rate": 8.703126651354916e-05, "loss": 10.137918472290039, "step": 1825 }, { "epoch": 0.23679113005843594, "grad_norm": 0.5715411901473999, "learning_rate": 8.701754206936357e-05, "loss": 10.442448616027832, "step": 1826 }, { "epoch": 0.2369208075666826, "grad_norm": 0.7503668069839478, "learning_rate": 8.700381145029448e-05, "loss": 15.279800415039062, "step": 1827 }, { "epoch": 0.2370504850749293, "grad_norm": 0.7279126048088074, "learning_rate": 8.699007465863232e-05, "loss": 12.463824272155762, "step": 1828 }, { "epoch": 0.23718016258317595, "grad_norm": 0.7563701272010803, "learning_rate": 8.69763316966685e-05, "loss": 12.162338256835938, "step": 1829 }, { "epoch": 0.23730984009142264, "grad_norm": 0.9858978390693665, "learning_rate": 8.696258256669551e-05, "loss": 14.185425758361816, "step": 1830 }, { "epoch": 0.23743951759966933, "grad_norm": 0.5390335917472839, "learning_rate": 8.69488272710068e-05, "loss": 10.367169380187988, "step": 1831 }, { "epoch": 0.237569195107916, "grad_norm": 0.9063987731933594, "learning_rate": 8.693506581189691e-05, "loss": 11.738272666931152, "step": 1832 }, { "epoch": 0.23769887261616268, "grad_norm": 0.7385785579681396, "learning_rate": 8.692129819166138e-05, "loss": 10.264101028442383, "step": 1833 }, { "epoch": 0.23782855012440937, "grad_norm": 0.6729455590248108, "learning_rate": 8.690752441259678e-05, "loss": 12.287409782409668, "step": 1834 }, { "epoch": 0.23795822763265603, "grad_norm": 0.7152612209320068, "learning_rate": 8.68937444770007e-05, "loss": 13.161144256591797, "step": 1835 }, { "epoch": 0.23808790514090272, "grad_norm": 0.8152037858963013, "learning_rate": 8.687995838717175e-05, "loss": 13.305368423461914, "step": 1836 }, { "epoch": 0.23821758264914938, "grad_norm": 0.9462242722511292, "learning_rate": 8.686616614540965e-05, "loss": 14.84829330444336, "step": 1837 }, { "epoch": 0.23834726015739607, "grad_norm": 0.6859889030456543, "learning_rate": 8.685236775401501e-05, "loss": 11.92328929901123, "step": 1838 }, { "epoch": 0.23847693766564276, "grad_norm": 0.5349240303039551, "learning_rate": 8.683856321528958e-05, "loss": 11.1326265335083, "step": 1839 }, { "epoch": 0.23860661517388942, "grad_norm": 0.7502342462539673, "learning_rate": 8.682475253153606e-05, "loss": 11.910209655761719, "step": 1840 }, { "epoch": 0.23873629268213611, "grad_norm": 0.9185617566108704, "learning_rate": 8.681093570505821e-05, "loss": 15.698657035827637, "step": 1841 }, { "epoch": 0.2388659701903828, "grad_norm": 0.8301330804824829, "learning_rate": 8.679711273816084e-05, "loss": 13.081841468811035, "step": 1842 }, { "epoch": 0.23899564769862947, "grad_norm": 0.607754647731781, "learning_rate": 8.678328363314971e-05, "loss": 13.580853462219238, "step": 1843 }, { "epoch": 0.23912532520687615, "grad_norm": 0.7603875994682312, "learning_rate": 8.676944839233167e-05, "loss": 11.539278030395508, "step": 1844 }, { "epoch": 0.23925500271512282, "grad_norm": 0.6248161792755127, "learning_rate": 8.675560701801458e-05, "loss": 9.513325691223145, "step": 1845 }, { "epoch": 0.2393846802233695, "grad_norm": 0.7460484504699707, "learning_rate": 8.674175951250732e-05, "loss": 11.024402618408203, "step": 1846 }, { "epoch": 0.2395143577316162, "grad_norm": 0.6699086427688599, "learning_rate": 8.672790587811975e-05, "loss": 11.224020004272461, "step": 1847 }, { "epoch": 0.23964403523986286, "grad_norm": 0.6608891487121582, "learning_rate": 8.671404611716283e-05, "loss": 11.003885269165039, "step": 1848 }, { "epoch": 0.23977371274810955, "grad_norm": 0.5973469018936157, "learning_rate": 8.670018023194847e-05, "loss": 8.450674057006836, "step": 1849 }, { "epoch": 0.23990339025635624, "grad_norm": 0.8045300841331482, "learning_rate": 8.668630822478966e-05, "loss": 10.189862251281738, "step": 1850 }, { "epoch": 0.2400330677646029, "grad_norm": 0.6731282472610474, "learning_rate": 8.667243009800037e-05, "loss": 13.536876678466797, "step": 1851 }, { "epoch": 0.2401627452728496, "grad_norm": 0.5557497143745422, "learning_rate": 8.665854585389561e-05, "loss": 8.698407173156738, "step": 1852 }, { "epoch": 0.24029242278109625, "grad_norm": 0.6539098024368286, "learning_rate": 8.664465549479141e-05, "loss": 9.48214340209961, "step": 1853 }, { "epoch": 0.24042210028934294, "grad_norm": 0.7225672602653503, "learning_rate": 8.663075902300481e-05, "loss": 12.344056129455566, "step": 1854 }, { "epoch": 0.24055177779758963, "grad_norm": 0.7461941838264465, "learning_rate": 8.661685644085389e-05, "loss": 14.204588890075684, "step": 1855 }, { "epoch": 0.2406814553058363, "grad_norm": 0.5504805445671082, "learning_rate": 8.660294775065772e-05, "loss": 9.782940864562988, "step": 1856 }, { "epoch": 0.24081113281408298, "grad_norm": 0.7142102718353271, "learning_rate": 8.65890329547364e-05, "loss": 14.339794158935547, "step": 1857 }, { "epoch": 0.24094081032232967, "grad_norm": 0.8392682075500488, "learning_rate": 8.657511205541104e-05, "loss": 16.360586166381836, "step": 1858 }, { "epoch": 0.24107048783057633, "grad_norm": 0.6115514636039734, "learning_rate": 8.656118505500382e-05, "loss": 12.509719848632812, "step": 1859 }, { "epoch": 0.24120016533882302, "grad_norm": 0.5450913310050964, "learning_rate": 8.654725195583788e-05, "loss": 8.14717960357666, "step": 1860 }, { "epoch": 0.24132984284706968, "grad_norm": 0.7143703103065491, "learning_rate": 8.653331276023738e-05, "loss": 11.046260833740234, "step": 1861 }, { "epoch": 0.24145952035531637, "grad_norm": 0.7359424233436584, "learning_rate": 8.651936747052753e-05, "loss": 13.092482566833496, "step": 1862 }, { "epoch": 0.24158919786356306, "grad_norm": 0.6625141501426697, "learning_rate": 8.650541608903452e-05, "loss": 12.909417152404785, "step": 1863 }, { "epoch": 0.24171887537180972, "grad_norm": 0.6119987368583679, "learning_rate": 8.649145861808562e-05, "loss": 12.638877868652344, "step": 1864 }, { "epoch": 0.2418485528800564, "grad_norm": 0.8231302499771118, "learning_rate": 8.647749506000901e-05, "loss": 13.60190486907959, "step": 1865 }, { "epoch": 0.2419782303883031, "grad_norm": 0.766485869884491, "learning_rate": 8.646352541713401e-05, "loss": 14.412537574768066, "step": 1866 }, { "epoch": 0.24210790789654976, "grad_norm": 0.6648708581924438, "learning_rate": 8.644954969179085e-05, "loss": 10.005895614624023, "step": 1867 }, { "epoch": 0.24223758540479645, "grad_norm": 0.5357049703598022, "learning_rate": 8.643556788631079e-05, "loss": 9.67330551147461, "step": 1868 }, { "epoch": 0.2423672629130431, "grad_norm": 0.6238895654678345, "learning_rate": 8.64215800030262e-05, "loss": 7.307522296905518, "step": 1869 }, { "epoch": 0.2424969404212898, "grad_norm": 0.696179211139679, "learning_rate": 8.640758604427035e-05, "loss": 11.49434757232666, "step": 1870 }, { "epoch": 0.2426266179295365, "grad_norm": 0.7108768820762634, "learning_rate": 8.639358601237756e-05, "loss": 12.518309593200684, "step": 1871 }, { "epoch": 0.24275629543778315, "grad_norm": 0.6269614696502686, "learning_rate": 8.63795799096832e-05, "loss": 15.243969917297363, "step": 1872 }, { "epoch": 0.24288597294602984, "grad_norm": 0.6393557786941528, "learning_rate": 8.636556773852361e-05, "loss": 14.11015796661377, "step": 1873 }, { "epoch": 0.24301565045427653, "grad_norm": 0.7116354703903198, "learning_rate": 8.635154950123617e-05, "loss": 13.893865585327148, "step": 1874 }, { "epoch": 0.2431453279625232, "grad_norm": 0.7216200232505798, "learning_rate": 8.633752520015921e-05, "loss": 9.761881828308105, "step": 1875 }, { "epoch": 0.24327500547076988, "grad_norm": 0.9212732911109924, "learning_rate": 8.632349483763215e-05, "loss": 15.424827575683594, "step": 1876 }, { "epoch": 0.24340468297901655, "grad_norm": 0.6831506490707397, "learning_rate": 8.630945841599542e-05, "loss": 11.756932258605957, "step": 1877 }, { "epoch": 0.24353436048726324, "grad_norm": 0.7551479339599609, "learning_rate": 8.629541593759039e-05, "loss": 12.144585609436035, "step": 1878 }, { "epoch": 0.24366403799550992, "grad_norm": 0.6901412606239319, "learning_rate": 8.62813674047595e-05, "loss": 13.446249008178711, "step": 1879 }, { "epoch": 0.2437937155037566, "grad_norm": 0.7128893733024597, "learning_rate": 8.626731281984615e-05, "loss": 9.010891914367676, "step": 1880 }, { "epoch": 0.24392339301200328, "grad_norm": 0.6487711668014526, "learning_rate": 8.625325218519482e-05, "loss": 14.39896011352539, "step": 1881 }, { "epoch": 0.24405307052024997, "grad_norm": 0.6542515754699707, "learning_rate": 8.623918550315094e-05, "loss": 14.087400436401367, "step": 1882 }, { "epoch": 0.24418274802849663, "grad_norm": 0.7718406319618225, "learning_rate": 8.622511277606097e-05, "loss": 14.964822769165039, "step": 1883 }, { "epoch": 0.24431242553674332, "grad_norm": 0.6008180379867554, "learning_rate": 8.621103400627239e-05, "loss": 12.739925384521484, "step": 1884 }, { "epoch": 0.24444210304498998, "grad_norm": 1.921428918838501, "learning_rate": 8.619694919613365e-05, "loss": 12.595832824707031, "step": 1885 }, { "epoch": 0.24457178055323667, "grad_norm": 0.5536103248596191, "learning_rate": 8.618285834799425e-05, "loss": 10.186102867126465, "step": 1886 }, { "epoch": 0.24470145806148336, "grad_norm": 0.5123682022094727, "learning_rate": 8.616876146420467e-05, "loss": 8.525142669677734, "step": 1887 }, { "epoch": 0.24483113556973002, "grad_norm": 0.6781495809555054, "learning_rate": 8.615465854711643e-05, "loss": 9.602754592895508, "step": 1888 }, { "epoch": 0.2449608130779767, "grad_norm": 0.7048407196998596, "learning_rate": 8.6140549599082e-05, "loss": 9.967129707336426, "step": 1889 }, { "epoch": 0.2450904905862234, "grad_norm": 0.6818579435348511, "learning_rate": 8.612643462245491e-05, "loss": 10.205965995788574, "step": 1890 }, { "epoch": 0.24522016809447006, "grad_norm": 0.5813664197921753, "learning_rate": 8.611231361958967e-05, "loss": 9.377582550048828, "step": 1891 }, { "epoch": 0.24534984560271675, "grad_norm": 0.8208697438240051, "learning_rate": 8.609818659284178e-05, "loss": 11.94628620147705, "step": 1892 }, { "epoch": 0.2454795231109634, "grad_norm": 0.4378076195716858, "learning_rate": 8.608405354456778e-05, "loss": 8.228214263916016, "step": 1893 }, { "epoch": 0.2456092006192101, "grad_norm": 0.626184344291687, "learning_rate": 8.606991447712523e-05, "loss": 8.337555885314941, "step": 1894 }, { "epoch": 0.2457388781274568, "grad_norm": 0.5907244086265564, "learning_rate": 8.605576939287262e-05, "loss": 10.300039291381836, "step": 1895 }, { "epoch": 0.24586855563570345, "grad_norm": 0.8287622332572937, "learning_rate": 8.60416182941695e-05, "loss": 12.549995422363281, "step": 1896 }, { "epoch": 0.24599823314395014, "grad_norm": 0.6373730897903442, "learning_rate": 8.60274611833764e-05, "loss": 11.44729995727539, "step": 1897 }, { "epoch": 0.24612791065219683, "grad_norm": 0.5443737506866455, "learning_rate": 8.601329806285488e-05, "loss": 7.662676811218262, "step": 1898 }, { "epoch": 0.2462575881604435, "grad_norm": 0.7046663165092468, "learning_rate": 8.59991289349675e-05, "loss": 12.941583633422852, "step": 1899 }, { "epoch": 0.24638726566869018, "grad_norm": 0.7045317888259888, "learning_rate": 8.598495380207775e-05, "loss": 11.874075889587402, "step": 1900 }, { "epoch": 0.24651694317693684, "grad_norm": 0.7100753784179688, "learning_rate": 8.597077266655023e-05, "loss": 12.046585083007812, "step": 1901 }, { "epoch": 0.24664662068518353, "grad_norm": 0.6644909381866455, "learning_rate": 8.595658553075047e-05, "loss": 10.86623764038086, "step": 1902 }, { "epoch": 0.24677629819343022, "grad_norm": 0.8099959492683411, "learning_rate": 8.594239239704503e-05, "loss": 15.571656227111816, "step": 1903 }, { "epoch": 0.24690597570167688, "grad_norm": 0.7770164012908936, "learning_rate": 8.592819326780146e-05, "loss": 11.090977668762207, "step": 1904 }, { "epoch": 0.24703565320992357, "grad_norm": 0.7841039299964905, "learning_rate": 8.591398814538831e-05, "loss": 11.977729797363281, "step": 1905 }, { "epoch": 0.24716533071817026, "grad_norm": 0.5670809149742126, "learning_rate": 8.589977703217513e-05, "loss": 9.615684509277344, "step": 1906 }, { "epoch": 0.24729500822641692, "grad_norm": 0.577992856502533, "learning_rate": 8.588555993053248e-05, "loss": 9.152704238891602, "step": 1907 }, { "epoch": 0.2474246857346636, "grad_norm": 0.7843447923660278, "learning_rate": 8.58713368428319e-05, "loss": 11.39846134185791, "step": 1908 }, { "epoch": 0.24755436324291027, "grad_norm": 0.6193442940711975, "learning_rate": 8.585710777144593e-05, "loss": 13.478943824768066, "step": 1909 }, { "epoch": 0.24768404075115696, "grad_norm": 0.595418393611908, "learning_rate": 8.584287271874813e-05, "loss": 13.787156105041504, "step": 1910 }, { "epoch": 0.24781371825940365, "grad_norm": 0.6272754073143005, "learning_rate": 8.582863168711305e-05, "loss": 9.424260139465332, "step": 1911 }, { "epoch": 0.24794339576765032, "grad_norm": 0.5829334259033203, "learning_rate": 8.581438467891623e-05, "loss": 11.831160545349121, "step": 1912 }, { "epoch": 0.248073073275897, "grad_norm": 0.6481611728668213, "learning_rate": 8.580013169653418e-05, "loss": 13.763066291809082, "step": 1913 }, { "epoch": 0.2482027507841437, "grad_norm": 0.9439581036567688, "learning_rate": 8.578587274234446e-05, "loss": 12.70805835723877, "step": 1914 }, { "epoch": 0.24833242829239036, "grad_norm": 0.7340753078460693, "learning_rate": 8.577160781872561e-05, "loss": 15.589029312133789, "step": 1915 }, { "epoch": 0.24846210580063705, "grad_norm": 0.6041817665100098, "learning_rate": 8.575733692805714e-05, "loss": 10.071195602416992, "step": 1916 }, { "epoch": 0.2485917833088837, "grad_norm": 0.45537540316581726, "learning_rate": 8.574306007271957e-05, "loss": 9.339165687561035, "step": 1917 }, { "epoch": 0.2487214608171304, "grad_norm": 0.546275794506073, "learning_rate": 8.572877725509444e-05, "loss": 13.132081985473633, "step": 1918 }, { "epoch": 0.24885113832537709, "grad_norm": 0.639792799949646, "learning_rate": 8.571448847756424e-05, "loss": 10.60291576385498, "step": 1919 }, { "epoch": 0.24898081583362375, "grad_norm": 0.46188628673553467, "learning_rate": 8.570019374251248e-05, "loss": 9.217141151428223, "step": 1920 }, { "epoch": 0.24911049334187044, "grad_norm": 0.6431860327720642, "learning_rate": 8.568589305232366e-05, "loss": 12.542508125305176, "step": 1921 }, { "epoch": 0.24924017085011713, "grad_norm": 0.6483737826347351, "learning_rate": 8.567158640938328e-05, "loss": 12.47518539428711, "step": 1922 }, { "epoch": 0.2493698483583638, "grad_norm": 0.7825607061386108, "learning_rate": 8.565727381607783e-05, "loss": 12.549982070922852, "step": 1923 }, { "epoch": 0.24949952586661048, "grad_norm": 0.825536847114563, "learning_rate": 8.564295527479475e-05, "loss": 14.17496109008789, "step": 1924 }, { "epoch": 0.24962920337485714, "grad_norm": 0.7041105628013611, "learning_rate": 8.562863078792259e-05, "loss": 10.795089721679688, "step": 1925 }, { "epoch": 0.24975888088310383, "grad_norm": 0.5982131361961365, "learning_rate": 8.561430035785072e-05, "loss": 13.142247200012207, "step": 1926 }, { "epoch": 0.24988855839135052, "grad_norm": 0.6700454950332642, "learning_rate": 8.559996398696965e-05, "loss": 11.883116722106934, "step": 1927 }, { "epoch": 0.2500182358995972, "grad_norm": 0.48981335759162903, "learning_rate": 8.558562167767084e-05, "loss": 9.56857967376709, "step": 1928 }, { "epoch": 0.25014791340784387, "grad_norm": 0.69077068567276, "learning_rate": 8.557127343234668e-05, "loss": 9.670564651489258, "step": 1929 }, { "epoch": 0.25027759091609053, "grad_norm": 0.6086869835853577, "learning_rate": 8.55569192533906e-05, "loss": 14.649310111999512, "step": 1930 }, { "epoch": 0.25040726842433725, "grad_norm": 0.6263576149940491, "learning_rate": 8.554255914319704e-05, "loss": 10.99242877960205, "step": 1931 }, { "epoch": 0.2505369459325839, "grad_norm": 0.6866093873977661, "learning_rate": 8.55281931041614e-05, "loss": 13.210405349731445, "step": 1932 }, { "epoch": 0.25066662344083057, "grad_norm": 0.6579293012619019, "learning_rate": 8.551382113868007e-05, "loss": 12.424637794494629, "step": 1933 }, { "epoch": 0.2507963009490773, "grad_norm": 0.6084567904472351, "learning_rate": 8.549944324915044e-05, "loss": 9.754288673400879, "step": 1934 }, { "epoch": 0.25092597845732395, "grad_norm": 0.5878382325172424, "learning_rate": 8.548505943797087e-05, "loss": 9.875222206115723, "step": 1935 }, { "epoch": 0.2510556559655706, "grad_norm": 0.9216770529747009, "learning_rate": 8.547066970754072e-05, "loss": 12.060429573059082, "step": 1936 }, { "epoch": 0.2511853334738173, "grad_norm": 0.6971606612205505, "learning_rate": 8.545627406026033e-05, "loss": 9.843399047851562, "step": 1937 }, { "epoch": 0.251315010982064, "grad_norm": 0.7904328107833862, "learning_rate": 8.544187249853105e-05, "loss": 16.956701278686523, "step": 1938 }, { "epoch": 0.25144468849031065, "grad_norm": 0.6836439967155457, "learning_rate": 8.542746502475519e-05, "loss": 12.464620590209961, "step": 1939 }, { "epoch": 0.2515743659985573, "grad_norm": 0.7423340082168579, "learning_rate": 8.541305164133605e-05, "loss": 13.96185302734375, "step": 1940 }, { "epoch": 0.25170404350680403, "grad_norm": 0.5973422527313232, "learning_rate": 8.539863235067794e-05, "loss": 9.62091064453125, "step": 1941 }, { "epoch": 0.2518337210150507, "grad_norm": 0.7759988307952881, "learning_rate": 8.538420715518612e-05, "loss": 13.891312599182129, "step": 1942 }, { "epoch": 0.25196339852329735, "grad_norm": 0.6469017863273621, "learning_rate": 8.536977605726683e-05, "loss": 12.918745994567871, "step": 1943 }, { "epoch": 0.25209307603154407, "grad_norm": 0.629660964012146, "learning_rate": 8.535533905932738e-05, "loss": 13.995966911315918, "step": 1944 }, { "epoch": 0.25222275353979073, "grad_norm": 0.6291027069091797, "learning_rate": 8.534089616377595e-05, "loss": 11.727378845214844, "step": 1945 }, { "epoch": 0.2523524310480374, "grad_norm": 0.5315154194831848, "learning_rate": 8.532644737302177e-05, "loss": 8.699665069580078, "step": 1946 }, { "epoch": 0.2524821085562841, "grad_norm": 0.6027202010154724, "learning_rate": 8.531199268947502e-05, "loss": 6.70100212097168, "step": 1947 }, { "epoch": 0.2526117860645308, "grad_norm": 0.6287987232208252, "learning_rate": 8.529753211554692e-05, "loss": 9.755228042602539, "step": 1948 }, { "epoch": 0.25274146357277744, "grad_norm": 0.7304128408432007, "learning_rate": 8.528306565364961e-05, "loss": 12.572982788085938, "step": 1949 }, { "epoch": 0.25287114108102415, "grad_norm": 0.6917598843574524, "learning_rate": 8.526859330619623e-05, "loss": 8.68743896484375, "step": 1950 }, { "epoch": 0.2530008185892708, "grad_norm": 0.6194168925285339, "learning_rate": 8.52541150756009e-05, "loss": 9.890412330627441, "step": 1951 }, { "epoch": 0.2531304960975175, "grad_norm": 0.787004292011261, "learning_rate": 8.523963096427876e-05, "loss": 13.610334396362305, "step": 1952 }, { "epoch": 0.25326017360576414, "grad_norm": 1.0031218528747559, "learning_rate": 8.522514097464587e-05, "loss": 14.685165405273438, "step": 1953 }, { "epoch": 0.25338985111401086, "grad_norm": 0.673936128616333, "learning_rate": 8.521064510911931e-05, "loss": 15.225625991821289, "step": 1954 }, { "epoch": 0.2535195286222575, "grad_norm": 0.7850061655044556, "learning_rate": 8.519614337011714e-05, "loss": 10.873791694641113, "step": 1955 }, { "epoch": 0.2536492061305042, "grad_norm": 0.765802264213562, "learning_rate": 8.518163576005838e-05, "loss": 12.35264778137207, "step": 1956 }, { "epoch": 0.2537788836387509, "grad_norm": 0.9000613689422607, "learning_rate": 8.516712228136303e-05, "loss": 16.21163558959961, "step": 1957 }, { "epoch": 0.25390856114699756, "grad_norm": 0.7859221696853638, "learning_rate": 8.515260293645211e-05, "loss": 11.128219604492188, "step": 1958 }, { "epoch": 0.2540382386552442, "grad_norm": 0.8817822933197021, "learning_rate": 8.513807772774756e-05, "loss": 11.995159149169922, "step": 1959 }, { "epoch": 0.25416791616349094, "grad_norm": 0.6124944090843201, "learning_rate": 8.512354665767233e-05, "loss": 8.609406471252441, "step": 1960 }, { "epoch": 0.2542975936717376, "grad_norm": 0.7229989767074585, "learning_rate": 8.510900972865034e-05, "loss": 9.929194450378418, "step": 1961 }, { "epoch": 0.25442727117998426, "grad_norm": 0.5114035606384277, "learning_rate": 8.50944669431065e-05, "loss": 12.351701736450195, "step": 1962 }, { "epoch": 0.254556948688231, "grad_norm": 0.9517144560813904, "learning_rate": 8.507991830346668e-05, "loss": 13.354283332824707, "step": 1963 }, { "epoch": 0.25468662619647764, "grad_norm": 0.7206177711486816, "learning_rate": 8.506536381215773e-05, "loss": 13.353277206420898, "step": 1964 }, { "epoch": 0.2548163037047243, "grad_norm": 0.4930883049964905, "learning_rate": 8.50508034716075e-05, "loss": 7.903234004974365, "step": 1965 }, { "epoch": 0.254945981212971, "grad_norm": 1.094601035118103, "learning_rate": 8.503623728424477e-05, "loss": 11.611764907836914, "step": 1966 }, { "epoch": 0.2550756587212177, "grad_norm": 0.8509446382522583, "learning_rate": 8.502166525249933e-05, "loss": 13.929329872131348, "step": 1967 }, { "epoch": 0.25520533622946434, "grad_norm": 0.775931179523468, "learning_rate": 8.500708737880194e-05, "loss": 15.490164756774902, "step": 1968 }, { "epoch": 0.255335013737711, "grad_norm": 0.7369164824485779, "learning_rate": 8.499250366558433e-05, "loss": 12.306568145751953, "step": 1969 }, { "epoch": 0.2554646912459577, "grad_norm": 0.6350170969963074, "learning_rate": 8.49779141152792e-05, "loss": 12.683173179626465, "step": 1970 }, { "epoch": 0.2555943687542044, "grad_norm": 0.5943461656570435, "learning_rate": 8.496331873032022e-05, "loss": 15.419387817382812, "step": 1971 }, { "epoch": 0.25572404626245104, "grad_norm": 0.8223320841789246, "learning_rate": 8.494871751314207e-05, "loss": 15.283839225769043, "step": 1972 }, { "epoch": 0.25585372377069776, "grad_norm": 0.527834951877594, "learning_rate": 8.493411046618032e-05, "loss": 8.261953353881836, "step": 1973 }, { "epoch": 0.2559834012789444, "grad_norm": 0.5261927843093872, "learning_rate": 8.491949759187163e-05, "loss": 9.826887130737305, "step": 1974 }, { "epoch": 0.2561130787871911, "grad_norm": 0.6494563221931458, "learning_rate": 8.490487889265353e-05, "loss": 11.283431053161621, "step": 1975 }, { "epoch": 0.2562427562954378, "grad_norm": 0.8187502026557922, "learning_rate": 8.489025437096457e-05, "loss": 13.136253356933594, "step": 1976 }, { "epoch": 0.25637243380368446, "grad_norm": 0.7190262079238892, "learning_rate": 8.487562402924424e-05, "loss": 10.333415985107422, "step": 1977 }, { "epoch": 0.2565021113119311, "grad_norm": 0.594196617603302, "learning_rate": 8.486098786993308e-05, "loss": 9.952869415283203, "step": 1978 }, { "epoch": 0.25663178882017784, "grad_norm": 0.5694220662117004, "learning_rate": 8.48463458954725e-05, "loss": 6.937562465667725, "step": 1979 }, { "epoch": 0.2567614663284245, "grad_norm": 0.5055859684944153, "learning_rate": 8.483169810830492e-05, "loss": 8.273555755615234, "step": 1980 }, { "epoch": 0.25689114383667117, "grad_norm": 0.6350362300872803, "learning_rate": 8.481704451087373e-05, "loss": 10.007567405700684, "step": 1981 }, { "epoch": 0.2570208213449179, "grad_norm": 0.7836266160011292, "learning_rate": 8.480238510562331e-05, "loss": 10.7615327835083, "step": 1982 }, { "epoch": 0.25715049885316454, "grad_norm": 0.7335644960403442, "learning_rate": 8.478771989499899e-05, "loss": 14.554079055786133, "step": 1983 }, { "epoch": 0.2572801763614112, "grad_norm": 0.8174175024032593, "learning_rate": 8.477304888144706e-05, "loss": 13.588091850280762, "step": 1984 }, { "epoch": 0.25740985386965787, "grad_norm": 0.9096839427947998, "learning_rate": 8.475837206741478e-05, "loss": 13.353771209716797, "step": 1985 }, { "epoch": 0.2575395313779046, "grad_norm": 0.5301013588905334, "learning_rate": 8.474368945535041e-05, "loss": 10.853979110717773, "step": 1986 }, { "epoch": 0.25766920888615125, "grad_norm": 0.6085160374641418, "learning_rate": 8.472900104770311e-05, "loss": 10.067267417907715, "step": 1987 }, { "epoch": 0.2577988863943979, "grad_norm": 0.8053773045539856, "learning_rate": 8.471430684692309e-05, "loss": 9.9290189743042, "step": 1988 }, { "epoch": 0.2579285639026446, "grad_norm": 0.9268636703491211, "learning_rate": 8.469960685546145e-05, "loss": 14.328280448913574, "step": 1989 }, { "epoch": 0.2580582414108913, "grad_norm": 0.692773163318634, "learning_rate": 8.468490107577033e-05, "loss": 12.367878913879395, "step": 1990 }, { "epoch": 0.25818791891913795, "grad_norm": 0.7269583940505981, "learning_rate": 8.467018951030277e-05, "loss": 11.004417419433594, "step": 1991 }, { "epoch": 0.25831759642738467, "grad_norm": 0.7704991698265076, "learning_rate": 8.465547216151279e-05, "loss": 14.80712604522705, "step": 1992 }, { "epoch": 0.2584472739356313, "grad_norm": 0.774367094039917, "learning_rate": 8.46407490318554e-05, "loss": 11.427129745483398, "step": 1993 }, { "epoch": 0.258576951443878, "grad_norm": 0.8460425734519958, "learning_rate": 8.462602012378657e-05, "loss": 11.269445419311523, "step": 1994 }, { "epoch": 0.2587066289521247, "grad_norm": 0.8426439166069031, "learning_rate": 8.461128543976322e-05, "loss": 14.345656394958496, "step": 1995 }, { "epoch": 0.25883630646037137, "grad_norm": 0.5650263428688049, "learning_rate": 8.459654498224321e-05, "loss": 15.001227378845215, "step": 1996 }, { "epoch": 0.25896598396861803, "grad_norm": 0.7786371111869812, "learning_rate": 8.458179875368544e-05, "loss": 12.539131164550781, "step": 1997 }, { "epoch": 0.25909566147686475, "grad_norm": 0.7228203415870667, "learning_rate": 8.456704675654969e-05, "loss": 14.968958854675293, "step": 1998 }, { "epoch": 0.2592253389851114, "grad_norm": 0.6506767272949219, "learning_rate": 8.455228899329674e-05, "loss": 12.942978858947754, "step": 1999 }, { "epoch": 0.25935501649335807, "grad_norm": 0.6308930516242981, "learning_rate": 8.453752546638832e-05, "loss": 10.40558910369873, "step": 2000 }, { "epoch": 0.25948469400160473, "grad_norm": 0.6582802534103394, "learning_rate": 8.452275617828714e-05, "loss": 11.446812629699707, "step": 2001 }, { "epoch": 0.25961437150985145, "grad_norm": 0.6499994397163391, "learning_rate": 8.450798113145688e-05, "loss": 8.474571228027344, "step": 2002 }, { "epoch": 0.2597440490180981, "grad_norm": 0.7378000617027283, "learning_rate": 8.449320032836211e-05, "loss": 14.491745948791504, "step": 2003 }, { "epoch": 0.2598737265263448, "grad_norm": 0.5804948210716248, "learning_rate": 8.447841377146845e-05, "loss": 7.68303108215332, "step": 2004 }, { "epoch": 0.2600034040345915, "grad_norm": 0.6137380599975586, "learning_rate": 8.446362146324243e-05, "loss": 12.137666702270508, "step": 2005 }, { "epoch": 0.26013308154283815, "grad_norm": 0.7706668972969055, "learning_rate": 8.444882340615156e-05, "loss": 10.359502792358398, "step": 2006 }, { "epoch": 0.2602627590510848, "grad_norm": 0.6433854103088379, "learning_rate": 8.443401960266428e-05, "loss": 11.097594261169434, "step": 2007 }, { "epoch": 0.26039243655933153, "grad_norm": 0.6280234456062317, "learning_rate": 8.441921005525001e-05, "loss": 12.462971687316895, "step": 2008 }, { "epoch": 0.2605221140675782, "grad_norm": 0.7136460542678833, "learning_rate": 8.440439476637915e-05, "loss": 16.07013511657715, "step": 2009 }, { "epoch": 0.26065179157582485, "grad_norm": 0.698839008808136, "learning_rate": 8.4389573738523e-05, "loss": 11.51048469543457, "step": 2010 }, { "epoch": 0.26078146908407157, "grad_norm": 0.7584715485572815, "learning_rate": 8.437474697415387e-05, "loss": 14.006267547607422, "step": 2011 }, { "epoch": 0.26091114659231823, "grad_norm": 0.7459684610366821, "learning_rate": 8.435991447574499e-05, "loss": 12.64613151550293, "step": 2012 }, { "epoch": 0.2610408241005649, "grad_norm": 0.6471083164215088, "learning_rate": 8.434507624577058e-05, "loss": 10.807415962219238, "step": 2013 }, { "epoch": 0.2611705016088116, "grad_norm": 0.8141928911209106, "learning_rate": 8.433023228670581e-05, "loss": 13.607616424560547, "step": 2014 }, { "epoch": 0.2613001791170583, "grad_norm": 0.6327809691429138, "learning_rate": 8.431538260102677e-05, "loss": 10.137073516845703, "step": 2015 }, { "epoch": 0.26142985662530493, "grad_norm": 0.550706148147583, "learning_rate": 8.430052719121056e-05, "loss": 9.68172550201416, "step": 2016 }, { "epoch": 0.2615595341335516, "grad_norm": 0.6609383821487427, "learning_rate": 8.428566605973517e-05, "loss": 10.083186149597168, "step": 2017 }, { "epoch": 0.2616892116417983, "grad_norm": 0.5764419436454773, "learning_rate": 8.42707992090796e-05, "loss": 11.270689964294434, "step": 2018 }, { "epoch": 0.261818889150045, "grad_norm": 0.6200392246246338, "learning_rate": 8.42559266417238e-05, "loss": 8.57076644897461, "step": 2019 }, { "epoch": 0.26194856665829164, "grad_norm": 0.6630744934082031, "learning_rate": 8.424104836014863e-05, "loss": 10.468070983886719, "step": 2020 }, { "epoch": 0.26207824416653835, "grad_norm": 0.5987918376922607, "learning_rate": 8.422616436683594e-05, "loss": 12.476861953735352, "step": 2021 }, { "epoch": 0.262207921674785, "grad_norm": 0.7438915371894836, "learning_rate": 8.421127466426855e-05, "loss": 11.045446395874023, "step": 2022 }, { "epoch": 0.2623375991830317, "grad_norm": 0.7537006735801697, "learning_rate": 8.419637925493016e-05, "loss": 13.006157875061035, "step": 2023 }, { "epoch": 0.2624672766912784, "grad_norm": 0.6677997708320618, "learning_rate": 8.41814781413055e-05, "loss": 9.916667938232422, "step": 2024 }, { "epoch": 0.26259695419952506, "grad_norm": 0.7696019411087036, "learning_rate": 8.41665713258802e-05, "loss": 13.547637939453125, "step": 2025 }, { "epoch": 0.2627266317077717, "grad_norm": 0.8158983588218689, "learning_rate": 8.415165881114088e-05, "loss": 12.251374244689941, "step": 2026 }, { "epoch": 0.26285630921601844, "grad_norm": 0.9332865476608276, "learning_rate": 8.413674059957508e-05, "loss": 9.799736976623535, "step": 2027 }, { "epoch": 0.2629859867242651, "grad_norm": 0.5915097594261169, "learning_rate": 8.41218166936713e-05, "loss": 9.908133506774902, "step": 2028 }, { "epoch": 0.26311566423251176, "grad_norm": 0.5593510270118713, "learning_rate": 8.4106887095919e-05, "loss": 7.901637554168701, "step": 2029 }, { "epoch": 0.2632453417407585, "grad_norm": 0.8394868969917297, "learning_rate": 8.409195180880857e-05, "loss": 13.478996276855469, "step": 2030 }, { "epoch": 0.26337501924900514, "grad_norm": 0.7471985816955566, "learning_rate": 8.407701083483137e-05, "loss": 11.04188346862793, "step": 2031 }, { "epoch": 0.2635046967572518, "grad_norm": 0.7216101884841919, "learning_rate": 8.40620641764797e-05, "loss": 12.389622688293457, "step": 2032 }, { "epoch": 0.26363437426549846, "grad_norm": 0.5327571630477905, "learning_rate": 8.404711183624681e-05, "loss": 10.169551849365234, "step": 2033 }, { "epoch": 0.2637640517737452, "grad_norm": 0.6544854640960693, "learning_rate": 8.403215381662687e-05, "loss": 12.55286693572998, "step": 2034 }, { "epoch": 0.26389372928199184, "grad_norm": 0.7795811891555786, "learning_rate": 8.401719012011506e-05, "loss": 9.434366226196289, "step": 2035 }, { "epoch": 0.2640234067902385, "grad_norm": 0.687797486782074, "learning_rate": 8.400222074920745e-05, "loss": 9.809443473815918, "step": 2036 }, { "epoch": 0.2641530842984852, "grad_norm": 0.9135170578956604, "learning_rate": 8.398724570640106e-05, "loss": 14.40373706817627, "step": 2037 }, { "epoch": 0.2642827618067319, "grad_norm": 0.535999596118927, "learning_rate": 8.397226499419388e-05, "loss": 7.728545188903809, "step": 2038 }, { "epoch": 0.26441243931497854, "grad_norm": 0.7151860594749451, "learning_rate": 8.395727861508486e-05, "loss": 13.249258041381836, "step": 2039 }, { "epoch": 0.26454211682322526, "grad_norm": 0.6067606806755066, "learning_rate": 8.394228657157385e-05, "loss": 7.885074615478516, "step": 2040 }, { "epoch": 0.2646717943314719, "grad_norm": 0.5584544539451599, "learning_rate": 8.392728886616167e-05, "loss": 11.623702049255371, "step": 2041 }, { "epoch": 0.2648014718397186, "grad_norm": 0.6829777956008911, "learning_rate": 8.391228550135008e-05, "loss": 10.333831787109375, "step": 2042 }, { "epoch": 0.2649311493479653, "grad_norm": 1.1775058507919312, "learning_rate": 8.38972764796418e-05, "loss": 13.42650318145752, "step": 2043 }, { "epoch": 0.26506082685621196, "grad_norm": 0.6175543665885925, "learning_rate": 8.388226180354048e-05, "loss": 12.997391700744629, "step": 2044 }, { "epoch": 0.2651905043644586, "grad_norm": 0.5348457098007202, "learning_rate": 8.38672414755507e-05, "loss": 10.802690505981445, "step": 2045 }, { "epoch": 0.26532018187270534, "grad_norm": 0.6474521160125732, "learning_rate": 8.3852215498178e-05, "loss": 11.18921947479248, "step": 2046 }, { "epoch": 0.265449859380952, "grad_norm": 0.6177282929420471, "learning_rate": 8.383718387392885e-05, "loss": 10.048498153686523, "step": 2047 }, { "epoch": 0.26557953688919866, "grad_norm": 1.063300609588623, "learning_rate": 8.38221466053107e-05, "loss": 12.713533401489258, "step": 2048 }, { "epoch": 0.2657092143974453, "grad_norm": 0.6187648177146912, "learning_rate": 8.380710369483186e-05, "loss": 11.975892066955566, "step": 2049 }, { "epoch": 0.26583889190569204, "grad_norm": 0.8391211032867432, "learning_rate": 8.379205514500168e-05, "loss": 13.731158256530762, "step": 2050 }, { "epoch": 0.2659685694139387, "grad_norm": 0.6771062016487122, "learning_rate": 8.377700095833039e-05, "loss": 15.200596809387207, "step": 2051 }, { "epoch": 0.26609824692218537, "grad_norm": 0.7074764966964722, "learning_rate": 8.376194113732917e-05, "loss": 12.175086975097656, "step": 2052 }, { "epoch": 0.2662279244304321, "grad_norm": 0.7708380222320557, "learning_rate": 8.374687568451014e-05, "loss": 15.218220710754395, "step": 2053 }, { "epoch": 0.26635760193867875, "grad_norm": 0.6453943848609924, "learning_rate": 8.373180460238636e-05, "loss": 12.046737670898438, "step": 2054 }, { "epoch": 0.2664872794469254, "grad_norm": 0.7774943709373474, "learning_rate": 8.371672789347185e-05, "loss": 13.882299423217773, "step": 2055 }, { "epoch": 0.2666169569551721, "grad_norm": 0.6601316928863525, "learning_rate": 8.370164556028153e-05, "loss": 10.46221923828125, "step": 2056 }, { "epoch": 0.2667466344634188, "grad_norm": 0.835559606552124, "learning_rate": 8.36865576053313e-05, "loss": 7.1836838722229, "step": 2057 }, { "epoch": 0.26687631197166545, "grad_norm": 0.7185377478599548, "learning_rate": 8.367146403113797e-05, "loss": 12.101648330688477, "step": 2058 }, { "epoch": 0.26700598947991216, "grad_norm": 0.7427223324775696, "learning_rate": 8.365636484021929e-05, "loss": 12.601179122924805, "step": 2059 }, { "epoch": 0.2671356669881588, "grad_norm": 0.8312908411026001, "learning_rate": 8.364126003509392e-05, "loss": 13.860396385192871, "step": 2060 }, { "epoch": 0.2672653444964055, "grad_norm": 1.1239203214645386, "learning_rate": 8.362614961828154e-05, "loss": 18.190345764160156, "step": 2061 }, { "epoch": 0.2673950220046522, "grad_norm": 0.7828530669212341, "learning_rate": 8.36110335923027e-05, "loss": 12.356629371643066, "step": 2062 }, { "epoch": 0.26752469951289887, "grad_norm": 0.5695507526397705, "learning_rate": 8.359591195967886e-05, "loss": 10.636547088623047, "step": 2063 }, { "epoch": 0.26765437702114553, "grad_norm": 0.8692725896835327, "learning_rate": 8.35807847229325e-05, "loss": 12.787074089050293, "step": 2064 }, { "epoch": 0.2677840545293922, "grad_norm": 0.7029805183410645, "learning_rate": 8.356565188458698e-05, "loss": 8.387994766235352, "step": 2065 }, { "epoch": 0.2679137320376389, "grad_norm": 0.7633159160614014, "learning_rate": 8.355051344716658e-05, "loss": 12.018895149230957, "step": 2066 }, { "epoch": 0.26804340954588557, "grad_norm": 0.6986936330795288, "learning_rate": 8.353536941319657e-05, "loss": 9.343421936035156, "step": 2067 }, { "epoch": 0.26817308705413223, "grad_norm": 0.7726527452468872, "learning_rate": 8.35202197852031e-05, "loss": 9.658863067626953, "step": 2068 }, { "epoch": 0.26830276456237895, "grad_norm": 0.6344983577728271, "learning_rate": 8.350506456571327e-05, "loss": 12.330727577209473, "step": 2069 }, { "epoch": 0.2684324420706256, "grad_norm": 0.7097104787826538, "learning_rate": 8.348990375725514e-05, "loss": 12.38107967376709, "step": 2070 }, { "epoch": 0.26856211957887227, "grad_norm": 0.6533455848693848, "learning_rate": 8.347473736235765e-05, "loss": 13.7022705078125, "step": 2071 }, { "epoch": 0.268691797087119, "grad_norm": 0.7157232165336609, "learning_rate": 8.345956538355073e-05, "loss": 11.293140411376953, "step": 2072 }, { "epoch": 0.26882147459536565, "grad_norm": 0.6378682851791382, "learning_rate": 8.34443878233652e-05, "loss": 13.078734397888184, "step": 2073 }, { "epoch": 0.2689511521036123, "grad_norm": 0.611926794052124, "learning_rate": 8.342920468433281e-05, "loss": 12.52904987335205, "step": 2074 }, { "epoch": 0.26908082961185903, "grad_norm": 0.5575952529907227, "learning_rate": 8.341401596898629e-05, "loss": 8.735437393188477, "step": 2075 }, { "epoch": 0.2692105071201057, "grad_norm": 0.5747816562652588, "learning_rate": 8.339882167985923e-05, "loss": 10.956219673156738, "step": 2076 }, { "epoch": 0.26934018462835235, "grad_norm": 0.7433188557624817, "learning_rate": 8.33836218194862e-05, "loss": 11.453983306884766, "step": 2077 }, { "epoch": 0.26946986213659907, "grad_norm": 0.725731611251831, "learning_rate": 8.336841639040267e-05, "loss": 9.243450164794922, "step": 2078 }, { "epoch": 0.26959953964484573, "grad_norm": 0.5729081034660339, "learning_rate": 8.335320539514508e-05, "loss": 7.3077168464660645, "step": 2079 }, { "epoch": 0.2697292171530924, "grad_norm": 0.564704179763794, "learning_rate": 8.333798883625072e-05, "loss": 10.889991760253906, "step": 2080 }, { "epoch": 0.26985889466133905, "grad_norm": 0.9255096912384033, "learning_rate": 8.332276671625792e-05, "loss": 11.176395416259766, "step": 2081 }, { "epoch": 0.26998857216958577, "grad_norm": 0.679451584815979, "learning_rate": 8.330753903770585e-05, "loss": 12.121262550354004, "step": 2082 }, { "epoch": 0.27011824967783243, "grad_norm": 0.5782216191291809, "learning_rate": 8.32923058031346e-05, "loss": 8.002052307128906, "step": 2083 }, { "epoch": 0.2702479271860791, "grad_norm": 0.44841307401657104, "learning_rate": 8.327706701508529e-05, "loss": 8.519682884216309, "step": 2084 }, { "epoch": 0.2703776046943258, "grad_norm": 0.6786438822746277, "learning_rate": 8.326182267609984e-05, "loss": 12.356032371520996, "step": 2085 }, { "epoch": 0.2705072822025725, "grad_norm": 0.7501733303070068, "learning_rate": 8.324657278872118e-05, "loss": 11.133890151977539, "step": 2086 }, { "epoch": 0.27063695971081914, "grad_norm": 0.8791986703872681, "learning_rate": 8.323131735549312e-05, "loss": 9.715494155883789, "step": 2087 }, { "epoch": 0.27076663721906585, "grad_norm": 0.6392830014228821, "learning_rate": 8.321605637896044e-05, "loss": 10.254255294799805, "step": 2088 }, { "epoch": 0.2708963147273125, "grad_norm": 0.6988325715065002, "learning_rate": 8.320078986166879e-05, "loss": 9.02662467956543, "step": 2089 }, { "epoch": 0.2710259922355592, "grad_norm": 0.8744861483573914, "learning_rate": 8.318551780616479e-05, "loss": 10.999966621398926, "step": 2090 }, { "epoch": 0.2711556697438059, "grad_norm": 0.7454221248626709, "learning_rate": 8.317024021499597e-05, "loss": 10.909263610839844, "step": 2091 }, { "epoch": 0.27128534725205256, "grad_norm": 0.7721489667892456, "learning_rate": 8.315495709071076e-05, "loss": 13.849482536315918, "step": 2092 }, { "epoch": 0.2714150247602992, "grad_norm": 0.6215622425079346, "learning_rate": 8.313966843585855e-05, "loss": 10.334853172302246, "step": 2093 }, { "epoch": 0.27154470226854593, "grad_norm": 0.6602717638015747, "learning_rate": 8.312437425298963e-05, "loss": 13.170195579528809, "step": 2094 }, { "epoch": 0.2716743797767926, "grad_norm": 0.7389291524887085, "learning_rate": 8.310907454465522e-05, "loss": 9.372159004211426, "step": 2095 }, { "epoch": 0.27180405728503926, "grad_norm": 0.7334312200546265, "learning_rate": 8.309376931340744e-05, "loss": 12.983746528625488, "step": 2096 }, { "epoch": 0.2719337347932859, "grad_norm": 0.7292169332504272, "learning_rate": 8.307845856179939e-05, "loss": 15.017955780029297, "step": 2097 }, { "epoch": 0.27206341230153264, "grad_norm": 0.9759877324104309, "learning_rate": 8.306314229238502e-05, "loss": 13.426356315612793, "step": 2098 }, { "epoch": 0.2721930898097793, "grad_norm": 0.7637549042701721, "learning_rate": 8.304782050771922e-05, "loss": 15.666253089904785, "step": 2099 }, { "epoch": 0.27232276731802596, "grad_norm": 0.649971067905426, "learning_rate": 8.303249321035784e-05, "loss": 12.23762035369873, "step": 2100 }, { "epoch": 0.2724524448262727, "grad_norm": 0.8023431897163391, "learning_rate": 8.301716040285763e-05, "loss": 12.747941970825195, "step": 2101 }, { "epoch": 0.27258212233451934, "grad_norm": 0.6409677863121033, "learning_rate": 8.300182208777622e-05, "loss": 10.402667045593262, "step": 2102 }, { "epoch": 0.272711799842766, "grad_norm": 0.5165101885795593, "learning_rate": 8.298647826767219e-05, "loss": 10.727303504943848, "step": 2103 }, { "epoch": 0.2728414773510127, "grad_norm": 0.6321370005607605, "learning_rate": 8.297112894510506e-05, "loss": 12.953300476074219, "step": 2104 }, { "epoch": 0.2729711548592594, "grad_norm": 0.8374330997467041, "learning_rate": 8.295577412263524e-05, "loss": 13.169899940490723, "step": 2105 }, { "epoch": 0.27310083236750604, "grad_norm": 0.7028535604476929, "learning_rate": 8.294041380282404e-05, "loss": 10.443083763122559, "step": 2106 }, { "epoch": 0.27323050987575276, "grad_norm": 0.7210525274276733, "learning_rate": 8.292504798823374e-05, "loss": 12.862833023071289, "step": 2107 }, { "epoch": 0.2733601873839994, "grad_norm": 0.5417824387550354, "learning_rate": 8.29096766814275e-05, "loss": 13.225142478942871, "step": 2108 }, { "epoch": 0.2734898648922461, "grad_norm": 0.683577299118042, "learning_rate": 8.289429988496938e-05, "loss": 10.997913360595703, "step": 2109 }, { "epoch": 0.2736195424004928, "grad_norm": 0.43682894110679626, "learning_rate": 8.287891760142438e-05, "loss": 9.166959762573242, "step": 2110 }, { "epoch": 0.27374921990873946, "grad_norm": 0.6845377683639526, "learning_rate": 8.286352983335846e-05, "loss": 11.707590103149414, "step": 2111 }, { "epoch": 0.2738788974169861, "grad_norm": 0.67735356092453, "learning_rate": 8.28481365833384e-05, "loss": 12.87865924835205, "step": 2112 }, { "epoch": 0.2740085749252328, "grad_norm": 0.5951511859893799, "learning_rate": 8.283273785393195e-05, "loss": 10.755433082580566, "step": 2113 }, { "epoch": 0.2741382524334795, "grad_norm": 0.680622935295105, "learning_rate": 8.281733364770779e-05, "loss": 12.756124496459961, "step": 2114 }, { "epoch": 0.27426792994172616, "grad_norm": 0.6974573731422424, "learning_rate": 8.280192396723547e-05, "loss": 14.298799514770508, "step": 2115 }, { "epoch": 0.2743976074499728, "grad_norm": 0.6232139468193054, "learning_rate": 8.278650881508547e-05, "loss": 13.372669219970703, "step": 2116 }, { "epoch": 0.27452728495821954, "grad_norm": 0.5834039449691772, "learning_rate": 8.27710881938292e-05, "loss": 10.801093101501465, "step": 2117 }, { "epoch": 0.2746569624664662, "grad_norm": 0.716361403465271, "learning_rate": 8.275566210603897e-05, "loss": 13.005693435668945, "step": 2118 }, { "epoch": 0.27478663997471287, "grad_norm": 0.8052172064781189, "learning_rate": 8.274023055428799e-05, "loss": 9.52921199798584, "step": 2119 }, { "epoch": 0.2749163174829596, "grad_norm": 0.612117350101471, "learning_rate": 8.27247935411504e-05, "loss": 9.754392623901367, "step": 2120 }, { "epoch": 0.27504599499120624, "grad_norm": 0.7231287956237793, "learning_rate": 8.270935106920121e-05, "loss": 12.603260040283203, "step": 2121 }, { "epoch": 0.2751756724994529, "grad_norm": 0.44953230023384094, "learning_rate": 8.269390314101644e-05, "loss": 7.266402244567871, "step": 2122 }, { "epoch": 0.2753053500076996, "grad_norm": 0.4815821945667267, "learning_rate": 8.26784497591729e-05, "loss": 7.641336917877197, "step": 2123 }, { "epoch": 0.2754350275159463, "grad_norm": 0.6074222326278687, "learning_rate": 8.266299092624837e-05, "loss": 11.040993690490723, "step": 2124 }, { "epoch": 0.27556470502419295, "grad_norm": 0.6430336236953735, "learning_rate": 8.264752664482154e-05, "loss": 11.181418418884277, "step": 2125 }, { "epoch": 0.27569438253243966, "grad_norm": 1.0353482961654663, "learning_rate": 8.263205691747201e-05, "loss": 15.450273513793945, "step": 2126 }, { "epoch": 0.2758240600406863, "grad_norm": 0.6592825055122375, "learning_rate": 8.261658174678026e-05, "loss": 11.852901458740234, "step": 2127 }, { "epoch": 0.275953737548933, "grad_norm": 0.8711482286453247, "learning_rate": 8.260110113532772e-05, "loss": 14.451459884643555, "step": 2128 }, { "epoch": 0.27608341505717965, "grad_norm": 0.7109101414680481, "learning_rate": 8.258561508569671e-05, "loss": 16.53739356994629, "step": 2129 }, { "epoch": 0.27621309256542637, "grad_norm": 0.6114353537559509, "learning_rate": 8.257012360047043e-05, "loss": 9.441962242126465, "step": 2130 }, { "epoch": 0.276342770073673, "grad_norm": 0.5141479969024658, "learning_rate": 8.255462668223301e-05, "loss": 12.669251441955566, "step": 2131 }, { "epoch": 0.2764724475819197, "grad_norm": 0.8160276412963867, "learning_rate": 8.25391243335695e-05, "loss": 12.148252487182617, "step": 2132 }, { "epoch": 0.2766021250901664, "grad_norm": 0.6738105416297913, "learning_rate": 8.252361655706584e-05, "loss": 12.890656471252441, "step": 2133 }, { "epoch": 0.27673180259841307, "grad_norm": 0.6883546710014343, "learning_rate": 8.250810335530885e-05, "loss": 11.623973846435547, "step": 2134 }, { "epoch": 0.27686148010665973, "grad_norm": 0.5695280432701111, "learning_rate": 8.249258473088632e-05, "loss": 12.622435569763184, "step": 2135 }, { "epoch": 0.27699115761490645, "grad_norm": 0.5996898412704468, "learning_rate": 8.24770606863869e-05, "loss": 7.625167369842529, "step": 2136 }, { "epoch": 0.2771208351231531, "grad_norm": 1.090914011001587, "learning_rate": 8.246153122440013e-05, "loss": 16.126380920410156, "step": 2137 }, { "epoch": 0.27725051263139977, "grad_norm": 0.8309890031814575, "learning_rate": 8.244599634751648e-05, "loss": 13.935903549194336, "step": 2138 }, { "epoch": 0.2773801901396465, "grad_norm": 0.8768637180328369, "learning_rate": 8.243045605832733e-05, "loss": 14.071968078613281, "step": 2139 }, { "epoch": 0.27750986764789315, "grad_norm": 0.8119418621063232, "learning_rate": 8.241491035942494e-05, "loss": 14.571698188781738, "step": 2140 }, { "epoch": 0.2776395451561398, "grad_norm": 0.6414729356765747, "learning_rate": 8.239935925340248e-05, "loss": 7.702983856201172, "step": 2141 }, { "epoch": 0.27776922266438653, "grad_norm": 0.9426267147064209, "learning_rate": 8.238380274285404e-05, "loss": 11.368194580078125, "step": 2142 }, { "epoch": 0.2778989001726332, "grad_norm": 0.6882365345954895, "learning_rate": 8.236824083037457e-05, "loss": 14.438769340515137, "step": 2143 }, { "epoch": 0.27802857768087985, "grad_norm": 0.6821517944335938, "learning_rate": 8.235267351855999e-05, "loss": 14.083334922790527, "step": 2144 }, { "epoch": 0.27815825518912657, "grad_norm": 0.6484436392784119, "learning_rate": 8.233710081000702e-05, "loss": 10.288558959960938, "step": 2145 }, { "epoch": 0.27828793269737323, "grad_norm": 0.6597533822059631, "learning_rate": 8.232152270731339e-05, "loss": 13.143777847290039, "step": 2146 }, { "epoch": 0.2784176102056199, "grad_norm": 0.6778373718261719, "learning_rate": 8.230593921307765e-05, "loss": 10.845544815063477, "step": 2147 }, { "epoch": 0.27854728771386655, "grad_norm": 0.5256010293960571, "learning_rate": 8.229035032989929e-05, "loss": 6.866443634033203, "step": 2148 }, { "epoch": 0.27867696522211327, "grad_norm": 0.46212634444236755, "learning_rate": 8.227475606037868e-05, "loss": 7.586957931518555, "step": 2149 }, { "epoch": 0.27880664273035993, "grad_norm": 0.6996514201164246, "learning_rate": 8.22591564071171e-05, "loss": 13.484865188598633, "step": 2150 }, { "epoch": 0.2789363202386066, "grad_norm": 0.4512571394443512, "learning_rate": 8.224355137271671e-05, "loss": 7.932498931884766, "step": 2151 }, { "epoch": 0.2790659977468533, "grad_norm": 0.9067168235778809, "learning_rate": 8.222794095978059e-05, "loss": 13.028471946716309, "step": 2152 }, { "epoch": 0.2791956752551, "grad_norm": 0.6972395777702332, "learning_rate": 8.22123251709127e-05, "loss": 10.676907539367676, "step": 2153 }, { "epoch": 0.27932535276334663, "grad_norm": 0.8940801620483398, "learning_rate": 8.219670400871792e-05, "loss": 13.039308547973633, "step": 2154 }, { "epoch": 0.27945503027159335, "grad_norm": 0.6217517852783203, "learning_rate": 8.218107747580201e-05, "loss": 10.662459373474121, "step": 2155 }, { "epoch": 0.27958470777984, "grad_norm": 0.57402503490448, "learning_rate": 8.216544557477162e-05, "loss": 8.909627914428711, "step": 2156 }, { "epoch": 0.2797143852880867, "grad_norm": 0.6355392932891846, "learning_rate": 8.214980830823429e-05, "loss": 9.382281303405762, "step": 2157 }, { "epoch": 0.2798440627963334, "grad_norm": 0.8423065543174744, "learning_rate": 8.213416567879848e-05, "loss": 12.698709487915039, "step": 2158 }, { "epoch": 0.27997374030458005, "grad_norm": 0.6319965124130249, "learning_rate": 8.211851768907352e-05, "loss": 7.0760111808776855, "step": 2159 }, { "epoch": 0.2801034178128267, "grad_norm": 0.6278969645500183, "learning_rate": 8.210286434166967e-05, "loss": 9.698594093322754, "step": 2160 }, { "epoch": 0.28023309532107343, "grad_norm": 0.7033721804618835, "learning_rate": 8.208720563919803e-05, "loss": 10.123296737670898, "step": 2161 }, { "epoch": 0.2803627728293201, "grad_norm": 0.6207185983657837, "learning_rate": 8.207154158427064e-05, "loss": 6.600024700164795, "step": 2162 }, { "epoch": 0.28049245033756676, "grad_norm": 0.8101865649223328, "learning_rate": 8.205587217950043e-05, "loss": 13.134916305541992, "step": 2163 }, { "epoch": 0.2806221278458134, "grad_norm": 0.5856488943099976, "learning_rate": 8.204019742750118e-05, "loss": 7.277475357055664, "step": 2164 }, { "epoch": 0.28075180535406014, "grad_norm": 0.6303049921989441, "learning_rate": 8.20245173308876e-05, "loss": 10.408206939697266, "step": 2165 }, { "epoch": 0.2808814828623068, "grad_norm": 0.613039493560791, "learning_rate": 8.200883189227526e-05, "loss": 13.600939750671387, "step": 2166 }, { "epoch": 0.28101116037055346, "grad_norm": 0.6407514214515686, "learning_rate": 8.199314111428069e-05, "loss": 9.515862464904785, "step": 2167 }, { "epoch": 0.2811408378788002, "grad_norm": 0.7877413630485535, "learning_rate": 8.197744499952123e-05, "loss": 12.692706108093262, "step": 2168 }, { "epoch": 0.28127051538704684, "grad_norm": 0.648443877696991, "learning_rate": 8.196174355061517e-05, "loss": 9.578510284423828, "step": 2169 }, { "epoch": 0.2814001928952935, "grad_norm": 0.6163367629051208, "learning_rate": 8.194603677018162e-05, "loss": 10.894989967346191, "step": 2170 }, { "epoch": 0.2815298704035402, "grad_norm": 0.7965658903121948, "learning_rate": 8.193032466084066e-05, "loss": 13.22610092163086, "step": 2171 }, { "epoch": 0.2816595479117869, "grad_norm": 0.6641227006912231, "learning_rate": 8.19146072252132e-05, "loss": 9.81505298614502, "step": 2172 }, { "epoch": 0.28178922542003354, "grad_norm": 0.734188437461853, "learning_rate": 8.189888446592108e-05, "loss": 12.436387062072754, "step": 2173 }, { "epoch": 0.28191890292828026, "grad_norm": 1.0703577995300293, "learning_rate": 8.188315638558702e-05, "loss": 13.586700439453125, "step": 2174 }, { "epoch": 0.2820485804365269, "grad_norm": 0.4850704073905945, "learning_rate": 8.186742298683456e-05, "loss": 13.643156051635742, "step": 2175 }, { "epoch": 0.2821782579447736, "grad_norm": 0.6838684678077698, "learning_rate": 8.185168427228822e-05, "loss": 12.646702766418457, "step": 2176 }, { "epoch": 0.2823079354530203, "grad_norm": 0.5259536504745483, "learning_rate": 8.183594024457337e-05, "loss": 9.59619426727295, "step": 2177 }, { "epoch": 0.28243761296126696, "grad_norm": 0.6688492298126221, "learning_rate": 8.182019090631625e-05, "loss": 13.071324348449707, "step": 2178 }, { "epoch": 0.2825672904695136, "grad_norm": 0.5948039889335632, "learning_rate": 8.180443626014402e-05, "loss": 11.076905250549316, "step": 2179 }, { "epoch": 0.2826969679777603, "grad_norm": 0.5637724995613098, "learning_rate": 8.178867630868472e-05, "loss": 7.728979110717773, "step": 2180 }, { "epoch": 0.282826645486007, "grad_norm": 0.7004666328430176, "learning_rate": 8.177291105456723e-05, "loss": 9.61047649383545, "step": 2181 }, { "epoch": 0.28295632299425366, "grad_norm": 0.6834388375282288, "learning_rate": 8.175714050042135e-05, "loss": 13.142511367797852, "step": 2182 }, { "epoch": 0.2830860005025003, "grad_norm": 0.658105731010437, "learning_rate": 8.174136464887777e-05, "loss": 8.00838851928711, "step": 2183 }, { "epoch": 0.28321567801074704, "grad_norm": 0.783419132232666, "learning_rate": 8.172558350256806e-05, "loss": 12.088726043701172, "step": 2184 }, { "epoch": 0.2833453555189937, "grad_norm": 0.7556414008140564, "learning_rate": 8.170979706412465e-05, "loss": 10.922674179077148, "step": 2185 }, { "epoch": 0.28347503302724036, "grad_norm": 0.6196784377098083, "learning_rate": 8.16940053361809e-05, "loss": 9.699724197387695, "step": 2186 }, { "epoch": 0.2836047105354871, "grad_norm": 0.9107887148857117, "learning_rate": 8.167820832137099e-05, "loss": 13.51695728302002, "step": 2187 }, { "epoch": 0.28373438804373374, "grad_norm": 0.5644358992576599, "learning_rate": 8.166240602233003e-05, "loss": 12.26561450958252, "step": 2188 }, { "epoch": 0.2838640655519804, "grad_norm": 0.5871463418006897, "learning_rate": 8.164659844169402e-05, "loss": 10.159242630004883, "step": 2189 }, { "epoch": 0.2839937430602271, "grad_norm": 0.628125786781311, "learning_rate": 8.163078558209977e-05, "loss": 12.37322998046875, "step": 2190 }, { "epoch": 0.2841234205684738, "grad_norm": 0.6737461090087891, "learning_rate": 8.161496744618505e-05, "loss": 11.134757041931152, "step": 2191 }, { "epoch": 0.28425309807672045, "grad_norm": 0.8320892453193665, "learning_rate": 8.159914403658846e-05, "loss": 11.90713882446289, "step": 2192 }, { "epoch": 0.28438277558496716, "grad_norm": 0.5641530156135559, "learning_rate": 8.158331535594949e-05, "loss": 10.621912002563477, "step": 2193 }, { "epoch": 0.2845124530932138, "grad_norm": 0.5582205057144165, "learning_rate": 8.156748140690856e-05, "loss": 8.03309154510498, "step": 2194 }, { "epoch": 0.2846421306014605, "grad_norm": 0.6949870586395264, "learning_rate": 8.15516421921069e-05, "loss": 9.94751262664795, "step": 2195 }, { "epoch": 0.28477180810970715, "grad_norm": 0.7809083461761475, "learning_rate": 8.153579771418663e-05, "loss": 14.138643264770508, "step": 2196 }, { "epoch": 0.28490148561795386, "grad_norm": 0.831329345703125, "learning_rate": 8.151994797579076e-05, "loss": 10.959732055664062, "step": 2197 }, { "epoch": 0.2850311631262005, "grad_norm": 0.746967077255249, "learning_rate": 8.15040929795632e-05, "loss": 14.471299171447754, "step": 2198 }, { "epoch": 0.2851608406344472, "grad_norm": 0.8694673180580139, "learning_rate": 8.148823272814873e-05, "loss": 11.608105659484863, "step": 2199 }, { "epoch": 0.2852905181426939, "grad_norm": 0.6722474694252014, "learning_rate": 8.147236722419296e-05, "loss": 12.215981483459473, "step": 2200 }, { "epoch": 0.28542019565094057, "grad_norm": 0.7845466136932373, "learning_rate": 8.145649647034242e-05, "loss": 12.16789722442627, "step": 2201 }, { "epoch": 0.28554987315918723, "grad_norm": 0.6890038251876831, "learning_rate": 8.14406204692445e-05, "loss": 8.925004959106445, "step": 2202 }, { "epoch": 0.28567955066743395, "grad_norm": 0.6628761887550354, "learning_rate": 8.14247392235475e-05, "loss": 13.974894523620605, "step": 2203 }, { "epoch": 0.2858092281756806, "grad_norm": 0.66399747133255, "learning_rate": 8.14088527359005e-05, "loss": 12.350266456604004, "step": 2204 }, { "epoch": 0.28593890568392727, "grad_norm": 0.46381524205207825, "learning_rate": 8.13929610089536e-05, "loss": 8.548779487609863, "step": 2205 }, { "epoch": 0.286068583192174, "grad_norm": 0.7594000101089478, "learning_rate": 8.137706404535766e-05, "loss": 13.01740550994873, "step": 2206 }, { "epoch": 0.28619826070042065, "grad_norm": 0.5919092893600464, "learning_rate": 8.136116184776442e-05, "loss": 8.479385375976562, "step": 2207 }, { "epoch": 0.2863279382086673, "grad_norm": 0.6434085369110107, "learning_rate": 8.134525441882654e-05, "loss": 16.128419876098633, "step": 2208 }, { "epoch": 0.286457615716914, "grad_norm": 0.6688454747200012, "learning_rate": 8.132934176119756e-05, "loss": 13.810240745544434, "step": 2209 }, { "epoch": 0.2865872932251607, "grad_norm": 0.6283459067344666, "learning_rate": 8.131342387753183e-05, "loss": 13.0489501953125, "step": 2210 }, { "epoch": 0.28671697073340735, "grad_norm": 0.7628545761108398, "learning_rate": 8.129750077048461e-05, "loss": 13.168370246887207, "step": 2211 }, { "epoch": 0.286846648241654, "grad_norm": 0.7366133332252502, "learning_rate": 8.128157244271204e-05, "loss": 13.305808067321777, "step": 2212 }, { "epoch": 0.28697632574990073, "grad_norm": 0.5232159495353699, "learning_rate": 8.12656388968711e-05, "loss": 8.451262474060059, "step": 2213 }, { "epoch": 0.2871060032581474, "grad_norm": 0.7445816993713379, "learning_rate": 8.12497001356197e-05, "loss": 10.93831729888916, "step": 2214 }, { "epoch": 0.28723568076639405, "grad_norm": 0.7058645486831665, "learning_rate": 8.123375616161653e-05, "loss": 10.316866874694824, "step": 2215 }, { "epoch": 0.28736535827464077, "grad_norm": 0.800112247467041, "learning_rate": 8.121780697752124e-05, "loss": 10.257439613342285, "step": 2216 }, { "epoch": 0.28749503578288743, "grad_norm": 0.6787742376327515, "learning_rate": 8.120185258599427e-05, "loss": 11.321284294128418, "step": 2217 }, { "epoch": 0.2876247132911341, "grad_norm": 0.6358707547187805, "learning_rate": 8.118589298969702e-05, "loss": 11.25201416015625, "step": 2218 }, { "epoch": 0.2877543907993808, "grad_norm": 0.8476402759552002, "learning_rate": 8.116992819129163e-05, "loss": 11.154780387878418, "step": 2219 }, { "epoch": 0.28788406830762747, "grad_norm": 0.7203444242477417, "learning_rate": 8.115395819344123e-05, "loss": 13.610236167907715, "step": 2220 }, { "epoch": 0.28801374581587413, "grad_norm": 0.8136402368545532, "learning_rate": 8.113798299880978e-05, "loss": 9.398224830627441, "step": 2221 }, { "epoch": 0.28814342332412085, "grad_norm": 0.5490216612815857, "learning_rate": 8.112200261006207e-05, "loss": 9.718914031982422, "step": 2222 }, { "epoch": 0.2882731008323675, "grad_norm": 0.7658852934837341, "learning_rate": 8.110601702986379e-05, "loss": 10.528417587280273, "step": 2223 }, { "epoch": 0.2884027783406142, "grad_norm": 0.7843996286392212, "learning_rate": 8.109002626088149e-05, "loss": 10.351995468139648, "step": 2224 }, { "epoch": 0.2885324558488609, "grad_norm": 0.654265284538269, "learning_rate": 8.10740303057826e-05, "loss": 9.06071949005127, "step": 2225 }, { "epoch": 0.28866213335710755, "grad_norm": 0.8149685263633728, "learning_rate": 8.105802916723538e-05, "loss": 16.1425838470459, "step": 2226 }, { "epoch": 0.2887918108653542, "grad_norm": 0.5844225287437439, "learning_rate": 8.104202284790898e-05, "loss": 8.332655906677246, "step": 2227 }, { "epoch": 0.2889214883736009, "grad_norm": 0.5673436522483826, "learning_rate": 8.102601135047342e-05, "loss": 9.099457740783691, "step": 2228 }, { "epoch": 0.2890511658818476, "grad_norm": 0.7787759304046631, "learning_rate": 8.100999467759956e-05, "loss": 15.05141830444336, "step": 2229 }, { "epoch": 0.28918084339009426, "grad_norm": 0.5015734434127808, "learning_rate": 8.099397283195913e-05, "loss": 8.752985000610352, "step": 2230 }, { "epoch": 0.2893105208983409, "grad_norm": 0.9686205387115479, "learning_rate": 8.097794581622473e-05, "loss": 13.650016784667969, "step": 2231 }, { "epoch": 0.28944019840658763, "grad_norm": 0.7429532408714294, "learning_rate": 8.096191363306985e-05, "loss": 10.268174171447754, "step": 2232 }, { "epoch": 0.2895698759148343, "grad_norm": 0.8198251724243164, "learning_rate": 8.094587628516877e-05, "loss": 15.467318534851074, "step": 2233 }, { "epoch": 0.28969955342308096, "grad_norm": 0.6545774936676025, "learning_rate": 8.09298337751967e-05, "loss": 10.852417945861816, "step": 2234 }, { "epoch": 0.2898292309313277, "grad_norm": 0.5892894864082336, "learning_rate": 8.091378610582967e-05, "loss": 13.787775993347168, "step": 2235 }, { "epoch": 0.28995890843957434, "grad_norm": 0.8339118361473083, "learning_rate": 8.089773327974462e-05, "loss": 14.837820053100586, "step": 2236 }, { "epoch": 0.290088585947821, "grad_norm": 0.6600664258003235, "learning_rate": 8.088167529961927e-05, "loss": 12.87485122680664, "step": 2237 }, { "epoch": 0.2902182634560677, "grad_norm": 0.8776029944419861, "learning_rate": 8.086561216813229e-05, "loss": 10.239957809448242, "step": 2238 }, { "epoch": 0.2903479409643144, "grad_norm": 0.5974510908126831, "learning_rate": 8.084954388796313e-05, "loss": 11.641497611999512, "step": 2239 }, { "epoch": 0.29047761847256104, "grad_norm": 0.7453067302703857, "learning_rate": 8.083347046179215e-05, "loss": 10.4866361618042, "step": 2240 }, { "epoch": 0.29060729598080776, "grad_norm": 0.6192340850830078, "learning_rate": 8.081739189230055e-05, "loss": 9.87906265258789, "step": 2241 }, { "epoch": 0.2907369734890544, "grad_norm": 0.5970487594604492, "learning_rate": 8.08013081821704e-05, "loss": 9.876864433288574, "step": 2242 }, { "epoch": 0.2908666509973011, "grad_norm": 0.6882340908050537, "learning_rate": 8.07852193340846e-05, "loss": 10.191789627075195, "step": 2243 }, { "epoch": 0.29099632850554774, "grad_norm": 0.7787739038467407, "learning_rate": 8.076912535072694e-05, "loss": 12.443458557128906, "step": 2244 }, { "epoch": 0.29112600601379446, "grad_norm": 0.5560675859451294, "learning_rate": 8.075302623478205e-05, "loss": 8.0094633102417, "step": 2245 }, { "epoch": 0.2912556835220411, "grad_norm": 0.6159926652908325, "learning_rate": 8.073692198893541e-05, "loss": 10.02580738067627, "step": 2246 }, { "epoch": 0.2913853610302878, "grad_norm": 0.8974068760871887, "learning_rate": 8.072081261587338e-05, "loss": 11.980730056762695, "step": 2247 }, { "epoch": 0.2915150385385345, "grad_norm": 0.7061436176300049, "learning_rate": 8.070469811828314e-05, "loss": 13.107787132263184, "step": 2248 }, { "epoch": 0.29164471604678116, "grad_norm": 0.7783455848693848, "learning_rate": 8.068857849885277e-05, "loss": 15.643610000610352, "step": 2249 }, { "epoch": 0.2917743935550278, "grad_norm": 0.6090824007987976, "learning_rate": 8.067245376027113e-05, "loss": 11.247447967529297, "step": 2250 }, { "epoch": 0.29190407106327454, "grad_norm": 0.6498343348503113, "learning_rate": 8.065632390522803e-05, "loss": 10.931214332580566, "step": 2251 }, { "epoch": 0.2920337485715212, "grad_norm": 0.6601024866104126, "learning_rate": 8.064018893641405e-05, "loss": 10.087970733642578, "step": 2252 }, { "epoch": 0.29216342607976786, "grad_norm": 0.9280720353126526, "learning_rate": 8.062404885652071e-05, "loss": 11.673754692077637, "step": 2253 }, { "epoch": 0.2922931035880146, "grad_norm": 0.6817254424095154, "learning_rate": 8.060790366824028e-05, "loss": 12.432238578796387, "step": 2254 }, { "epoch": 0.29242278109626124, "grad_norm": 0.6728682518005371, "learning_rate": 8.059175337426594e-05, "loss": 15.137382507324219, "step": 2255 }, { "epoch": 0.2925524586045079, "grad_norm": 0.5435169339179993, "learning_rate": 8.057559797729173e-05, "loss": 8.766786575317383, "step": 2256 }, { "epoch": 0.2926821361127546, "grad_norm": 0.8207765817642212, "learning_rate": 8.055943748001253e-05, "loss": 13.009720802307129, "step": 2257 }, { "epoch": 0.2928118136210013, "grad_norm": 0.7492731213569641, "learning_rate": 8.054327188512405e-05, "loss": 10.459925651550293, "step": 2258 }, { "epoch": 0.29294149112924794, "grad_norm": 0.4589632451534271, "learning_rate": 8.05271011953229e-05, "loss": 7.805881500244141, "step": 2259 }, { "epoch": 0.2930711686374946, "grad_norm": 0.8905314803123474, "learning_rate": 8.051092541330645e-05, "loss": 9.491729736328125, "step": 2260 }, { "epoch": 0.2932008461457413, "grad_norm": 0.6963414549827576, "learning_rate": 8.049474454177303e-05, "loss": 11.812762260437012, "step": 2261 }, { "epoch": 0.293330523653988, "grad_norm": 0.941692054271698, "learning_rate": 8.047855858342174e-05, "loss": 13.330869674682617, "step": 2262 }, { "epoch": 0.29346020116223465, "grad_norm": 0.7269774079322815, "learning_rate": 8.046236754095256e-05, "loss": 10.746745109558105, "step": 2263 }, { "epoch": 0.29358987867048136, "grad_norm": 0.6802989840507507, "learning_rate": 8.044617141706631e-05, "loss": 9.715348243713379, "step": 2264 }, { "epoch": 0.293719556178728, "grad_norm": 0.9130289554595947, "learning_rate": 8.042997021446468e-05, "loss": 16.858633041381836, "step": 2265 }, { "epoch": 0.2938492336869747, "grad_norm": 0.57892906665802, "learning_rate": 8.041376393585016e-05, "loss": 7.351332187652588, "step": 2266 }, { "epoch": 0.2939789111952214, "grad_norm": 0.6080766916275024, "learning_rate": 8.039755258392612e-05, "loss": 9.615988731384277, "step": 2267 }, { "epoch": 0.29410858870346807, "grad_norm": 0.709852933883667, "learning_rate": 8.038133616139676e-05, "loss": 13.117509841918945, "step": 2268 }, { "epoch": 0.2942382662117147, "grad_norm": 0.7879102826118469, "learning_rate": 8.036511467096717e-05, "loss": 10.915599822998047, "step": 2269 }, { "epoch": 0.29436794371996144, "grad_norm": 0.818655252456665, "learning_rate": 8.034888811534323e-05, "loss": 13.899003982543945, "step": 2270 }, { "epoch": 0.2944976212282081, "grad_norm": 0.8003055453300476, "learning_rate": 8.033265649723167e-05, "loss": 13.56237506866455, "step": 2271 }, { "epoch": 0.29462729873645477, "grad_norm": 0.6719357371330261, "learning_rate": 8.031641981934011e-05, "loss": 10.163810729980469, "step": 2272 }, { "epoch": 0.2947569762447015, "grad_norm": 0.7103961706161499, "learning_rate": 8.030017808437696e-05, "loss": 10.550228118896484, "step": 2273 }, { "epoch": 0.29488665375294815, "grad_norm": 0.5129273533821106, "learning_rate": 8.028393129505152e-05, "loss": 8.987150192260742, "step": 2274 }, { "epoch": 0.2950163312611948, "grad_norm": 0.5931435823440552, "learning_rate": 8.026767945407389e-05, "loss": 10.41243839263916, "step": 2275 }, { "epoch": 0.29514600876944147, "grad_norm": 0.7799476385116577, "learning_rate": 8.025142256415506e-05, "loss": 12.4266996383667, "step": 2276 }, { "epoch": 0.2952756862776882, "grad_norm": 0.8348907828330994, "learning_rate": 8.02351606280068e-05, "loss": 11.011489868164062, "step": 2277 }, { "epoch": 0.29540536378593485, "grad_norm": 0.6570531129837036, "learning_rate": 8.02188936483418e-05, "loss": 6.8659539222717285, "step": 2278 }, { "epoch": 0.2955350412941815, "grad_norm": 0.9027970433235168, "learning_rate": 8.020262162787353e-05, "loss": 10.151517868041992, "step": 2279 }, { "epoch": 0.29566471880242823, "grad_norm": 0.5774563550949097, "learning_rate": 8.018634456931631e-05, "loss": 10.129830360412598, "step": 2280 }, { "epoch": 0.2957943963106749, "grad_norm": 0.7403742074966431, "learning_rate": 8.017006247538533e-05, "loss": 11.18375015258789, "step": 2281 }, { "epoch": 0.29592407381892155, "grad_norm": 0.5919867753982544, "learning_rate": 8.015377534879655e-05, "loss": 9.942337989807129, "step": 2282 }, { "epoch": 0.29605375132716827, "grad_norm": 0.9055217504501343, "learning_rate": 8.01374831922669e-05, "loss": 14.283909797668457, "step": 2283 }, { "epoch": 0.29618342883541493, "grad_norm": 0.6525554060935974, "learning_rate": 8.012118600851403e-05, "loss": 12.45134449005127, "step": 2284 }, { "epoch": 0.2963131063436616, "grad_norm": 0.8691684603691101, "learning_rate": 8.010488380025645e-05, "loss": 12.480253219604492, "step": 2285 }, { "epoch": 0.2964427838519083, "grad_norm": 0.6498176455497742, "learning_rate": 8.008857657021355e-05, "loss": 8.47542953491211, "step": 2286 }, { "epoch": 0.29657246136015497, "grad_norm": 0.6903596520423889, "learning_rate": 8.007226432110553e-05, "loss": 11.742609024047852, "step": 2287 }, { "epoch": 0.29670213886840163, "grad_norm": 0.8407562375068665, "learning_rate": 8.005594705565343e-05, "loss": 13.33034896850586, "step": 2288 }, { "epoch": 0.29683181637664835, "grad_norm": 0.9200408458709717, "learning_rate": 8.00396247765791e-05, "loss": 12.859521865844727, "step": 2289 }, { "epoch": 0.296961493884895, "grad_norm": 0.6678513288497925, "learning_rate": 8.00232974866053e-05, "loss": 12.323067665100098, "step": 2290 }, { "epoch": 0.2970911713931417, "grad_norm": 0.6490641832351685, "learning_rate": 8.000696518845555e-05, "loss": 11.916465759277344, "step": 2291 }, { "epoch": 0.29722084890138833, "grad_norm": 0.9965459108352661, "learning_rate": 7.999062788485423e-05, "loss": 14.468018531799316, "step": 2292 }, { "epoch": 0.29735052640963505, "grad_norm": 0.6990786194801331, "learning_rate": 7.99742855785266e-05, "loss": 12.1216402053833, "step": 2293 }, { "epoch": 0.2974802039178817, "grad_norm": 0.8446210026741028, "learning_rate": 7.995793827219866e-05, "loss": 17.69558334350586, "step": 2294 }, { "epoch": 0.2976098814261284, "grad_norm": 0.8298749327659607, "learning_rate": 7.994158596859733e-05, "loss": 14.4332914352417, "step": 2295 }, { "epoch": 0.2977395589343751, "grad_norm": 0.7818044424057007, "learning_rate": 7.992522867045034e-05, "loss": 12.385486602783203, "step": 2296 }, { "epoch": 0.29786923644262175, "grad_norm": 0.8266396522521973, "learning_rate": 7.990886638048623e-05, "loss": 12.781532287597656, "step": 2297 }, { "epoch": 0.2979989139508684, "grad_norm": 0.7083802223205566, "learning_rate": 7.989249910143439e-05, "loss": 9.550651550292969, "step": 2298 }, { "epoch": 0.29812859145911513, "grad_norm": 0.9961304664611816, "learning_rate": 7.987612683602503e-05, "loss": 15.56836986541748, "step": 2299 }, { "epoch": 0.2982582689673618, "grad_norm": 0.6494882702827454, "learning_rate": 7.985974958698924e-05, "loss": 8.844062805175781, "step": 2300 }, { "epoch": 0.29838794647560846, "grad_norm": 0.9316782355308533, "learning_rate": 7.984336735705884e-05, "loss": 14.232274055480957, "step": 2301 }, { "epoch": 0.2985176239838552, "grad_norm": 1.0273010730743408, "learning_rate": 7.98269801489666e-05, "loss": 13.613138198852539, "step": 2302 }, { "epoch": 0.29864730149210184, "grad_norm": 0.6080825924873352, "learning_rate": 7.981058796544605e-05, "loss": 6.724747180938721, "step": 2303 }, { "epoch": 0.2987769790003485, "grad_norm": 0.600173830986023, "learning_rate": 7.979419080923155e-05, "loss": 9.499631881713867, "step": 2304 }, { "epoch": 0.2989066565085952, "grad_norm": 0.8187645077705383, "learning_rate": 7.977778868305832e-05, "loss": 13.12850570678711, "step": 2305 }, { "epoch": 0.2990363340168419, "grad_norm": 0.6506132483482361, "learning_rate": 7.976138158966237e-05, "loss": 11.848321914672852, "step": 2306 }, { "epoch": 0.29916601152508854, "grad_norm": 0.7387018203735352, "learning_rate": 7.97449695317806e-05, "loss": 14.357027053833008, "step": 2307 }, { "epoch": 0.2992956890333352, "grad_norm": 0.45946237444877625, "learning_rate": 7.972855251215066e-05, "loss": 8.19558048248291, "step": 2308 }, { "epoch": 0.2994253665415819, "grad_norm": 0.7866737246513367, "learning_rate": 7.97121305335111e-05, "loss": 12.614139556884766, "step": 2309 }, { "epoch": 0.2995550440498286, "grad_norm": 1.0785000324249268, "learning_rate": 7.969570359860124e-05, "loss": 11.097677230834961, "step": 2310 }, { "epoch": 0.29968472155807524, "grad_norm": 0.6368347406387329, "learning_rate": 7.967927171016127e-05, "loss": 12.257821083068848, "step": 2311 }, { "epoch": 0.29981439906632196, "grad_norm": 0.5769362449645996, "learning_rate": 7.966283487093218e-05, "loss": 9.468317985534668, "step": 2312 }, { "epoch": 0.2999440765745686, "grad_norm": 0.5452999472618103, "learning_rate": 7.964639308365578e-05, "loss": 7.9016218185424805, "step": 2313 }, { "epoch": 0.3000737540828153, "grad_norm": 0.7499622702598572, "learning_rate": 7.962994635107474e-05, "loss": 12.615060806274414, "step": 2314 }, { "epoch": 0.300203431591062, "grad_norm": 0.6577298641204834, "learning_rate": 7.961349467593253e-05, "loss": 9.4760160446167, "step": 2315 }, { "epoch": 0.30033310909930866, "grad_norm": 0.4665195047855377, "learning_rate": 7.959703806097343e-05, "loss": 10.250565528869629, "step": 2316 }, { "epoch": 0.3004627866075553, "grad_norm": 0.7688223719596863, "learning_rate": 7.958057650894259e-05, "loss": 10.172545433044434, "step": 2317 }, { "epoch": 0.30059246411580204, "grad_norm": 0.6713089346885681, "learning_rate": 7.956411002258593e-05, "loss": 14.158344268798828, "step": 2318 }, { "epoch": 0.3007221416240487, "grad_norm": 0.7877426147460938, "learning_rate": 7.954763860465022e-05, "loss": 13.291820526123047, "step": 2319 }, { "epoch": 0.30085181913229536, "grad_norm": 0.6134392023086548, "learning_rate": 7.953116225788308e-05, "loss": 7.879059791564941, "step": 2320 }, { "epoch": 0.3009814966405421, "grad_norm": 0.9153854846954346, "learning_rate": 7.95146809850329e-05, "loss": 12.598148345947266, "step": 2321 }, { "epoch": 0.30111117414878874, "grad_norm": 0.8542314171791077, "learning_rate": 7.949819478884893e-05, "loss": 14.536649703979492, "step": 2322 }, { "epoch": 0.3012408516570354, "grad_norm": 0.608068585395813, "learning_rate": 7.948170367208121e-05, "loss": 9.63839340209961, "step": 2323 }, { "epoch": 0.30137052916528206, "grad_norm": 0.64136803150177, "learning_rate": 7.946520763748063e-05, "loss": 11.561647415161133, "step": 2324 }, { "epoch": 0.3015002066735288, "grad_norm": 0.5039787888526917, "learning_rate": 7.944870668779887e-05, "loss": 6.758794784545898, "step": 2325 }, { "epoch": 0.30162988418177544, "grad_norm": 0.7074934244155884, "learning_rate": 7.943220082578847e-05, "loss": 11.204939842224121, "step": 2326 }, { "epoch": 0.3017595616900221, "grad_norm": 0.4386429190635681, "learning_rate": 7.941569005420277e-05, "loss": 6.531087398529053, "step": 2327 }, { "epoch": 0.3018892391982688, "grad_norm": 0.6509313583374023, "learning_rate": 7.939917437579591e-05, "loss": 11.160441398620605, "step": 2328 }, { "epoch": 0.3020189167065155, "grad_norm": 0.623520016670227, "learning_rate": 7.938265379332286e-05, "loss": 9.552468299865723, "step": 2329 }, { "epoch": 0.30214859421476215, "grad_norm": 0.7379948496818542, "learning_rate": 7.936612830953942e-05, "loss": 12.348896026611328, "step": 2330 }, { "epoch": 0.30227827172300886, "grad_norm": 0.6451578736305237, "learning_rate": 7.93495979272022e-05, "loss": 10.629106521606445, "step": 2331 }, { "epoch": 0.3024079492312555, "grad_norm": 0.764434278011322, "learning_rate": 7.933306264906864e-05, "loss": 13.691828727722168, "step": 2332 }, { "epoch": 0.3025376267395022, "grad_norm": 0.8456186652183533, "learning_rate": 7.931652247789698e-05, "loss": 14.508393287658691, "step": 2333 }, { "epoch": 0.3026673042477489, "grad_norm": 0.930772066116333, "learning_rate": 7.929997741644626e-05, "loss": 12.871192932128906, "step": 2334 }, { "epoch": 0.30279698175599556, "grad_norm": 0.8266646265983582, "learning_rate": 7.928342746747637e-05, "loss": 9.83505916595459, "step": 2335 }, { "epoch": 0.3029266592642422, "grad_norm": 0.7116612792015076, "learning_rate": 7.9266872633748e-05, "loss": 9.499577522277832, "step": 2336 }, { "epoch": 0.30305633677248894, "grad_norm": 0.691737949848175, "learning_rate": 7.925031291802266e-05, "loss": 12.452277183532715, "step": 2337 }, { "epoch": 0.3031860142807356, "grad_norm": 0.46770358085632324, "learning_rate": 7.923374832306267e-05, "loss": 4.904686450958252, "step": 2338 }, { "epoch": 0.30331569178898227, "grad_norm": 0.566026508808136, "learning_rate": 7.921717885163116e-05, "loss": 11.306918144226074, "step": 2339 }, { "epoch": 0.30344536929722893, "grad_norm": 0.9036741256713867, "learning_rate": 7.920060450649208e-05, "loss": 11.309521675109863, "step": 2340 }, { "epoch": 0.30357504680547565, "grad_norm": 0.7196448445320129, "learning_rate": 7.918402529041021e-05, "loss": 12.91974925994873, "step": 2341 }, { "epoch": 0.3037047243137223, "grad_norm": 0.8266157507896423, "learning_rate": 7.916744120615108e-05, "loss": 13.942825317382812, "step": 2342 }, { "epoch": 0.30383440182196897, "grad_norm": 0.6960545182228088, "learning_rate": 7.915085225648109e-05, "loss": 10.685009956359863, "step": 2343 }, { "epoch": 0.3039640793302157, "grad_norm": 0.6185094714164734, "learning_rate": 7.913425844416747e-05, "loss": 11.833401679992676, "step": 2344 }, { "epoch": 0.30409375683846235, "grad_norm": 0.609112560749054, "learning_rate": 7.91176597719782e-05, "loss": 12.503843307495117, "step": 2345 }, { "epoch": 0.304223434346709, "grad_norm": 0.5816333293914795, "learning_rate": 7.910105624268209e-05, "loss": 7.958792209625244, "step": 2346 }, { "epoch": 0.3043531118549557, "grad_norm": 0.6648733019828796, "learning_rate": 7.90844478590488e-05, "loss": 12.100004196166992, "step": 2347 }, { "epoch": 0.3044827893632024, "grad_norm": 0.7110784649848938, "learning_rate": 7.906783462384875e-05, "loss": 11.390891075134277, "step": 2348 }, { "epoch": 0.30461246687144905, "grad_norm": 0.6911570429801941, "learning_rate": 7.905121653985317e-05, "loss": 12.361507415771484, "step": 2349 }, { "epoch": 0.30474214437969577, "grad_norm": 0.8440008759498596, "learning_rate": 7.903459360983417e-05, "loss": 14.719866752624512, "step": 2350 }, { "epoch": 0.30487182188794243, "grad_norm": 0.6258646845817566, "learning_rate": 7.901796583656454e-05, "loss": 11.74431037902832, "step": 2351 }, { "epoch": 0.3050014993961891, "grad_norm": 0.7022781372070312, "learning_rate": 7.900133322281802e-05, "loss": 8.956109046936035, "step": 2352 }, { "epoch": 0.3051311769044358, "grad_norm": 0.566819429397583, "learning_rate": 7.898469577136907e-05, "loss": 8.185734748840332, "step": 2353 }, { "epoch": 0.30526085441268247, "grad_norm": 0.7127084732055664, "learning_rate": 7.896805348499296e-05, "loss": 9.321802139282227, "step": 2354 }, { "epoch": 0.30539053192092913, "grad_norm": 0.641377866268158, "learning_rate": 7.89514063664658e-05, "loss": 12.077469825744629, "step": 2355 }, { "epoch": 0.3055202094291758, "grad_norm": 0.7452883720397949, "learning_rate": 7.893475441856448e-05, "loss": 15.026433944702148, "step": 2356 }, { "epoch": 0.3056498869374225, "grad_norm": 0.7616062760353088, "learning_rate": 7.891809764406672e-05, "loss": 14.312169075012207, "step": 2357 }, { "epoch": 0.30577956444566917, "grad_norm": 0.8221332430839539, "learning_rate": 7.890143604575102e-05, "loss": 13.395547866821289, "step": 2358 }, { "epoch": 0.30590924195391583, "grad_norm": 0.7932479977607727, "learning_rate": 7.888476962639671e-05, "loss": 11.780078887939453, "step": 2359 }, { "epoch": 0.30603891946216255, "grad_norm": 0.714265763759613, "learning_rate": 7.886809838878389e-05, "loss": 10.01285171508789, "step": 2360 }, { "epoch": 0.3061685969704092, "grad_norm": 0.5171470046043396, "learning_rate": 7.88514223356935e-05, "loss": 10.174489974975586, "step": 2361 }, { "epoch": 0.3062982744786559, "grad_norm": 0.5623322129249573, "learning_rate": 7.883474146990724e-05, "loss": 10.809536933898926, "step": 2362 }, { "epoch": 0.3064279519869026, "grad_norm": 0.7556122541427612, "learning_rate": 7.881805579420766e-05, "loss": 12.449546813964844, "step": 2363 }, { "epoch": 0.30655762949514925, "grad_norm": 0.6598160862922668, "learning_rate": 7.88013653113781e-05, "loss": 10.221400260925293, "step": 2364 }, { "epoch": 0.3066873070033959, "grad_norm": 0.7128664255142212, "learning_rate": 7.878467002420269e-05, "loss": 10.155682563781738, "step": 2365 }, { "epoch": 0.30681698451164263, "grad_norm": 0.790968656539917, "learning_rate": 7.876796993546636e-05, "loss": 12.218090057373047, "step": 2366 }, { "epoch": 0.3069466620198893, "grad_norm": 0.614660382270813, "learning_rate": 7.875126504795482e-05, "loss": 9.300291061401367, "step": 2367 }, { "epoch": 0.30707633952813596, "grad_norm": 0.7648331522941589, "learning_rate": 7.873455536445467e-05, "loss": 10.208044052124023, "step": 2368 }, { "epoch": 0.3072060170363827, "grad_norm": 0.6479147672653198, "learning_rate": 7.871784088775319e-05, "loss": 7.915867328643799, "step": 2369 }, { "epoch": 0.30733569454462933, "grad_norm": 0.6149949431419373, "learning_rate": 7.870112162063852e-05, "loss": 12.103139877319336, "step": 2370 }, { "epoch": 0.307465372052876, "grad_norm": 0.6902816891670227, "learning_rate": 7.868439756589963e-05, "loss": 9.2225980758667, "step": 2371 }, { "epoch": 0.30759504956112266, "grad_norm": 0.6454277634620667, "learning_rate": 7.866766872632622e-05, "loss": 12.25428295135498, "step": 2372 }, { "epoch": 0.3077247270693694, "grad_norm": 0.5349610447883606, "learning_rate": 7.865093510470885e-05, "loss": 6.4781975746154785, "step": 2373 }, { "epoch": 0.30785440457761604, "grad_norm": 0.8632481694221497, "learning_rate": 7.863419670383881e-05, "loss": 14.935667037963867, "step": 2374 }, { "epoch": 0.3079840820858627, "grad_norm": 0.7590436339378357, "learning_rate": 7.861745352650827e-05, "loss": 14.068364143371582, "step": 2375 }, { "epoch": 0.3081137595941094, "grad_norm": 0.8066757321357727, "learning_rate": 7.860070557551014e-05, "loss": 11.653023719787598, "step": 2376 }, { "epoch": 0.3082434371023561, "grad_norm": 0.6393242478370667, "learning_rate": 7.85839528536381e-05, "loss": 8.599366188049316, "step": 2377 }, { "epoch": 0.30837311461060274, "grad_norm": 0.6165448427200317, "learning_rate": 7.856719536368673e-05, "loss": 10.98147201538086, "step": 2378 }, { "epoch": 0.30850279211884946, "grad_norm": 0.635371208190918, "learning_rate": 7.855043310845131e-05, "loss": 12.451214790344238, "step": 2379 }, { "epoch": 0.3086324696270961, "grad_norm": 0.6941056251525879, "learning_rate": 7.853366609072796e-05, "loss": 12.069828987121582, "step": 2380 }, { "epoch": 0.3087621471353428, "grad_norm": 0.7351772785186768, "learning_rate": 7.851689431331355e-05, "loss": 10.588016510009766, "step": 2381 }, { "epoch": 0.3088918246435895, "grad_norm": 0.9142374396324158, "learning_rate": 7.850011777900581e-05, "loss": 9.143996238708496, "step": 2382 }, { "epoch": 0.30902150215183616, "grad_norm": 0.6710877418518066, "learning_rate": 7.84833364906032e-05, "loss": 12.304362297058105, "step": 2383 }, { "epoch": 0.3091511796600828, "grad_norm": 0.7337384819984436, "learning_rate": 7.846655045090501e-05, "loss": 11.558765411376953, "step": 2384 }, { "epoch": 0.30928085716832954, "grad_norm": 1.2938934564590454, "learning_rate": 7.844975966271132e-05, "loss": 14.874632835388184, "step": 2385 }, { "epoch": 0.3094105346765762, "grad_norm": 0.5864127278327942, "learning_rate": 7.8432964128823e-05, "loss": 11.982558250427246, "step": 2386 }, { "epoch": 0.30954021218482286, "grad_norm": 0.8478800654411316, "learning_rate": 7.841616385204169e-05, "loss": 14.628116607666016, "step": 2387 }, { "epoch": 0.3096698896930695, "grad_norm": 0.813897967338562, "learning_rate": 7.839935883516985e-05, "loss": 16.086870193481445, "step": 2388 }, { "epoch": 0.30979956720131624, "grad_norm": 0.8470439314842224, "learning_rate": 7.83825490810107e-05, "loss": 12.427142143249512, "step": 2389 }, { "epoch": 0.3099292447095629, "grad_norm": 0.9002448916435242, "learning_rate": 7.836573459236831e-05, "loss": 7.786073684692383, "step": 2390 }, { "epoch": 0.31005892221780956, "grad_norm": 0.6226637959480286, "learning_rate": 7.834891537204745e-05, "loss": 9.629162788391113, "step": 2391 }, { "epoch": 0.3101885997260563, "grad_norm": 0.6270474791526794, "learning_rate": 7.833209142285377e-05, "loss": 10.02519702911377, "step": 2392 }, { "epoch": 0.31031827723430294, "grad_norm": 0.6360914707183838, "learning_rate": 7.831526274759365e-05, "loss": 12.070863723754883, "step": 2393 }, { "epoch": 0.3104479547425496, "grad_norm": 0.7473006248474121, "learning_rate": 7.829842934907428e-05, "loss": 12.907389640808105, "step": 2394 }, { "epoch": 0.3105776322507963, "grad_norm": 0.8291186690330505, "learning_rate": 7.828159123010362e-05, "loss": 9.43190860748291, "step": 2395 }, { "epoch": 0.310707309759043, "grad_norm": 0.5753152370452881, "learning_rate": 7.826474839349043e-05, "loss": 7.332087516784668, "step": 2396 }, { "epoch": 0.31083698726728964, "grad_norm": 0.7039026021957397, "learning_rate": 7.824790084204427e-05, "loss": 9.040315628051758, "step": 2397 }, { "epoch": 0.31096666477553636, "grad_norm": 0.6834849119186401, "learning_rate": 7.823104857857548e-05, "loss": 12.33242416381836, "step": 2398 }, { "epoch": 0.311096342283783, "grad_norm": 0.7559219598770142, "learning_rate": 7.821419160589517e-05, "loss": 12.494054794311523, "step": 2399 }, { "epoch": 0.3112260197920297, "grad_norm": 0.6694706678390503, "learning_rate": 7.819732992681525e-05, "loss": 13.084250450134277, "step": 2400 }, { "epoch": 0.3113556973002764, "grad_norm": 0.9975526332855225, "learning_rate": 7.81804635441484e-05, "loss": 11.981632232666016, "step": 2401 }, { "epoch": 0.31148537480852306, "grad_norm": 0.6508364081382751, "learning_rate": 7.816359246070813e-05, "loss": 12.19582748413086, "step": 2402 }, { "epoch": 0.3116150523167697, "grad_norm": 0.7138308882713318, "learning_rate": 7.814671667930864e-05, "loss": 11.402647018432617, "step": 2403 }, { "epoch": 0.3117447298250164, "grad_norm": 0.7171677350997925, "learning_rate": 7.812983620276503e-05, "loss": 11.600951194763184, "step": 2404 }, { "epoch": 0.3118744073332631, "grad_norm": 0.5974873304367065, "learning_rate": 7.811295103389309e-05, "loss": 8.92506217956543, "step": 2405 }, { "epoch": 0.31200408484150977, "grad_norm": 0.4602392315864563, "learning_rate": 7.809606117550945e-05, "loss": 8.765012741088867, "step": 2406 }, { "epoch": 0.3121337623497564, "grad_norm": 0.4656739830970764, "learning_rate": 7.807916663043151e-05, "loss": 9.522829055786133, "step": 2407 }, { "epoch": 0.31226343985800314, "grad_norm": 0.6416405439376831, "learning_rate": 7.806226740147741e-05, "loss": 9.184703826904297, "step": 2408 }, { "epoch": 0.3123931173662498, "grad_norm": 0.7476564049720764, "learning_rate": 7.804536349146612e-05, "loss": 12.166413307189941, "step": 2409 }, { "epoch": 0.31252279487449647, "grad_norm": 0.6576550602912903, "learning_rate": 7.802845490321738e-05, "loss": 10.650235176086426, "step": 2410 }, { "epoch": 0.3126524723827432, "grad_norm": 0.6554632186889648, "learning_rate": 7.801154163955171e-05, "loss": 9.221234321594238, "step": 2411 }, { "epoch": 0.31278214989098985, "grad_norm": 0.6126965284347534, "learning_rate": 7.799462370329039e-05, "loss": 8.746149063110352, "step": 2412 }, { "epoch": 0.3129118273992365, "grad_norm": 0.7415833473205566, "learning_rate": 7.79777010972555e-05, "loss": 9.994813919067383, "step": 2413 }, { "epoch": 0.3130415049074832, "grad_norm": 0.5564342141151428, "learning_rate": 7.796077382426993e-05, "loss": 10.767133712768555, "step": 2414 }, { "epoch": 0.3131711824157299, "grad_norm": 0.6224269866943359, "learning_rate": 7.794384188715726e-05, "loss": 10.545703887939453, "step": 2415 }, { "epoch": 0.31330085992397655, "grad_norm": 0.7090111374855042, "learning_rate": 7.792690528874192e-05, "loss": 10.126873016357422, "step": 2416 }, { "epoch": 0.31343053743222327, "grad_norm": 0.8574581146240234, "learning_rate": 7.790996403184909e-05, "loss": 12.188175201416016, "step": 2417 }, { "epoch": 0.31356021494046993, "grad_norm": 0.7534857392311096, "learning_rate": 7.789301811930476e-05, "loss": 10.314202308654785, "step": 2418 }, { "epoch": 0.3136898924487166, "grad_norm": 0.5217606425285339, "learning_rate": 7.787606755393564e-05, "loss": 5.756511688232422, "step": 2419 }, { "epoch": 0.31381956995696325, "grad_norm": 0.6380245685577393, "learning_rate": 7.785911233856929e-05, "loss": 9.447843551635742, "step": 2420 }, { "epoch": 0.31394924746520997, "grad_norm": 0.6216638684272766, "learning_rate": 7.784215247603397e-05, "loss": 9.135584831237793, "step": 2421 }, { "epoch": 0.31407892497345663, "grad_norm": 0.9236554503440857, "learning_rate": 7.782518796915875e-05, "loss": 13.007694244384766, "step": 2422 }, { "epoch": 0.3142086024817033, "grad_norm": 0.9616803526878357, "learning_rate": 7.780821882077348e-05, "loss": 13.005162239074707, "step": 2423 }, { "epoch": 0.31433827998995, "grad_norm": 0.868322491645813, "learning_rate": 7.779124503370878e-05, "loss": 11.406587600708008, "step": 2424 }, { "epoch": 0.31446795749819667, "grad_norm": 0.632693886756897, "learning_rate": 7.777426661079605e-05, "loss": 8.869176864624023, "step": 2425 }, { "epoch": 0.31459763500644333, "grad_norm": 1.0383471250534058, "learning_rate": 7.775728355486745e-05, "loss": 11.088496208190918, "step": 2426 }, { "epoch": 0.31472731251469005, "grad_norm": 0.7254782915115356, "learning_rate": 7.774029586875588e-05, "loss": 11.726234436035156, "step": 2427 }, { "epoch": 0.3148569900229367, "grad_norm": 0.7166110277175903, "learning_rate": 7.772330355529512e-05, "loss": 12.215804100036621, "step": 2428 }, { "epoch": 0.3149866675311834, "grad_norm": 0.766669511795044, "learning_rate": 7.77063066173196e-05, "loss": 9.955460548400879, "step": 2429 }, { "epoch": 0.3151163450394301, "grad_norm": 0.9464349150657654, "learning_rate": 7.768930505766459e-05, "loss": 16.926876068115234, "step": 2430 }, { "epoch": 0.31524602254767675, "grad_norm": 0.7592201232910156, "learning_rate": 7.767229887916611e-05, "loss": 12.119441032409668, "step": 2431 }, { "epoch": 0.3153757000559234, "grad_norm": 0.832558810710907, "learning_rate": 7.765528808466096e-05, "loss": 13.379895210266113, "step": 2432 }, { "epoch": 0.31550537756417013, "grad_norm": 1.0076414346694946, "learning_rate": 7.763827267698671e-05, "loss": 11.960622787475586, "step": 2433 }, { "epoch": 0.3156350550724168, "grad_norm": 0.7097036242485046, "learning_rate": 7.762125265898168e-05, "loss": 9.809938430786133, "step": 2434 }, { "epoch": 0.31576473258066345, "grad_norm": 0.6057538390159607, "learning_rate": 7.760422803348496e-05, "loss": 6.615105152130127, "step": 2435 }, { "epoch": 0.3158944100889101, "grad_norm": 0.6639454960823059, "learning_rate": 7.758719880333646e-05, "loss": 9.714582443237305, "step": 2436 }, { "epoch": 0.31602408759715683, "grad_norm": 0.8198614120483398, "learning_rate": 7.757016497137681e-05, "loss": 11.239343643188477, "step": 2437 }, { "epoch": 0.3161537651054035, "grad_norm": 0.6808931231498718, "learning_rate": 7.75531265404474e-05, "loss": 9.269477844238281, "step": 2438 }, { "epoch": 0.31628344261365016, "grad_norm": 0.7789075970649719, "learning_rate": 7.753608351339041e-05, "loss": 10.970255851745605, "step": 2439 }, { "epoch": 0.3164131201218969, "grad_norm": 0.9660606384277344, "learning_rate": 7.75190358930488e-05, "loss": 13.895998001098633, "step": 2440 }, { "epoch": 0.31654279763014354, "grad_norm": 0.8419876098632812, "learning_rate": 7.750198368226626e-05, "loss": 16.44295310974121, "step": 2441 }, { "epoch": 0.3166724751383902, "grad_norm": 0.5751250386238098, "learning_rate": 7.748492688388726e-05, "loss": 10.748305320739746, "step": 2442 }, { "epoch": 0.3168021526466369, "grad_norm": 0.8578277826309204, "learning_rate": 7.746786550075704e-05, "loss": 12.898271560668945, "step": 2443 }, { "epoch": 0.3169318301548836, "grad_norm": 0.8366395235061646, "learning_rate": 7.745079953572163e-05, "loss": 11.532986640930176, "step": 2444 }, { "epoch": 0.31706150766313024, "grad_norm": 0.7926024198532104, "learning_rate": 7.743372899162777e-05, "loss": 10.085549354553223, "step": 2445 }, { "epoch": 0.31719118517137695, "grad_norm": 0.7813683748245239, "learning_rate": 7.741665387132299e-05, "loss": 12.584650993347168, "step": 2446 }, { "epoch": 0.3173208626796236, "grad_norm": 0.8581256866455078, "learning_rate": 7.73995741776556e-05, "loss": 13.060358047485352, "step": 2447 }, { "epoch": 0.3174505401878703, "grad_norm": 0.8057459592819214, "learning_rate": 7.738248991347464e-05, "loss": 12.036735534667969, "step": 2448 }, { "epoch": 0.317580217696117, "grad_norm": 0.8125506639480591, "learning_rate": 7.736540108162995e-05, "loss": 12.450698852539062, "step": 2449 }, { "epoch": 0.31770989520436366, "grad_norm": 0.685786783695221, "learning_rate": 7.73483076849721e-05, "loss": 10.782018661499023, "step": 2450 }, { "epoch": 0.3178395727126103, "grad_norm": 0.6824828386306763, "learning_rate": 7.733120972635245e-05, "loss": 7.840118408203125, "step": 2451 }, { "epoch": 0.317969250220857, "grad_norm": 0.7509633302688599, "learning_rate": 7.731410720862308e-05, "loss": 9.101160049438477, "step": 2452 }, { "epoch": 0.3180989277291037, "grad_norm": 0.7505804300308228, "learning_rate": 7.729700013463688e-05, "loss": 10.114521980285645, "step": 2453 }, { "epoch": 0.31822860523735036, "grad_norm": 0.5353032350540161, "learning_rate": 7.727988850724743e-05, "loss": 11.439088821411133, "step": 2454 }, { "epoch": 0.318358282745597, "grad_norm": 0.5906164646148682, "learning_rate": 7.726277232930918e-05, "loss": 12.824570655822754, "step": 2455 }, { "epoch": 0.31848796025384374, "grad_norm": 0.532585084438324, "learning_rate": 7.724565160367722e-05, "loss": 7.245076656341553, "step": 2456 }, { "epoch": 0.3186176377620904, "grad_norm": 0.5620623230934143, "learning_rate": 7.72285263332075e-05, "loss": 8.672289848327637, "step": 2457 }, { "epoch": 0.31874731527033706, "grad_norm": 0.8136072754859924, "learning_rate": 7.721139652075664e-05, "loss": 10.374195098876953, "step": 2458 }, { "epoch": 0.3188769927785838, "grad_norm": 0.8283575177192688, "learning_rate": 7.719426216918207e-05, "loss": 11.187417030334473, "step": 2459 }, { "epoch": 0.31900667028683044, "grad_norm": 0.5974694490432739, "learning_rate": 7.717712328134196e-05, "loss": 8.948607444763184, "step": 2460 }, { "epoch": 0.3191363477950771, "grad_norm": 0.566228985786438, "learning_rate": 7.715997986009525e-05, "loss": 10.625014305114746, "step": 2461 }, { "epoch": 0.3192660253033238, "grad_norm": 0.948981523513794, "learning_rate": 7.714283190830163e-05, "loss": 13.824980735778809, "step": 2462 }, { "epoch": 0.3193957028115705, "grad_norm": 0.9244624376296997, "learning_rate": 7.712567942882154e-05, "loss": 9.961275100708008, "step": 2463 }, { "epoch": 0.31952538031981714, "grad_norm": 0.6181317567825317, "learning_rate": 7.71085224245162e-05, "loss": 12.234574317932129, "step": 2464 }, { "epoch": 0.31965505782806386, "grad_norm": 0.6042324900627136, "learning_rate": 7.709136089824752e-05, "loss": 10.220845222473145, "step": 2465 }, { "epoch": 0.3197847353363105, "grad_norm": 0.39088788628578186, "learning_rate": 7.707419485287825e-05, "loss": 6.688786506652832, "step": 2466 }, { "epoch": 0.3199144128445572, "grad_norm": 0.773052990436554, "learning_rate": 7.705702429127181e-05, "loss": 15.526487350463867, "step": 2467 }, { "epoch": 0.32004409035280384, "grad_norm": 0.7629469633102417, "learning_rate": 7.703984921629245e-05, "loss": 10.299196243286133, "step": 2468 }, { "epoch": 0.32017376786105056, "grad_norm": 0.6349610090255737, "learning_rate": 7.702266963080516e-05, "loss": 11.213680267333984, "step": 2469 }, { "epoch": 0.3203034453692972, "grad_norm": 0.5514646768569946, "learning_rate": 7.700548553767559e-05, "loss": 7.691679000854492, "step": 2470 }, { "epoch": 0.3204331228775439, "grad_norm": 0.5934799909591675, "learning_rate": 7.698829693977027e-05, "loss": 11.780736923217773, "step": 2471 }, { "epoch": 0.3205628003857906, "grad_norm": 0.9609545469284058, "learning_rate": 7.69711038399564e-05, "loss": 15.107919692993164, "step": 2472 }, { "epoch": 0.32069247789403726, "grad_norm": 0.7391193509101868, "learning_rate": 7.695390624110197e-05, "loss": 14.04498291015625, "step": 2473 }, { "epoch": 0.3208221554022839, "grad_norm": 0.9207913279533386, "learning_rate": 7.693670414607567e-05, "loss": 13.998570442199707, "step": 2474 }, { "epoch": 0.32095183291053064, "grad_norm": 0.7167978882789612, "learning_rate": 7.691949755774704e-05, "loss": 9.053775787353516, "step": 2475 }, { "epoch": 0.3210815104187773, "grad_norm": 0.5982667803764343, "learning_rate": 7.690228647898625e-05, "loss": 8.415836334228516, "step": 2476 }, { "epoch": 0.32121118792702397, "grad_norm": 0.7542736530303955, "learning_rate": 7.688507091266428e-05, "loss": 16.051803588867188, "step": 2477 }, { "epoch": 0.3213408654352707, "grad_norm": 0.7848616242408752, "learning_rate": 7.686785086165288e-05, "loss": 10.68651294708252, "step": 2478 }, { "epoch": 0.32147054294351735, "grad_norm": 0.8174179792404175, "learning_rate": 7.685062632882451e-05, "loss": 13.893732070922852, "step": 2479 }, { "epoch": 0.321600220451764, "grad_norm": 0.7298468351364136, "learning_rate": 7.683339731705237e-05, "loss": 8.341760635375977, "step": 2480 }, { "epoch": 0.3217298979600107, "grad_norm": 0.5475309491157532, "learning_rate": 7.681616382921044e-05, "loss": 8.365582466125488, "step": 2481 }, { "epoch": 0.3218595754682574, "grad_norm": 0.7394405007362366, "learning_rate": 7.679892586817345e-05, "loss": 12.952528953552246, "step": 2482 }, { "epoch": 0.32198925297650405, "grad_norm": 0.8487951159477234, "learning_rate": 7.678168343681682e-05, "loss": 14.797469139099121, "step": 2483 }, { "epoch": 0.3221189304847507, "grad_norm": 0.760181725025177, "learning_rate": 7.676443653801677e-05, "loss": 13.184206008911133, "step": 2484 }, { "epoch": 0.3222486079929974, "grad_norm": 0.6648114919662476, "learning_rate": 7.674718517465027e-05, "loss": 8.944740295410156, "step": 2485 }, { "epoch": 0.3223782855012441, "grad_norm": 0.7661712169647217, "learning_rate": 7.672992934959499e-05, "loss": 12.033675193786621, "step": 2486 }, { "epoch": 0.32250796300949075, "grad_norm": 0.7209144830703735, "learning_rate": 7.671266906572937e-05, "loss": 11.40950870513916, "step": 2487 }, { "epoch": 0.32263764051773747, "grad_norm": 0.6084060668945312, "learning_rate": 7.669540432593261e-05, "loss": 12.274618148803711, "step": 2488 }, { "epoch": 0.32276731802598413, "grad_norm": 0.713497519493103, "learning_rate": 7.66781351330846e-05, "loss": 11.379110336303711, "step": 2489 }, { "epoch": 0.3228969955342308, "grad_norm": 0.8629701733589172, "learning_rate": 7.666086149006607e-05, "loss": 12.162210464477539, "step": 2490 }, { "epoch": 0.3230266730424775, "grad_norm": 0.525316596031189, "learning_rate": 7.664358339975834e-05, "loss": 8.477457046508789, "step": 2491 }, { "epoch": 0.32315635055072417, "grad_norm": 0.7805837988853455, "learning_rate": 7.662630086504365e-05, "loss": 11.688116073608398, "step": 2492 }, { "epoch": 0.32328602805897083, "grad_norm": 0.736089289188385, "learning_rate": 7.660901388880483e-05, "loss": 13.441675186157227, "step": 2493 }, { "epoch": 0.32341570556721755, "grad_norm": 0.6789936423301697, "learning_rate": 7.659172247392556e-05, "loss": 12.760255813598633, "step": 2494 }, { "epoch": 0.3235453830754642, "grad_norm": 0.7373749613761902, "learning_rate": 7.657442662329019e-05, "loss": 13.23768138885498, "step": 2495 }, { "epoch": 0.32367506058371087, "grad_norm": 0.6755347847938538, "learning_rate": 7.655712633978384e-05, "loss": 11.973817825317383, "step": 2496 }, { "epoch": 0.3238047380919576, "grad_norm": 0.5907647013664246, "learning_rate": 7.653982162629239e-05, "loss": 11.84168529510498, "step": 2497 }, { "epoch": 0.32393441560020425, "grad_norm": 0.789211094379425, "learning_rate": 7.652251248570239e-05, "loss": 14.843419075012207, "step": 2498 }, { "epoch": 0.3240640931084509, "grad_norm": 0.6732943654060364, "learning_rate": 7.650519892090118e-05, "loss": 10.602937698364258, "step": 2499 }, { "epoch": 0.3241937706166976, "grad_norm": 0.5881267189979553, "learning_rate": 7.648788093477686e-05, "loss": 8.702668190002441, "step": 2500 }, { "epoch": 0.3243234481249443, "grad_norm": 0.6916697025299072, "learning_rate": 7.64705585302182e-05, "loss": 13.826309204101562, "step": 2501 }, { "epoch": 0.32445312563319095, "grad_norm": 0.5613880157470703, "learning_rate": 7.645323171011478e-05, "loss": 10.913728713989258, "step": 2502 }, { "epoch": 0.3245828031414376, "grad_norm": 0.9655318856239319, "learning_rate": 7.643590047735686e-05, "loss": 13.391138076782227, "step": 2503 }, { "epoch": 0.32471248064968433, "grad_norm": 0.5742948651313782, "learning_rate": 7.641856483483545e-05, "loss": 11.686311721801758, "step": 2504 }, { "epoch": 0.324842158157931, "grad_norm": 0.6502830982208252, "learning_rate": 7.640122478544231e-05, "loss": 9.0734224319458, "step": 2505 }, { "epoch": 0.32497183566617766, "grad_norm": 0.5922211408615112, "learning_rate": 7.638388033206995e-05, "loss": 12.104394912719727, "step": 2506 }, { "epoch": 0.3251015131744244, "grad_norm": 0.6959969401359558, "learning_rate": 7.636653147761158e-05, "loss": 14.46042251586914, "step": 2507 }, { "epoch": 0.32523119068267103, "grad_norm": 0.6495524048805237, "learning_rate": 7.634917822496113e-05, "loss": 10.076828956604004, "step": 2508 }, { "epoch": 0.3253608681909177, "grad_norm": 0.7678934931755066, "learning_rate": 7.63318205770133e-05, "loss": 11.310890197753906, "step": 2509 }, { "epoch": 0.3254905456991644, "grad_norm": 0.6574892401695251, "learning_rate": 7.631445853666354e-05, "loss": 11.299195289611816, "step": 2510 }, { "epoch": 0.3256202232074111, "grad_norm": 0.7357890605926514, "learning_rate": 7.629709210680797e-05, "loss": 13.620697021484375, "step": 2511 }, { "epoch": 0.32574990071565774, "grad_norm": 1.0221747159957886, "learning_rate": 7.627972129034351e-05, "loss": 14.312030792236328, "step": 2512 }, { "epoch": 0.32587957822390445, "grad_norm": 0.7796796560287476, "learning_rate": 7.626234609016778e-05, "loss": 15.120580673217773, "step": 2513 }, { "epoch": 0.3260092557321511, "grad_norm": 0.5248708128929138, "learning_rate": 7.624496650917911e-05, "loss": 10.04013729095459, "step": 2514 }, { "epoch": 0.3261389332403978, "grad_norm": 0.5238862037658691, "learning_rate": 7.622758255027657e-05, "loss": 6.8929572105407715, "step": 2515 }, { "epoch": 0.32626861074864444, "grad_norm": 0.7477911114692688, "learning_rate": 7.621019421636003e-05, "loss": 11.848374366760254, "step": 2516 }, { "epoch": 0.32639828825689116, "grad_norm": 0.8225433826446533, "learning_rate": 7.619280151032995e-05, "loss": 11.602853775024414, "step": 2517 }, { "epoch": 0.3265279657651378, "grad_norm": 0.5237151980400085, "learning_rate": 7.617540443508768e-05, "loss": 8.830142974853516, "step": 2518 }, { "epoch": 0.3266576432733845, "grad_norm": 0.7464258670806885, "learning_rate": 7.615800299353517e-05, "loss": 11.162107467651367, "step": 2519 }, { "epoch": 0.3267873207816312, "grad_norm": 1.0533933639526367, "learning_rate": 7.614059718857517e-05, "loss": 11.482982635498047, "step": 2520 }, { "epoch": 0.32691699828987786, "grad_norm": 0.6367322206497192, "learning_rate": 7.612318702311113e-05, "loss": 8.574080467224121, "step": 2521 }, { "epoch": 0.3270466757981245, "grad_norm": 0.7852044105529785, "learning_rate": 7.610577250004724e-05, "loss": 9.718038558959961, "step": 2522 }, { "epoch": 0.32717635330637124, "grad_norm": 0.8744147419929504, "learning_rate": 7.608835362228839e-05, "loss": 10.485644340515137, "step": 2523 }, { "epoch": 0.3273060308146179, "grad_norm": 0.6353866457939148, "learning_rate": 7.607093039274024e-05, "loss": 7.4098687171936035, "step": 2524 }, { "epoch": 0.32743570832286456, "grad_norm": 0.9429799318313599, "learning_rate": 7.605350281430914e-05, "loss": 12.481285095214844, "step": 2525 }, { "epoch": 0.3275653858311113, "grad_norm": 0.7752252817153931, "learning_rate": 7.603607088990219e-05, "loss": 11.080114364624023, "step": 2526 }, { "epoch": 0.32769506333935794, "grad_norm": 0.6696032285690308, "learning_rate": 7.60186346224272e-05, "loss": 8.2035551071167, "step": 2527 }, { "epoch": 0.3278247408476046, "grad_norm": 0.861785888671875, "learning_rate": 7.600119401479268e-05, "loss": 13.950725555419922, "step": 2528 }, { "epoch": 0.3279544183558513, "grad_norm": 0.869144082069397, "learning_rate": 7.598374906990796e-05, "loss": 10.837080955505371, "step": 2529 }, { "epoch": 0.328084095864098, "grad_norm": 0.7865067720413208, "learning_rate": 7.596629979068294e-05, "loss": 7.650613307952881, "step": 2530 }, { "epoch": 0.32821377337234464, "grad_norm": 1.13351571559906, "learning_rate": 7.594884618002838e-05, "loss": 10.178976058959961, "step": 2531 }, { "epoch": 0.3283434508805913, "grad_norm": 0.8661725521087646, "learning_rate": 7.59313882408557e-05, "loss": 13.993091583251953, "step": 2532 }, { "epoch": 0.328473128388838, "grad_norm": 0.7022091746330261, "learning_rate": 7.591392597607707e-05, "loss": 11.138649940490723, "step": 2533 }, { "epoch": 0.3286028058970847, "grad_norm": 0.6611803770065308, "learning_rate": 7.589645938860533e-05, "loss": 14.585701942443848, "step": 2534 }, { "epoch": 0.32873248340533134, "grad_norm": 0.9053117036819458, "learning_rate": 7.587898848135411e-05, "loss": 11.901663780212402, "step": 2535 }, { "epoch": 0.32886216091357806, "grad_norm": 0.8045965433120728, "learning_rate": 7.586151325723769e-05, "loss": 11.763940811157227, "step": 2536 }, { "epoch": 0.3289918384218247, "grad_norm": 0.7266232371330261, "learning_rate": 7.584403371917113e-05, "loss": 8.566043853759766, "step": 2537 }, { "epoch": 0.3291215159300714, "grad_norm": 0.6702025532722473, "learning_rate": 7.582654987007019e-05, "loss": 9.835824966430664, "step": 2538 }, { "epoch": 0.3292511934383181, "grad_norm": 0.6764283180236816, "learning_rate": 7.580906171285133e-05, "loss": 9.630273818969727, "step": 2539 }, { "epoch": 0.32938087094656476, "grad_norm": 0.843699038028717, "learning_rate": 7.579156925043177e-05, "loss": 11.195584297180176, "step": 2540 }, { "epoch": 0.3295105484548114, "grad_norm": 0.7763442993164062, "learning_rate": 7.577407248572937e-05, "loss": 11.011824607849121, "step": 2541 }, { "epoch": 0.32964022596305814, "grad_norm": 0.9245157837867737, "learning_rate": 7.575657142166279e-05, "loss": 14.073286056518555, "step": 2542 }, { "epoch": 0.3297699034713048, "grad_norm": 0.8861099481582642, "learning_rate": 7.57390660611514e-05, "loss": 14.022058486938477, "step": 2543 }, { "epoch": 0.32989958097955147, "grad_norm": 0.6344727277755737, "learning_rate": 7.572155640711522e-05, "loss": 7.948250770568848, "step": 2544 }, { "epoch": 0.3300292584877982, "grad_norm": 0.7135047912597656, "learning_rate": 7.570404246247507e-05, "loss": 9.666872024536133, "step": 2545 }, { "epoch": 0.33015893599604484, "grad_norm": 0.7898240685462952, "learning_rate": 7.568652423015242e-05, "loss": 9.09388256072998, "step": 2546 }, { "epoch": 0.3302886135042915, "grad_norm": 0.7003114819526672, "learning_rate": 7.566900171306948e-05, "loss": 9.583375930786133, "step": 2547 }, { "epoch": 0.33041829101253817, "grad_norm": 0.4931774437427521, "learning_rate": 7.565147491414917e-05, "loss": 5.748340606689453, "step": 2548 }, { "epoch": 0.3305479685207849, "grad_norm": 0.6477763652801514, "learning_rate": 7.563394383631515e-05, "loss": 11.136693954467773, "step": 2549 }, { "epoch": 0.33067764602903155, "grad_norm": 0.6244880557060242, "learning_rate": 7.561640848249174e-05, "loss": 11.880775451660156, "step": 2550 }, { "epoch": 0.3308073235372782, "grad_norm": 0.7008768916130066, "learning_rate": 7.559886885560406e-05, "loss": 11.466910362243652, "step": 2551 }, { "epoch": 0.3309370010455249, "grad_norm": 0.6726774573326111, "learning_rate": 7.558132495857784e-05, "loss": 8.046242713928223, "step": 2552 }, { "epoch": 0.3310666785537716, "grad_norm": 0.6513617038726807, "learning_rate": 7.55637767943396e-05, "loss": 11.13694953918457, "step": 2553 }, { "epoch": 0.33119635606201825, "grad_norm": 1.0696431398391724, "learning_rate": 7.554622436581652e-05, "loss": 10.701986312866211, "step": 2554 }, { "epoch": 0.33132603357026497, "grad_norm": 0.7020120024681091, "learning_rate": 7.552866767593654e-05, "loss": 10.828343391418457, "step": 2555 }, { "epoch": 0.33145571107851163, "grad_norm": 0.750329852104187, "learning_rate": 7.551110672762827e-05, "loss": 13.079314231872559, "step": 2556 }, { "epoch": 0.3315853885867583, "grad_norm": 0.8527426719665527, "learning_rate": 7.549354152382104e-05, "loss": 11.564291954040527, "step": 2557 }, { "epoch": 0.331715066095005, "grad_norm": 0.8962405323982239, "learning_rate": 7.54759720674449e-05, "loss": 11.803115844726562, "step": 2558 }, { "epoch": 0.33184474360325167, "grad_norm": 1.3274990320205688, "learning_rate": 7.545839836143063e-05, "loss": 9.9227876663208, "step": 2559 }, { "epoch": 0.33197442111149833, "grad_norm": 0.7540264129638672, "learning_rate": 7.544082040870966e-05, "loss": 9.109070777893066, "step": 2560 }, { "epoch": 0.33210409861974505, "grad_norm": 0.836241602897644, "learning_rate": 7.542323821221418e-05, "loss": 11.356927871704102, "step": 2561 }, { "epoch": 0.3322337761279917, "grad_norm": 0.8091151118278503, "learning_rate": 7.540565177487704e-05, "loss": 11.085162162780762, "step": 2562 }, { "epoch": 0.33236345363623837, "grad_norm": 0.6518629193305969, "learning_rate": 7.538806109963188e-05, "loss": 11.474190711975098, "step": 2563 }, { "epoch": 0.33249313114448503, "grad_norm": 0.9016504883766174, "learning_rate": 7.537046618941295e-05, "loss": 13.593611717224121, "step": 2564 }, { "epoch": 0.33262280865273175, "grad_norm": 0.7179001569747925, "learning_rate": 7.535286704715528e-05, "loss": 11.094253540039062, "step": 2565 }, { "epoch": 0.3327524861609784, "grad_norm": 0.9471331238746643, "learning_rate": 7.533526367579457e-05, "loss": 15.866927146911621, "step": 2566 }, { "epoch": 0.3328821636692251, "grad_norm": 0.6655042171478271, "learning_rate": 7.531765607826722e-05, "loss": 11.815473556518555, "step": 2567 }, { "epoch": 0.3330118411774718, "grad_norm": 0.7790369391441345, "learning_rate": 7.530004425751036e-05, "loss": 12.850154876708984, "step": 2568 }, { "epoch": 0.33314151868571845, "grad_norm": 0.6428310871124268, "learning_rate": 7.528242821646179e-05, "loss": 8.817361831665039, "step": 2569 }, { "epoch": 0.3332711961939651, "grad_norm": 0.6961901783943176, "learning_rate": 7.526480795806005e-05, "loss": 14.791611671447754, "step": 2570 }, { "epoch": 0.33340087370221183, "grad_norm": 0.9177014231681824, "learning_rate": 7.524718348524439e-05, "loss": 13.375146865844727, "step": 2571 }, { "epoch": 0.3335305512104585, "grad_norm": 0.5637293457984924, "learning_rate": 7.522955480095472e-05, "loss": 6.777241230010986, "step": 2572 }, { "epoch": 0.33366022871870515, "grad_norm": 0.5886304974555969, "learning_rate": 7.521192190813168e-05, "loss": 10.425134658813477, "step": 2573 }, { "epoch": 0.33378990622695187, "grad_norm": 0.6885266304016113, "learning_rate": 7.51942848097166e-05, "loss": 11.688619613647461, "step": 2574 }, { "epoch": 0.33391958373519853, "grad_norm": 0.7771115303039551, "learning_rate": 7.517664350865152e-05, "loss": 11.317683219909668, "step": 2575 }, { "epoch": 0.3340492612434452, "grad_norm": 0.7814350724220276, "learning_rate": 7.51589980078792e-05, "loss": 9.75332260131836, "step": 2576 }, { "epoch": 0.3341789387516919, "grad_norm": 0.8822901248931885, "learning_rate": 7.514134831034306e-05, "loss": 11.598734855651855, "step": 2577 }, { "epoch": 0.3343086162599386, "grad_norm": 0.8052982091903687, "learning_rate": 7.512369441898722e-05, "loss": 12.174822807312012, "step": 2578 }, { "epoch": 0.33443829376818524, "grad_norm": 0.7123537063598633, "learning_rate": 7.510603633675654e-05, "loss": 13.04548454284668, "step": 2579 }, { "epoch": 0.3345679712764319, "grad_norm": 0.8393014073371887, "learning_rate": 7.508837406659658e-05, "loss": 14.214309692382812, "step": 2580 }, { "epoch": 0.3346976487846786, "grad_norm": 0.7539219260215759, "learning_rate": 7.507070761145356e-05, "loss": 11.008098602294922, "step": 2581 }, { "epoch": 0.3348273262929253, "grad_norm": 0.62894606590271, "learning_rate": 7.505303697427442e-05, "loss": 10.98798942565918, "step": 2582 }, { "epoch": 0.33495700380117194, "grad_norm": 0.6762627363204956, "learning_rate": 7.503536215800678e-05, "loss": 8.095002174377441, "step": 2583 }, { "epoch": 0.33508668130941865, "grad_norm": 1.0038352012634277, "learning_rate": 7.501768316559896e-05, "loss": 12.301258087158203, "step": 2584 }, { "epoch": 0.3352163588176653, "grad_norm": 0.6334065794944763, "learning_rate": 7.500000000000001e-05, "loss": 7.498077869415283, "step": 2585 }, { "epoch": 0.335346036325912, "grad_norm": 1.0800472497940063, "learning_rate": 7.498231266415965e-05, "loss": 12.028641700744629, "step": 2586 }, { "epoch": 0.3354757138341587, "grad_norm": 1.0077104568481445, "learning_rate": 7.496462116102829e-05, "loss": 11.611696243286133, "step": 2587 }, { "epoch": 0.33560539134240536, "grad_norm": 0.7406539916992188, "learning_rate": 7.494692549355705e-05, "loss": 12.474344253540039, "step": 2588 }, { "epoch": 0.335735068850652, "grad_norm": 0.7977684736251831, "learning_rate": 7.492922566469773e-05, "loss": 14.567309379577637, "step": 2589 }, { "epoch": 0.33586474635889874, "grad_norm": 0.7595228552818298, "learning_rate": 7.491152167740286e-05, "loss": 10.128293991088867, "step": 2590 }, { "epoch": 0.3359944238671454, "grad_norm": 0.5965685248374939, "learning_rate": 7.489381353462559e-05, "loss": 6.255321025848389, "step": 2591 }, { "epoch": 0.33612410137539206, "grad_norm": 0.8919810652732849, "learning_rate": 7.487610123931984e-05, "loss": 10.857194900512695, "step": 2592 }, { "epoch": 0.3362537788836388, "grad_norm": 0.7775075435638428, "learning_rate": 7.48583847944402e-05, "loss": 9.720535278320312, "step": 2593 }, { "epoch": 0.33638345639188544, "grad_norm": 0.6063661575317383, "learning_rate": 7.484066420294191e-05, "loss": 7.589887619018555, "step": 2594 }, { "epoch": 0.3365131339001321, "grad_norm": 0.9701308012008667, "learning_rate": 7.482293946778096e-05, "loss": 13.767412185668945, "step": 2595 }, { "epoch": 0.33664281140837876, "grad_norm": 0.7621440887451172, "learning_rate": 7.480521059191401e-05, "loss": 12.463765144348145, "step": 2596 }, { "epoch": 0.3367724889166255, "grad_norm": 0.6648926138877869, "learning_rate": 7.47874775782984e-05, "loss": 12.338261604309082, "step": 2597 }, { "epoch": 0.33690216642487214, "grad_norm": 0.9004234075546265, "learning_rate": 7.476974042989216e-05, "loss": 10.845465660095215, "step": 2598 }, { "epoch": 0.3370318439331188, "grad_norm": 0.7615066170692444, "learning_rate": 7.475199914965403e-05, "loss": 12.919178009033203, "step": 2599 }, { "epoch": 0.3371615214413655, "grad_norm": 0.6981407403945923, "learning_rate": 7.473425374054344e-05, "loss": 10.032975196838379, "step": 2600 }, { "epoch": 0.3372911989496122, "grad_norm": 0.8330899477005005, "learning_rate": 7.471650420552047e-05, "loss": 13.186771392822266, "step": 2601 }, { "epoch": 0.33742087645785884, "grad_norm": 0.5437802076339722, "learning_rate": 7.46987505475459e-05, "loss": 8.978972434997559, "step": 2602 }, { "epoch": 0.33755055396610556, "grad_norm": 0.7386367321014404, "learning_rate": 7.468099276958125e-05, "loss": 10.684298515319824, "step": 2603 }, { "epoch": 0.3376802314743522, "grad_norm": 0.6254516243934631, "learning_rate": 7.466323087458868e-05, "loss": 10.56350040435791, "step": 2604 }, { "epoch": 0.3378099089825989, "grad_norm": 0.6733736991882324, "learning_rate": 7.464546486553103e-05, "loss": 12.62476634979248, "step": 2605 }, { "epoch": 0.3379395864908456, "grad_norm": 0.8910006284713745, "learning_rate": 7.462769474537183e-05, "loss": 17.321529388427734, "step": 2606 }, { "epoch": 0.33806926399909226, "grad_norm": 0.6400598883628845, "learning_rate": 7.460992051707536e-05, "loss": 8.230490684509277, "step": 2607 }, { "epoch": 0.3381989415073389, "grad_norm": 0.6484809517860413, "learning_rate": 7.459214218360648e-05, "loss": 8.373310089111328, "step": 2608 }, { "epoch": 0.33832861901558564, "grad_norm": 0.6892209649085999, "learning_rate": 7.457435974793081e-05, "loss": 10.677600860595703, "step": 2609 }, { "epoch": 0.3384582965238323, "grad_norm": 0.7941356301307678, "learning_rate": 7.455657321301463e-05, "loss": 12.647974967956543, "step": 2610 }, { "epoch": 0.33858797403207896, "grad_norm": 0.6206890940666199, "learning_rate": 7.45387825818249e-05, "loss": 12.197265625, "step": 2611 }, { "epoch": 0.3387176515403256, "grad_norm": 0.589826226234436, "learning_rate": 7.452098785732929e-05, "loss": 9.130228042602539, "step": 2612 }, { "epoch": 0.33884732904857234, "grad_norm": 0.9186633229255676, "learning_rate": 7.450318904249611e-05, "loss": 13.933650970458984, "step": 2613 }, { "epoch": 0.338977006556819, "grad_norm": 0.7773815989494324, "learning_rate": 7.448538614029438e-05, "loss": 11.998332977294922, "step": 2614 }, { "epoch": 0.33910668406506567, "grad_norm": 0.663353681564331, "learning_rate": 7.446757915369381e-05, "loss": 10.657685279846191, "step": 2615 }, { "epoch": 0.3392363615733124, "grad_norm": 0.757463812828064, "learning_rate": 7.444976808566475e-05, "loss": 10.89553165435791, "step": 2616 }, { "epoch": 0.33936603908155905, "grad_norm": 0.9438915252685547, "learning_rate": 7.443195293917828e-05, "loss": 16.038022994995117, "step": 2617 }, { "epoch": 0.3394957165898057, "grad_norm": 0.6364865303039551, "learning_rate": 7.441413371720613e-05, "loss": 10.098727226257324, "step": 2618 }, { "epoch": 0.3396253940980524, "grad_norm": 0.8467127084732056, "learning_rate": 7.439631042272072e-05, "loss": 9.262540817260742, "step": 2619 }, { "epoch": 0.3397550716062991, "grad_norm": 0.650062620639801, "learning_rate": 7.437848305869513e-05, "loss": 9.739880561828613, "step": 2620 }, { "epoch": 0.33988474911454575, "grad_norm": 0.6622211337089539, "learning_rate": 7.436065162810317e-05, "loss": 8.715133666992188, "step": 2621 }, { "epoch": 0.34001442662279246, "grad_norm": 1.0230462551116943, "learning_rate": 7.434281613391928e-05, "loss": 13.183433532714844, "step": 2622 }, { "epoch": 0.3401441041310391, "grad_norm": 0.7448149919509888, "learning_rate": 7.432497657911859e-05, "loss": 9.148551940917969, "step": 2623 }, { "epoch": 0.3402737816392858, "grad_norm": 0.8325015902519226, "learning_rate": 7.430713296667691e-05, "loss": 8.025534629821777, "step": 2624 }, { "epoch": 0.3404034591475325, "grad_norm": 0.8290857672691345, "learning_rate": 7.428928529957072e-05, "loss": 13.817534446716309, "step": 2625 }, { "epoch": 0.34053313665577917, "grad_norm": 0.7175450325012207, "learning_rate": 7.427143358077719e-05, "loss": 12.388943672180176, "step": 2626 }, { "epoch": 0.34066281416402583, "grad_norm": 0.9599714875221252, "learning_rate": 7.425357781327418e-05, "loss": 14.432882308959961, "step": 2627 }, { "epoch": 0.3407924916722725, "grad_norm": 0.7004045248031616, "learning_rate": 7.423571800004017e-05, "loss": 11.208866119384766, "step": 2628 }, { "epoch": 0.3409221691805192, "grad_norm": 1.0554640293121338, "learning_rate": 7.421785414405435e-05, "loss": 11.290971755981445, "step": 2629 }, { "epoch": 0.34105184668876587, "grad_norm": 0.6443371772766113, "learning_rate": 7.419998624829661e-05, "loss": 9.016507148742676, "step": 2630 }, { "epoch": 0.34118152419701253, "grad_norm": 0.5423691868782043, "learning_rate": 7.418211431574749e-05, "loss": 6.82003927230835, "step": 2631 }, { "epoch": 0.34131120170525925, "grad_norm": 0.8537275195121765, "learning_rate": 7.416423834938818e-05, "loss": 11.92814826965332, "step": 2632 }, { "epoch": 0.3414408792135059, "grad_norm": 0.9020258188247681, "learning_rate": 7.414635835220055e-05, "loss": 11.661657333374023, "step": 2633 }, { "epoch": 0.34157055672175257, "grad_norm": 0.6492094993591309, "learning_rate": 7.412847432716719e-05, "loss": 8.495733261108398, "step": 2634 }, { "epoch": 0.3417002342299993, "grad_norm": 0.6793560981750488, "learning_rate": 7.411058627727131e-05, "loss": 9.41590404510498, "step": 2635 }, { "epoch": 0.34182991173824595, "grad_norm": 0.7677463889122009, "learning_rate": 7.409269420549682e-05, "loss": 12.662871360778809, "step": 2636 }, { "epoch": 0.3419595892464926, "grad_norm": 0.7095454335212708, "learning_rate": 7.407479811482828e-05, "loss": 10.634872436523438, "step": 2637 }, { "epoch": 0.34208926675473933, "grad_norm": 0.6447287201881409, "learning_rate": 7.405689800825093e-05, "loss": 9.473072052001953, "step": 2638 }, { "epoch": 0.342218944262986, "grad_norm": 0.9886207580566406, "learning_rate": 7.403899388875069e-05, "loss": 14.46649169921875, "step": 2639 }, { "epoch": 0.34234862177123265, "grad_norm": 0.6070206761360168, "learning_rate": 7.402108575931413e-05, "loss": 8.41575813293457, "step": 2640 }, { "epoch": 0.34247829927947937, "grad_norm": 0.6605907678604126, "learning_rate": 7.400317362292852e-05, "loss": 7.182644367218018, "step": 2641 }, { "epoch": 0.34260797678772603, "grad_norm": 0.8148466348648071, "learning_rate": 7.398525748258176e-05, "loss": 15.399951934814453, "step": 2642 }, { "epoch": 0.3427376542959727, "grad_norm": 0.8399850130081177, "learning_rate": 7.396733734126242e-05, "loss": 14.276060104370117, "step": 2643 }, { "epoch": 0.34286733180421936, "grad_norm": 0.8982509970664978, "learning_rate": 7.394941320195977e-05, "loss": 14.066596031188965, "step": 2644 }, { "epoch": 0.3429970093124661, "grad_norm": 0.7375142574310303, "learning_rate": 7.393148506766374e-05, "loss": 10.684171676635742, "step": 2645 }, { "epoch": 0.34312668682071273, "grad_norm": 0.63929682970047, "learning_rate": 7.391355294136488e-05, "loss": 11.203692436218262, "step": 2646 }, { "epoch": 0.3432563643289594, "grad_norm": 1.0488204956054688, "learning_rate": 7.389561682605449e-05, "loss": 13.797103881835938, "step": 2647 }, { "epoch": 0.3433860418372061, "grad_norm": 0.9900559186935425, "learning_rate": 7.387767672472446e-05, "loss": 11.884657859802246, "step": 2648 }, { "epoch": 0.3435157193454528, "grad_norm": 0.5755271315574646, "learning_rate": 7.385973264036739e-05, "loss": 7.826188087463379, "step": 2649 }, { "epoch": 0.34364539685369944, "grad_norm": 0.8513473868370056, "learning_rate": 7.384178457597649e-05, "loss": 12.109161376953125, "step": 2650 }, { "epoch": 0.34377507436194615, "grad_norm": 0.7756170034408569, "learning_rate": 7.38238325345457e-05, "loss": 13.189712524414062, "step": 2651 }, { "epoch": 0.3439047518701928, "grad_norm": 0.6360487937927246, "learning_rate": 7.38058765190696e-05, "loss": 11.027892112731934, "step": 2652 }, { "epoch": 0.3440344293784395, "grad_norm": 0.7500995993614197, "learning_rate": 7.37879165325434e-05, "loss": 10.386556625366211, "step": 2653 }, { "epoch": 0.3441641068866862, "grad_norm": 0.938990592956543, "learning_rate": 7.3769952577963e-05, "loss": 16.018787384033203, "step": 2654 }, { "epoch": 0.34429378439493286, "grad_norm": 0.7756077647209167, "learning_rate": 7.375198465832502e-05, "loss": 11.238126754760742, "step": 2655 }, { "epoch": 0.3444234619031795, "grad_norm": 0.8626571297645569, "learning_rate": 7.37340127766266e-05, "loss": 14.939850807189941, "step": 2656 }, { "epoch": 0.34455313941142623, "grad_norm": 0.6055500507354736, "learning_rate": 7.371603693586566e-05, "loss": 7.921290397644043, "step": 2657 }, { "epoch": 0.3446828169196729, "grad_norm": 0.9543087482452393, "learning_rate": 7.369805713904074e-05, "loss": 13.545869827270508, "step": 2658 }, { "epoch": 0.34481249442791956, "grad_norm": 1.036997675895691, "learning_rate": 7.368007338915107e-05, "loss": 10.968767166137695, "step": 2659 }, { "epoch": 0.3449421719361662, "grad_norm": 0.616897702217102, "learning_rate": 7.366208568919648e-05, "loss": 9.621590614318848, "step": 2660 }, { "epoch": 0.34507184944441294, "grad_norm": 0.7083175778388977, "learning_rate": 7.364409404217749e-05, "loss": 9.869694709777832, "step": 2661 }, { "epoch": 0.3452015269526596, "grad_norm": 0.7335613965988159, "learning_rate": 7.36260984510953e-05, "loss": 11.886717796325684, "step": 2662 }, { "epoch": 0.34533120446090626, "grad_norm": 0.8035231232643127, "learning_rate": 7.360809891895176e-05, "loss": 10.461797714233398, "step": 2663 }, { "epoch": 0.345460881969153, "grad_norm": 1.1368257999420166, "learning_rate": 7.359009544874932e-05, "loss": 15.517492294311523, "step": 2664 }, { "epoch": 0.34559055947739964, "grad_norm": 0.9420771598815918, "learning_rate": 7.357208804349118e-05, "loss": 14.039385795593262, "step": 2665 }, { "epoch": 0.3457202369856463, "grad_norm": 0.7876234650611877, "learning_rate": 7.35540767061811e-05, "loss": 11.880126953125, "step": 2666 }, { "epoch": 0.345849914493893, "grad_norm": 0.8360580801963806, "learning_rate": 7.353606143982359e-05, "loss": 14.157498359680176, "step": 2667 }, { "epoch": 0.3459795920021397, "grad_norm": 0.830109715461731, "learning_rate": 7.351804224742376e-05, "loss": 12.689550399780273, "step": 2668 }, { "epoch": 0.34610926951038634, "grad_norm": 0.7177560925483704, "learning_rate": 7.350001913198739e-05, "loss": 9.076005935668945, "step": 2669 }, { "epoch": 0.34623894701863306, "grad_norm": 0.6554062962532043, "learning_rate": 7.348199209652089e-05, "loss": 7.969069004058838, "step": 2670 }, { "epoch": 0.3463686245268797, "grad_norm": 0.5352786779403687, "learning_rate": 7.346396114403133e-05, "loss": 7.8482818603515625, "step": 2671 }, { "epoch": 0.3464983020351264, "grad_norm": 0.6821584105491638, "learning_rate": 7.34459262775265e-05, "loss": 12.808656692504883, "step": 2672 }, { "epoch": 0.3466279795433731, "grad_norm": 1.099166750907898, "learning_rate": 7.342788750001475e-05, "loss": 12.020392417907715, "step": 2673 }, { "epoch": 0.34675765705161976, "grad_norm": 0.8646422028541565, "learning_rate": 7.340984481450512e-05, "loss": 12.643850326538086, "step": 2674 }, { "epoch": 0.3468873345598664, "grad_norm": 0.6231749653816223, "learning_rate": 7.339179822400733e-05, "loss": 9.76323413848877, "step": 2675 }, { "epoch": 0.3470170120681131, "grad_norm": 0.7881715297698975, "learning_rate": 7.337374773153171e-05, "loss": 14.136372566223145, "step": 2676 }, { "epoch": 0.3471466895763598, "grad_norm": 0.7820488214492798, "learning_rate": 7.335569334008927e-05, "loss": 7.339773654937744, "step": 2677 }, { "epoch": 0.34727636708460646, "grad_norm": 0.6593238711357117, "learning_rate": 7.333763505269162e-05, "loss": 9.632522583007812, "step": 2678 }, { "epoch": 0.3474060445928531, "grad_norm": 0.6780252456665039, "learning_rate": 7.33195728723511e-05, "loss": 9.811905860900879, "step": 2679 }, { "epoch": 0.34753572210109984, "grad_norm": 0.7220401763916016, "learning_rate": 7.330150680208063e-05, "loss": 10.184457778930664, "step": 2680 }, { "epoch": 0.3476653996093465, "grad_norm": 0.6476408243179321, "learning_rate": 7.328343684489383e-05, "loss": 11.873295783996582, "step": 2681 }, { "epoch": 0.34779507711759317, "grad_norm": 0.6068124175071716, "learning_rate": 7.326536300380491e-05, "loss": 10.442267417907715, "step": 2682 }, { "epoch": 0.3479247546258399, "grad_norm": 0.9841580986976624, "learning_rate": 7.324728528182877e-05, "loss": 13.633161544799805, "step": 2683 }, { "epoch": 0.34805443213408654, "grad_norm": 0.856307327747345, "learning_rate": 7.322920368198096e-05, "loss": 11.589902877807617, "step": 2684 }, { "epoch": 0.3481841096423332, "grad_norm": 0.8960317373275757, "learning_rate": 7.321111820727768e-05, "loss": 12.46220874786377, "step": 2685 }, { "epoch": 0.3483137871505799, "grad_norm": 0.6826012134552002, "learning_rate": 7.319302886073573e-05, "loss": 10.889570236206055, "step": 2686 }, { "epoch": 0.3484434646588266, "grad_norm": 0.7251795530319214, "learning_rate": 7.31749356453726e-05, "loss": 15.125164985656738, "step": 2687 }, { "epoch": 0.34857314216707325, "grad_norm": 0.954037070274353, "learning_rate": 7.315683856420641e-05, "loss": 7.616235256195068, "step": 2688 }, { "epoch": 0.34870281967531996, "grad_norm": 0.6764266490936279, "learning_rate": 7.313873762025593e-05, "loss": 10.427736282348633, "step": 2689 }, { "epoch": 0.3488324971835666, "grad_norm": 0.743587076663971, "learning_rate": 7.312063281654059e-05, "loss": 12.191956520080566, "step": 2690 }, { "epoch": 0.3489621746918133, "grad_norm": 0.6383064985275269, "learning_rate": 7.310252415608042e-05, "loss": 10.871944427490234, "step": 2691 }, { "epoch": 0.34909185220005995, "grad_norm": 0.6415284276008606, "learning_rate": 7.308441164189613e-05, "loss": 8.822850227355957, "step": 2692 }, { "epoch": 0.34922152970830667, "grad_norm": 0.5392876863479614, "learning_rate": 7.306629527700907e-05, "loss": 8.906573295593262, "step": 2693 }, { "epoch": 0.34935120721655333, "grad_norm": 0.5678917169570923, "learning_rate": 7.304817506444121e-05, "loss": 7.535055637359619, "step": 2694 }, { "epoch": 0.3494808847248, "grad_norm": 1.0388572216033936, "learning_rate": 7.303005100721519e-05, "loss": 10.32780933380127, "step": 2695 }, { "epoch": 0.3496105622330467, "grad_norm": 0.7856326103210449, "learning_rate": 7.301192310835425e-05, "loss": 9.49203109741211, "step": 2696 }, { "epoch": 0.34974023974129337, "grad_norm": 0.6788496971130371, "learning_rate": 7.299379137088234e-05, "loss": 11.471840858459473, "step": 2697 }, { "epoch": 0.34986991724954003, "grad_norm": 0.8147908449172974, "learning_rate": 7.297565579782399e-05, "loss": 8.11412525177002, "step": 2698 }, { "epoch": 0.34999959475778675, "grad_norm": 0.9594548940658569, "learning_rate": 7.295751639220437e-05, "loss": 13.766480445861816, "step": 2699 }, { "epoch": 0.3501292722660334, "grad_norm": 0.7222285270690918, "learning_rate": 7.293937315704934e-05, "loss": 13.47679328918457, "step": 2700 }, { "epoch": 0.35025894977428007, "grad_norm": 1.0643240213394165, "learning_rate": 7.292122609538534e-05, "loss": 10.322484970092773, "step": 2701 }, { "epoch": 0.3503886272825268, "grad_norm": 0.6215446591377258, "learning_rate": 7.290307521023949e-05, "loss": 10.832233428955078, "step": 2702 }, { "epoch": 0.35051830479077345, "grad_norm": 0.608325719833374, "learning_rate": 7.288492050463953e-05, "loss": 10.1461820602417, "step": 2703 }, { "epoch": 0.3506479822990201, "grad_norm": 0.7859072089195251, "learning_rate": 7.286676198161384e-05, "loss": 11.641207695007324, "step": 2704 }, { "epoch": 0.35077765980726683, "grad_norm": 0.8119010329246521, "learning_rate": 7.284859964419144e-05, "loss": 9.607178688049316, "step": 2705 }, { "epoch": 0.3509073373155135, "grad_norm": 0.8026895523071289, "learning_rate": 7.283043349540197e-05, "loss": 9.215991973876953, "step": 2706 }, { "epoch": 0.35103701482376015, "grad_norm": 0.685160756111145, "learning_rate": 7.281226353827575e-05, "loss": 10.205370903015137, "step": 2707 }, { "epoch": 0.3511666923320068, "grad_norm": 0.6959421038627625, "learning_rate": 7.279408977584368e-05, "loss": 9.693917274475098, "step": 2708 }, { "epoch": 0.35129636984025353, "grad_norm": 0.6826078295707703, "learning_rate": 7.277591221113731e-05, "loss": 10.837173461914062, "step": 2709 }, { "epoch": 0.3514260473485002, "grad_norm": 0.6419442296028137, "learning_rate": 7.275773084718886e-05, "loss": 8.646568298339844, "step": 2710 }, { "epoch": 0.35155572485674685, "grad_norm": 0.8839897513389587, "learning_rate": 7.273954568703113e-05, "loss": 11.640585899353027, "step": 2711 }, { "epoch": 0.35168540236499357, "grad_norm": 1.165151834487915, "learning_rate": 7.272135673369759e-05, "loss": 11.347970008850098, "step": 2712 }, { "epoch": 0.35181507987324023, "grad_norm": 0.7488321661949158, "learning_rate": 7.270316399022236e-05, "loss": 13.064139366149902, "step": 2713 }, { "epoch": 0.3519447573814869, "grad_norm": 0.6473931074142456, "learning_rate": 7.268496745964013e-05, "loss": 12.18433666229248, "step": 2714 }, { "epoch": 0.3520744348897336, "grad_norm": 0.5547687411308289, "learning_rate": 7.266676714498626e-05, "loss": 8.541383743286133, "step": 2715 }, { "epoch": 0.3522041123979803, "grad_norm": 0.8568704128265381, "learning_rate": 7.264856304929676e-05, "loss": 12.602248191833496, "step": 2716 }, { "epoch": 0.35233378990622694, "grad_norm": 0.59107905626297, "learning_rate": 7.263035517560822e-05, "loss": 8.295477867126465, "step": 2717 }, { "epoch": 0.35246346741447365, "grad_norm": 0.5037662982940674, "learning_rate": 7.261214352695793e-05, "loss": 8.40173053741455, "step": 2718 }, { "epoch": 0.3525931449227203, "grad_norm": 0.8091995120048523, "learning_rate": 7.259392810638371e-05, "loss": 11.342225074768066, "step": 2719 }, { "epoch": 0.352722822430967, "grad_norm": 0.6558278203010559, "learning_rate": 7.257570891692412e-05, "loss": 8.568115234375, "step": 2720 }, { "epoch": 0.3528524999392137, "grad_norm": 0.782324492931366, "learning_rate": 7.25574859616183e-05, "loss": 10.001799583435059, "step": 2721 }, { "epoch": 0.35298217744746035, "grad_norm": 0.7408369183540344, "learning_rate": 7.253925924350595e-05, "loss": 11.790135383605957, "step": 2722 }, { "epoch": 0.353111854955707, "grad_norm": 0.5898226499557495, "learning_rate": 7.252102876562753e-05, "loss": 8.364840507507324, "step": 2723 }, { "epoch": 0.3532415324639537, "grad_norm": 0.7296309471130371, "learning_rate": 7.250279453102403e-05, "loss": 10.801774978637695, "step": 2724 }, { "epoch": 0.3533712099722004, "grad_norm": 0.9528898596763611, "learning_rate": 7.248455654273711e-05, "loss": 15.515445709228516, "step": 2725 }, { "epoch": 0.35350088748044706, "grad_norm": 0.7764421701431274, "learning_rate": 7.246631480380903e-05, "loss": 13.063753128051758, "step": 2726 }, { "epoch": 0.3536305649886937, "grad_norm": 0.6956939697265625, "learning_rate": 7.24480693172827e-05, "loss": 10.624245643615723, "step": 2727 }, { "epoch": 0.35376024249694044, "grad_norm": 0.9003658294677734, "learning_rate": 7.242982008620163e-05, "loss": 13.716265678405762, "step": 2728 }, { "epoch": 0.3538899200051871, "grad_norm": 0.7941508889198303, "learning_rate": 7.241156711360997e-05, "loss": 11.1708984375, "step": 2729 }, { "epoch": 0.35401959751343376, "grad_norm": 0.551146924495697, "learning_rate": 7.239331040255252e-05, "loss": 8.371156692504883, "step": 2730 }, { "epoch": 0.3541492750216805, "grad_norm": 0.7422481179237366, "learning_rate": 7.237504995607463e-05, "loss": 12.420886993408203, "step": 2731 }, { "epoch": 0.35427895252992714, "grad_norm": 0.601823627948761, "learning_rate": 7.235678577722235e-05, "loss": 6.767277717590332, "step": 2732 }, { "epoch": 0.3544086300381738, "grad_norm": 0.9725791811943054, "learning_rate": 7.233851786904231e-05, "loss": 11.523242950439453, "step": 2733 }, { "epoch": 0.3545383075464205, "grad_norm": 0.76395583152771, "learning_rate": 7.232024623458178e-05, "loss": 8.945981979370117, "step": 2734 }, { "epoch": 0.3546679850546672, "grad_norm": 0.6649267673492432, "learning_rate": 7.230197087688863e-05, "loss": 10.901212692260742, "step": 2735 }, { "epoch": 0.35479766256291384, "grad_norm": 0.8120803833007812, "learning_rate": 7.228369179901139e-05, "loss": 11.478930473327637, "step": 2736 }, { "epoch": 0.35492734007116056, "grad_norm": 0.6571901440620422, "learning_rate": 7.226540900399918e-05, "loss": 9.147964477539062, "step": 2737 }, { "epoch": 0.3550570175794072, "grad_norm": 0.7933105230331421, "learning_rate": 7.224712249490172e-05, "loss": 9.739238739013672, "step": 2738 }, { "epoch": 0.3551866950876539, "grad_norm": 0.5791335701942444, "learning_rate": 7.222883227476941e-05, "loss": 8.660918235778809, "step": 2739 }, { "epoch": 0.35531637259590054, "grad_norm": 0.6067638993263245, "learning_rate": 7.221053834665321e-05, "loss": 7.465194225311279, "step": 2740 }, { "epoch": 0.35544605010414726, "grad_norm": 0.6406699419021606, "learning_rate": 7.219224071360476e-05, "loss": 6.864714622497559, "step": 2741 }, { "epoch": 0.3555757276123939, "grad_norm": 0.9337261319160461, "learning_rate": 7.217393937867625e-05, "loss": 11.995185852050781, "step": 2742 }, { "epoch": 0.3557054051206406, "grad_norm": 0.940757691860199, "learning_rate": 7.215563434492051e-05, "loss": 15.91276741027832, "step": 2743 }, { "epoch": 0.3558350826288873, "grad_norm": 0.7237588763237, "learning_rate": 7.213732561539102e-05, "loss": 9.012596130371094, "step": 2744 }, { "epoch": 0.35596476013713396, "grad_norm": 0.6259617209434509, "learning_rate": 7.211901319314184e-05, "loss": 8.061433792114258, "step": 2745 }, { "epoch": 0.3560944376453806, "grad_norm": 0.8356926441192627, "learning_rate": 7.210069708122769e-05, "loss": 9.039200782775879, "step": 2746 }, { "epoch": 0.35622411515362734, "grad_norm": 0.8286046981811523, "learning_rate": 7.208237728270384e-05, "loss": 9.854391098022461, "step": 2747 }, { "epoch": 0.356353792661874, "grad_norm": 0.9346978068351746, "learning_rate": 7.206405380062622e-05, "loss": 12.42306900024414, "step": 2748 }, { "epoch": 0.35648347017012066, "grad_norm": 0.9848844408988953, "learning_rate": 7.204572663805136e-05, "loss": 14.662538528442383, "step": 2749 }, { "epoch": 0.3566131476783674, "grad_norm": 0.5152261257171631, "learning_rate": 7.202739579803642e-05, "loss": 9.761371612548828, "step": 2750 }, { "epoch": 0.35674282518661404, "grad_norm": 1.0455721616744995, "learning_rate": 7.200906128363913e-05, "loss": 16.453277587890625, "step": 2751 }, { "epoch": 0.3568725026948607, "grad_norm": 0.6232399940490723, "learning_rate": 7.19907230979179e-05, "loss": 9.299333572387695, "step": 2752 }, { "epoch": 0.3570021802031074, "grad_norm": 0.7533464431762695, "learning_rate": 7.19723812439317e-05, "loss": 8.424259185791016, "step": 2753 }, { "epoch": 0.3571318577113541, "grad_norm": 0.564342737197876, "learning_rate": 7.195403572474014e-05, "loss": 6.736146926879883, "step": 2754 }, { "epoch": 0.35726153521960075, "grad_norm": 0.7933052182197571, "learning_rate": 7.193568654340341e-05, "loss": 11.502392768859863, "step": 2755 }, { "epoch": 0.3573912127278474, "grad_norm": 0.6118322014808655, "learning_rate": 7.191733370298234e-05, "loss": 8.04394245147705, "step": 2756 }, { "epoch": 0.3575208902360941, "grad_norm": 0.5953173637390137, "learning_rate": 7.189897720653837e-05, "loss": 11.152259826660156, "step": 2757 }, { "epoch": 0.3576505677443408, "grad_norm": 0.7254968285560608, "learning_rate": 7.188061705713351e-05, "loss": 10.581278800964355, "step": 2758 }, { "epoch": 0.35778024525258745, "grad_norm": 0.81828773021698, "learning_rate": 7.186225325783048e-05, "loss": 10.63817310333252, "step": 2759 }, { "epoch": 0.35790992276083416, "grad_norm": 0.5399907231330872, "learning_rate": 7.184388581169244e-05, "loss": 8.9617338180542, "step": 2760 }, { "epoch": 0.3580396002690808, "grad_norm": 0.9137894511222839, "learning_rate": 7.182551472178334e-05, "loss": 14.142471313476562, "step": 2761 }, { "epoch": 0.3581692777773275, "grad_norm": 0.7438995838165283, "learning_rate": 7.180713999116761e-05, "loss": 12.188085556030273, "step": 2762 }, { "epoch": 0.3582989552855742, "grad_norm": 0.6238980293273926, "learning_rate": 7.178876162291035e-05, "loss": 10.375642776489258, "step": 2763 }, { "epoch": 0.35842863279382087, "grad_norm": 0.7615858316421509, "learning_rate": 7.177037962007726e-05, "loss": 11.853836059570312, "step": 2764 }, { "epoch": 0.35855831030206753, "grad_norm": 0.7483505606651306, "learning_rate": 7.175199398573462e-05, "loss": 11.616240501403809, "step": 2765 }, { "epoch": 0.35868798781031425, "grad_norm": 0.7572104930877686, "learning_rate": 7.173360472294933e-05, "loss": 8.900519371032715, "step": 2766 }, { "epoch": 0.3588176653185609, "grad_norm": 0.6754804849624634, "learning_rate": 7.171521183478891e-05, "loss": 11.048064231872559, "step": 2767 }, { "epoch": 0.35894734282680757, "grad_norm": 1.085053563117981, "learning_rate": 7.169681532432144e-05, "loss": 12.70222282409668, "step": 2768 }, { "epoch": 0.3590770203350543, "grad_norm": 0.5756562352180481, "learning_rate": 7.167841519461567e-05, "loss": 7.460285663604736, "step": 2769 }, { "epoch": 0.35920669784330095, "grad_norm": 0.7360395193099976, "learning_rate": 7.166001144874091e-05, "loss": 11.7794771194458, "step": 2770 }, { "epoch": 0.3593363753515476, "grad_norm": 0.7307716012001038, "learning_rate": 7.164160408976706e-05, "loss": 11.309218406677246, "step": 2771 }, { "epoch": 0.35946605285979427, "grad_norm": 0.775018036365509, "learning_rate": 7.162319312076469e-05, "loss": 8.203238487243652, "step": 2772 }, { "epoch": 0.359595730368041, "grad_norm": 0.6763973832130432, "learning_rate": 7.160477854480488e-05, "loss": 10.984196662902832, "step": 2773 }, { "epoch": 0.35972540787628765, "grad_norm": 0.5933905839920044, "learning_rate": 7.158636036495939e-05, "loss": 7.741728782653809, "step": 2774 }, { "epoch": 0.3598550853845343, "grad_norm": 0.79578697681427, "learning_rate": 7.156793858430056e-05, "loss": 8.889116287231445, "step": 2775 }, { "epoch": 0.35998476289278103, "grad_norm": 0.7705802917480469, "learning_rate": 7.154951320590128e-05, "loss": 12.269720077514648, "step": 2776 }, { "epoch": 0.3601144404010277, "grad_norm": 0.6935040354728699, "learning_rate": 7.153108423283509e-05, "loss": 7.997677326202393, "step": 2777 }, { "epoch": 0.36024411790927435, "grad_norm": 0.6913668513298035, "learning_rate": 7.151265166817616e-05, "loss": 11.020100593566895, "step": 2778 }, { "epoch": 0.36037379541752107, "grad_norm": 0.8322657346725464, "learning_rate": 7.149421551499917e-05, "loss": 9.404045104980469, "step": 2779 }, { "epoch": 0.36050347292576773, "grad_norm": 0.6939737200737, "learning_rate": 7.14757757763795e-05, "loss": 10.400835037231445, "step": 2780 }, { "epoch": 0.3606331504340144, "grad_norm": 1.1449872255325317, "learning_rate": 7.145733245539303e-05, "loss": 13.500740051269531, "step": 2781 }, { "epoch": 0.3607628279422611, "grad_norm": 0.6729664206504822, "learning_rate": 7.14388855551163e-05, "loss": 9.448118209838867, "step": 2782 }, { "epoch": 0.3608925054505078, "grad_norm": 0.4972444772720337, "learning_rate": 7.142043507862644e-05, "loss": 5.866791725158691, "step": 2783 }, { "epoch": 0.36102218295875443, "grad_norm": 0.7759475708007812, "learning_rate": 7.140198102900115e-05, "loss": 8.777994155883789, "step": 2784 }, { "epoch": 0.36115186046700115, "grad_norm": 0.44941383600234985, "learning_rate": 7.138352340931877e-05, "loss": 5.49329137802124, "step": 2785 }, { "epoch": 0.3612815379752478, "grad_norm": 0.865705668926239, "learning_rate": 7.136506222265818e-05, "loss": 12.235732078552246, "step": 2786 }, { "epoch": 0.3614112154834945, "grad_norm": 0.9896803498268127, "learning_rate": 7.134659747209891e-05, "loss": 13.671507835388184, "step": 2787 }, { "epoch": 0.36154089299174114, "grad_norm": 0.7750702500343323, "learning_rate": 7.132812916072105e-05, "loss": 9.431584358215332, "step": 2788 }, { "epoch": 0.36167057049998785, "grad_norm": 0.6821528077125549, "learning_rate": 7.130965729160528e-05, "loss": 10.379636764526367, "step": 2789 }, { "epoch": 0.3618002480082345, "grad_norm": 1.1755735874176025, "learning_rate": 7.12911818678329e-05, "loss": 15.30954360961914, "step": 2790 }, { "epoch": 0.3619299255164812, "grad_norm": 0.6720901131629944, "learning_rate": 7.127270289248577e-05, "loss": 12.515280723571777, "step": 2791 }, { "epoch": 0.3620596030247279, "grad_norm": 0.72552490234375, "learning_rate": 7.125422036864639e-05, "loss": 11.48412799835205, "step": 2792 }, { "epoch": 0.36218928053297456, "grad_norm": 1.2129337787628174, "learning_rate": 7.123573429939782e-05, "loss": 12.461658477783203, "step": 2793 }, { "epoch": 0.3623189580412212, "grad_norm": 0.5356932878494263, "learning_rate": 7.121724468782369e-05, "loss": 6.886512756347656, "step": 2794 }, { "epoch": 0.36244863554946793, "grad_norm": 0.8955503702163696, "learning_rate": 7.119875153700826e-05, "loss": 11.8673734664917, "step": 2795 }, { "epoch": 0.3625783130577146, "grad_norm": 0.7768462896347046, "learning_rate": 7.118025485003638e-05, "loss": 9.507987022399902, "step": 2796 }, { "epoch": 0.36270799056596126, "grad_norm": 0.7524216175079346, "learning_rate": 7.116175462999345e-05, "loss": 10.37950325012207, "step": 2797 }, { "epoch": 0.362837668074208, "grad_norm": 0.8217728734016418, "learning_rate": 7.114325087996552e-05, "loss": 12.67301082611084, "step": 2798 }, { "epoch": 0.36296734558245464, "grad_norm": 0.9311463832855225, "learning_rate": 7.112474360303915e-05, "loss": 10.469128608703613, "step": 2799 }, { "epoch": 0.3630970230907013, "grad_norm": 0.7041566371917725, "learning_rate": 7.110623280230156e-05, "loss": 12.137079238891602, "step": 2800 }, { "epoch": 0.363226700598948, "grad_norm": 0.8672946691513062, "learning_rate": 7.10877184808405e-05, "loss": 11.228148460388184, "step": 2801 }, { "epoch": 0.3633563781071947, "grad_norm": 0.6171591281890869, "learning_rate": 7.10692006417444e-05, "loss": 10.823537826538086, "step": 2802 }, { "epoch": 0.36348605561544134, "grad_norm": 0.7491335868835449, "learning_rate": 7.105067928810216e-05, "loss": 10.754740715026855, "step": 2803 }, { "epoch": 0.363615733123688, "grad_norm": 0.5720590949058533, "learning_rate": 7.103215442300334e-05, "loss": 8.53775691986084, "step": 2804 }, { "epoch": 0.3637454106319347, "grad_norm": 0.573567807674408, "learning_rate": 7.101362604953806e-05, "loss": 7.830066680908203, "step": 2805 }, { "epoch": 0.3638750881401814, "grad_norm": 0.8256746530532837, "learning_rate": 7.099509417079704e-05, "loss": 12.399313926696777, "step": 2806 }, { "epoch": 0.36400476564842804, "grad_norm": 0.8945457935333252, "learning_rate": 7.097655878987157e-05, "loss": 14.607718467712402, "step": 2807 }, { "epoch": 0.36413444315667476, "grad_norm": 0.6605450510978699, "learning_rate": 7.095801990985353e-05, "loss": 11.6385498046875, "step": 2808 }, { "epoch": 0.3642641206649214, "grad_norm": 0.6373308897018433, "learning_rate": 7.09394775338354e-05, "loss": 11.557093620300293, "step": 2809 }, { "epoch": 0.3643937981731681, "grad_norm": 0.8138818740844727, "learning_rate": 7.09209316649102e-05, "loss": 12.071134567260742, "step": 2810 }, { "epoch": 0.3645234756814148, "grad_norm": 0.6211045384407043, "learning_rate": 7.090238230617158e-05, "loss": 10.542415618896484, "step": 2811 }, { "epoch": 0.36465315318966146, "grad_norm": 0.7246427536010742, "learning_rate": 7.088382946071373e-05, "loss": 11.54391860961914, "step": 2812 }, { "epoch": 0.3647828306979081, "grad_norm": 0.6083588600158691, "learning_rate": 7.086527313163148e-05, "loss": 9.706727981567383, "step": 2813 }, { "epoch": 0.36491250820615484, "grad_norm": 0.7860652208328247, "learning_rate": 7.084671332202018e-05, "loss": 11.114635467529297, "step": 2814 }, { "epoch": 0.3650421857144015, "grad_norm": 0.8109635710716248, "learning_rate": 7.082815003497578e-05, "loss": 8.467988967895508, "step": 2815 }, { "epoch": 0.36517186322264816, "grad_norm": 0.6971026062965393, "learning_rate": 7.080958327359483e-05, "loss": 9.434592247009277, "step": 2816 }, { "epoch": 0.3653015407308949, "grad_norm": 0.6672459244728088, "learning_rate": 7.079101304097446e-05, "loss": 10.813413619995117, "step": 2817 }, { "epoch": 0.36543121823914154, "grad_norm": 0.6626402735710144, "learning_rate": 7.077243934021233e-05, "loss": 9.230188369750977, "step": 2818 }, { "epoch": 0.3655608957473882, "grad_norm": 0.5751288533210754, "learning_rate": 7.075386217440673e-05, "loss": 9.337651252746582, "step": 2819 }, { "epoch": 0.36569057325563487, "grad_norm": 0.7434353232383728, "learning_rate": 7.073528154665649e-05, "loss": 9.569348335266113, "step": 2820 }, { "epoch": 0.3658202507638816, "grad_norm": 0.7532798051834106, "learning_rate": 7.071669746006109e-05, "loss": 12.175684928894043, "step": 2821 }, { "epoch": 0.36594992827212824, "grad_norm": 0.798704981803894, "learning_rate": 7.069810991772048e-05, "loss": 12.9767484664917, "step": 2822 }, { "epoch": 0.3660796057803749, "grad_norm": 0.8556485772132874, "learning_rate": 7.067951892273527e-05, "loss": 12.952933311462402, "step": 2823 }, { "epoch": 0.3662092832886216, "grad_norm": 0.8087399005889893, "learning_rate": 7.06609244782066e-05, "loss": 10.852192878723145, "step": 2824 }, { "epoch": 0.3663389607968683, "grad_norm": 0.7061428427696228, "learning_rate": 7.064232658723622e-05, "loss": 11.212859153747559, "step": 2825 }, { "epoch": 0.36646863830511495, "grad_norm": 0.8347482681274414, "learning_rate": 7.062372525292644e-05, "loss": 11.971447944641113, "step": 2826 }, { "epoch": 0.36659831581336166, "grad_norm": 0.8160087466239929, "learning_rate": 7.060512047838013e-05, "loss": 14.090941429138184, "step": 2827 }, { "epoch": 0.3667279933216083, "grad_norm": 0.7371589541435242, "learning_rate": 7.058651226670074e-05, "loss": 10.626033782958984, "step": 2828 }, { "epoch": 0.366857670829855, "grad_norm": 0.7492855191230774, "learning_rate": 7.056790062099231e-05, "loss": 11.436408996582031, "step": 2829 }, { "epoch": 0.3669873483381017, "grad_norm": 0.5964459180831909, "learning_rate": 7.054928554435944e-05, "loss": 8.363863945007324, "step": 2830 }, { "epoch": 0.36711702584634837, "grad_norm": 0.9207035899162292, "learning_rate": 7.053066703990732e-05, "loss": 10.66772747039795, "step": 2831 }, { "epoch": 0.367246703354595, "grad_norm": 0.8776440024375916, "learning_rate": 7.051204511074167e-05, "loss": 10.53244686126709, "step": 2832 }, { "epoch": 0.36737638086284174, "grad_norm": 0.6990785002708435, "learning_rate": 7.049341975996883e-05, "loss": 10.412052154541016, "step": 2833 }, { "epoch": 0.3675060583710884, "grad_norm": 0.7648468613624573, "learning_rate": 7.047479099069566e-05, "loss": 14.066627502441406, "step": 2834 }, { "epoch": 0.36763573587933507, "grad_norm": 0.8492885828018188, "learning_rate": 7.045615880602965e-05, "loss": 14.290696144104004, "step": 2835 }, { "epoch": 0.36776541338758173, "grad_norm": 0.6928899884223938, "learning_rate": 7.043752320907882e-05, "loss": 8.136892318725586, "step": 2836 }, { "epoch": 0.36789509089582845, "grad_norm": 0.8575878143310547, "learning_rate": 7.041888420295176e-05, "loss": 15.218218803405762, "step": 2837 }, { "epoch": 0.3680247684040751, "grad_norm": 0.8503538966178894, "learning_rate": 7.040024179075763e-05, "loss": 10.730430603027344, "step": 2838 }, { "epoch": 0.36815444591232177, "grad_norm": 0.5964133739471436, "learning_rate": 7.038159597560616e-05, "loss": 10.525093078613281, "step": 2839 }, { "epoch": 0.3682841234205685, "grad_norm": 0.8415565490722656, "learning_rate": 7.036294676060768e-05, "loss": 12.883569717407227, "step": 2840 }, { "epoch": 0.36841380092881515, "grad_norm": 0.700675368309021, "learning_rate": 7.034429414887304e-05, "loss": 11.735858917236328, "step": 2841 }, { "epoch": 0.3685434784370618, "grad_norm": 0.7910969257354736, "learning_rate": 7.03256381435137e-05, "loss": 9.618213653564453, "step": 2842 }, { "epoch": 0.36867315594530853, "grad_norm": 0.6801586747169495, "learning_rate": 7.030697874764162e-05, "loss": 10.818593978881836, "step": 2843 }, { "epoch": 0.3688028334535552, "grad_norm": 0.6131163239479065, "learning_rate": 7.02883159643694e-05, "loss": 12.606987953186035, "step": 2844 }, { "epoch": 0.36893251096180185, "grad_norm": 0.6100372672080994, "learning_rate": 7.026964979681015e-05, "loss": 11.635223388671875, "step": 2845 }, { "epoch": 0.36906218847004857, "grad_norm": 0.7068193554878235, "learning_rate": 7.025098024807759e-05, "loss": 8.487587928771973, "step": 2846 }, { "epoch": 0.36919186597829523, "grad_norm": 0.8396742343902588, "learning_rate": 7.023230732128596e-05, "loss": 11.01218032836914, "step": 2847 }, { "epoch": 0.3693215434865419, "grad_norm": 0.5972118377685547, "learning_rate": 7.02136310195501e-05, "loss": 8.811786651611328, "step": 2848 }, { "epoch": 0.3694512209947886, "grad_norm": 0.9647285342216492, "learning_rate": 7.019495134598539e-05, "loss": 14.806558609008789, "step": 2849 }, { "epoch": 0.36958089850303527, "grad_norm": 0.6939088702201843, "learning_rate": 7.01762683037078e-05, "loss": 11.545431137084961, "step": 2850 }, { "epoch": 0.36971057601128193, "grad_norm": 0.6453000903129578, "learning_rate": 7.015758189583382e-05, "loss": 8.555660247802734, "step": 2851 }, { "epoch": 0.3698402535195286, "grad_norm": 0.6726697683334351, "learning_rate": 7.01388921254805e-05, "loss": 8.915407180786133, "step": 2852 }, { "epoch": 0.3699699310277753, "grad_norm": 0.6276054978370667, "learning_rate": 7.012019899576552e-05, "loss": 10.520530700683594, "step": 2853 }, { "epoch": 0.370099608536022, "grad_norm": 0.6783474683761597, "learning_rate": 7.010150250980705e-05, "loss": 10.764137268066406, "step": 2854 }, { "epoch": 0.37022928604426864, "grad_norm": 0.7424052953720093, "learning_rate": 7.008280267072386e-05, "loss": 11.381355285644531, "step": 2855 }, { "epoch": 0.37035896355251535, "grad_norm": 0.7526618838310242, "learning_rate": 7.006409948163524e-05, "loss": 9.0833101272583, "step": 2856 }, { "epoch": 0.370488641060762, "grad_norm": 0.7055870890617371, "learning_rate": 7.004539294566109e-05, "loss": 10.266679763793945, "step": 2857 }, { "epoch": 0.3706183185690087, "grad_norm": 0.7300361394882202, "learning_rate": 7.002668306592179e-05, "loss": 13.540613174438477, "step": 2858 }, { "epoch": 0.3707479960772554, "grad_norm": 0.9538905620574951, "learning_rate": 7.00079698455384e-05, "loss": 14.017985343933105, "step": 2859 }, { "epoch": 0.37087767358550205, "grad_norm": 0.6895122528076172, "learning_rate": 6.998925328763241e-05, "loss": 12.68897819519043, "step": 2860 }, { "epoch": 0.3710073510937487, "grad_norm": 0.8701851963996887, "learning_rate": 6.997053339532598e-05, "loss": 9.364479064941406, "step": 2861 }, { "epoch": 0.37113702860199543, "grad_norm": 1.0410500764846802, "learning_rate": 6.99518101717417e-05, "loss": 13.425204277038574, "step": 2862 }, { "epoch": 0.3712667061102421, "grad_norm": 0.748382568359375, "learning_rate": 6.993308362000283e-05, "loss": 12.297069549560547, "step": 2863 }, { "epoch": 0.37139638361848876, "grad_norm": 0.7646349668502808, "learning_rate": 6.991435374323313e-05, "loss": 14.675407409667969, "step": 2864 }, { "epoch": 0.3715260611267355, "grad_norm": 0.6251187324523926, "learning_rate": 6.989562054455692e-05, "loss": 12.505699157714844, "step": 2865 }, { "epoch": 0.37165573863498214, "grad_norm": 0.8692383766174316, "learning_rate": 6.987688402709908e-05, "loss": 11.084147453308105, "step": 2866 }, { "epoch": 0.3717854161432288, "grad_norm": 0.8283092975616455, "learning_rate": 6.985814419398503e-05, "loss": 12.875550270080566, "step": 2867 }, { "epoch": 0.3719150936514755, "grad_norm": 0.6229200959205627, "learning_rate": 6.983940104834078e-05, "loss": 10.170984268188477, "step": 2868 }, { "epoch": 0.3720447711597222, "grad_norm": 0.9963784217834473, "learning_rate": 6.982065459329285e-05, "loss": 14.070496559143066, "step": 2869 }, { "epoch": 0.37217444866796884, "grad_norm": 1.0524680614471436, "learning_rate": 6.980190483196834e-05, "loss": 14.815540313720703, "step": 2870 }, { "epoch": 0.3723041261762155, "grad_norm": 0.8140510320663452, "learning_rate": 6.978315176749489e-05, "loss": 11.5103120803833, "step": 2871 }, { "epoch": 0.3724338036844622, "grad_norm": 0.9395405650138855, "learning_rate": 6.976439540300066e-05, "loss": 11.334636688232422, "step": 2872 }, { "epoch": 0.3725634811927089, "grad_norm": 0.6763862371444702, "learning_rate": 6.974563574161444e-05, "loss": 9.700637817382812, "step": 2873 }, { "epoch": 0.37269315870095554, "grad_norm": 0.8058580160140991, "learning_rate": 6.97268727864655e-05, "loss": 8.693963050842285, "step": 2874 }, { "epoch": 0.37282283620920226, "grad_norm": 0.8607035279273987, "learning_rate": 6.97081065406837e-05, "loss": 9.664449691772461, "step": 2875 }, { "epoch": 0.3729525137174489, "grad_norm": 1.0389434099197388, "learning_rate": 6.968933700739941e-05, "loss": 10.2711763381958, "step": 2876 }, { "epoch": 0.3730821912256956, "grad_norm": 0.7639604806900024, "learning_rate": 6.967056418974356e-05, "loss": 12.526444435119629, "step": 2877 }, { "epoch": 0.3732118687339423, "grad_norm": 0.6286901831626892, "learning_rate": 6.965178809084767e-05, "loss": 8.810728073120117, "step": 2878 }, { "epoch": 0.37334154624218896, "grad_norm": 0.6515505313873291, "learning_rate": 6.963300871384375e-05, "loss": 12.38874626159668, "step": 2879 }, { "epoch": 0.3734712237504356, "grad_norm": 0.7313756942749023, "learning_rate": 6.961422606186438e-05, "loss": 10.513954162597656, "step": 2880 }, { "epoch": 0.37360090125868234, "grad_norm": 0.756149172782898, "learning_rate": 6.959544013804271e-05, "loss": 12.666330337524414, "step": 2881 }, { "epoch": 0.373730578766929, "grad_norm": 0.6217619180679321, "learning_rate": 6.95766509455124e-05, "loss": 10.832328796386719, "step": 2882 }, { "epoch": 0.37386025627517566, "grad_norm": 0.6675135493278503, "learning_rate": 6.955785848740765e-05, "loss": 8.568426132202148, "step": 2883 }, { "epoch": 0.3739899337834224, "grad_norm": 0.815807580947876, "learning_rate": 6.953906276686327e-05, "loss": 11.59045696258545, "step": 2884 }, { "epoch": 0.37411961129166904, "grad_norm": 0.8394123911857605, "learning_rate": 6.952026378701453e-05, "loss": 13.67747688293457, "step": 2885 }, { "epoch": 0.3742492887999157, "grad_norm": 0.704079270362854, "learning_rate": 6.950146155099729e-05, "loss": 13.578773498535156, "step": 2886 }, { "epoch": 0.37437896630816236, "grad_norm": 0.8418577909469604, "learning_rate": 6.948265606194794e-05, "loss": 13.056282043457031, "step": 2887 }, { "epoch": 0.3745086438164091, "grad_norm": 0.696191668510437, "learning_rate": 6.946384732300344e-05, "loss": 10.60184097290039, "step": 2888 }, { "epoch": 0.37463832132465574, "grad_norm": 0.6809961795806885, "learning_rate": 6.944503533730125e-05, "loss": 11.378161430358887, "step": 2889 }, { "epoch": 0.3747679988329024, "grad_norm": 0.8096187114715576, "learning_rate": 6.942622010797938e-05, "loss": 8.53195858001709, "step": 2890 }, { "epoch": 0.3748976763411491, "grad_norm": 0.812961220741272, "learning_rate": 6.94074016381764e-05, "loss": 11.855297088623047, "step": 2891 }, { "epoch": 0.3750273538493958, "grad_norm": 0.6546033620834351, "learning_rate": 6.938857993103144e-05, "loss": 9.649539947509766, "step": 2892 }, { "epoch": 0.37515703135764245, "grad_norm": 1.0230047702789307, "learning_rate": 6.936975498968411e-05, "loss": 12.927359580993652, "step": 2893 }, { "epoch": 0.37528670886588916, "grad_norm": 0.7337154746055603, "learning_rate": 6.93509268172746e-05, "loss": 8.230866432189941, "step": 2894 }, { "epoch": 0.3754163863741358, "grad_norm": 0.7039088010787964, "learning_rate": 6.933209541694363e-05, "loss": 11.598104476928711, "step": 2895 }, { "epoch": 0.3755460638823825, "grad_norm": 0.7834462523460388, "learning_rate": 6.931326079183247e-05, "loss": 10.303095817565918, "step": 2896 }, { "epoch": 0.3756757413906292, "grad_norm": 0.6288527846336365, "learning_rate": 6.92944229450829e-05, "loss": 11.588351249694824, "step": 2897 }, { "epoch": 0.37580541889887586, "grad_norm": 0.5912624001502991, "learning_rate": 6.927558187983729e-05, "loss": 11.599726676940918, "step": 2898 }, { "epoch": 0.3759350964071225, "grad_norm": 0.7846640348434448, "learning_rate": 6.925673759923846e-05, "loss": 9.774703979492188, "step": 2899 }, { "epoch": 0.37606477391536924, "grad_norm": 0.6257275342941284, "learning_rate": 6.923789010642985e-05, "loss": 11.067400932312012, "step": 2900 }, { "epoch": 0.3761944514236159, "grad_norm": 0.639068067073822, "learning_rate": 6.921903940455541e-05, "loss": 10.198626518249512, "step": 2901 }, { "epoch": 0.37632412893186257, "grad_norm": 0.9908628463745117, "learning_rate": 6.92001854967596e-05, "loss": 13.125869750976562, "step": 2902 }, { "epoch": 0.37645380644010923, "grad_norm": 0.6996057033538818, "learning_rate": 6.918132838618744e-05, "loss": 8.903975486755371, "step": 2903 }, { "epoch": 0.37658348394835595, "grad_norm": 0.7288227081298828, "learning_rate": 6.916246807598448e-05, "loss": 9.956119537353516, "step": 2904 }, { "epoch": 0.3767131614566026, "grad_norm": 0.9300910234451294, "learning_rate": 6.91436045692968e-05, "loss": 13.469067573547363, "step": 2905 }, { "epoch": 0.37684283896484927, "grad_norm": 0.8729309439659119, "learning_rate": 6.912473786927103e-05, "loss": 13.021650314331055, "step": 2906 }, { "epoch": 0.376972516473096, "grad_norm": 0.6854978799819946, "learning_rate": 6.91058679790543e-05, "loss": 7.82349157333374, "step": 2907 }, { "epoch": 0.37710219398134265, "grad_norm": 0.8211666941642761, "learning_rate": 6.908699490179427e-05, "loss": 14.291763305664062, "step": 2908 }, { "epoch": 0.3772318714895893, "grad_norm": 0.7196105122566223, "learning_rate": 6.906811864063921e-05, "loss": 10.968544006347656, "step": 2909 }, { "epoch": 0.377361548997836, "grad_norm": 0.7888873815536499, "learning_rate": 6.90492391987378e-05, "loss": 11.548763275146484, "step": 2910 }, { "epoch": 0.3774912265060827, "grad_norm": 0.6925691366195679, "learning_rate": 6.903035657923936e-05, "loss": 13.633651733398438, "step": 2911 }, { "epoch": 0.37762090401432935, "grad_norm": 0.803904116153717, "learning_rate": 6.901147078529366e-05, "loss": 8.27139663696289, "step": 2912 }, { "epoch": 0.37775058152257607, "grad_norm": 0.6925514936447144, "learning_rate": 6.899258182005106e-05, "loss": 9.294119834899902, "step": 2913 }, { "epoch": 0.37788025903082273, "grad_norm": 0.6525021195411682, "learning_rate": 6.89736896866624e-05, "loss": 8.686664581298828, "step": 2914 }, { "epoch": 0.3780099365390694, "grad_norm": 0.7644597887992859, "learning_rate": 6.895479438827908e-05, "loss": 11.194620132446289, "step": 2915 }, { "epoch": 0.3781396140473161, "grad_norm": 0.5604313015937805, "learning_rate": 6.893589592805301e-05, "loss": 9.950658798217773, "step": 2916 }, { "epoch": 0.37826929155556277, "grad_norm": 0.9214765429496765, "learning_rate": 6.891699430913666e-05, "loss": 12.954383850097656, "step": 2917 }, { "epoch": 0.37839896906380943, "grad_norm": 0.8230619430541992, "learning_rate": 6.889808953468298e-05, "loss": 11.269064903259277, "step": 2918 }, { "epoch": 0.3785286465720561, "grad_norm": 0.6836865544319153, "learning_rate": 6.887918160784548e-05, "loss": 10.42253589630127, "step": 2919 }, { "epoch": 0.3786583240803028, "grad_norm": 0.755220353603363, "learning_rate": 6.886027053177817e-05, "loss": 12.087041854858398, "step": 2920 }, { "epoch": 0.37878800158854947, "grad_norm": 0.758829653263092, "learning_rate": 6.884135630963562e-05, "loss": 10.828533172607422, "step": 2921 }, { "epoch": 0.37891767909679613, "grad_norm": 0.7651969194412231, "learning_rate": 6.882243894457288e-05, "loss": 10.26063346862793, "step": 2922 }, { "epoch": 0.37904735660504285, "grad_norm": 0.5808061361312866, "learning_rate": 6.880351843974557e-05, "loss": 8.553289413452148, "step": 2923 }, { "epoch": 0.3791770341132895, "grad_norm": 0.5609932541847229, "learning_rate": 6.878459479830982e-05, "loss": 7.198626518249512, "step": 2924 }, { "epoch": 0.3793067116215362, "grad_norm": 0.6880869269371033, "learning_rate": 6.876566802342226e-05, "loss": 10.590700149536133, "step": 2925 }, { "epoch": 0.3794363891297829, "grad_norm": 0.6473469734191895, "learning_rate": 6.874673811824005e-05, "loss": 9.520959854125977, "step": 2926 }, { "epoch": 0.37956606663802955, "grad_norm": 0.7051544785499573, "learning_rate": 6.87278050859209e-05, "loss": 11.013882637023926, "step": 2927 }, { "epoch": 0.3796957441462762, "grad_norm": 0.6638771891593933, "learning_rate": 6.8708868929623e-05, "loss": 9.679956436157227, "step": 2928 }, { "epoch": 0.37982542165452293, "grad_norm": 0.6656695604324341, "learning_rate": 6.868992965250513e-05, "loss": 10.689361572265625, "step": 2929 }, { "epoch": 0.3799550991627696, "grad_norm": 0.908920168876648, "learning_rate": 6.867098725772649e-05, "loss": 9.558314323425293, "step": 2930 }, { "epoch": 0.38008477667101626, "grad_norm": 0.6333568096160889, "learning_rate": 6.865204174844688e-05, "loss": 9.164234161376953, "step": 2931 }, { "epoch": 0.380214454179263, "grad_norm": 0.7125003337860107, "learning_rate": 6.86330931278266e-05, "loss": 11.943742752075195, "step": 2932 }, { "epoch": 0.38034413168750963, "grad_norm": 0.658749520778656, "learning_rate": 6.861414139902644e-05, "loss": 16.334426879882812, "step": 2933 }, { "epoch": 0.3804738091957563, "grad_norm": 0.6597110033035278, "learning_rate": 6.859518656520776e-05, "loss": 8.141891479492188, "step": 2934 }, { "epoch": 0.38060348670400296, "grad_norm": 0.7045281529426575, "learning_rate": 6.85762286295324e-05, "loss": 11.180192947387695, "step": 2935 }, { "epoch": 0.3807331642122497, "grad_norm": 0.8449962735176086, "learning_rate": 6.85572675951627e-05, "loss": 9.180602073669434, "step": 2936 }, { "epoch": 0.38086284172049634, "grad_norm": 0.642639696598053, "learning_rate": 6.853830346526159e-05, "loss": 9.34676456451416, "step": 2937 }, { "epoch": 0.380992519228743, "grad_norm": 1.1067675352096558, "learning_rate": 6.851933624299243e-05, "loss": 13.586088180541992, "step": 2938 }, { "epoch": 0.3811221967369897, "grad_norm": 1.1354129314422607, "learning_rate": 6.850036593151916e-05, "loss": 17.374038696289062, "step": 2939 }, { "epoch": 0.3812518742452364, "grad_norm": 0.849974513053894, "learning_rate": 6.84813925340062e-05, "loss": 10.405153274536133, "step": 2940 }, { "epoch": 0.38138155175348304, "grad_norm": 0.7898096442222595, "learning_rate": 6.84624160536185e-05, "loss": 11.121679306030273, "step": 2941 }, { "epoch": 0.38151122926172976, "grad_norm": 0.7138206958770752, "learning_rate": 6.844343649352151e-05, "loss": 9.337303161621094, "step": 2942 }, { "epoch": 0.3816409067699764, "grad_norm": 0.7519306540489197, "learning_rate": 6.842445385688122e-05, "loss": 11.917549133300781, "step": 2943 }, { "epoch": 0.3817705842782231, "grad_norm": 0.7815276384353638, "learning_rate": 6.840546814686411e-05, "loss": 10.801671981811523, "step": 2944 }, { "epoch": 0.3819002617864698, "grad_norm": 0.6345651149749756, "learning_rate": 6.838647936663718e-05, "loss": 10.377814292907715, "step": 2945 }, { "epoch": 0.38202993929471646, "grad_norm": 0.797238290309906, "learning_rate": 6.836748751936794e-05, "loss": 13.85433578491211, "step": 2946 }, { "epoch": 0.3821596168029631, "grad_norm": 0.650750458240509, "learning_rate": 6.834849260822441e-05, "loss": 7.9126105308532715, "step": 2947 }, { "epoch": 0.38228929431120984, "grad_norm": 0.6736665368080139, "learning_rate": 6.832949463637513e-05, "loss": 8.154890060424805, "step": 2948 }, { "epoch": 0.3824189718194565, "grad_norm": 0.7035326361656189, "learning_rate": 6.831049360698916e-05, "loss": 9.412213325500488, "step": 2949 }, { "epoch": 0.38254864932770316, "grad_norm": 0.7061949968338013, "learning_rate": 6.829148952323603e-05, "loss": 9.515575408935547, "step": 2950 }, { "epoch": 0.3826783268359498, "grad_norm": 0.6414670944213867, "learning_rate": 6.827248238828581e-05, "loss": 8.895999908447266, "step": 2951 }, { "epoch": 0.38280800434419654, "grad_norm": 0.6748709082603455, "learning_rate": 6.825347220530907e-05, "loss": 10.754291534423828, "step": 2952 }, { "epoch": 0.3829376818524432, "grad_norm": 0.7192788124084473, "learning_rate": 6.823445897747693e-05, "loss": 9.463212966918945, "step": 2953 }, { "epoch": 0.38306735936068986, "grad_norm": 0.7704136967658997, "learning_rate": 6.82154427079609e-05, "loss": 9.088220596313477, "step": 2954 }, { "epoch": 0.3831970368689366, "grad_norm": 0.6819847822189331, "learning_rate": 6.819642339993318e-05, "loss": 9.780767440795898, "step": 2955 }, { "epoch": 0.38332671437718324, "grad_norm": 0.5618830323219299, "learning_rate": 6.817740105656629e-05, "loss": 8.048895835876465, "step": 2956 }, { "epoch": 0.3834563918854299, "grad_norm": 0.7083235383033752, "learning_rate": 6.815837568103338e-05, "loss": 9.105749130249023, "step": 2957 }, { "epoch": 0.3835860693936766, "grad_norm": 0.631717324256897, "learning_rate": 6.813934727650807e-05, "loss": 8.168251991271973, "step": 2958 }, { "epoch": 0.3837157469019233, "grad_norm": 0.8004878759384155, "learning_rate": 6.812031584616447e-05, "loss": 7.324965476989746, "step": 2959 }, { "epoch": 0.38384542441016994, "grad_norm": 1.1171766519546509, "learning_rate": 6.81012813931772e-05, "loss": 9.02457332611084, "step": 2960 }, { "epoch": 0.38397510191841666, "grad_norm": 0.615482747554779, "learning_rate": 6.80822439207214e-05, "loss": 6.600667953491211, "step": 2961 }, { "epoch": 0.3841047794266633, "grad_norm": 0.5235456824302673, "learning_rate": 6.80632034319727e-05, "loss": 7.159496307373047, "step": 2962 }, { "epoch": 0.38423445693491, "grad_norm": 0.6006951928138733, "learning_rate": 6.804415993010724e-05, "loss": 8.921723365783691, "step": 2963 }, { "epoch": 0.3843641344431567, "grad_norm": 0.7429704070091248, "learning_rate": 6.802511341830166e-05, "loss": 9.581146240234375, "step": 2964 }, { "epoch": 0.38449381195140336, "grad_norm": 0.977151095867157, "learning_rate": 6.800606389973309e-05, "loss": 12.082429885864258, "step": 2965 }, { "epoch": 0.38462348945965, "grad_norm": 0.6965535283088684, "learning_rate": 6.79870113775792e-05, "loss": 10.391311645507812, "step": 2966 }, { "epoch": 0.3847531669678967, "grad_norm": 0.8639399409294128, "learning_rate": 6.796795585501811e-05, "loss": 10.50659465789795, "step": 2967 }, { "epoch": 0.3848828444761434, "grad_norm": 0.7982960343360901, "learning_rate": 6.794889733522848e-05, "loss": 10.213272094726562, "step": 2968 }, { "epoch": 0.38501252198439007, "grad_norm": 0.8742769956588745, "learning_rate": 6.792983582138944e-05, "loss": 8.748634338378906, "step": 2969 }, { "epoch": 0.3851421994926367, "grad_norm": 0.9039188027381897, "learning_rate": 6.791077131668066e-05, "loss": 12.352726936340332, "step": 2970 }, { "epoch": 0.38527187700088344, "grad_norm": 0.8265738487243652, "learning_rate": 6.789170382428227e-05, "loss": 11.419976234436035, "step": 2971 }, { "epoch": 0.3854015545091301, "grad_norm": 0.7076639533042908, "learning_rate": 6.787263334737487e-05, "loss": 8.676959991455078, "step": 2972 }, { "epoch": 0.38553123201737677, "grad_norm": 0.996962308883667, "learning_rate": 6.785355988913968e-05, "loss": 11.128891944885254, "step": 2973 }, { "epoch": 0.3856609095256235, "grad_norm": 0.8409445285797119, "learning_rate": 6.783448345275828e-05, "loss": 9.840518951416016, "step": 2974 }, { "epoch": 0.38579058703387015, "grad_norm": 0.5440940856933594, "learning_rate": 6.781540404141282e-05, "loss": 8.204727172851562, "step": 2975 }, { "epoch": 0.3859202645421168, "grad_norm": 0.9612694978713989, "learning_rate": 6.779632165828593e-05, "loss": 15.429152488708496, "step": 2976 }, { "epoch": 0.3860499420503635, "grad_norm": 0.7894347310066223, "learning_rate": 6.777723630656076e-05, "loss": 11.024738311767578, "step": 2977 }, { "epoch": 0.3861796195586102, "grad_norm": 0.8067209124565125, "learning_rate": 6.775814798942089e-05, "loss": 11.462570190429688, "step": 2978 }, { "epoch": 0.38630929706685685, "grad_norm": 0.8786903619766235, "learning_rate": 6.773905671005044e-05, "loss": 11.014276504516602, "step": 2979 }, { "epoch": 0.38643897457510357, "grad_norm": 0.6111000776290894, "learning_rate": 6.771996247163404e-05, "loss": 12.586203575134277, "step": 2980 }, { "epoch": 0.38656865208335023, "grad_norm": 0.7572482824325562, "learning_rate": 6.77008652773568e-05, "loss": 11.890535354614258, "step": 2981 }, { "epoch": 0.3866983295915969, "grad_norm": 0.5360522866249084, "learning_rate": 6.768176513040428e-05, "loss": 9.704151153564453, "step": 2982 }, { "epoch": 0.38682800709984355, "grad_norm": 0.926580548286438, "learning_rate": 6.766266203396261e-05, "loss": 14.83159351348877, "step": 2983 }, { "epoch": 0.38695768460809027, "grad_norm": 0.6428030729293823, "learning_rate": 6.764355599121835e-05, "loss": 12.735265731811523, "step": 2984 }, { "epoch": 0.38708736211633693, "grad_norm": 0.5218062400817871, "learning_rate": 6.762444700535857e-05, "loss": 7.565214157104492, "step": 2985 }, { "epoch": 0.3872170396245836, "grad_norm": 0.5271806716918945, "learning_rate": 6.760533507957083e-05, "loss": 6.876743793487549, "step": 2986 }, { "epoch": 0.3873467171328303, "grad_norm": 0.926038920879364, "learning_rate": 6.75862202170432e-05, "loss": 14.37401294708252, "step": 2987 }, { "epoch": 0.38747639464107697, "grad_norm": 0.5987998247146606, "learning_rate": 6.756710242096422e-05, "loss": 9.150432586669922, "step": 2988 }, { "epoch": 0.38760607214932363, "grad_norm": 0.7697097659111023, "learning_rate": 6.75479816945229e-05, "loss": 9.646675109863281, "step": 2989 }, { "epoch": 0.38773574965757035, "grad_norm": 0.7636705636978149, "learning_rate": 6.75288580409088e-05, "loss": 11.234628677368164, "step": 2990 }, { "epoch": 0.387865427165817, "grad_norm": 0.5569595694541931, "learning_rate": 6.75097314633119e-05, "loss": 8.987811088562012, "step": 2991 }, { "epoch": 0.3879951046740637, "grad_norm": 0.6778380274772644, "learning_rate": 6.749060196492271e-05, "loss": 9.234562873840332, "step": 2992 }, { "epoch": 0.3881247821823104, "grad_norm": 0.8378667235374451, "learning_rate": 6.747146954893221e-05, "loss": 12.757028579711914, "step": 2993 }, { "epoch": 0.38825445969055705, "grad_norm": 1.01231050491333, "learning_rate": 6.745233421853188e-05, "loss": 16.33617401123047, "step": 2994 }, { "epoch": 0.3883841371988037, "grad_norm": 0.6352622509002686, "learning_rate": 6.743319597691368e-05, "loss": 11.415637969970703, "step": 2995 }, { "epoch": 0.38851381470705043, "grad_norm": 0.5135257840156555, "learning_rate": 6.741405482727003e-05, "loss": 8.395702362060547, "step": 2996 }, { "epoch": 0.3886434922152971, "grad_norm": 0.6812878847122192, "learning_rate": 6.739491077279388e-05, "loss": 9.879047393798828, "step": 2997 }, { "epoch": 0.38877316972354375, "grad_norm": 0.7439034581184387, "learning_rate": 6.737576381667865e-05, "loss": 13.734488487243652, "step": 2998 }, { "epoch": 0.3889028472317904, "grad_norm": 0.7927636504173279, "learning_rate": 6.73566139621182e-05, "loss": 9.52289867401123, "step": 2999 }, { "epoch": 0.38903252474003713, "grad_norm": 0.8440930247306824, "learning_rate": 6.733746121230696e-05, "loss": 9.674880027770996, "step": 3000 }, { "epoch": 0.3891622022482838, "grad_norm": 0.7440526485443115, "learning_rate": 6.731830557043977e-05, "loss": 11.369386672973633, "step": 3001 }, { "epoch": 0.38929187975653046, "grad_norm": 0.8555552959442139, "learning_rate": 6.729914703971196e-05, "loss": 14.28339672088623, "step": 3002 }, { "epoch": 0.3894215572647772, "grad_norm": 0.8583316206932068, "learning_rate": 6.727998562331937e-05, "loss": 9.135601997375488, "step": 3003 }, { "epoch": 0.38955123477302384, "grad_norm": 0.8992339372634888, "learning_rate": 6.726082132445833e-05, "loss": 10.720100402832031, "step": 3004 }, { "epoch": 0.3896809122812705, "grad_norm": 0.6556758880615234, "learning_rate": 6.724165414632561e-05, "loss": 11.758584976196289, "step": 3005 }, { "epoch": 0.3898105897895172, "grad_norm": 0.7932969927787781, "learning_rate": 6.722248409211847e-05, "loss": 9.973284721374512, "step": 3006 }, { "epoch": 0.3899402672977639, "grad_norm": 0.7111232876777649, "learning_rate": 6.720331116503468e-05, "loss": 10.127897262573242, "step": 3007 }, { "epoch": 0.39006994480601054, "grad_norm": 0.9136183857917786, "learning_rate": 6.718413536827247e-05, "loss": 9.8139066696167, "step": 3008 }, { "epoch": 0.39019962231425726, "grad_norm": 0.5463285446166992, "learning_rate": 6.716495670503054e-05, "loss": 7.9859619140625, "step": 3009 }, { "epoch": 0.3903292998225039, "grad_norm": 0.7167007327079773, "learning_rate": 6.714577517850808e-05, "loss": 8.398996353149414, "step": 3010 }, { "epoch": 0.3904589773307506, "grad_norm": 0.6993705630302429, "learning_rate": 6.712659079190473e-05, "loss": 10.703292846679688, "step": 3011 }, { "epoch": 0.3905886548389973, "grad_norm": 1.1952579021453857, "learning_rate": 6.710740354842067e-05, "loss": 14.008734703063965, "step": 3012 }, { "epoch": 0.39071833234724396, "grad_norm": 0.8154519200325012, "learning_rate": 6.708821345125649e-05, "loss": 11.588841438293457, "step": 3013 }, { "epoch": 0.3908480098554906, "grad_norm": 1.1489801406860352, "learning_rate": 6.706902050361328e-05, "loss": 15.120687484741211, "step": 3014 }, { "epoch": 0.3909776873637373, "grad_norm": 0.6466594338417053, "learning_rate": 6.704982470869266e-05, "loss": 9.964052200317383, "step": 3015 }, { "epoch": 0.391107364871984, "grad_norm": 0.8208151459693909, "learning_rate": 6.703062606969663e-05, "loss": 12.054064750671387, "step": 3016 }, { "epoch": 0.39123704238023066, "grad_norm": 0.7463478446006775, "learning_rate": 6.701142458982769e-05, "loss": 11.093038558959961, "step": 3017 }, { "epoch": 0.3913667198884773, "grad_norm": 0.9179416298866272, "learning_rate": 6.699222027228886e-05, "loss": 13.008764266967773, "step": 3018 }, { "epoch": 0.39149639739672404, "grad_norm": 0.91579669713974, "learning_rate": 6.697301312028361e-05, "loss": 12.36307144165039, "step": 3019 }, { "epoch": 0.3916260749049707, "grad_norm": 0.7630777359008789, "learning_rate": 6.695380313701587e-05, "loss": 11.515260696411133, "step": 3020 }, { "epoch": 0.39175575241321736, "grad_norm": 0.9133921265602112, "learning_rate": 6.693459032569004e-05, "loss": 12.41325855255127, "step": 3021 }, { "epoch": 0.3918854299214641, "grad_norm": 0.7579723596572876, "learning_rate": 6.691537468951103e-05, "loss": 13.900659561157227, "step": 3022 }, { "epoch": 0.39201510742971074, "grad_norm": 0.6987335085868835, "learning_rate": 6.689615623168417e-05, "loss": 8.303914070129395, "step": 3023 }, { "epoch": 0.3921447849379574, "grad_norm": 1.0527294874191284, "learning_rate": 6.68769349554153e-05, "loss": 13.88958740234375, "step": 3024 }, { "epoch": 0.3922744624462041, "grad_norm": 0.7337396144866943, "learning_rate": 6.68577108639107e-05, "loss": 9.739577293395996, "step": 3025 }, { "epoch": 0.3924041399544508, "grad_norm": 0.7109223008155823, "learning_rate": 6.683848396037714e-05, "loss": 9.6541109085083, "step": 3026 }, { "epoch": 0.39253381746269744, "grad_norm": 0.8521087765693665, "learning_rate": 6.681925424802184e-05, "loss": 13.60766887664795, "step": 3027 }, { "epoch": 0.39266349497094416, "grad_norm": 0.8634320497512817, "learning_rate": 6.680002173005254e-05, "loss": 13.860970497131348, "step": 3028 }, { "epoch": 0.3927931724791908, "grad_norm": 0.8124500513076782, "learning_rate": 6.678078640967738e-05, "loss": 8.241302490234375, "step": 3029 }, { "epoch": 0.3929228499874375, "grad_norm": 0.8623190522193909, "learning_rate": 6.676154829010499e-05, "loss": 11.437066078186035, "step": 3030 }, { "epoch": 0.39305252749568415, "grad_norm": 0.9549577236175537, "learning_rate": 6.674230737454448e-05, "loss": 13.57133960723877, "step": 3031 }, { "epoch": 0.39318220500393086, "grad_norm": 0.7760308980941772, "learning_rate": 6.672306366620544e-05, "loss": 11.451393127441406, "step": 3032 }, { "epoch": 0.3933118825121775, "grad_norm": 0.8466333746910095, "learning_rate": 6.670381716829789e-05, "loss": 8.920284271240234, "step": 3033 }, { "epoch": 0.3934415600204242, "grad_norm": 0.7593802809715271, "learning_rate": 6.668456788403233e-05, "loss": 11.375076293945312, "step": 3034 }, { "epoch": 0.3935712375286709, "grad_norm": 0.5491423606872559, "learning_rate": 6.666531581661975e-05, "loss": 8.045186996459961, "step": 3035 }, { "epoch": 0.39370091503691756, "grad_norm": 0.7858401536941528, "learning_rate": 6.664606096927154e-05, "loss": 11.134407043457031, "step": 3036 }, { "epoch": 0.3938305925451642, "grad_norm": 0.6075623631477356, "learning_rate": 6.662680334519963e-05, "loss": 9.570114135742188, "step": 3037 }, { "epoch": 0.39396027005341094, "grad_norm": 1.1006962060928345, "learning_rate": 6.660754294761636e-05, "loss": 12.735444068908691, "step": 3038 }, { "epoch": 0.3940899475616576, "grad_norm": 0.7547109723091125, "learning_rate": 6.658827977973456e-05, "loss": 8.765857696533203, "step": 3039 }, { "epoch": 0.39421962506990427, "grad_norm": 0.8130173087120056, "learning_rate": 6.656901384476749e-05, "loss": 10.65733814239502, "step": 3040 }, { "epoch": 0.394349302578151, "grad_norm": 0.7272369265556335, "learning_rate": 6.654974514592893e-05, "loss": 7.23221492767334, "step": 3041 }, { "epoch": 0.39447898008639765, "grad_norm": 0.7248726487159729, "learning_rate": 6.653047368643306e-05, "loss": 11.650629043579102, "step": 3042 }, { "epoch": 0.3946086575946443, "grad_norm": 0.88895183801651, "learning_rate": 6.651119946949454e-05, "loss": 10.650168418884277, "step": 3043 }, { "epoch": 0.394738335102891, "grad_norm": 0.6730843186378479, "learning_rate": 6.649192249832852e-05, "loss": 9.073282241821289, "step": 3044 }, { "epoch": 0.3948680126111377, "grad_norm": 0.832446813583374, "learning_rate": 6.647264277615057e-05, "loss": 11.159862518310547, "step": 3045 }, { "epoch": 0.39499769011938435, "grad_norm": 0.9290329813957214, "learning_rate": 6.645336030617674e-05, "loss": 11.136480331420898, "step": 3046 }, { "epoch": 0.395127367627631, "grad_norm": 0.8051220774650574, "learning_rate": 6.643407509162353e-05, "loss": 12.221871376037598, "step": 3047 }, { "epoch": 0.3952570451358777, "grad_norm": 0.8427198529243469, "learning_rate": 6.64147871357079e-05, "loss": 12.906744956970215, "step": 3048 }, { "epoch": 0.3953867226441244, "grad_norm": 0.8031232953071594, "learning_rate": 6.639549644164725e-05, "loss": 11.595691680908203, "step": 3049 }, { "epoch": 0.39551640015237105, "grad_norm": 0.5911054015159607, "learning_rate": 6.637620301265946e-05, "loss": 9.440291404724121, "step": 3050 }, { "epoch": 0.39564607766061777, "grad_norm": 0.7057468891143799, "learning_rate": 6.635690685196288e-05, "loss": 10.903176307678223, "step": 3051 }, { "epoch": 0.39577575516886443, "grad_norm": 0.7756178379058838, "learning_rate": 6.63376079627763e-05, "loss": 12.456409454345703, "step": 3052 }, { "epoch": 0.3959054326771111, "grad_norm": 0.9042341709136963, "learning_rate": 6.631830634831894e-05, "loss": 8.911574363708496, "step": 3053 }, { "epoch": 0.3960351101853578, "grad_norm": 0.7758477330207825, "learning_rate": 6.62990020118105e-05, "loss": 14.616902351379395, "step": 3054 }, { "epoch": 0.39616478769360447, "grad_norm": 1.2806016206741333, "learning_rate": 6.627969495647113e-05, "loss": 9.952463150024414, "step": 3055 }, { "epoch": 0.39629446520185113, "grad_norm": 0.9063150882720947, "learning_rate": 6.626038518552144e-05, "loss": 11.064009666442871, "step": 3056 }, { "epoch": 0.39642414271009785, "grad_norm": 0.7843353748321533, "learning_rate": 6.624107270218247e-05, "loss": 11.871224403381348, "step": 3057 }, { "epoch": 0.3965538202183445, "grad_norm": 0.9419393539428711, "learning_rate": 6.622175750967576e-05, "loss": 10.59317684173584, "step": 3058 }, { "epoch": 0.39668349772659117, "grad_norm": 0.9931625723838806, "learning_rate": 6.620243961122324e-05, "loss": 14.02161979675293, "step": 3059 }, { "epoch": 0.3968131752348379, "grad_norm": 0.5303059220314026, "learning_rate": 6.618311901004733e-05, "loss": 7.421056747436523, "step": 3060 }, { "epoch": 0.39694285274308455, "grad_norm": 0.8918815851211548, "learning_rate": 6.61637957093709e-05, "loss": 12.438051223754883, "step": 3061 }, { "epoch": 0.3970725302513312, "grad_norm": 1.0160704851150513, "learning_rate": 6.614446971241725e-05, "loss": 12.800838470458984, "step": 3062 }, { "epoch": 0.3972022077595779, "grad_norm": 0.6680283546447754, "learning_rate": 6.612514102241015e-05, "loss": 11.667803764343262, "step": 3063 }, { "epoch": 0.3973318852678246, "grad_norm": 0.7099065780639648, "learning_rate": 6.610580964257383e-05, "loss": 8.961532592773438, "step": 3064 }, { "epoch": 0.39746156277607125, "grad_norm": 0.6798382997512817, "learning_rate": 6.608647557613292e-05, "loss": 10.478689193725586, "step": 3065 }, { "epoch": 0.3975912402843179, "grad_norm": 0.8034135699272156, "learning_rate": 6.606713882631256e-05, "loss": 11.680736541748047, "step": 3066 }, { "epoch": 0.39772091779256463, "grad_norm": 0.6282764673233032, "learning_rate": 6.604779939633828e-05, "loss": 10.350238800048828, "step": 3067 }, { "epoch": 0.3978505953008113, "grad_norm": 0.9213512539863586, "learning_rate": 6.60284572894361e-05, "loss": 12.304323196411133, "step": 3068 }, { "epoch": 0.39798027280905796, "grad_norm": 0.956143319606781, "learning_rate": 6.600911250883245e-05, "loss": 10.608994483947754, "step": 3069 }, { "epoch": 0.3981099503173047, "grad_norm": 0.773455023765564, "learning_rate": 6.598976505775424e-05, "loss": 12.632147789001465, "step": 3070 }, { "epoch": 0.39823962782555133, "grad_norm": 0.7795429825782776, "learning_rate": 6.597041493942884e-05, "loss": 10.963876724243164, "step": 3071 }, { "epoch": 0.398369305333798, "grad_norm": 0.7328673601150513, "learning_rate": 6.5951062157084e-05, "loss": 11.422181129455566, "step": 3072 }, { "epoch": 0.3984989828420447, "grad_norm": 0.6254801750183105, "learning_rate": 6.593170671394794e-05, "loss": 7.921092987060547, "step": 3073 }, { "epoch": 0.3986286603502914, "grad_norm": 0.7990678548812866, "learning_rate": 6.591234861324938e-05, "loss": 11.754814147949219, "step": 3074 }, { "epoch": 0.39875833785853804, "grad_norm": 0.7063722610473633, "learning_rate": 6.589298785821739e-05, "loss": 8.333081245422363, "step": 3075 }, { "epoch": 0.39888801536678475, "grad_norm": 0.8530819416046143, "learning_rate": 6.587362445208157e-05, "loss": 10.72077751159668, "step": 3076 }, { "epoch": 0.3990176928750314, "grad_norm": 0.6605314016342163, "learning_rate": 6.585425839807188e-05, "loss": 11.342774391174316, "step": 3077 }, { "epoch": 0.3991473703832781, "grad_norm": 0.7962661981582642, "learning_rate": 6.583488969941883e-05, "loss": 9.192456245422363, "step": 3078 }, { "epoch": 0.39927704789152474, "grad_norm": 0.6769970059394836, "learning_rate": 6.581551835935325e-05, "loss": 9.343868255615234, "step": 3079 }, { "epoch": 0.39940672539977146, "grad_norm": 0.7607387900352478, "learning_rate": 6.579614438110647e-05, "loss": 8.360976219177246, "step": 3080 }, { "epoch": 0.3995364029080181, "grad_norm": 0.5960583090782166, "learning_rate": 6.577676776791028e-05, "loss": 9.34969425201416, "step": 3081 }, { "epoch": 0.3996660804162648, "grad_norm": 0.7883150577545166, "learning_rate": 6.575738852299688e-05, "loss": 9.358034133911133, "step": 3082 }, { "epoch": 0.3997957579245115, "grad_norm": 0.704620897769928, "learning_rate": 6.573800664959892e-05, "loss": 10.472949981689453, "step": 3083 }, { "epoch": 0.39992543543275816, "grad_norm": 0.6993387937545776, "learning_rate": 6.571862215094946e-05, "loss": 9.929455757141113, "step": 3084 }, { "epoch": 0.4000551129410048, "grad_norm": 0.6418335437774658, "learning_rate": 6.569923503028204e-05, "loss": 8.239411354064941, "step": 3085 }, { "epoch": 0.40018479044925154, "grad_norm": 0.9045808911323547, "learning_rate": 6.567984529083064e-05, "loss": 11.10903549194336, "step": 3086 }, { "epoch": 0.4003144679574982, "grad_norm": 0.9034862518310547, "learning_rate": 6.566045293582961e-05, "loss": 13.079612731933594, "step": 3087 }, { "epoch": 0.40044414546574486, "grad_norm": 0.6672282814979553, "learning_rate": 6.56410579685138e-05, "loss": 7.9198384284973145, "step": 3088 }, { "epoch": 0.4005738229739916, "grad_norm": 0.7600343227386475, "learning_rate": 6.562166039211847e-05, "loss": 10.105268478393555, "step": 3089 }, { "epoch": 0.40070350048223824, "grad_norm": 0.6520967483520508, "learning_rate": 6.560226020987936e-05, "loss": 10.143211364746094, "step": 3090 }, { "epoch": 0.4008331779904849, "grad_norm": 0.6853710412979126, "learning_rate": 6.558285742503258e-05, "loss": 9.91738510131836, "step": 3091 }, { "epoch": 0.4009628554987316, "grad_norm": 0.9500579237937927, "learning_rate": 6.55634520408147e-05, "loss": 13.359224319458008, "step": 3092 }, { "epoch": 0.4010925330069783, "grad_norm": 0.8721070289611816, "learning_rate": 6.554404406046272e-05, "loss": 11.445178985595703, "step": 3093 }, { "epoch": 0.40122221051522494, "grad_norm": 0.7522755265235901, "learning_rate": 6.552463348721412e-05, "loss": 10.692350387573242, "step": 3094 }, { "epoch": 0.4013518880234716, "grad_norm": 0.6517635583877563, "learning_rate": 6.550522032430671e-05, "loss": 7.952104091644287, "step": 3095 }, { "epoch": 0.4014815655317183, "grad_norm": 0.9509298801422119, "learning_rate": 6.548580457497884e-05, "loss": 11.400826454162598, "step": 3096 }, { "epoch": 0.401611243039965, "grad_norm": 0.8136300444602966, "learning_rate": 6.546638624246921e-05, "loss": 9.410991668701172, "step": 3097 }, { "epoch": 0.40174092054821164, "grad_norm": 0.6287317872047424, "learning_rate": 6.544696533001702e-05, "loss": 11.23920726776123, "step": 3098 }, { "epoch": 0.40187059805645836, "grad_norm": 0.7626967430114746, "learning_rate": 6.542754184086183e-05, "loss": 13.480491638183594, "step": 3099 }, { "epoch": 0.402000275564705, "grad_norm": 0.7371850609779358, "learning_rate": 6.54081157782437e-05, "loss": 9.547687530517578, "step": 3100 }, { "epoch": 0.4021299530729517, "grad_norm": 1.0264722108840942, "learning_rate": 6.538868714540307e-05, "loss": 13.485645294189453, "step": 3101 }, { "epoch": 0.4022596305811984, "grad_norm": 1.0244648456573486, "learning_rate": 6.536925594558083e-05, "loss": 8.643465042114258, "step": 3102 }, { "epoch": 0.40238930808944506, "grad_norm": 0.8692715764045715, "learning_rate": 6.534982218201826e-05, "loss": 11.109664916992188, "step": 3103 }, { "epoch": 0.4025189855976917, "grad_norm": 0.7592151165008545, "learning_rate": 6.533038585795714e-05, "loss": 8.304685592651367, "step": 3104 }, { "epoch": 0.40264866310593844, "grad_norm": 0.7126330733299255, "learning_rate": 6.531094697663962e-05, "loss": 9.806086540222168, "step": 3105 }, { "epoch": 0.4027783406141851, "grad_norm": 0.9912644624710083, "learning_rate": 6.529150554130829e-05, "loss": 11.665633201599121, "step": 3106 }, { "epoch": 0.40290801812243177, "grad_norm": 0.7155874371528625, "learning_rate": 6.527206155520616e-05, "loss": 11.47913646697998, "step": 3107 }, { "epoch": 0.4030376956306785, "grad_norm": 0.811689019203186, "learning_rate": 6.52526150215767e-05, "loss": 10.594050407409668, "step": 3108 }, { "epoch": 0.40316737313892514, "grad_norm": 0.8536486625671387, "learning_rate": 6.523316594366375e-05, "loss": 13.77469539642334, "step": 3109 }, { "epoch": 0.4032970506471718, "grad_norm": 0.6404903531074524, "learning_rate": 6.521371432471163e-05, "loss": 9.59901237487793, "step": 3110 }, { "epoch": 0.40342672815541847, "grad_norm": 0.7493064403533936, "learning_rate": 6.519426016796505e-05, "loss": 8.461840629577637, "step": 3111 }, { "epoch": 0.4035564056636652, "grad_norm": 0.8151461482048035, "learning_rate": 6.517480347666916e-05, "loss": 14.304524421691895, "step": 3112 }, { "epoch": 0.40368608317191185, "grad_norm": 0.8048996329307556, "learning_rate": 6.51553442540695e-05, "loss": 13.430096626281738, "step": 3113 }, { "epoch": 0.4038157606801585, "grad_norm": 0.7139055132865906, "learning_rate": 6.513588250341207e-05, "loss": 8.018774032592773, "step": 3114 }, { "epoch": 0.4039454381884052, "grad_norm": 0.6325384378433228, "learning_rate": 6.511641822794328e-05, "loss": 8.801737785339355, "step": 3115 }, { "epoch": 0.4040751156966519, "grad_norm": 0.7290205955505371, "learning_rate": 6.509695143090996e-05, "loss": 8.110897064208984, "step": 3116 }, { "epoch": 0.40420479320489855, "grad_norm": 0.7123152613639832, "learning_rate": 6.507748211555935e-05, "loss": 11.854294776916504, "step": 3117 }, { "epoch": 0.40433447071314527, "grad_norm": 0.6662838459014893, "learning_rate": 6.505801028513914e-05, "loss": 10.330902099609375, "step": 3118 }, { "epoch": 0.40446414822139193, "grad_norm": 0.6698424816131592, "learning_rate": 6.50385359428974e-05, "loss": 11.260282516479492, "step": 3119 }, { "epoch": 0.4045938257296386, "grad_norm": 1.0240261554718018, "learning_rate": 6.501905909208264e-05, "loss": 12.348527908325195, "step": 3120 }, { "epoch": 0.4047235032378853, "grad_norm": 0.6133765578269958, "learning_rate": 6.49995797359438e-05, "loss": 9.299867630004883, "step": 3121 }, { "epoch": 0.40485318074613197, "grad_norm": 0.9239206910133362, "learning_rate": 6.49800978777302e-05, "loss": 10.119283676147461, "step": 3122 }, { "epoch": 0.40498285825437863, "grad_norm": 0.8035175204277039, "learning_rate": 6.496061352069162e-05, "loss": 11.95682430267334, "step": 3123 }, { "epoch": 0.40511253576262535, "grad_norm": 0.9555312395095825, "learning_rate": 6.494112666807826e-05, "loss": 14.294559478759766, "step": 3124 }, { "epoch": 0.405242213270872, "grad_norm": 0.6583715677261353, "learning_rate": 6.492163732314066e-05, "loss": 9.638091087341309, "step": 3125 }, { "epoch": 0.40537189077911867, "grad_norm": 1.0400110483169556, "learning_rate": 6.490214548912988e-05, "loss": 12.957262992858887, "step": 3126 }, { "epoch": 0.40550156828736533, "grad_norm": 0.6679948568344116, "learning_rate": 6.488265116929732e-05, "loss": 8.118606567382812, "step": 3127 }, { "epoch": 0.40563124579561205, "grad_norm": 0.7332290410995483, "learning_rate": 6.486315436689484e-05, "loss": 8.941094398498535, "step": 3128 }, { "epoch": 0.4057609233038587, "grad_norm": 0.8357635140419006, "learning_rate": 6.484365508517468e-05, "loss": 7.553316593170166, "step": 3129 }, { "epoch": 0.4058906008121054, "grad_norm": 0.6596196293830872, "learning_rate": 6.482415332738951e-05, "loss": 10.417976379394531, "step": 3130 }, { "epoch": 0.4060202783203521, "grad_norm": 0.8671808838844299, "learning_rate": 6.480464909679243e-05, "loss": 9.447248458862305, "step": 3131 }, { "epoch": 0.40614995582859875, "grad_norm": 0.5904194712638855, "learning_rate": 6.47851423966369e-05, "loss": 8.69963550567627, "step": 3132 }, { "epoch": 0.4062796333368454, "grad_norm": 0.6891331076622009, "learning_rate": 6.476563323017685e-05, "loss": 10.624212265014648, "step": 3133 }, { "epoch": 0.40640931084509213, "grad_norm": 0.7207460403442383, "learning_rate": 6.47461216006666e-05, "loss": 11.922983169555664, "step": 3134 }, { "epoch": 0.4065389883533388, "grad_norm": 0.8574826121330261, "learning_rate": 6.472660751136085e-05, "loss": 9.460291862487793, "step": 3135 }, { "epoch": 0.40666866586158545, "grad_norm": 0.5304431915283203, "learning_rate": 6.470709096551479e-05, "loss": 6.97655725479126, "step": 3136 }, { "epoch": 0.40679834336983217, "grad_norm": 0.8261632919311523, "learning_rate": 6.468757196638391e-05, "loss": 10.31155776977539, "step": 3137 }, { "epoch": 0.40692802087807883, "grad_norm": 0.6467838287353516, "learning_rate": 6.46680505172242e-05, "loss": 11.031233787536621, "step": 3138 }, { "epoch": 0.4070576983863255, "grad_norm": 0.7870991230010986, "learning_rate": 6.464852662129203e-05, "loss": 14.154611587524414, "step": 3139 }, { "epoch": 0.4071873758945722, "grad_norm": 0.8629051446914673, "learning_rate": 6.462900028184415e-05, "loss": 10.183945655822754, "step": 3140 }, { "epoch": 0.4073170534028189, "grad_norm": 0.8034111857414246, "learning_rate": 6.460947150213778e-05, "loss": 9.952167510986328, "step": 3141 }, { "epoch": 0.40744673091106554, "grad_norm": 0.606143057346344, "learning_rate": 6.458994028543046e-05, "loss": 7.809364318847656, "step": 3142 }, { "epoch": 0.4075764084193122, "grad_norm": 0.9754874110221863, "learning_rate": 6.457040663498023e-05, "loss": 15.420320510864258, "step": 3143 }, { "epoch": 0.4077060859275589, "grad_norm": 0.6977838277816772, "learning_rate": 6.455087055404547e-05, "loss": 8.403193473815918, "step": 3144 }, { "epoch": 0.4078357634358056, "grad_norm": 0.9259147047996521, "learning_rate": 6.453133204588498e-05, "loss": 10.083778381347656, "step": 3145 }, { "epoch": 0.40796544094405224, "grad_norm": 0.7455813884735107, "learning_rate": 6.451179111375798e-05, "loss": 8.302544593811035, "step": 3146 }, { "epoch": 0.40809511845229895, "grad_norm": 0.8809233903884888, "learning_rate": 6.44922477609241e-05, "loss": 9.63011646270752, "step": 3147 }, { "epoch": 0.4082247959605456, "grad_norm": 1.1746959686279297, "learning_rate": 6.447270199064335e-05, "loss": 12.716059684753418, "step": 3148 }, { "epoch": 0.4083544734687923, "grad_norm": 0.715258777141571, "learning_rate": 6.445315380617614e-05, "loss": 7.803625106811523, "step": 3149 }, { "epoch": 0.408484150977039, "grad_norm": 0.7558518648147583, "learning_rate": 6.443360321078332e-05, "loss": 7.825687408447266, "step": 3150 }, { "epoch": 0.40861382848528566, "grad_norm": 0.5321434140205383, "learning_rate": 6.441405020772609e-05, "loss": 7.014153480529785, "step": 3151 }, { "epoch": 0.4087435059935323, "grad_norm": 1.0079617500305176, "learning_rate": 6.439449480026612e-05, "loss": 11.972256660461426, "step": 3152 }, { "epoch": 0.40887318350177904, "grad_norm": 0.750131368637085, "learning_rate": 6.437493699166539e-05, "loss": 9.350698471069336, "step": 3153 }, { "epoch": 0.4090028610100257, "grad_norm": 0.7282060980796814, "learning_rate": 6.435537678518637e-05, "loss": 8.428596496582031, "step": 3154 }, { "epoch": 0.40913253851827236, "grad_norm": 0.8705008029937744, "learning_rate": 6.433581418409188e-05, "loss": 10.549726486206055, "step": 3155 }, { "epoch": 0.4092622160265191, "grad_norm": 0.9006875157356262, "learning_rate": 6.431624919164514e-05, "loss": 12.89076042175293, "step": 3156 }, { "epoch": 0.40939189353476574, "grad_norm": 0.8241369724273682, "learning_rate": 6.429668181110982e-05, "loss": 12.534977912902832, "step": 3157 }, { "epoch": 0.4095215710430124, "grad_norm": 0.7284458875656128, "learning_rate": 6.427711204574988e-05, "loss": 11.045754432678223, "step": 3158 }, { "epoch": 0.40965124855125906, "grad_norm": 0.6160198450088501, "learning_rate": 6.42575398988298e-05, "loss": 8.402873992919922, "step": 3159 }, { "epoch": 0.4097809260595058, "grad_norm": 0.9428833723068237, "learning_rate": 6.423796537361438e-05, "loss": 12.198929786682129, "step": 3160 }, { "epoch": 0.40991060356775244, "grad_norm": 0.7869759798049927, "learning_rate": 6.421838847336886e-05, "loss": 8.959476470947266, "step": 3161 }, { "epoch": 0.4100402810759991, "grad_norm": 0.9846064448356628, "learning_rate": 6.419880920135882e-05, "loss": 15.113105773925781, "step": 3162 }, { "epoch": 0.4101699585842458, "grad_norm": 0.7199384570121765, "learning_rate": 6.41792275608503e-05, "loss": 10.610806465148926, "step": 3163 }, { "epoch": 0.4102996360924925, "grad_norm": 0.7654632329940796, "learning_rate": 6.415964355510971e-05, "loss": 9.729888916015625, "step": 3164 }, { "epoch": 0.41042931360073914, "grad_norm": 0.7643359303474426, "learning_rate": 6.414005718740383e-05, "loss": 12.052977561950684, "step": 3165 }, { "epoch": 0.41055899110898586, "grad_norm": 0.6948076486587524, "learning_rate": 6.412046846099987e-05, "loss": 10.319972038269043, "step": 3166 }, { "epoch": 0.4106886686172325, "grad_norm": 0.9263554215431213, "learning_rate": 6.41008773791654e-05, "loss": 14.260208129882812, "step": 3167 }, { "epoch": 0.4108183461254792, "grad_norm": 0.6483742594718933, "learning_rate": 6.408128394516843e-05, "loss": 9.929290771484375, "step": 3168 }, { "epoch": 0.4109480236337259, "grad_norm": 0.6931551694869995, "learning_rate": 6.406168816227733e-05, "loss": 11.233810424804688, "step": 3169 }, { "epoch": 0.41107770114197256, "grad_norm": 0.9169944524765015, "learning_rate": 6.404209003376085e-05, "loss": 13.507524490356445, "step": 3170 }, { "epoch": 0.4112073786502192, "grad_norm": 0.9120716452598572, "learning_rate": 6.402248956288816e-05, "loss": 11.771949768066406, "step": 3171 }, { "epoch": 0.41133705615846594, "grad_norm": 0.7419218420982361, "learning_rate": 6.400288675292879e-05, "loss": 11.487380981445312, "step": 3172 }, { "epoch": 0.4114667336667126, "grad_norm": 0.7106005549430847, "learning_rate": 6.39832816071527e-05, "loss": 14.122764587402344, "step": 3173 }, { "epoch": 0.41159641117495926, "grad_norm": 0.7795200347900391, "learning_rate": 6.39636741288302e-05, "loss": 10.197367668151855, "step": 3174 }, { "epoch": 0.4117260886832059, "grad_norm": 0.6976615786552429, "learning_rate": 6.394406432123203e-05, "loss": 11.007966995239258, "step": 3175 }, { "epoch": 0.41185576619145264, "grad_norm": 0.6266179084777832, "learning_rate": 6.392445218762925e-05, "loss": 9.715078353881836, "step": 3176 }, { "epoch": 0.4119854436996993, "grad_norm": 0.6899446845054626, "learning_rate": 6.390483773129341e-05, "loss": 10.081086158752441, "step": 3177 }, { "epoch": 0.41211512120794597, "grad_norm": 0.8249636292457581, "learning_rate": 6.388522095549636e-05, "loss": 9.499661445617676, "step": 3178 }, { "epoch": 0.4122447987161927, "grad_norm": 0.9359137415885925, "learning_rate": 6.386560186351036e-05, "loss": 10.943408966064453, "step": 3179 }, { "epoch": 0.41237447622443935, "grad_norm": 0.5927610397338867, "learning_rate": 6.384598045860808e-05, "loss": 6.399498462677002, "step": 3180 }, { "epoch": 0.412504153732686, "grad_norm": 0.713714063167572, "learning_rate": 6.382635674406255e-05, "loss": 11.02127742767334, "step": 3181 }, { "epoch": 0.4126338312409327, "grad_norm": 0.70499587059021, "learning_rate": 6.380673072314721e-05, "loss": 10.717516899108887, "step": 3182 }, { "epoch": 0.4127635087491794, "grad_norm": 0.7116789221763611, "learning_rate": 6.378710239913586e-05, "loss": 7.676577091217041, "step": 3183 }, { "epoch": 0.41289318625742605, "grad_norm": 1.0842982530593872, "learning_rate": 6.376747177530269e-05, "loss": 9.064983367919922, "step": 3184 }, { "epoch": 0.41302286376567277, "grad_norm": 0.7248928546905518, "learning_rate": 6.374783885492228e-05, "loss": 9.58212661743164, "step": 3185 }, { "epoch": 0.4131525412739194, "grad_norm": 0.573384165763855, "learning_rate": 6.372820364126959e-05, "loss": 7.4168901443481445, "step": 3186 }, { "epoch": 0.4132822187821661, "grad_norm": 1.006961703300476, "learning_rate": 6.370856613761996e-05, "loss": 9.041524887084961, "step": 3187 }, { "epoch": 0.4134118962904128, "grad_norm": 0.6480093002319336, "learning_rate": 6.368892634724913e-05, "loss": 9.519338607788086, "step": 3188 }, { "epoch": 0.41354157379865947, "grad_norm": 0.5960317254066467, "learning_rate": 6.366928427343319e-05, "loss": 11.731776237487793, "step": 3189 }, { "epoch": 0.41367125130690613, "grad_norm": 0.7694414258003235, "learning_rate": 6.364963991944866e-05, "loss": 10.481508255004883, "step": 3190 }, { "epoch": 0.4138009288151528, "grad_norm": 0.8356024622917175, "learning_rate": 6.362999328857235e-05, "loss": 12.59317684173584, "step": 3191 }, { "epoch": 0.4139306063233995, "grad_norm": 0.8621148467063904, "learning_rate": 6.361034438408155e-05, "loss": 12.717649459838867, "step": 3192 }, { "epoch": 0.41406028383164617, "grad_norm": 0.6156476140022278, "learning_rate": 6.359069320925388e-05, "loss": 9.46971321105957, "step": 3193 }, { "epoch": 0.41418996133989283, "grad_norm": 0.7169080376625061, "learning_rate": 6.357103976736736e-05, "loss": 10.672969818115234, "step": 3194 }, { "epoch": 0.41431963884813955, "grad_norm": 0.6919263601303101, "learning_rate": 6.355138406170033e-05, "loss": 6.911924839019775, "step": 3195 }, { "epoch": 0.4144493163563862, "grad_norm": 0.903200089931488, "learning_rate": 6.35317260955316e-05, "loss": 12.297198295593262, "step": 3196 }, { "epoch": 0.41457899386463287, "grad_norm": 0.8636282086372375, "learning_rate": 6.351206587214027e-05, "loss": 14.317147254943848, "step": 3197 }, { "epoch": 0.4147086713728796, "grad_norm": 0.8985584378242493, "learning_rate": 6.349240339480589e-05, "loss": 10.234604835510254, "step": 3198 }, { "epoch": 0.41483834888112625, "grad_norm": 0.6380428075790405, "learning_rate": 6.347273866680832e-05, "loss": 9.682433128356934, "step": 3199 }, { "epoch": 0.4149680263893729, "grad_norm": 0.7937518954277039, "learning_rate": 6.345307169142785e-05, "loss": 10.700576782226562, "step": 3200 }, { "epoch": 0.41509770389761963, "grad_norm": 0.7716608643531799, "learning_rate": 6.34334024719451e-05, "loss": 9.253802299499512, "step": 3201 }, { "epoch": 0.4152273814058663, "grad_norm": 0.7602969408035278, "learning_rate": 6.34137310116411e-05, "loss": 10.755529403686523, "step": 3202 }, { "epoch": 0.41535705891411295, "grad_norm": 0.8889927268028259, "learning_rate": 6.339405731379725e-05, "loss": 13.991929054260254, "step": 3203 }, { "epoch": 0.41548673642235967, "grad_norm": 0.7577676773071289, "learning_rate": 6.337438138169527e-05, "loss": 7.263860702514648, "step": 3204 }, { "epoch": 0.41561641393060633, "grad_norm": 0.583358883857727, "learning_rate": 6.335470321861733e-05, "loss": 8.310270309448242, "step": 3205 }, { "epoch": 0.415746091438853, "grad_norm": 0.8506077527999878, "learning_rate": 6.333502282784595e-05, "loss": 15.811015129089355, "step": 3206 }, { "epoch": 0.41587576894709966, "grad_norm": 0.7021073698997498, "learning_rate": 6.331534021266399e-05, "loss": 10.600139617919922, "step": 3207 }, { "epoch": 0.4160054464553464, "grad_norm": 0.5724135041236877, "learning_rate": 6.32956553763547e-05, "loss": 7.242289066314697, "step": 3208 }, { "epoch": 0.41613512396359303, "grad_norm": 0.6553635001182556, "learning_rate": 6.327596832220171e-05, "loss": 10.66320514678955, "step": 3209 }, { "epoch": 0.4162648014718397, "grad_norm": 0.5846846699714661, "learning_rate": 6.325627905348898e-05, "loss": 8.675317764282227, "step": 3210 }, { "epoch": 0.4163944789800864, "grad_norm": 0.689269483089447, "learning_rate": 6.323658757350091e-05, "loss": 8.250762939453125, "step": 3211 }, { "epoch": 0.4165241564883331, "grad_norm": 0.9634190201759338, "learning_rate": 6.321689388552223e-05, "loss": 12.972724914550781, "step": 3212 }, { "epoch": 0.41665383399657974, "grad_norm": 0.9783684015274048, "learning_rate": 6.319719799283801e-05, "loss": 13.732834815979004, "step": 3213 }, { "epoch": 0.41678351150482645, "grad_norm": 0.7738972902297974, "learning_rate": 6.317749989873372e-05, "loss": 12.328256607055664, "step": 3214 }, { "epoch": 0.4169131890130731, "grad_norm": 0.6055720448493958, "learning_rate": 6.315779960649518e-05, "loss": 10.391374588012695, "step": 3215 }, { "epoch": 0.4170428665213198, "grad_norm": 0.697049081325531, "learning_rate": 6.313809711940863e-05, "loss": 8.412880897521973, "step": 3216 }, { "epoch": 0.4171725440295665, "grad_norm": 0.8365723490715027, "learning_rate": 6.31183924407606e-05, "loss": 14.33879566192627, "step": 3217 }, { "epoch": 0.41730222153781316, "grad_norm": 0.7696112990379333, "learning_rate": 6.309868557383804e-05, "loss": 11.873804092407227, "step": 3218 }, { "epoch": 0.4174318990460598, "grad_norm": 0.5887565016746521, "learning_rate": 6.307897652192823e-05, "loss": 7.350179672241211, "step": 3219 }, { "epoch": 0.41756157655430653, "grad_norm": 0.5872434973716736, "learning_rate": 6.305926528831885e-05, "loss": 8.320778846740723, "step": 3220 }, { "epoch": 0.4176912540625532, "grad_norm": 0.6409257054328918, "learning_rate": 6.303955187629789e-05, "loss": 12.385659217834473, "step": 3221 }, { "epoch": 0.41782093157079986, "grad_norm": 0.9770721793174744, "learning_rate": 6.301983628915377e-05, "loss": 8.645600318908691, "step": 3222 }, { "epoch": 0.4179506090790465, "grad_norm": 0.7673680186271667, "learning_rate": 6.300011853017523e-05, "loss": 8.663324356079102, "step": 3223 }, { "epoch": 0.41808028658729324, "grad_norm": 0.9339295029640198, "learning_rate": 6.29803986026514e-05, "loss": 13.198357582092285, "step": 3224 }, { "epoch": 0.4182099640955399, "grad_norm": 0.7438153028488159, "learning_rate": 6.296067650987172e-05, "loss": 12.609891891479492, "step": 3225 }, { "epoch": 0.41833964160378656, "grad_norm": 0.7623740434646606, "learning_rate": 6.294095225512603e-05, "loss": 11.191980361938477, "step": 3226 }, { "epoch": 0.4184693191120333, "grad_norm": 0.7798686027526855, "learning_rate": 6.292122584170457e-05, "loss": 10.621662139892578, "step": 3227 }, { "epoch": 0.41859899662027994, "grad_norm": 0.835144579410553, "learning_rate": 6.290149727289785e-05, "loss": 12.923022270202637, "step": 3228 }, { "epoch": 0.4187286741285266, "grad_norm": 0.6054161787033081, "learning_rate": 6.28817665519968e-05, "loss": 7.712199687957764, "step": 3229 }, { "epoch": 0.4188583516367733, "grad_norm": 0.6897125840187073, "learning_rate": 6.286203368229268e-05, "loss": 10.701750755310059, "step": 3230 }, { "epoch": 0.41898802914502, "grad_norm": 0.7604148983955383, "learning_rate": 6.284229866707716e-05, "loss": 10.567120552062988, "step": 3231 }, { "epoch": 0.41911770665326664, "grad_norm": 0.7215262055397034, "learning_rate": 6.282256150964221e-05, "loss": 7.693919658660889, "step": 3232 }, { "epoch": 0.41924738416151336, "grad_norm": 0.776006817817688, "learning_rate": 6.280282221328018e-05, "loss": 10.836529731750488, "step": 3233 }, { "epoch": 0.41937706166976, "grad_norm": 0.6992603540420532, "learning_rate": 6.278308078128376e-05, "loss": 8.656095504760742, "step": 3234 }, { "epoch": 0.4195067391780067, "grad_norm": 0.8401609063148499, "learning_rate": 6.276333721694603e-05, "loss": 13.425615310668945, "step": 3235 }, { "epoch": 0.4196364166862534, "grad_norm": 0.7429764270782471, "learning_rate": 6.27435915235604e-05, "loss": 12.577837944030762, "step": 3236 }, { "epoch": 0.41976609419450006, "grad_norm": 0.6945944428443909, "learning_rate": 6.272384370442065e-05, "loss": 9.968259811401367, "step": 3237 }, { "epoch": 0.4198957717027467, "grad_norm": 0.9198126196861267, "learning_rate": 6.27040937628209e-05, "loss": 9.88138198852539, "step": 3238 }, { "epoch": 0.4200254492109934, "grad_norm": 0.9691866040229797, "learning_rate": 6.268434170205563e-05, "loss": 12.159629821777344, "step": 3239 }, { "epoch": 0.4201551267192401, "grad_norm": 0.8546249270439148, "learning_rate": 6.266458752541968e-05, "loss": 11.041265487670898, "step": 3240 }, { "epoch": 0.42028480422748676, "grad_norm": 0.8118463158607483, "learning_rate": 6.264483123620823e-05, "loss": 10.69084358215332, "step": 3241 }, { "epoch": 0.4204144817357334, "grad_norm": 0.6847507953643799, "learning_rate": 6.262507283771683e-05, "loss": 10.492216110229492, "step": 3242 }, { "epoch": 0.42054415924398014, "grad_norm": 0.5387752652168274, "learning_rate": 6.260531233324136e-05, "loss": 7.206225872039795, "step": 3243 }, { "epoch": 0.4206738367522268, "grad_norm": 0.491098552942276, "learning_rate": 6.258554972607808e-05, "loss": 8.534130096435547, "step": 3244 }, { "epoch": 0.42080351426047347, "grad_norm": 0.65662682056427, "learning_rate": 6.256578501952356e-05, "loss": 11.424870491027832, "step": 3245 }, { "epoch": 0.4209331917687202, "grad_norm": 0.709149956703186, "learning_rate": 6.254601821687475e-05, "loss": 11.783710479736328, "step": 3246 }, { "epoch": 0.42106286927696684, "grad_norm": 0.6895809769630432, "learning_rate": 6.252624932142897e-05, "loss": 12.366473197937012, "step": 3247 }, { "epoch": 0.4211925467852135, "grad_norm": 0.6895943284034729, "learning_rate": 6.250647833648384e-05, "loss": 13.774006843566895, "step": 3248 }, { "epoch": 0.4213222242934602, "grad_norm": 1.0897302627563477, "learning_rate": 6.248670526533734e-05, "loss": 11.794445037841797, "step": 3249 }, { "epoch": 0.4214519018017069, "grad_norm": 0.7332729697227478, "learning_rate": 6.246693011128784e-05, "loss": 11.66162395477295, "step": 3250 }, { "epoch": 0.42158157930995355, "grad_norm": 0.8394380807876587, "learning_rate": 6.2447152877634e-05, "loss": 12.743148803710938, "step": 3251 }, { "epoch": 0.42171125681820026, "grad_norm": 0.7831345796585083, "learning_rate": 6.242737356767486e-05, "loss": 12.012946128845215, "step": 3252 }, { "epoch": 0.4218409343264469, "grad_norm": 0.916869044303894, "learning_rate": 6.240759218470981e-05, "loss": 14.261982917785645, "step": 3253 }, { "epoch": 0.4219706118346936, "grad_norm": 0.6469659209251404, "learning_rate": 6.238780873203857e-05, "loss": 9.551667213439941, "step": 3254 }, { "epoch": 0.42210028934294025, "grad_norm": 0.6628855466842651, "learning_rate": 6.23680232129612e-05, "loss": 10.223057746887207, "step": 3255 }, { "epoch": 0.42222996685118697, "grad_norm": 0.7107856869697571, "learning_rate": 6.234823563077813e-05, "loss": 13.00194263458252, "step": 3256 }, { "epoch": 0.42235964435943363, "grad_norm": 0.7525744438171387, "learning_rate": 6.232844598879012e-05, "loss": 9.35212516784668, "step": 3257 }, { "epoch": 0.4224893218676803, "grad_norm": 0.8128088116645813, "learning_rate": 6.230865429029827e-05, "loss": 9.301453590393066, "step": 3258 }, { "epoch": 0.422618999375927, "grad_norm": 0.7392539381980896, "learning_rate": 6.228886053860401e-05, "loss": 8.430891990661621, "step": 3259 }, { "epoch": 0.42274867688417367, "grad_norm": 0.882063090801239, "learning_rate": 6.226906473700915e-05, "loss": 11.627729415893555, "step": 3260 }, { "epoch": 0.42287835439242033, "grad_norm": 0.8123559951782227, "learning_rate": 6.224926688881582e-05, "loss": 12.06723690032959, "step": 3261 }, { "epoch": 0.42300803190066705, "grad_norm": 0.7388857007026672, "learning_rate": 6.222946699732647e-05, "loss": 11.698853492736816, "step": 3262 }, { "epoch": 0.4231377094089137, "grad_norm": 0.7808411717414856, "learning_rate": 6.220966506584396e-05, "loss": 11.797948837280273, "step": 3263 }, { "epoch": 0.42326738691716037, "grad_norm": 0.8269119262695312, "learning_rate": 6.21898610976714e-05, "loss": 11.2359619140625, "step": 3264 }, { "epoch": 0.4233970644254071, "grad_norm": 0.9991507530212402, "learning_rate": 6.217005509611229e-05, "loss": 12.98321533203125, "step": 3265 }, { "epoch": 0.42352674193365375, "grad_norm": 1.0975133180618286, "learning_rate": 6.215024706447046e-05, "loss": 15.270132064819336, "step": 3266 }, { "epoch": 0.4236564194419004, "grad_norm": 0.6297140717506409, "learning_rate": 6.213043700605012e-05, "loss": 8.015201568603516, "step": 3267 }, { "epoch": 0.42378609695014713, "grad_norm": 0.5915248990058899, "learning_rate": 6.211062492415574e-05, "loss": 8.501501083374023, "step": 3268 }, { "epoch": 0.4239157744583938, "grad_norm": 1.2833607196807861, "learning_rate": 6.209081082209218e-05, "loss": 14.299214363098145, "step": 3269 }, { "epoch": 0.42404545196664045, "grad_norm": 0.9398568868637085, "learning_rate": 6.20709947031646e-05, "loss": 9.531266212463379, "step": 3270 }, { "epoch": 0.4241751294748871, "grad_norm": 0.7327706813812256, "learning_rate": 6.205117657067855e-05, "loss": 9.462996482849121, "step": 3271 }, { "epoch": 0.42430480698313383, "grad_norm": 0.8910762667655945, "learning_rate": 6.203135642793987e-05, "loss": 13.940439224243164, "step": 3272 }, { "epoch": 0.4244344844913805, "grad_norm": 0.8587002754211426, "learning_rate": 6.201153427825476e-05, "loss": 14.221162796020508, "step": 3273 }, { "epoch": 0.42456416199962715, "grad_norm": 0.6819692254066467, "learning_rate": 6.199171012492975e-05, "loss": 11.775638580322266, "step": 3274 }, { "epoch": 0.42469383950787387, "grad_norm": 0.737880527973175, "learning_rate": 6.197188397127168e-05, "loss": 9.947131156921387, "step": 3275 }, { "epoch": 0.42482351701612053, "grad_norm": 0.9323819875717163, "learning_rate": 6.195205582058777e-05, "loss": 9.598050117492676, "step": 3276 }, { "epoch": 0.4249531945243672, "grad_norm": 0.8146330118179321, "learning_rate": 6.19322256761855e-05, "loss": 10.949261665344238, "step": 3277 }, { "epoch": 0.4250828720326139, "grad_norm": 1.325925588607788, "learning_rate": 6.191239354137279e-05, "loss": 12.501754760742188, "step": 3278 }, { "epoch": 0.4252125495408606, "grad_norm": 0.8297793865203857, "learning_rate": 6.18925594194578e-05, "loss": 9.472015380859375, "step": 3279 }, { "epoch": 0.42534222704910724, "grad_norm": 0.6661406755447388, "learning_rate": 6.187272331374907e-05, "loss": 11.447114944458008, "step": 3280 }, { "epoch": 0.42547190455735395, "grad_norm": 0.7217709422111511, "learning_rate": 6.185288522755542e-05, "loss": 9.425196647644043, "step": 3281 }, { "epoch": 0.4256015820656006, "grad_norm": 0.739522397518158, "learning_rate": 6.183304516418606e-05, "loss": 10.650177955627441, "step": 3282 }, { "epoch": 0.4257312595738473, "grad_norm": 0.8457368612289429, "learning_rate": 6.181320312695051e-05, "loss": 11.681140899658203, "step": 3283 }, { "epoch": 0.425860937082094, "grad_norm": 0.6346796154975891, "learning_rate": 6.17933591191586e-05, "loss": 9.514771461486816, "step": 3284 }, { "epoch": 0.42599061459034065, "grad_norm": 1.1046143770217896, "learning_rate": 6.17735131441205e-05, "loss": 6.671060085296631, "step": 3285 }, { "epoch": 0.4261202920985873, "grad_norm": 0.7271028161048889, "learning_rate": 6.175366520514672e-05, "loss": 9.3328275680542, "step": 3286 }, { "epoch": 0.426249969606834, "grad_norm": 0.9611547589302063, "learning_rate": 6.173381530554809e-05, "loss": 10.366830825805664, "step": 3287 }, { "epoch": 0.4263796471150807, "grad_norm": 0.8015246391296387, "learning_rate": 6.171396344863576e-05, "loss": 10.907817840576172, "step": 3288 }, { "epoch": 0.42650932462332736, "grad_norm": 1.0204466581344604, "learning_rate": 6.169410963772119e-05, "loss": 9.013962745666504, "step": 3289 }, { "epoch": 0.426639002131574, "grad_norm": 0.8547239303588867, "learning_rate": 6.167425387611622e-05, "loss": 9.205618858337402, "step": 3290 }, { "epoch": 0.42676867963982074, "grad_norm": 0.9586637020111084, "learning_rate": 6.165439616713298e-05, "loss": 12.565213203430176, "step": 3291 }, { "epoch": 0.4268983571480674, "grad_norm": 1.0154246091842651, "learning_rate": 6.16345365140839e-05, "loss": 12.778653144836426, "step": 3292 }, { "epoch": 0.42702803465631406, "grad_norm": 0.9073339104652405, "learning_rate": 6.16146749202818e-05, "loss": 10.755518913269043, "step": 3293 }, { "epoch": 0.4271577121645608, "grad_norm": 0.8055518269538879, "learning_rate": 6.159481138903976e-05, "loss": 13.071925163269043, "step": 3294 }, { "epoch": 0.42728738967280744, "grad_norm": 0.652697741985321, "learning_rate": 6.15749459236712e-05, "loss": 8.838753700256348, "step": 3295 }, { "epoch": 0.4274170671810541, "grad_norm": 0.6506456732749939, "learning_rate": 6.155507852748989e-05, "loss": 8.38820743560791, "step": 3296 }, { "epoch": 0.4275467446893008, "grad_norm": 0.9426637887954712, "learning_rate": 6.15352092038099e-05, "loss": 11.445730209350586, "step": 3297 }, { "epoch": 0.4276764221975475, "grad_norm": 0.5887378454208374, "learning_rate": 6.151533795594564e-05, "loss": 6.085393905639648, "step": 3298 }, { "epoch": 0.42780609970579414, "grad_norm": 0.48349642753601074, "learning_rate": 6.14954647872118e-05, "loss": 7.742348670959473, "step": 3299 }, { "epoch": 0.42793577721404086, "grad_norm": 0.7783871293067932, "learning_rate": 6.147558970092342e-05, "loss": 10.296802520751953, "step": 3300 }, { "epoch": 0.4280654547222875, "grad_norm": 0.7564448118209839, "learning_rate": 6.145571270039588e-05, "loss": 9.244500160217285, "step": 3301 }, { "epoch": 0.4281951322305342, "grad_norm": 0.6833263635635376, "learning_rate": 6.143583378894481e-05, "loss": 11.47228717803955, "step": 3302 }, { "epoch": 0.42832480973878084, "grad_norm": 0.6453641057014465, "learning_rate": 6.141595296988627e-05, "loss": 9.300533294677734, "step": 3303 }, { "epoch": 0.42845448724702756, "grad_norm": 0.78568035364151, "learning_rate": 6.13960702465365e-05, "loss": 10.123868942260742, "step": 3304 }, { "epoch": 0.4285841647552742, "grad_norm": 0.7894951105117798, "learning_rate": 6.137618562221218e-05, "loss": 10.165765762329102, "step": 3305 }, { "epoch": 0.4287138422635209, "grad_norm": 1.0038269758224487, "learning_rate": 6.135629910023025e-05, "loss": 10.299635887145996, "step": 3306 }, { "epoch": 0.4288435197717676, "grad_norm": 1.0328859090805054, "learning_rate": 6.133641068390797e-05, "loss": 12.834074020385742, "step": 3307 }, { "epoch": 0.42897319728001426, "grad_norm": 0.50836181640625, "learning_rate": 6.13165203765629e-05, "loss": 6.754205226898193, "step": 3308 }, { "epoch": 0.4291028747882609, "grad_norm": 0.761393129825592, "learning_rate": 6.129662818151296e-05, "loss": 12.547953605651855, "step": 3309 }, { "epoch": 0.42923255229650764, "grad_norm": 0.993201732635498, "learning_rate": 6.127673410207636e-05, "loss": 12.57127857208252, "step": 3310 }, { "epoch": 0.4293622298047543, "grad_norm": 0.78338623046875, "learning_rate": 6.12568381415716e-05, "loss": 12.010974884033203, "step": 3311 }, { "epoch": 0.42949190731300096, "grad_norm": 0.6911954879760742, "learning_rate": 6.123694030331756e-05, "loss": 13.708977699279785, "step": 3312 }, { "epoch": 0.4296215848212477, "grad_norm": 0.753911018371582, "learning_rate": 6.121704059063335e-05, "loss": 8.555333137512207, "step": 3313 }, { "epoch": 0.42975126232949434, "grad_norm": 0.7289760112762451, "learning_rate": 6.119713900683846e-05, "loss": 8.06216049194336, "step": 3314 }, { "epoch": 0.429880939837741, "grad_norm": 1.0982890129089355, "learning_rate": 6.117723555525265e-05, "loss": 14.018980979919434, "step": 3315 }, { "epoch": 0.4300106173459877, "grad_norm": 0.5888426303863525, "learning_rate": 6.115733023919603e-05, "loss": 6.775753021240234, "step": 3316 }, { "epoch": 0.4301402948542344, "grad_norm": 0.7393051385879517, "learning_rate": 6.113742306198896e-05, "loss": 10.250325202941895, "step": 3317 }, { "epoch": 0.43026997236248105, "grad_norm": 0.8192257285118103, "learning_rate": 6.111751402695219e-05, "loss": 10.365365028381348, "step": 3318 }, { "epoch": 0.4303996498707277, "grad_norm": 1.0404459238052368, "learning_rate": 6.10976031374067e-05, "loss": 14.006077766418457, "step": 3319 }, { "epoch": 0.4305293273789744, "grad_norm": 0.697630763053894, "learning_rate": 6.107769039667387e-05, "loss": 10.589712142944336, "step": 3320 }, { "epoch": 0.4306590048872211, "grad_norm": 0.7798983454704285, "learning_rate": 6.10577758080753e-05, "loss": 11.690973281860352, "step": 3321 }, { "epoch": 0.43078868239546775, "grad_norm": 0.7847676873207092, "learning_rate": 6.103785937493296e-05, "loss": 10.243595123291016, "step": 3322 }, { "epoch": 0.43091835990371447, "grad_norm": 0.5721989870071411, "learning_rate": 6.101794110056908e-05, "loss": 8.222981452941895, "step": 3323 }, { "epoch": 0.4310480374119611, "grad_norm": 0.7458352446556091, "learning_rate": 6.099802098830623e-05, "loss": 8.729347229003906, "step": 3324 }, { "epoch": 0.4311777149202078, "grad_norm": 1.0315765142440796, "learning_rate": 6.0978099041467266e-05, "loss": 9.946529388427734, "step": 3325 }, { "epoch": 0.4313073924284545, "grad_norm": 0.8312174677848816, "learning_rate": 6.0958175263375374e-05, "loss": 8.463665008544922, "step": 3326 }, { "epoch": 0.43143706993670117, "grad_norm": 0.8713139295578003, "learning_rate": 6.093824965735404e-05, "loss": 10.833541870117188, "step": 3327 }, { "epoch": 0.43156674744494783, "grad_norm": 0.925735592842102, "learning_rate": 6.091832222672702e-05, "loss": 15.195579528808594, "step": 3328 }, { "epoch": 0.43169642495319455, "grad_norm": 0.6960967779159546, "learning_rate": 6.089839297481843e-05, "loss": 7.436763763427734, "step": 3329 }, { "epoch": 0.4318261024614412, "grad_norm": 0.63539719581604, "learning_rate": 6.087846190495266e-05, "loss": 8.76417350769043, "step": 3330 }, { "epoch": 0.43195577996968787, "grad_norm": 0.9571444988250732, "learning_rate": 6.085852902045438e-05, "loss": 11.102945327758789, "step": 3331 }, { "epoch": 0.4320854574779346, "grad_norm": 0.7209802865982056, "learning_rate": 6.08385943246486e-05, "loss": 10.548912048339844, "step": 3332 }, { "epoch": 0.43221513498618125, "grad_norm": 0.8943246603012085, "learning_rate": 6.0818657820860635e-05, "loss": 10.354134559631348, "step": 3333 }, { "epoch": 0.4323448124944279, "grad_norm": 0.8119673728942871, "learning_rate": 6.079871951241607e-05, "loss": 9.582561492919922, "step": 3334 }, { "epoch": 0.43247449000267457, "grad_norm": 0.6532847285270691, "learning_rate": 6.0778779402640815e-05, "loss": 7.828824996948242, "step": 3335 }, { "epoch": 0.4326041675109213, "grad_norm": 0.793916642665863, "learning_rate": 6.075883749486106e-05, "loss": 8.097434997558594, "step": 3336 }, { "epoch": 0.43273384501916795, "grad_norm": 0.8761628270149231, "learning_rate": 6.073889379240333e-05, "loss": 14.23385238647461, "step": 3337 }, { "epoch": 0.4328635225274146, "grad_norm": 0.8517217636108398, "learning_rate": 6.071894829859441e-05, "loss": 7.631187438964844, "step": 3338 }, { "epoch": 0.43299320003566133, "grad_norm": 0.6379651427268982, "learning_rate": 6.069900101676139e-05, "loss": 7.406857013702393, "step": 3339 }, { "epoch": 0.433122877543908, "grad_norm": 0.792052686214447, "learning_rate": 6.06790519502317e-05, "loss": 14.70301628112793, "step": 3340 }, { "epoch": 0.43325255505215465, "grad_norm": 0.7889311909675598, "learning_rate": 6.0659101102333025e-05, "loss": 11.061797142028809, "step": 3341 }, { "epoch": 0.43338223256040137, "grad_norm": 1.0911073684692383, "learning_rate": 6.063914847639335e-05, "loss": 11.263449668884277, "step": 3342 }, { "epoch": 0.43351191006864803, "grad_norm": 0.7487554550170898, "learning_rate": 6.061919407574097e-05, "loss": 10.25610637664795, "step": 3343 }, { "epoch": 0.4336415875768947, "grad_norm": 1.018526554107666, "learning_rate": 6.0599237903704475e-05, "loss": 12.03724193572998, "step": 3344 }, { "epoch": 0.4337712650851414, "grad_norm": 1.0595881938934326, "learning_rate": 6.0579279963612755e-05, "loss": 8.705245018005371, "step": 3345 }, { "epoch": 0.4339009425933881, "grad_norm": 0.7663394212722778, "learning_rate": 6.055932025879496e-05, "loss": 11.717255592346191, "step": 3346 }, { "epoch": 0.43403062010163473, "grad_norm": 1.0583710670471191, "learning_rate": 6.053935879258059e-05, "loss": 11.396625518798828, "step": 3347 }, { "epoch": 0.43416029760988145, "grad_norm": 0.7888442873954773, "learning_rate": 6.05193955682994e-05, "loss": 10.069742202758789, "step": 3348 }, { "epoch": 0.4342899751181281, "grad_norm": 0.8589977622032166, "learning_rate": 6.0499430589281446e-05, "loss": 11.614335060119629, "step": 3349 }, { "epoch": 0.4344196526263748, "grad_norm": 1.0637431144714355, "learning_rate": 6.047946385885709e-05, "loss": 15.076358795166016, "step": 3350 }, { "epoch": 0.43454933013462144, "grad_norm": 0.8586578965187073, "learning_rate": 6.045949538035696e-05, "loss": 12.11964225769043, "step": 3351 }, { "epoch": 0.43467900764286815, "grad_norm": 0.8755229115486145, "learning_rate": 6.0439525157111985e-05, "loss": 8.45337963104248, "step": 3352 }, { "epoch": 0.4348086851511148, "grad_norm": 1.0646103620529175, "learning_rate": 6.041955319245341e-05, "loss": 10.606902122497559, "step": 3353 }, { "epoch": 0.4349383626593615, "grad_norm": 0.7917141914367676, "learning_rate": 6.039957948971277e-05, "loss": 7.7166242599487305, "step": 3354 }, { "epoch": 0.4350680401676082, "grad_norm": 0.8283325433731079, "learning_rate": 6.037960405222183e-05, "loss": 7.949502944946289, "step": 3355 }, { "epoch": 0.43519771767585486, "grad_norm": 0.6502971649169922, "learning_rate": 6.035962688331269e-05, "loss": 9.105552673339844, "step": 3356 }, { "epoch": 0.4353273951841015, "grad_norm": 0.7588916420936584, "learning_rate": 6.033964798631776e-05, "loss": 11.228532791137695, "step": 3357 }, { "epoch": 0.43545707269234823, "grad_norm": 0.8689054250717163, "learning_rate": 6.031966736456969e-05, "loss": 9.936870574951172, "step": 3358 }, { "epoch": 0.4355867502005949, "grad_norm": 0.8319447636604309, "learning_rate": 6.029968502140147e-05, "loss": 13.999053955078125, "step": 3359 }, { "epoch": 0.43571642770884156, "grad_norm": 0.9126978516578674, "learning_rate": 6.02797009601463e-05, "loss": 8.651729583740234, "step": 3360 }, { "epoch": 0.4358461052170883, "grad_norm": 1.1004056930541992, "learning_rate": 6.025971518413777e-05, "loss": 11.15443229675293, "step": 3361 }, { "epoch": 0.43597578272533494, "grad_norm": 0.7034338712692261, "learning_rate": 6.023972769670966e-05, "loss": 9.043030738830566, "step": 3362 }, { "epoch": 0.4361054602335816, "grad_norm": 0.8734164237976074, "learning_rate": 6.0219738501196096e-05, "loss": 9.767477035522461, "step": 3363 }, { "epoch": 0.4362351377418283, "grad_norm": 0.5764414072036743, "learning_rate": 6.0199747600931456e-05, "loss": 9.529647827148438, "step": 3364 }, { "epoch": 0.436364815250075, "grad_norm": 0.6133989095687866, "learning_rate": 6.017975499925042e-05, "loss": 9.367201805114746, "step": 3365 }, { "epoch": 0.43649449275832164, "grad_norm": 0.9378644824028015, "learning_rate": 6.015976069948796e-05, "loss": 14.446555137634277, "step": 3366 }, { "epoch": 0.4366241702665683, "grad_norm": 0.7096846103668213, "learning_rate": 6.0139764704979306e-05, "loss": 10.173103332519531, "step": 3367 }, { "epoch": 0.436753847774815, "grad_norm": 0.6402127742767334, "learning_rate": 6.011976701905998e-05, "loss": 10.415277481079102, "step": 3368 }, { "epoch": 0.4368835252830617, "grad_norm": 1.036884069442749, "learning_rate": 6.0099767645065795e-05, "loss": 15.336949348449707, "step": 3369 }, { "epoch": 0.43701320279130834, "grad_norm": 0.8441838026046753, "learning_rate": 6.007976658633283e-05, "loss": 12.004881858825684, "step": 3370 }, { "epoch": 0.43714288029955506, "grad_norm": 0.6527126431465149, "learning_rate": 6.005976384619747e-05, "loss": 10.23834228515625, "step": 3371 }, { "epoch": 0.4372725578078017, "grad_norm": 0.54100501537323, "learning_rate": 6.003975942799636e-05, "loss": 10.159747123718262, "step": 3372 }, { "epoch": 0.4374022353160484, "grad_norm": 0.7727686166763306, "learning_rate": 6.001975333506642e-05, "loss": 12.442867279052734, "step": 3373 }, { "epoch": 0.4375319128242951, "grad_norm": 0.5562875270843506, "learning_rate": 5.999974557074487e-05, "loss": 7.797626495361328, "step": 3374 }, { "epoch": 0.43766159033254176, "grad_norm": 0.6416633725166321, "learning_rate": 5.997973613836919e-05, "loss": 9.798311233520508, "step": 3375 }, { "epoch": 0.4377912678407884, "grad_norm": 0.6577466130256653, "learning_rate": 5.995972504127716e-05, "loss": 7.477596759796143, "step": 3376 }, { "epoch": 0.43792094534903514, "grad_norm": 0.6400067210197449, "learning_rate": 5.9939712282806816e-05, "loss": 8.474573135375977, "step": 3377 }, { "epoch": 0.4380506228572818, "grad_norm": 0.8103587031364441, "learning_rate": 5.9919697866296476e-05, "loss": 12.036026954650879, "step": 3378 }, { "epoch": 0.43818030036552846, "grad_norm": 0.7728745937347412, "learning_rate": 5.989968179508475e-05, "loss": 7.9220290184021, "step": 3379 }, { "epoch": 0.4383099778737752, "grad_norm": 0.7812963128089905, "learning_rate": 5.98796640725105e-05, "loss": 14.547966003417969, "step": 3380 }, { "epoch": 0.43843965538202184, "grad_norm": 0.6425616145133972, "learning_rate": 5.985964470191288e-05, "loss": 9.086616516113281, "step": 3381 }, { "epoch": 0.4385693328902685, "grad_norm": 0.8349751234054565, "learning_rate": 5.9839623686631306e-05, "loss": 12.593676567077637, "step": 3382 }, { "epoch": 0.43869901039851517, "grad_norm": 0.6650148034095764, "learning_rate": 5.981960103000549e-05, "loss": 10.174250602722168, "step": 3383 }, { "epoch": 0.4388286879067619, "grad_norm": 1.1071271896362305, "learning_rate": 5.9799576735375394e-05, "loss": 11.465903282165527, "step": 3384 }, { "epoch": 0.43895836541500854, "grad_norm": 0.8468512892723083, "learning_rate": 5.977955080608127e-05, "loss": 9.736372947692871, "step": 3385 }, { "epoch": 0.4390880429232552, "grad_norm": 0.8249483704566956, "learning_rate": 5.9759523245463635e-05, "loss": 10.17736530303955, "step": 3386 }, { "epoch": 0.4392177204315019, "grad_norm": 0.6559199094772339, "learning_rate": 5.9739494056863276e-05, "loss": 10.056620597839355, "step": 3387 }, { "epoch": 0.4393473979397486, "grad_norm": 0.6988840699195862, "learning_rate": 5.971946324362127e-05, "loss": 11.90302562713623, "step": 3388 }, { "epoch": 0.43947707544799525, "grad_norm": 0.7731924653053284, "learning_rate": 5.969943080907893e-05, "loss": 10.585250854492188, "step": 3389 }, { "epoch": 0.43960675295624196, "grad_norm": 0.8796398639678955, "learning_rate": 5.9679396756577864e-05, "loss": 11.032754898071289, "step": 3390 }, { "epoch": 0.4397364304644886, "grad_norm": 0.9281768798828125, "learning_rate": 5.9659361089459954e-05, "loss": 10.701811790466309, "step": 3391 }, { "epoch": 0.4398661079727353, "grad_norm": 0.717452883720398, "learning_rate": 5.963932381106734e-05, "loss": 10.486282348632812, "step": 3392 }, { "epoch": 0.439995785480982, "grad_norm": 0.7487065196037292, "learning_rate": 5.961928492474242e-05, "loss": 10.950199127197266, "step": 3393 }, { "epoch": 0.44012546298922867, "grad_norm": 0.6856966018676758, "learning_rate": 5.9599244433827904e-05, "loss": 7.539325714111328, "step": 3394 }, { "epoch": 0.44025514049747533, "grad_norm": 0.7285069823265076, "learning_rate": 5.957920234166672e-05, "loss": 8.30312442779541, "step": 3395 }, { "epoch": 0.44038481800572205, "grad_norm": 0.5629587769508362, "learning_rate": 5.955915865160205e-05, "loss": 7.335002899169922, "step": 3396 }, { "epoch": 0.4405144955139687, "grad_norm": 0.7937312722206116, "learning_rate": 5.953911336697744e-05, "loss": 12.353517532348633, "step": 3397 }, { "epoch": 0.44064417302221537, "grad_norm": 0.600894033908844, "learning_rate": 5.95190664911366e-05, "loss": 7.140430927276611, "step": 3398 }, { "epoch": 0.44077385053046203, "grad_norm": 0.6577214002609253, "learning_rate": 5.949901802742355e-05, "loss": 8.37775993347168, "step": 3399 }, { "epoch": 0.44090352803870875, "grad_norm": 0.8525835275650024, "learning_rate": 5.9478967979182556e-05, "loss": 10.606145858764648, "step": 3400 }, { "epoch": 0.4410332055469554, "grad_norm": 0.9025457501411438, "learning_rate": 5.945891634975818e-05, "loss": 11.969980239868164, "step": 3401 }, { "epoch": 0.44116288305520207, "grad_norm": 0.9450047612190247, "learning_rate": 5.9438863142495204e-05, "loss": 11.795232772827148, "step": 3402 }, { "epoch": 0.4412925605634488, "grad_norm": 0.699377715587616, "learning_rate": 5.941880836073871e-05, "loss": 8.841362953186035, "step": 3403 }, { "epoch": 0.44142223807169545, "grad_norm": 0.6548464298248291, "learning_rate": 5.939875200783404e-05, "loss": 7.642826080322266, "step": 3404 }, { "epoch": 0.4415519155799421, "grad_norm": 0.7157819867134094, "learning_rate": 5.937869408712676e-05, "loss": 9.302490234375, "step": 3405 }, { "epoch": 0.44168159308818883, "grad_norm": 0.7261216044425964, "learning_rate": 5.9358634601962745e-05, "loss": 9.725724220275879, "step": 3406 }, { "epoch": 0.4418112705964355, "grad_norm": 0.8393431901931763, "learning_rate": 5.933857355568811e-05, "loss": 14.264314651489258, "step": 3407 }, { "epoch": 0.44194094810468215, "grad_norm": 0.9469763040542603, "learning_rate": 5.931851095164924e-05, "loss": 13.263031005859375, "step": 3408 }, { "epoch": 0.44207062561292887, "grad_norm": 0.8314464688301086, "learning_rate": 5.9298446793192755e-05, "loss": 12.832717895507812, "step": 3409 }, { "epoch": 0.44220030312117553, "grad_norm": 0.6332257390022278, "learning_rate": 5.927838108366554e-05, "loss": 9.115666389465332, "step": 3410 }, { "epoch": 0.4423299806294222, "grad_norm": 0.7902301549911499, "learning_rate": 5.9258313826414766e-05, "loss": 13.310551643371582, "step": 3411 }, { "epoch": 0.4424596581376689, "grad_norm": 0.5346226096153259, "learning_rate": 5.9238245024787844e-05, "loss": 12.477167129516602, "step": 3412 }, { "epoch": 0.44258933564591557, "grad_norm": 0.9357972741127014, "learning_rate": 5.921817468213244e-05, "loss": 9.05359935760498, "step": 3413 }, { "epoch": 0.44271901315416223, "grad_norm": 0.6935725212097168, "learning_rate": 5.919810280179649e-05, "loss": 9.173410415649414, "step": 3414 }, { "epoch": 0.4428486906624089, "grad_norm": 0.6858792304992676, "learning_rate": 5.917802938712816e-05, "loss": 9.106578826904297, "step": 3415 }, { "epoch": 0.4429783681706556, "grad_norm": 0.8923718333244324, "learning_rate": 5.9157954441475896e-05, "loss": 12.90909194946289, "step": 3416 }, { "epoch": 0.4431080456789023, "grad_norm": 0.8343174457550049, "learning_rate": 5.913787796818841e-05, "loss": 10.230423927307129, "step": 3417 }, { "epoch": 0.44323772318714894, "grad_norm": 0.6714288592338562, "learning_rate": 5.911779997061464e-05, "loss": 8.512080192565918, "step": 3418 }, { "epoch": 0.44336740069539565, "grad_norm": 0.5839366316795349, "learning_rate": 5.909772045210378e-05, "loss": 6.027339458465576, "step": 3419 }, { "epoch": 0.4434970782036423, "grad_norm": 1.0680216550827026, "learning_rate": 5.9077639416005295e-05, "loss": 13.529776573181152, "step": 3420 }, { "epoch": 0.443626755711889, "grad_norm": 0.9546318054199219, "learning_rate": 5.905755686566891e-05, "loss": 14.286480903625488, "step": 3421 }, { "epoch": 0.4437564332201357, "grad_norm": 0.6398417949676514, "learning_rate": 5.903747280444456e-05, "loss": 8.831778526306152, "step": 3422 }, { "epoch": 0.44388611072838235, "grad_norm": 0.7514573335647583, "learning_rate": 5.901738723568248e-05, "loss": 11.47745418548584, "step": 3423 }, { "epoch": 0.444015788236629, "grad_norm": 0.583652138710022, "learning_rate": 5.899730016273313e-05, "loss": 8.788034439086914, "step": 3424 }, { "epoch": 0.44414546574487573, "grad_norm": 0.7487939596176147, "learning_rate": 5.897721158894723e-05, "loss": 10.26719856262207, "step": 3425 }, { "epoch": 0.4442751432531224, "grad_norm": 0.6625804901123047, "learning_rate": 5.895712151767575e-05, "loss": 7.6800055503845215, "step": 3426 }, { "epoch": 0.44440482076136906, "grad_norm": 0.8347891569137573, "learning_rate": 5.893702995226991e-05, "loss": 10.789421081542969, "step": 3427 }, { "epoch": 0.4445344982696158, "grad_norm": 0.6624383926391602, "learning_rate": 5.8916936896081166e-05, "loss": 8.627817153930664, "step": 3428 }, { "epoch": 0.44466417577786244, "grad_norm": 1.115167260169983, "learning_rate": 5.889684235246125e-05, "loss": 12.372110366821289, "step": 3429 }, { "epoch": 0.4447938532861091, "grad_norm": 0.9213653206825256, "learning_rate": 5.887674632476211e-05, "loss": 12.016037940979004, "step": 3430 }, { "epoch": 0.44492353079435576, "grad_norm": 0.7201657891273499, "learning_rate": 5.885664881633597e-05, "loss": 7.404200077056885, "step": 3431 }, { "epoch": 0.4450532083026025, "grad_norm": 0.967479407787323, "learning_rate": 5.883654983053527e-05, "loss": 14.052206039428711, "step": 3432 }, { "epoch": 0.44518288581084914, "grad_norm": 0.8772505521774292, "learning_rate": 5.881644937071273e-05, "loss": 13.114954948425293, "step": 3433 }, { "epoch": 0.4453125633190958, "grad_norm": 0.932594895362854, "learning_rate": 5.879634744022131e-05, "loss": 13.512072563171387, "step": 3434 }, { "epoch": 0.4454422408273425, "grad_norm": 0.9210644960403442, "learning_rate": 5.877624404241419e-05, "loss": 12.5166597366333, "step": 3435 }, { "epoch": 0.4455719183355892, "grad_norm": 0.8909719586372375, "learning_rate": 5.875613918064481e-05, "loss": 11.775370597839355, "step": 3436 }, { "epoch": 0.44570159584383584, "grad_norm": 0.8511305451393127, "learning_rate": 5.873603285826686e-05, "loss": 12.222582817077637, "step": 3437 }, { "epoch": 0.44583127335208256, "grad_norm": 0.6453792452812195, "learning_rate": 5.871592507863428e-05, "loss": 11.053874969482422, "step": 3438 }, { "epoch": 0.4459609508603292, "grad_norm": 0.7617144584655762, "learning_rate": 5.869581584510122e-05, "loss": 9.554119110107422, "step": 3439 }, { "epoch": 0.4460906283685759, "grad_norm": 0.7678163051605225, "learning_rate": 5.867570516102211e-05, "loss": 10.603363990783691, "step": 3440 }, { "epoch": 0.4462203058768226, "grad_norm": 0.7619647979736328, "learning_rate": 5.86555930297516e-05, "loss": 11.758309364318848, "step": 3441 }, { "epoch": 0.44634998338506926, "grad_norm": 0.8008137941360474, "learning_rate": 5.8635479454644596e-05, "loss": 10.596028327941895, "step": 3442 }, { "epoch": 0.4464796608933159, "grad_norm": 0.7374841570854187, "learning_rate": 5.861536443905622e-05, "loss": 7.185418128967285, "step": 3443 }, { "epoch": 0.44660933840156264, "grad_norm": 0.9004619121551514, "learning_rate": 5.859524798634186e-05, "loss": 12.268406867980957, "step": 3444 }, { "epoch": 0.4467390159098093, "grad_norm": 0.6861486434936523, "learning_rate": 5.8575130099857144e-05, "loss": 9.063365936279297, "step": 3445 }, { "epoch": 0.44686869341805596, "grad_norm": 0.6858518123626709, "learning_rate": 5.855501078295792e-05, "loss": 10.247349739074707, "step": 3446 }, { "epoch": 0.4469983709263026, "grad_norm": 0.8660739660263062, "learning_rate": 5.853489003900028e-05, "loss": 11.591680526733398, "step": 3447 }, { "epoch": 0.44712804843454934, "grad_norm": 0.8797001242637634, "learning_rate": 5.851476787134056e-05, "loss": 10.900524139404297, "step": 3448 }, { "epoch": 0.447257725942796, "grad_norm": 0.7218366265296936, "learning_rate": 5.849464428333534e-05, "loss": 8.544886589050293, "step": 3449 }, { "epoch": 0.44738740345104266, "grad_norm": 0.645572304725647, "learning_rate": 5.847451927834143e-05, "loss": 8.010234832763672, "step": 3450 }, { "epoch": 0.4475170809592894, "grad_norm": 0.940219521522522, "learning_rate": 5.845439285971587e-05, "loss": 10.031496047973633, "step": 3451 }, { "epoch": 0.44764675846753604, "grad_norm": 0.7245369553565979, "learning_rate": 5.843426503081594e-05, "loss": 10.243766784667969, "step": 3452 }, { "epoch": 0.4477764359757827, "grad_norm": 0.8124321103096008, "learning_rate": 5.841413579499915e-05, "loss": 8.796747207641602, "step": 3453 }, { "epoch": 0.4479061134840294, "grad_norm": 0.8300172686576843, "learning_rate": 5.8394005155623246e-05, "loss": 11.975339889526367, "step": 3454 }, { "epoch": 0.4480357909922761, "grad_norm": 0.7839025259017944, "learning_rate": 5.837387311604624e-05, "loss": 12.804245948791504, "step": 3455 }, { "epoch": 0.44816546850052275, "grad_norm": 0.8392417430877686, "learning_rate": 5.835373967962634e-05, "loss": 12.366373062133789, "step": 3456 }, { "epoch": 0.44829514600876946, "grad_norm": 0.964555561542511, "learning_rate": 5.833360484972199e-05, "loss": 11.648430824279785, "step": 3457 }, { "epoch": 0.4484248235170161, "grad_norm": 0.9708308577537537, "learning_rate": 5.831346862969187e-05, "loss": 13.290670394897461, "step": 3458 }, { "epoch": 0.4485545010252628, "grad_norm": 0.6994921565055847, "learning_rate": 5.829333102289491e-05, "loss": 8.874733924865723, "step": 3459 }, { "epoch": 0.4486841785335095, "grad_norm": 0.8363504409790039, "learning_rate": 5.827319203269025e-05, "loss": 10.828818321228027, "step": 3460 }, { "epoch": 0.44881385604175617, "grad_norm": 0.6924130320549011, "learning_rate": 5.825305166243726e-05, "loss": 7.537240028381348, "step": 3461 }, { "epoch": 0.4489435335500028, "grad_norm": 0.8919296264648438, "learning_rate": 5.823290991549557e-05, "loss": 14.28830623626709, "step": 3462 }, { "epoch": 0.4490732110582495, "grad_norm": 0.9413667917251587, "learning_rate": 5.8212766795225003e-05, "loss": 11.491077423095703, "step": 3463 }, { "epoch": 0.4492028885664962, "grad_norm": 0.8620169758796692, "learning_rate": 5.819262230498562e-05, "loss": 8.663365364074707, "step": 3464 }, { "epoch": 0.44933256607474287, "grad_norm": 0.771683931350708, "learning_rate": 5.8172476448137736e-05, "loss": 10.128249168395996, "step": 3465 }, { "epoch": 0.44946224358298953, "grad_norm": 0.8755385875701904, "learning_rate": 5.8152329228041844e-05, "loss": 13.837246894836426, "step": 3466 }, { "epoch": 0.44959192109123625, "grad_norm": 0.912610650062561, "learning_rate": 5.813218064805873e-05, "loss": 13.449434280395508, "step": 3467 }, { "epoch": 0.4497215985994829, "grad_norm": 0.6749876737594604, "learning_rate": 5.811203071154935e-05, "loss": 8.960209846496582, "step": 3468 }, { "epoch": 0.44985127610772957, "grad_norm": 0.7323324084281921, "learning_rate": 5.8091879421874905e-05, "loss": 9.224714279174805, "step": 3469 }, { "epoch": 0.4499809536159763, "grad_norm": 0.8973622918128967, "learning_rate": 5.807172678239684e-05, "loss": 9.500466346740723, "step": 3470 }, { "epoch": 0.45011063112422295, "grad_norm": 0.7857462763786316, "learning_rate": 5.805157279647679e-05, "loss": 9.813858032226562, "step": 3471 }, { "epoch": 0.4502403086324696, "grad_norm": 0.729816734790802, "learning_rate": 5.8031417467476665e-05, "loss": 11.143250465393066, "step": 3472 }, { "epoch": 0.4503699861407163, "grad_norm": 0.8653647899627686, "learning_rate": 5.801126079875853e-05, "loss": 12.046531677246094, "step": 3473 }, { "epoch": 0.450499663648963, "grad_norm": 0.8101174831390381, "learning_rate": 5.7991102793684745e-05, "loss": 12.308537483215332, "step": 3474 }, { "epoch": 0.45062934115720965, "grad_norm": 0.84153151512146, "learning_rate": 5.7970943455617845e-05, "loss": 10.636305809020996, "step": 3475 }, { "epoch": 0.45075901866545637, "grad_norm": 0.8185320496559143, "learning_rate": 5.795078278792061e-05, "loss": 8.21574878692627, "step": 3476 }, { "epoch": 0.45088869617370303, "grad_norm": 0.8770683407783508, "learning_rate": 5.7930620793956025e-05, "loss": 11.13638687133789, "step": 3477 }, { "epoch": 0.4510183736819497, "grad_norm": 0.9101407527923584, "learning_rate": 5.791045747708731e-05, "loss": 12.582525253295898, "step": 3478 }, { "epoch": 0.45114805119019635, "grad_norm": 1.1931241750717163, "learning_rate": 5.7890292840677905e-05, "loss": 14.35811996459961, "step": 3479 }, { "epoch": 0.45127772869844307, "grad_norm": 0.7464581727981567, "learning_rate": 5.787012688809146e-05, "loss": 10.413804054260254, "step": 3480 }, { "epoch": 0.45140740620668973, "grad_norm": 0.7069540619850159, "learning_rate": 5.784995962269184e-05, "loss": 7.809738636016846, "step": 3481 }, { "epoch": 0.4515370837149364, "grad_norm": 0.6290309429168701, "learning_rate": 5.782979104784316e-05, "loss": 9.061797142028809, "step": 3482 }, { "epoch": 0.4516667612231831, "grad_norm": 0.7412807941436768, "learning_rate": 5.780962116690972e-05, "loss": 10.236398696899414, "step": 3483 }, { "epoch": 0.4517964387314298, "grad_norm": 0.7267403602600098, "learning_rate": 5.778944998325606e-05, "loss": 9.118897438049316, "step": 3484 }, { "epoch": 0.45192611623967643, "grad_norm": 0.7406579852104187, "learning_rate": 5.776927750024692e-05, "loss": 9.346085548400879, "step": 3485 }, { "epoch": 0.45205579374792315, "grad_norm": 0.80519038438797, "learning_rate": 5.7749103721247275e-05, "loss": 10.54952621459961, "step": 3486 }, { "epoch": 0.4521854712561698, "grad_norm": 0.8187544941902161, "learning_rate": 5.7728928649622295e-05, "loss": 8.958839416503906, "step": 3487 }, { "epoch": 0.4523151487644165, "grad_norm": 0.7068065404891968, "learning_rate": 5.770875228873738e-05, "loss": 9.273432731628418, "step": 3488 }, { "epoch": 0.4524448262726632, "grad_norm": 0.5768935084342957, "learning_rate": 5.768857464195815e-05, "loss": 7.440933704376221, "step": 3489 }, { "epoch": 0.45257450378090985, "grad_norm": 0.8266781568527222, "learning_rate": 5.7668395712650416e-05, "loss": 11.262039184570312, "step": 3490 }, { "epoch": 0.4527041812891565, "grad_norm": 0.8135215044021606, "learning_rate": 5.764821550418022e-05, "loss": 8.92018985748291, "step": 3491 }, { "epoch": 0.45283385879740323, "grad_norm": 0.7273221015930176, "learning_rate": 5.762803401991384e-05, "loss": 9.95621109008789, "step": 3492 }, { "epoch": 0.4529635363056499, "grad_norm": 0.8207893967628479, "learning_rate": 5.760785126321772e-05, "loss": 9.267175674438477, "step": 3493 }, { "epoch": 0.45309321381389656, "grad_norm": 0.9091071486473083, "learning_rate": 5.7587667237458545e-05, "loss": 8.881787300109863, "step": 3494 }, { "epoch": 0.4532228913221432, "grad_norm": 0.6979576349258423, "learning_rate": 5.7567481946003186e-05, "loss": 11.184304237365723, "step": 3495 }, { "epoch": 0.45335256883038993, "grad_norm": 1.0937784910202026, "learning_rate": 5.754729539221878e-05, "loss": 15.138019561767578, "step": 3496 }, { "epoch": 0.4534822463386366, "grad_norm": 0.9376363754272461, "learning_rate": 5.752710757947262e-05, "loss": 10.467537879943848, "step": 3497 }, { "epoch": 0.45361192384688326, "grad_norm": 0.9881048202514648, "learning_rate": 5.7506918511132226e-05, "loss": 12.966289520263672, "step": 3498 }, { "epoch": 0.45374160135513, "grad_norm": 0.6946467161178589, "learning_rate": 5.748672819056533e-05, "loss": 9.029767990112305, "step": 3499 }, { "epoch": 0.45387127886337664, "grad_norm": 0.9368935227394104, "learning_rate": 5.746653662113989e-05, "loss": 12.126402854919434, "step": 3500 }, { "epoch": 0.4540009563716233, "grad_norm": 0.7173465490341187, "learning_rate": 5.744634380622402e-05, "loss": 8.043391227722168, "step": 3501 }, { "epoch": 0.45413063387987, "grad_norm": 0.6603568196296692, "learning_rate": 5.742614974918612e-05, "loss": 9.6345853805542, "step": 3502 }, { "epoch": 0.4542603113881167, "grad_norm": 1.0133371353149414, "learning_rate": 5.7405954453394714e-05, "loss": 12.017910957336426, "step": 3503 }, { "epoch": 0.45438998889636334, "grad_norm": 0.8309103846549988, "learning_rate": 5.73857579222186e-05, "loss": 9.505184173583984, "step": 3504 }, { "epoch": 0.45451966640461006, "grad_norm": 0.6297770738601685, "learning_rate": 5.736556015902673e-05, "loss": 9.438393592834473, "step": 3505 }, { "epoch": 0.4546493439128567, "grad_norm": 0.9003617167472839, "learning_rate": 5.7345361167188307e-05, "loss": 11.962759017944336, "step": 3506 }, { "epoch": 0.4547790214211034, "grad_norm": 0.5977389216423035, "learning_rate": 5.732516095007271e-05, "loss": 9.722484588623047, "step": 3507 }, { "epoch": 0.4549086989293501, "grad_norm": 0.6003726124763489, "learning_rate": 5.7304959511049525e-05, "loss": 6.8603105545043945, "step": 3508 }, { "epoch": 0.45503837643759676, "grad_norm": 1.09577476978302, "learning_rate": 5.728475685348856e-05, "loss": 12.205047607421875, "step": 3509 }, { "epoch": 0.4551680539458434, "grad_norm": 0.950939953327179, "learning_rate": 5.7264552980759815e-05, "loss": 12.30324935913086, "step": 3510 }, { "epoch": 0.4552977314540901, "grad_norm": 0.9285300970077515, "learning_rate": 5.7244347896233465e-05, "loss": 11.16315746307373, "step": 3511 }, { "epoch": 0.4554274089623368, "grad_norm": 0.6702606081962585, "learning_rate": 5.722414160327992e-05, "loss": 9.056694984436035, "step": 3512 }, { "epoch": 0.45555708647058346, "grad_norm": 0.7754701972007751, "learning_rate": 5.720393410526982e-05, "loss": 10.873367309570312, "step": 3513 }, { "epoch": 0.4556867639788301, "grad_norm": 0.6535420417785645, "learning_rate": 5.718372540557394e-05, "loss": 9.859129905700684, "step": 3514 }, { "epoch": 0.45581644148707684, "grad_norm": 0.8470479249954224, "learning_rate": 5.7163515507563294e-05, "loss": 11.552103996276855, "step": 3515 }, { "epoch": 0.4559461189953235, "grad_norm": 0.8901132345199585, "learning_rate": 5.714330441460908e-05, "loss": 8.315420150756836, "step": 3516 }, { "epoch": 0.45607579650357016, "grad_norm": 0.7640055418014526, "learning_rate": 5.712309213008271e-05, "loss": 9.766496658325195, "step": 3517 }, { "epoch": 0.4562054740118169, "grad_norm": 0.7652407288551331, "learning_rate": 5.710287865735581e-05, "loss": 9.148050308227539, "step": 3518 }, { "epoch": 0.45633515152006354, "grad_norm": 0.8545884490013123, "learning_rate": 5.708266399980014e-05, "loss": 11.399481773376465, "step": 3519 }, { "epoch": 0.4564648290283102, "grad_norm": 0.8531696796417236, "learning_rate": 5.706244816078772e-05, "loss": 12.51230525970459, "step": 3520 }, { "epoch": 0.4565945065365569, "grad_norm": 0.9900657534599304, "learning_rate": 5.704223114369075e-05, "loss": 13.81489086151123, "step": 3521 }, { "epoch": 0.4567241840448036, "grad_norm": 0.6537759304046631, "learning_rate": 5.702201295188162e-05, "loss": 9.949853897094727, "step": 3522 }, { "epoch": 0.45685386155305024, "grad_norm": 1.00253164768219, "learning_rate": 5.700179358873292e-05, "loss": 12.823648452758789, "step": 3523 }, { "epoch": 0.45698353906129696, "grad_norm": 0.6564896702766418, "learning_rate": 5.698157305761744e-05, "loss": 9.049612998962402, "step": 3524 }, { "epoch": 0.4571132165695436, "grad_norm": 0.7202627062797546, "learning_rate": 5.6961351361908155e-05, "loss": 12.028717041015625, "step": 3525 }, { "epoch": 0.4572428940777903, "grad_norm": 0.7013002634048462, "learning_rate": 5.6941128504978234e-05, "loss": 10.076411247253418, "step": 3526 }, { "epoch": 0.45737257158603695, "grad_norm": 0.5945501923561096, "learning_rate": 5.6920904490201064e-05, "loss": 9.46145248413086, "step": 3527 }, { "epoch": 0.45750224909428366, "grad_norm": 0.7813075184822083, "learning_rate": 5.690067932095018e-05, "loss": 9.223319053649902, "step": 3528 }, { "epoch": 0.4576319266025303, "grad_norm": 0.8771684169769287, "learning_rate": 5.688045300059936e-05, "loss": 12.81203556060791, "step": 3529 }, { "epoch": 0.457761604110777, "grad_norm": 0.6171428561210632, "learning_rate": 5.686022553252253e-05, "loss": 10.420957565307617, "step": 3530 }, { "epoch": 0.4578912816190237, "grad_norm": 0.8322551250457764, "learning_rate": 5.683999692009382e-05, "loss": 11.03285026550293, "step": 3531 }, { "epoch": 0.45802095912727037, "grad_norm": 0.8881837725639343, "learning_rate": 5.68197671666876e-05, "loss": 12.556344985961914, "step": 3532 }, { "epoch": 0.45815063663551703, "grad_norm": 0.7260288000106812, "learning_rate": 5.679953627567835e-05, "loss": 8.001338005065918, "step": 3533 }, { "epoch": 0.45828031414376375, "grad_norm": 1.2943941354751587, "learning_rate": 5.677930425044078e-05, "loss": 12.38007926940918, "step": 3534 }, { "epoch": 0.4584099916520104, "grad_norm": 1.1856473684310913, "learning_rate": 5.675907109434981e-05, "loss": 10.035699844360352, "step": 3535 }, { "epoch": 0.45853966916025707, "grad_norm": 0.6795852780342102, "learning_rate": 5.6738836810780504e-05, "loss": 8.166363716125488, "step": 3536 }, { "epoch": 0.4586693466685038, "grad_norm": 0.7561383843421936, "learning_rate": 5.671860140310815e-05, "loss": 10.415000915527344, "step": 3537 }, { "epoch": 0.45879902417675045, "grad_norm": 0.6596734523773193, "learning_rate": 5.66983648747082e-05, "loss": 11.37877082824707, "step": 3538 }, { "epoch": 0.4589287016849971, "grad_norm": 1.1147701740264893, "learning_rate": 5.6678127228956315e-05, "loss": 13.046862602233887, "step": 3539 }, { "epoch": 0.4590583791932438, "grad_norm": 0.6878477931022644, "learning_rate": 5.6657888469228306e-05, "loss": 9.75459098815918, "step": 3540 }, { "epoch": 0.4591880567014905, "grad_norm": 0.6482043266296387, "learning_rate": 5.6637648598900215e-05, "loss": 5.945588111877441, "step": 3541 }, { "epoch": 0.45931773420973715, "grad_norm": 0.6088656187057495, "learning_rate": 5.661740762134824e-05, "loss": 10.20290756225586, "step": 3542 }, { "epoch": 0.4594474117179838, "grad_norm": 0.7384693622589111, "learning_rate": 5.6597165539948774e-05, "loss": 8.854823112487793, "step": 3543 }, { "epoch": 0.45957708922623053, "grad_norm": 0.5639956593513489, "learning_rate": 5.6576922358078385e-05, "loss": 6.889931678771973, "step": 3544 }, { "epoch": 0.4597067667344772, "grad_norm": 0.8594397306442261, "learning_rate": 5.6556678079113846e-05, "loss": 9.885307312011719, "step": 3545 }, { "epoch": 0.45983644424272385, "grad_norm": 0.6435467600822449, "learning_rate": 5.6536432706432075e-05, "loss": 8.614508628845215, "step": 3546 }, { "epoch": 0.45996612175097057, "grad_norm": 0.6816118955612183, "learning_rate": 5.651618624341022e-05, "loss": 8.809889793395996, "step": 3547 }, { "epoch": 0.46009579925921723, "grad_norm": 0.8048672676086426, "learning_rate": 5.6495938693425575e-05, "loss": 10.71944808959961, "step": 3548 }, { "epoch": 0.4602254767674639, "grad_norm": 0.7625088691711426, "learning_rate": 5.647569005985562e-05, "loss": 11.792430877685547, "step": 3549 }, { "epoch": 0.4603551542757106, "grad_norm": 0.7103696465492249, "learning_rate": 5.6455440346078036e-05, "loss": 11.353726387023926, "step": 3550 }, { "epoch": 0.46048483178395727, "grad_norm": 0.7354645729064941, "learning_rate": 5.6435189555470656e-05, "loss": 11.855124473571777, "step": 3551 }, { "epoch": 0.46061450929220393, "grad_norm": 0.7213660478591919, "learning_rate": 5.641493769141153e-05, "loss": 12.118963241577148, "step": 3552 }, { "epoch": 0.46074418680045065, "grad_norm": 0.8130019307136536, "learning_rate": 5.639468475727885e-05, "loss": 10.244294166564941, "step": 3553 }, { "epoch": 0.4608738643086973, "grad_norm": 0.707354724407196, "learning_rate": 5.637443075645099e-05, "loss": 9.897127151489258, "step": 3554 }, { "epoch": 0.461003541816944, "grad_norm": 0.7817860245704651, "learning_rate": 5.635417569230653e-05, "loss": 13.628436088562012, "step": 3555 }, { "epoch": 0.4611332193251907, "grad_norm": 0.9186019897460938, "learning_rate": 5.6333919568224204e-05, "loss": 9.211637496948242, "step": 3556 }, { "epoch": 0.46126289683343735, "grad_norm": 0.7520756721496582, "learning_rate": 5.6313662387582936e-05, "loss": 11.417844772338867, "step": 3557 }, { "epoch": 0.461392574341684, "grad_norm": 0.9531618356704712, "learning_rate": 5.6293404153761806e-05, "loss": 11.647513389587402, "step": 3558 }, { "epoch": 0.4615222518499307, "grad_norm": 0.7257635593414307, "learning_rate": 5.6273144870140096e-05, "loss": 9.303156852722168, "step": 3559 }, { "epoch": 0.4616519293581774, "grad_norm": 0.6507522463798523, "learning_rate": 5.625288454009724e-05, "loss": 11.046772003173828, "step": 3560 }, { "epoch": 0.46178160686642405, "grad_norm": 0.9585465788841248, "learning_rate": 5.6232623167012865e-05, "loss": 13.274710655212402, "step": 3561 }, { "epoch": 0.4619112843746707, "grad_norm": 0.826850414276123, "learning_rate": 5.621236075426676e-05, "loss": 9.971090316772461, "step": 3562 }, { "epoch": 0.46204096188291743, "grad_norm": 1.0874645709991455, "learning_rate": 5.619209730523889e-05, "loss": 14.868185997009277, "step": 3563 }, { "epoch": 0.4621706393911641, "grad_norm": 0.918007493019104, "learning_rate": 5.61718328233094e-05, "loss": 10.318909645080566, "step": 3564 }, { "epoch": 0.46230031689941076, "grad_norm": 0.7311211824417114, "learning_rate": 5.615156731185859e-05, "loss": 8.06757926940918, "step": 3565 }, { "epoch": 0.4624299944076575, "grad_norm": 0.8626628518104553, "learning_rate": 5.613130077426695e-05, "loss": 10.977665901184082, "step": 3566 }, { "epoch": 0.46255967191590414, "grad_norm": 0.7688417434692383, "learning_rate": 5.611103321391513e-05, "loss": 8.750039100646973, "step": 3567 }, { "epoch": 0.4626893494241508, "grad_norm": 0.7380220293998718, "learning_rate": 5.6090764634183966e-05, "loss": 11.000442504882812, "step": 3568 }, { "epoch": 0.4628190269323975, "grad_norm": 0.9814056754112244, "learning_rate": 5.6070495038454426e-05, "loss": 12.377979278564453, "step": 3569 }, { "epoch": 0.4629487044406442, "grad_norm": 0.8037732243537903, "learning_rate": 5.605022443010771e-05, "loss": 10.61695671081543, "step": 3570 }, { "epoch": 0.46307838194889084, "grad_norm": 0.9513772130012512, "learning_rate": 5.602995281252514e-05, "loss": 13.531387329101562, "step": 3571 }, { "epoch": 0.46320805945713756, "grad_norm": 0.7697589993476868, "learning_rate": 5.600968018908821e-05, "loss": 8.413020133972168, "step": 3572 }, { "epoch": 0.4633377369653842, "grad_norm": 0.6013213992118835, "learning_rate": 5.598940656317859e-05, "loss": 11.416484832763672, "step": 3573 }, { "epoch": 0.4634674144736309, "grad_norm": 0.6624206304550171, "learning_rate": 5.596913193817812e-05, "loss": 9.935303688049316, "step": 3574 }, { "epoch": 0.4635970919818776, "grad_norm": 0.7058554887771606, "learning_rate": 5.59488563174688e-05, "loss": 10.336986541748047, "step": 3575 }, { "epoch": 0.46372676949012426, "grad_norm": 0.7943611741065979, "learning_rate": 5.592857970443279e-05, "loss": 11.181795120239258, "step": 3576 }, { "epoch": 0.4638564469983709, "grad_norm": 1.1234909296035767, "learning_rate": 5.590830210245245e-05, "loss": 10.907486915588379, "step": 3577 }, { "epoch": 0.4639861245066176, "grad_norm": 0.8474132418632507, "learning_rate": 5.588802351491026e-05, "loss": 10.687207221984863, "step": 3578 }, { "epoch": 0.4641158020148643, "grad_norm": 0.7152726054191589, "learning_rate": 5.586774394518889e-05, "loss": 10.290966987609863, "step": 3579 }, { "epoch": 0.46424547952311096, "grad_norm": 0.9619097113609314, "learning_rate": 5.584746339667116e-05, "loss": 13.279048919677734, "step": 3580 }, { "epoch": 0.4643751570313576, "grad_norm": 0.705520749092102, "learning_rate": 5.582718187274007e-05, "loss": 10.61146068572998, "step": 3581 }, { "epoch": 0.46450483453960434, "grad_norm": 0.6705273985862732, "learning_rate": 5.580689937677879e-05, "loss": 9.453378677368164, "step": 3582 }, { "epoch": 0.464634512047851, "grad_norm": 0.8622300028800964, "learning_rate": 5.57866159121706e-05, "loss": 10.828511238098145, "step": 3583 }, { "epoch": 0.46476418955609766, "grad_norm": 0.7819033861160278, "learning_rate": 5.576633148229901e-05, "loss": 9.27481746673584, "step": 3584 }, { "epoch": 0.4648938670643444, "grad_norm": 1.048972249031067, "learning_rate": 5.574604609054764e-05, "loss": 10.132621765136719, "step": 3585 }, { "epoch": 0.46502354457259104, "grad_norm": 0.5547299981117249, "learning_rate": 5.57257597403003e-05, "loss": 8.508779525756836, "step": 3586 }, { "epoch": 0.4651532220808377, "grad_norm": 0.7867440581321716, "learning_rate": 5.570547243494093e-05, "loss": 8.58857536315918, "step": 3587 }, { "epoch": 0.4652828995890844, "grad_norm": 0.6820545792579651, "learning_rate": 5.568518417785368e-05, "loss": 10.439525604248047, "step": 3588 }, { "epoch": 0.4654125770973311, "grad_norm": 0.6166793704032898, "learning_rate": 5.5664894972422785e-05, "loss": 9.070919036865234, "step": 3589 }, { "epoch": 0.46554225460557774, "grad_norm": 0.7389794588088989, "learning_rate": 5.564460482203273e-05, "loss": 8.546332359313965, "step": 3590 }, { "epoch": 0.46567193211382446, "grad_norm": 0.7887160181999207, "learning_rate": 5.562431373006807e-05, "loss": 12.073269844055176, "step": 3591 }, { "epoch": 0.4658016096220711, "grad_norm": 0.9929417371749878, "learning_rate": 5.560402169991358e-05, "loss": 10.671395301818848, "step": 3592 }, { "epoch": 0.4659312871303178, "grad_norm": 0.8267149329185486, "learning_rate": 5.558372873495414e-05, "loss": 10.176193237304688, "step": 3593 }, { "epoch": 0.46606096463856445, "grad_norm": 0.6928192377090454, "learning_rate": 5.5563434838574846e-05, "loss": 7.313327312469482, "step": 3594 }, { "epoch": 0.46619064214681116, "grad_norm": 0.7774123549461365, "learning_rate": 5.5543140014160876e-05, "loss": 8.886286735534668, "step": 3595 }, { "epoch": 0.4663203196550578, "grad_norm": 0.831956684589386, "learning_rate": 5.552284426509764e-05, "loss": 11.339567184448242, "step": 3596 }, { "epoch": 0.4664499971633045, "grad_norm": 0.8293512463569641, "learning_rate": 5.5502547594770646e-05, "loss": 13.144951820373535, "step": 3597 }, { "epoch": 0.4665796746715512, "grad_norm": 0.8704394102096558, "learning_rate": 5.5482250006565575e-05, "loss": 12.957222938537598, "step": 3598 }, { "epoch": 0.46670935217979787, "grad_norm": 0.9721068739891052, "learning_rate": 5.546195150386826e-05, "loss": 12.401761054992676, "step": 3599 }, { "epoch": 0.4668390296880445, "grad_norm": 0.6635109782218933, "learning_rate": 5.54416520900647e-05, "loss": 10.380803108215332, "step": 3600 }, { "epoch": 0.46696870719629124, "grad_norm": 0.7894169092178345, "learning_rate": 5.5421351768541017e-05, "loss": 8.730056762695312, "step": 3601 }, { "epoch": 0.4670983847045379, "grad_norm": 0.9834372997283936, "learning_rate": 5.540105054268351e-05, "loss": 12.006284713745117, "step": 3602 }, { "epoch": 0.46722806221278457, "grad_norm": 0.6977006196975708, "learning_rate": 5.5380748415878616e-05, "loss": 9.394145965576172, "step": 3603 }, { "epoch": 0.4673577397210313, "grad_norm": 0.9615087509155273, "learning_rate": 5.536044539151293e-05, "loss": 10.3399076461792, "step": 3604 }, { "epoch": 0.46748741722927795, "grad_norm": 0.9760428667068481, "learning_rate": 5.5340141472973184e-05, "loss": 11.161222457885742, "step": 3605 }, { "epoch": 0.4676170947375246, "grad_norm": 0.6368736624717712, "learning_rate": 5.531983666364627e-05, "loss": 8.905730247497559, "step": 3606 }, { "epoch": 0.4677467722457713, "grad_norm": 0.6575909852981567, "learning_rate": 5.529953096691924e-05, "loss": 9.569982528686523, "step": 3607 }, { "epoch": 0.467876449754018, "grad_norm": 0.5798975229263306, "learning_rate": 5.527922438617925e-05, "loss": 6.682979106903076, "step": 3608 }, { "epoch": 0.46800612726226465, "grad_norm": 0.818193256855011, "learning_rate": 5.525891692481366e-05, "loss": 11.922654151916504, "step": 3609 }, { "epoch": 0.4681358047705113, "grad_norm": 0.799720287322998, "learning_rate": 5.523860858620996e-05, "loss": 8.805578231811523, "step": 3610 }, { "epoch": 0.468265482278758, "grad_norm": 0.8790558576583862, "learning_rate": 5.5218299373755744e-05, "loss": 8.65413761138916, "step": 3611 }, { "epoch": 0.4683951597870047, "grad_norm": 0.8379720449447632, "learning_rate": 5.519798929083882e-05, "loss": 14.290992736816406, "step": 3612 }, { "epoch": 0.46852483729525135, "grad_norm": 0.8225061297416687, "learning_rate": 5.517767834084707e-05, "loss": 11.469623565673828, "step": 3613 }, { "epoch": 0.46865451480349807, "grad_norm": 0.7795998454093933, "learning_rate": 5.5157366527168575e-05, "loss": 9.700008392333984, "step": 3614 }, { "epoch": 0.46878419231174473, "grad_norm": 1.0047482252120972, "learning_rate": 5.513705385319154e-05, "loss": 14.642925262451172, "step": 3615 }, { "epoch": 0.4689138698199914, "grad_norm": 0.7013160586357117, "learning_rate": 5.511674032230433e-05, "loss": 9.749151229858398, "step": 3616 }, { "epoch": 0.4690435473282381, "grad_norm": 0.8174687027931213, "learning_rate": 5.509642593789541e-05, "loss": 11.143963813781738, "step": 3617 }, { "epoch": 0.46917322483648477, "grad_norm": 1.0356215238571167, "learning_rate": 5.5076110703353445e-05, "loss": 9.630105972290039, "step": 3618 }, { "epoch": 0.46930290234473143, "grad_norm": 0.8649573922157288, "learning_rate": 5.505579462206718e-05, "loss": 11.916356086730957, "step": 3619 }, { "epoch": 0.46943257985297815, "grad_norm": 0.75881028175354, "learning_rate": 5.5035477697425566e-05, "loss": 9.198047637939453, "step": 3620 }, { "epoch": 0.4695622573612248, "grad_norm": 0.8659115433692932, "learning_rate": 5.501515993281763e-05, "loss": 13.134739875793457, "step": 3621 }, { "epoch": 0.4696919348694715, "grad_norm": 0.9259697794914246, "learning_rate": 5.49948413316326e-05, "loss": 11.149599075317383, "step": 3622 }, { "epoch": 0.4698216123777182, "grad_norm": 0.6052051782608032, "learning_rate": 5.49745218972598e-05, "loss": 9.201396942138672, "step": 3623 }, { "epoch": 0.46995128988596485, "grad_norm": 0.8874136805534363, "learning_rate": 5.495420163308871e-05, "loss": 9.975958824157715, "step": 3624 }, { "epoch": 0.4700809673942115, "grad_norm": 0.7426517009735107, "learning_rate": 5.493388054250894e-05, "loss": 9.698622703552246, "step": 3625 }, { "epoch": 0.4702106449024582, "grad_norm": 0.6945549249649048, "learning_rate": 5.491355862891027e-05, "loss": 7.877206325531006, "step": 3626 }, { "epoch": 0.4703403224107049, "grad_norm": 1.12090003490448, "learning_rate": 5.4893235895682536e-05, "loss": 10.96865463256836, "step": 3627 }, { "epoch": 0.47046999991895155, "grad_norm": 0.8698898553848267, "learning_rate": 5.487291234621584e-05, "loss": 11.316305160522461, "step": 3628 }, { "epoch": 0.4705996774271982, "grad_norm": 0.9047809839248657, "learning_rate": 5.485258798390031e-05, "loss": 13.750192642211914, "step": 3629 }, { "epoch": 0.47072935493544493, "grad_norm": 0.7712447047233582, "learning_rate": 5.483226281212622e-05, "loss": 11.664778709411621, "step": 3630 }, { "epoch": 0.4708590324436916, "grad_norm": 0.7737686038017273, "learning_rate": 5.481193683428405e-05, "loss": 8.240120887756348, "step": 3631 }, { "epoch": 0.47098870995193826, "grad_norm": 0.7490290999412537, "learning_rate": 5.479161005376433e-05, "loss": 10.92264461517334, "step": 3632 }, { "epoch": 0.471118387460185, "grad_norm": 1.0572714805603027, "learning_rate": 5.47712824739578e-05, "loss": 16.346769332885742, "step": 3633 }, { "epoch": 0.47124806496843163, "grad_norm": 0.7296467423439026, "learning_rate": 5.475095409825527e-05, "loss": 11.227404594421387, "step": 3634 }, { "epoch": 0.4713777424766783, "grad_norm": 0.6847907304763794, "learning_rate": 5.4730624930047726e-05, "loss": 11.79565143585205, "step": 3635 }, { "epoch": 0.471507419984925, "grad_norm": 0.6070372462272644, "learning_rate": 5.471029497272625e-05, "loss": 7.921985149383545, "step": 3636 }, { "epoch": 0.4716370974931717, "grad_norm": 1.3286001682281494, "learning_rate": 5.46899642296821e-05, "loss": 8.19604206085205, "step": 3637 }, { "epoch": 0.47176677500141834, "grad_norm": 0.6237328052520752, "learning_rate": 5.466963270430662e-05, "loss": 8.755191802978516, "step": 3638 }, { "epoch": 0.47189645250966505, "grad_norm": 1.032180666923523, "learning_rate": 5.464930039999131e-05, "loss": 13.399785041809082, "step": 3639 }, { "epoch": 0.4720261300179117, "grad_norm": 0.8759504556655884, "learning_rate": 5.46289673201278e-05, "loss": 13.763470649719238, "step": 3640 }, { "epoch": 0.4721558075261584, "grad_norm": 0.767970085144043, "learning_rate": 5.460863346810784e-05, "loss": 10.970695495605469, "step": 3641 }, { "epoch": 0.47228548503440504, "grad_norm": 0.7699472308158875, "learning_rate": 5.458829884732332e-05, "loss": 12.996832847595215, "step": 3642 }, { "epoch": 0.47241516254265176, "grad_norm": 0.8554308414459229, "learning_rate": 5.4567963461166236e-05, "loss": 11.345561027526855, "step": 3643 }, { "epoch": 0.4725448400508984, "grad_norm": 0.7585598826408386, "learning_rate": 5.454762731302875e-05, "loss": 10.895956993103027, "step": 3644 }, { "epoch": 0.4726745175591451, "grad_norm": 0.7363995313644409, "learning_rate": 5.45272904063031e-05, "loss": 11.656997680664062, "step": 3645 }, { "epoch": 0.4728041950673918, "grad_norm": 0.6180092096328735, "learning_rate": 5.450695274438169e-05, "loss": 6.783509254455566, "step": 3646 }, { "epoch": 0.47293387257563846, "grad_norm": 0.622606098651886, "learning_rate": 5.448661433065705e-05, "loss": 8.509581565856934, "step": 3647 }, { "epoch": 0.4730635500838851, "grad_norm": 0.7324535250663757, "learning_rate": 5.446627516852181e-05, "loss": 10.546215057373047, "step": 3648 }, { "epoch": 0.47319322759213184, "grad_norm": 0.7720884680747986, "learning_rate": 5.444593526136874e-05, "loss": 9.972550392150879, "step": 3649 }, { "epoch": 0.4733229051003785, "grad_norm": 0.7372675538063049, "learning_rate": 5.4425594612590734e-05, "loss": 9.027080535888672, "step": 3650 }, { "epoch": 0.47345258260862516, "grad_norm": 0.9062895774841309, "learning_rate": 5.440525322558082e-05, "loss": 15.753458023071289, "step": 3651 }, { "epoch": 0.4735822601168719, "grad_norm": 0.9221463203430176, "learning_rate": 5.4384911103732125e-05, "loss": 12.715103149414062, "step": 3652 }, { "epoch": 0.47371193762511854, "grad_norm": 0.6371315121650696, "learning_rate": 5.436456825043792e-05, "loss": 9.521711349487305, "step": 3653 }, { "epoch": 0.4738416151333652, "grad_norm": 0.7940596342086792, "learning_rate": 5.434422466909158e-05, "loss": 10.546433448791504, "step": 3654 }, { "epoch": 0.4739712926416119, "grad_norm": 0.9226229786872864, "learning_rate": 5.432388036308661e-05, "loss": 12.192733764648438, "step": 3655 }, { "epoch": 0.4741009701498586, "grad_norm": 0.7037297487258911, "learning_rate": 5.430353533581665e-05, "loss": 8.08088207244873, "step": 3656 }, { "epoch": 0.47423064765810524, "grad_norm": 0.5239605903625488, "learning_rate": 5.428318959067543e-05, "loss": 10.412381172180176, "step": 3657 }, { "epoch": 0.4743603251663519, "grad_norm": 0.7383391261100769, "learning_rate": 5.426284313105683e-05, "loss": 9.968392372131348, "step": 3658 }, { "epoch": 0.4744900026745986, "grad_norm": 0.9747890830039978, "learning_rate": 5.4242495960354837e-05, "loss": 12.76503849029541, "step": 3659 }, { "epoch": 0.4746196801828453, "grad_norm": 0.5041804909706116, "learning_rate": 5.422214808196355e-05, "loss": 10.103590965270996, "step": 3660 }, { "epoch": 0.47474935769109194, "grad_norm": 1.0526442527770996, "learning_rate": 5.4201799499277185e-05, "loss": 9.033174514770508, "step": 3661 }, { "epoch": 0.47487903519933866, "grad_norm": 0.9681350588798523, "learning_rate": 5.418145021569009e-05, "loss": 10.259944915771484, "step": 3662 }, { "epoch": 0.4750087127075853, "grad_norm": 0.9842054843902588, "learning_rate": 5.416110023459672e-05, "loss": 8.252182006835938, "step": 3663 }, { "epoch": 0.475138390215832, "grad_norm": 0.7112556099891663, "learning_rate": 5.414074955939165e-05, "loss": 11.040061950683594, "step": 3664 }, { "epoch": 0.4752680677240787, "grad_norm": 0.6701276302337646, "learning_rate": 5.412039819346957e-05, "loss": 8.461594581604004, "step": 3665 }, { "epoch": 0.47539774523232536, "grad_norm": 0.7272716164588928, "learning_rate": 5.410004614022527e-05, "loss": 8.624960899353027, "step": 3666 }, { "epoch": 0.475527422740572, "grad_norm": 0.7124675512313843, "learning_rate": 5.4079693403053686e-05, "loss": 7.505926132202148, "step": 3667 }, { "epoch": 0.47565710024881874, "grad_norm": 1.01987886428833, "learning_rate": 5.4059339985349856e-05, "loss": 8.526693344116211, "step": 3668 }, { "epoch": 0.4757867777570654, "grad_norm": 0.8100236654281616, "learning_rate": 5.403898589050891e-05, "loss": 8.330857276916504, "step": 3669 }, { "epoch": 0.47591645526531207, "grad_norm": 0.6981241703033447, "learning_rate": 5.401863112192611e-05, "loss": 9.098610877990723, "step": 3670 }, { "epoch": 0.4760461327735588, "grad_norm": 0.7362191677093506, "learning_rate": 5.3998275682996844e-05, "loss": 9.360504150390625, "step": 3671 }, { "epoch": 0.47617581028180545, "grad_norm": 1.044459342956543, "learning_rate": 5.397791957711658e-05, "loss": 8.69532299041748, "step": 3672 }, { "epoch": 0.4763054877900521, "grad_norm": 0.9210850596427917, "learning_rate": 5.395756280768092e-05, "loss": 10.618474006652832, "step": 3673 }, { "epoch": 0.47643516529829877, "grad_norm": 0.8299160599708557, "learning_rate": 5.3937205378085566e-05, "loss": 7.661994457244873, "step": 3674 }, { "epoch": 0.4765648428065455, "grad_norm": 0.7377058863639832, "learning_rate": 5.3916847291726326e-05, "loss": 9.501945495605469, "step": 3675 }, { "epoch": 0.47669452031479215, "grad_norm": 0.7958012819290161, "learning_rate": 5.389648855199915e-05, "loss": 12.062361717224121, "step": 3676 }, { "epoch": 0.4768241978230388, "grad_norm": 0.7503736615180969, "learning_rate": 5.387612916230005e-05, "loss": 10.338903427124023, "step": 3677 }, { "epoch": 0.4769538753312855, "grad_norm": 0.8719453811645508, "learning_rate": 5.385576912602517e-05, "loss": 10.475883483886719, "step": 3678 }, { "epoch": 0.4770835528395322, "grad_norm": 0.7704172730445862, "learning_rate": 5.383540844657077e-05, "loss": 12.104084014892578, "step": 3679 }, { "epoch": 0.47721323034777885, "grad_norm": 1.0094592571258545, "learning_rate": 5.381504712733321e-05, "loss": 7.623289585113525, "step": 3680 }, { "epoch": 0.47734290785602557, "grad_norm": 0.8779214024543762, "learning_rate": 5.3794685171708934e-05, "loss": 10.401779174804688, "step": 3681 }, { "epoch": 0.47747258536427223, "grad_norm": 0.8963243365287781, "learning_rate": 5.377432258309455e-05, "loss": 10.631317138671875, "step": 3682 }, { "epoch": 0.4776022628725189, "grad_norm": 0.943617582321167, "learning_rate": 5.37539593648867e-05, "loss": 9.758893966674805, "step": 3683 }, { "epoch": 0.4777319403807656, "grad_norm": 0.6843152642250061, "learning_rate": 5.373359552048217e-05, "loss": 7.445611000061035, "step": 3684 }, { "epoch": 0.47786161788901227, "grad_norm": 0.8558300137519836, "learning_rate": 5.371323105327787e-05, "loss": 7.8789801597595215, "step": 3685 }, { "epoch": 0.47799129539725893, "grad_norm": 0.7715452909469604, "learning_rate": 5.369286596667076e-05, "loss": 8.876933097839355, "step": 3686 }, { "epoch": 0.47812097290550565, "grad_norm": 0.6677585244178772, "learning_rate": 5.3672500264057945e-05, "loss": 7.569882392883301, "step": 3687 }, { "epoch": 0.4782506504137523, "grad_norm": 0.8737394213676453, "learning_rate": 5.3652133948836636e-05, "loss": 14.306323051452637, "step": 3688 }, { "epoch": 0.47838032792199897, "grad_norm": 0.903285801410675, "learning_rate": 5.363176702440411e-05, "loss": 9.641021728515625, "step": 3689 }, { "epoch": 0.47851000543024563, "grad_norm": 0.746112585067749, "learning_rate": 5.3611399494157765e-05, "loss": 7.968459606170654, "step": 3690 }, { "epoch": 0.47863968293849235, "grad_norm": 0.6898713707923889, "learning_rate": 5.359103136149512e-05, "loss": 11.054281234741211, "step": 3691 }, { "epoch": 0.478769360446739, "grad_norm": 1.052738904953003, "learning_rate": 5.3570662629813775e-05, "loss": 12.879522323608398, "step": 3692 }, { "epoch": 0.4788990379549857, "grad_norm": 0.9135039448738098, "learning_rate": 5.355029330251141e-05, "loss": 12.707530975341797, "step": 3693 }, { "epoch": 0.4790287154632324, "grad_norm": 0.9998471140861511, "learning_rate": 5.352992338298584e-05, "loss": 11.000348091125488, "step": 3694 }, { "epoch": 0.47915839297147905, "grad_norm": 0.6843326687812805, "learning_rate": 5.350955287463496e-05, "loss": 8.117947578430176, "step": 3695 }, { "epoch": 0.4792880704797257, "grad_norm": 0.8722155690193176, "learning_rate": 5.348918178085678e-05, "loss": 9.806377410888672, "step": 3696 }, { "epoch": 0.47941774798797243, "grad_norm": 0.7462936639785767, "learning_rate": 5.3468810105049385e-05, "loss": 9.492280006408691, "step": 3697 }, { "epoch": 0.4795474254962191, "grad_norm": 0.808108925819397, "learning_rate": 5.344843785061095e-05, "loss": 11.415358543395996, "step": 3698 }, { "epoch": 0.47967710300446575, "grad_norm": 1.0119062662124634, "learning_rate": 5.34280650209398e-05, "loss": 10.611006736755371, "step": 3699 }, { "epoch": 0.47980678051271247, "grad_norm": 0.7634989619255066, "learning_rate": 5.3407691619434295e-05, "loss": 10.351283073425293, "step": 3700 }, { "epoch": 0.47993645802095913, "grad_norm": 0.8469387292861938, "learning_rate": 5.338731764949293e-05, "loss": 10.168155670166016, "step": 3701 }, { "epoch": 0.4800661355292058, "grad_norm": 0.8407633900642395, "learning_rate": 5.336694311451427e-05, "loss": 11.659708023071289, "step": 3702 }, { "epoch": 0.4801958130374525, "grad_norm": 0.8985345959663391, "learning_rate": 5.334656801789697e-05, "loss": 9.11600112915039, "step": 3703 }, { "epoch": 0.4803254905456992, "grad_norm": 0.8237338066101074, "learning_rate": 5.332619236303981e-05, "loss": 7.938920974731445, "step": 3704 }, { "epoch": 0.48045516805394584, "grad_norm": 0.7209765911102295, "learning_rate": 5.3305816153341645e-05, "loss": 8.145658493041992, "step": 3705 }, { "epoch": 0.4805848455621925, "grad_norm": 1.0797492265701294, "learning_rate": 5.328543939220143e-05, "loss": 11.793800354003906, "step": 3706 }, { "epoch": 0.4807145230704392, "grad_norm": 0.9894722700119019, "learning_rate": 5.32650620830182e-05, "loss": 12.406567573547363, "step": 3707 }, { "epoch": 0.4808442005786859, "grad_norm": 0.7939886450767517, "learning_rate": 5.324468422919109e-05, "loss": 9.203413963317871, "step": 3708 }, { "epoch": 0.48097387808693254, "grad_norm": 0.9488465785980225, "learning_rate": 5.3224305834119305e-05, "loss": 12.986471176147461, "step": 3709 }, { "epoch": 0.48110355559517926, "grad_norm": 0.757051408290863, "learning_rate": 5.320392690120217e-05, "loss": 8.719409942626953, "step": 3710 }, { "epoch": 0.4812332331034259, "grad_norm": 0.6664899587631226, "learning_rate": 5.31835474338391e-05, "loss": 7.326260089874268, "step": 3711 }, { "epoch": 0.4813629106116726, "grad_norm": 1.029685378074646, "learning_rate": 5.3163167435429575e-05, "loss": 11.662429809570312, "step": 3712 }, { "epoch": 0.4814925881199193, "grad_norm": 0.929939866065979, "learning_rate": 5.3142786909373173e-05, "loss": 8.793509483337402, "step": 3713 }, { "epoch": 0.48162226562816596, "grad_norm": 0.7687063813209534, "learning_rate": 5.312240585906958e-05, "loss": 12.815532684326172, "step": 3714 }, { "epoch": 0.4817519431364126, "grad_norm": 0.5289552807807922, "learning_rate": 5.310202428791852e-05, "loss": 6.710669040679932, "step": 3715 }, { "epoch": 0.48188162064465934, "grad_norm": 0.8422068953514099, "learning_rate": 5.308164219931986e-05, "loss": 9.19527816772461, "step": 3716 }, { "epoch": 0.482011298152906, "grad_norm": 1.0183119773864746, "learning_rate": 5.3061259596673516e-05, "loss": 13.953836441040039, "step": 3717 }, { "epoch": 0.48214097566115266, "grad_norm": 0.9511117935180664, "learning_rate": 5.304087648337952e-05, "loss": 14.530059814453125, "step": 3718 }, { "epoch": 0.4822706531693994, "grad_norm": 1.1088894605636597, "learning_rate": 5.3020492862837955e-05, "loss": 9.362217903137207, "step": 3719 }, { "epoch": 0.48240033067764604, "grad_norm": 0.8617530465126038, "learning_rate": 5.300010873844902e-05, "loss": 10.267196655273438, "step": 3720 }, { "epoch": 0.4825300081858927, "grad_norm": 0.9595996737480164, "learning_rate": 5.297972411361297e-05, "loss": 12.971109390258789, "step": 3721 }, { "epoch": 0.48265968569413936, "grad_norm": 0.559548556804657, "learning_rate": 5.2959338991730166e-05, "loss": 9.356572151184082, "step": 3722 }, { "epoch": 0.4827893632023861, "grad_norm": 0.8382101655006409, "learning_rate": 5.293895337620103e-05, "loss": 10.149212837219238, "step": 3723 }, { "epoch": 0.48291904071063274, "grad_norm": 0.8095917105674744, "learning_rate": 5.29185672704261e-05, "loss": 7.792088985443115, "step": 3724 }, { "epoch": 0.4830487182188794, "grad_norm": 0.7965250015258789, "learning_rate": 5.289818067780596e-05, "loss": 11.774916648864746, "step": 3725 }, { "epoch": 0.4831783957271261, "grad_norm": 0.7745852470397949, "learning_rate": 5.287779360174131e-05, "loss": 8.564059257507324, "step": 3726 }, { "epoch": 0.4833080732353728, "grad_norm": 0.6926886439323425, "learning_rate": 5.285740604563289e-05, "loss": 12.747693061828613, "step": 3727 }, { "epoch": 0.48343775074361944, "grad_norm": 0.8876514434814453, "learning_rate": 5.2837018012881553e-05, "loss": 9.961747169494629, "step": 3728 }, { "epoch": 0.48356742825186616, "grad_norm": 0.6840676665306091, "learning_rate": 5.281662950688822e-05, "loss": 8.655682563781738, "step": 3729 }, { "epoch": 0.4836971057601128, "grad_norm": 0.8273225426673889, "learning_rate": 5.2796240531053876e-05, "loss": 7.343973636627197, "step": 3730 }, { "epoch": 0.4838267832683595, "grad_norm": 0.8001739382743835, "learning_rate": 5.2775851088779614e-05, "loss": 11.512627601623535, "step": 3731 }, { "epoch": 0.4839564607766062, "grad_norm": 1.0273936986923218, "learning_rate": 5.2755461183466584e-05, "loss": 13.582685470581055, "step": 3732 }, { "epoch": 0.48408613828485286, "grad_norm": 0.6422415971755981, "learning_rate": 5.273507081851602e-05, "loss": 8.321382522583008, "step": 3733 }, { "epoch": 0.4842158157930995, "grad_norm": 1.238295555114746, "learning_rate": 5.271467999732923e-05, "loss": 10.466897964477539, "step": 3734 }, { "epoch": 0.48434549330134624, "grad_norm": 0.7961108088493347, "learning_rate": 5.2694288723307605e-05, "loss": 10.53766918182373, "step": 3735 }, { "epoch": 0.4844751708095929, "grad_norm": 0.7448829412460327, "learning_rate": 5.26738969998526e-05, "loss": 8.007026672363281, "step": 3736 }, { "epoch": 0.48460484831783956, "grad_norm": 0.8280330896377563, "learning_rate": 5.265350483036576e-05, "loss": 11.123479843139648, "step": 3737 }, { "epoch": 0.4847345258260862, "grad_norm": 0.7540768980979919, "learning_rate": 5.263311221824869e-05, "loss": 9.283305168151855, "step": 3738 }, { "epoch": 0.48486420333433294, "grad_norm": 0.9566198587417603, "learning_rate": 5.261271916690307e-05, "loss": 9.969151496887207, "step": 3739 }, { "epoch": 0.4849938808425796, "grad_norm": 0.814643144607544, "learning_rate": 5.259232567973067e-05, "loss": 7.886512279510498, "step": 3740 }, { "epoch": 0.48512355835082627, "grad_norm": 0.9805038571357727, "learning_rate": 5.2571931760133305e-05, "loss": 14.848979949951172, "step": 3741 }, { "epoch": 0.485253235859073, "grad_norm": 0.8207270503044128, "learning_rate": 5.25515374115129e-05, "loss": 11.43404483795166, "step": 3742 }, { "epoch": 0.48538291336731965, "grad_norm": 0.7028091549873352, "learning_rate": 5.25311426372714e-05, "loss": 9.846672058105469, "step": 3743 }, { "epoch": 0.4855125908755663, "grad_norm": 0.8446585536003113, "learning_rate": 5.251074744081088e-05, "loss": 9.558196067810059, "step": 3744 }, { "epoch": 0.485642268383813, "grad_norm": 0.719525933265686, "learning_rate": 5.2490351825533436e-05, "loss": 10.138080596923828, "step": 3745 }, { "epoch": 0.4857719458920597, "grad_norm": 0.7308700084686279, "learning_rate": 5.246995579484125e-05, "loss": 11.685954093933105, "step": 3746 }, { "epoch": 0.48590162340030635, "grad_norm": 0.8578081727027893, "learning_rate": 5.24495593521366e-05, "loss": 10.842469215393066, "step": 3747 }, { "epoch": 0.48603130090855307, "grad_norm": 0.8688920140266418, "learning_rate": 5.242916250082178e-05, "loss": 14.578356742858887, "step": 3748 }, { "epoch": 0.4861609784167997, "grad_norm": 0.8597134351730347, "learning_rate": 5.240876524429922e-05, "loss": 9.931057929992676, "step": 3749 }, { "epoch": 0.4862906559250464, "grad_norm": 0.7172528505325317, "learning_rate": 5.2388367585971346e-05, "loss": 8.617024421691895, "step": 3750 }, { "epoch": 0.4864203334332931, "grad_norm": 0.6596654653549194, "learning_rate": 5.236796952924069e-05, "loss": 10.97232723236084, "step": 3751 }, { "epoch": 0.48655001094153977, "grad_norm": 0.5378269553184509, "learning_rate": 5.234757107750985e-05, "loss": 6.553804397583008, "step": 3752 }, { "epoch": 0.48667968844978643, "grad_norm": 1.0868065357208252, "learning_rate": 5.232717223418149e-05, "loss": 9.529635429382324, "step": 3753 }, { "epoch": 0.4868093659580331, "grad_norm": 1.2436615228652954, "learning_rate": 5.2306773002658326e-05, "loss": 13.131832122802734, "step": 3754 }, { "epoch": 0.4869390434662798, "grad_norm": 0.842490017414093, "learning_rate": 5.2286373386343146e-05, "loss": 7.929079055786133, "step": 3755 }, { "epoch": 0.48706872097452647, "grad_norm": 0.6763673424720764, "learning_rate": 5.226597338863881e-05, "loss": 7.707791328430176, "step": 3756 }, { "epoch": 0.48719839848277313, "grad_norm": 0.7803781032562256, "learning_rate": 5.224557301294823e-05, "loss": 11.178589820861816, "step": 3757 }, { "epoch": 0.48732807599101985, "grad_norm": 0.8894637227058411, "learning_rate": 5.2225172262674396e-05, "loss": 11.409568786621094, "step": 3758 }, { "epoch": 0.4874577534992665, "grad_norm": 0.6488630175590515, "learning_rate": 5.2204771141220334e-05, "loss": 6.0640869140625, "step": 3759 }, { "epoch": 0.4875874310075132, "grad_norm": 1.0470407009124756, "learning_rate": 5.218436965198916e-05, "loss": 13.203483581542969, "step": 3760 }, { "epoch": 0.4877171085157599, "grad_norm": 0.7315210700035095, "learning_rate": 5.216396779838403e-05, "loss": 7.435268402099609, "step": 3761 }, { "epoch": 0.48784678602400655, "grad_norm": 0.9043150544166565, "learning_rate": 5.214356558380816e-05, "loss": 10.173572540283203, "step": 3762 }, { "epoch": 0.4879764635322532, "grad_norm": 0.8618450164794922, "learning_rate": 5.2123163011664865e-05, "loss": 8.519871711730957, "step": 3763 }, { "epoch": 0.48810614104049993, "grad_norm": 0.9693130254745483, "learning_rate": 5.2102760085357485e-05, "loss": 10.253555297851562, "step": 3764 }, { "epoch": 0.4882358185487466, "grad_norm": 0.8091184496879578, "learning_rate": 5.208235680828941e-05, "loss": 11.656888961791992, "step": 3765 }, { "epoch": 0.48836549605699325, "grad_norm": 0.8946650624275208, "learning_rate": 5.206195318386411e-05, "loss": 11.935758590698242, "step": 3766 }, { "epoch": 0.48849517356523997, "grad_norm": 0.9667556881904602, "learning_rate": 5.204154921548511e-05, "loss": 9.408910751342773, "step": 3767 }, { "epoch": 0.48862485107348663, "grad_norm": 0.7392779588699341, "learning_rate": 5.202114490655599e-05, "loss": 12.430974006652832, "step": 3768 }, { "epoch": 0.4887545285817333, "grad_norm": 0.47977516055107117, "learning_rate": 5.2000740260480386e-05, "loss": 7.444514274597168, "step": 3769 }, { "epoch": 0.48888420608997996, "grad_norm": 0.6447061896324158, "learning_rate": 5.198033528066197e-05, "loss": 8.148927688598633, "step": 3770 }, { "epoch": 0.4890138835982267, "grad_norm": 0.6582933068275452, "learning_rate": 5.1959929970504516e-05, "loss": 8.907197952270508, "step": 3771 }, { "epoch": 0.48914356110647333, "grad_norm": 0.8085566163063049, "learning_rate": 5.193952433341182e-05, "loss": 10.392303466796875, "step": 3772 }, { "epoch": 0.48927323861472, "grad_norm": 0.9027763605117798, "learning_rate": 5.1919118372787736e-05, "loss": 13.722249984741211, "step": 3773 }, { "epoch": 0.4894029161229667, "grad_norm": 0.7326719760894775, "learning_rate": 5.189871209203616e-05, "loss": 9.486289978027344, "step": 3774 }, { "epoch": 0.4895325936312134, "grad_norm": 0.7005599141120911, "learning_rate": 5.1878305494561086e-05, "loss": 9.523051261901855, "step": 3775 }, { "epoch": 0.48966227113946004, "grad_norm": 0.9413827657699585, "learning_rate": 5.185789858376652e-05, "loss": 13.120628356933594, "step": 3776 }, { "epoch": 0.48979194864770675, "grad_norm": 0.885012686252594, "learning_rate": 5.183749136305651e-05, "loss": 14.65072250366211, "step": 3777 }, { "epoch": 0.4899216261559534, "grad_norm": 0.7914251089096069, "learning_rate": 5.1817083835835215e-05, "loss": 10.329912185668945, "step": 3778 }, { "epoch": 0.4900513036642001, "grad_norm": 0.7481860518455505, "learning_rate": 5.1796676005506784e-05, "loss": 9.521098136901855, "step": 3779 }, { "epoch": 0.4901809811724468, "grad_norm": 0.7841234803199768, "learning_rate": 5.177626787547545e-05, "loss": 7.805773735046387, "step": 3780 }, { "epoch": 0.49031065868069346, "grad_norm": 0.8274315595626831, "learning_rate": 5.175585944914546e-05, "loss": 11.779863357543945, "step": 3781 }, { "epoch": 0.4904403361889401, "grad_norm": 0.5912509560585022, "learning_rate": 5.1735450729921185e-05, "loss": 8.4581880569458, "step": 3782 }, { "epoch": 0.49057001369718684, "grad_norm": 0.720684826374054, "learning_rate": 5.1715041721206956e-05, "loss": 5.626399993896484, "step": 3783 }, { "epoch": 0.4906996912054335, "grad_norm": 0.9269657135009766, "learning_rate": 5.1694632426407206e-05, "loss": 9.56212043762207, "step": 3784 }, { "epoch": 0.49082936871368016, "grad_norm": 0.7448143362998962, "learning_rate": 5.167422284892641e-05, "loss": 7.425023555755615, "step": 3785 }, { "epoch": 0.4909590462219268, "grad_norm": 0.866482675075531, "learning_rate": 5.1653812992169084e-05, "loss": 10.767098426818848, "step": 3786 }, { "epoch": 0.49108872373017354, "grad_norm": 0.8166488409042358, "learning_rate": 5.163340285953977e-05, "loss": 10.030282974243164, "step": 3787 }, { "epoch": 0.4912184012384202, "grad_norm": 0.8437719941139221, "learning_rate": 5.1612992454443076e-05, "loss": 11.533907890319824, "step": 3788 }, { "epoch": 0.49134807874666686, "grad_norm": 0.892415463924408, "learning_rate": 5.159258178028368e-05, "loss": 8.48648738861084, "step": 3789 }, { "epoch": 0.4914777562549136, "grad_norm": 0.8446155786514282, "learning_rate": 5.157217084046626e-05, "loss": 11.057713508605957, "step": 3790 }, { "epoch": 0.49160743376316024, "grad_norm": 0.785169243812561, "learning_rate": 5.155175963839556e-05, "loss": 10.63805103302002, "step": 3791 }, { "epoch": 0.4917371112714069, "grad_norm": 0.8420745134353638, "learning_rate": 5.153134817747637e-05, "loss": 11.091604232788086, "step": 3792 }, { "epoch": 0.4918667887796536, "grad_norm": 0.6941841840744019, "learning_rate": 5.1510936461113515e-05, "loss": 9.058615684509277, "step": 3793 }, { "epoch": 0.4919964662879003, "grad_norm": 0.5655985474586487, "learning_rate": 5.1490524492711866e-05, "loss": 11.566187858581543, "step": 3794 }, { "epoch": 0.49212614379614694, "grad_norm": 0.5649345517158508, "learning_rate": 5.147011227567634e-05, "loss": 6.530677795410156, "step": 3795 }, { "epoch": 0.49225582130439366, "grad_norm": 0.7237451076507568, "learning_rate": 5.144969981341189e-05, "loss": 10.680901527404785, "step": 3796 }, { "epoch": 0.4923854988126403, "grad_norm": 0.7187272906303406, "learning_rate": 5.1429287109323524e-05, "loss": 8.745404243469238, "step": 3797 }, { "epoch": 0.492515176320887, "grad_norm": 0.9277742505073547, "learning_rate": 5.1408874166816245e-05, "loss": 12.716793060302734, "step": 3798 }, { "epoch": 0.4926448538291337, "grad_norm": 0.8624264001846313, "learning_rate": 5.138846098929516e-05, "loss": 12.956221580505371, "step": 3799 }, { "epoch": 0.49277453133738036, "grad_norm": 0.8599368929862976, "learning_rate": 5.136804758016538e-05, "loss": 10.349267959594727, "step": 3800 }, { "epoch": 0.492904208845627, "grad_norm": 0.8071478605270386, "learning_rate": 5.134763394283204e-05, "loss": 9.607876777648926, "step": 3801 }, { "epoch": 0.4930338863538737, "grad_norm": 0.7367340326309204, "learning_rate": 5.132722008070035e-05, "loss": 8.439126014709473, "step": 3802 }, { "epoch": 0.4931635638621204, "grad_norm": 0.9983401298522949, "learning_rate": 5.130680599717554e-05, "loss": 11.240517616271973, "step": 3803 }, { "epoch": 0.49329324137036706, "grad_norm": 0.875338077545166, "learning_rate": 5.1286391695662874e-05, "loss": 12.539342880249023, "step": 3804 }, { "epoch": 0.4934229188786137, "grad_norm": 1.0336087942123413, "learning_rate": 5.1265977179567634e-05, "loss": 10.89670181274414, "step": 3805 }, { "epoch": 0.49355259638686044, "grad_norm": 0.8208726644515991, "learning_rate": 5.1245562452295196e-05, "loss": 9.435254096984863, "step": 3806 }, { "epoch": 0.4936822738951071, "grad_norm": 0.8380321264266968, "learning_rate": 5.1225147517250895e-05, "loss": 10.253418922424316, "step": 3807 }, { "epoch": 0.49381195140335377, "grad_norm": 0.6171880960464478, "learning_rate": 5.1204732377840156e-05, "loss": 8.943337440490723, "step": 3808 }, { "epoch": 0.4939416289116005, "grad_norm": 0.6785252094268799, "learning_rate": 5.1184317037468445e-05, "loss": 5.718273639678955, "step": 3809 }, { "epoch": 0.49407130641984714, "grad_norm": 0.763952910900116, "learning_rate": 5.11639014995412e-05, "loss": 10.33641242980957, "step": 3810 }, { "epoch": 0.4942009839280938, "grad_norm": 1.014670968055725, "learning_rate": 5.114348576746394e-05, "loss": 10.082498550415039, "step": 3811 }, { "epoch": 0.4943306614363405, "grad_norm": 1.0755594968795776, "learning_rate": 5.112306984464222e-05, "loss": 12.424042701721191, "step": 3812 }, { "epoch": 0.4944603389445872, "grad_norm": 0.6851860284805298, "learning_rate": 5.110265373448161e-05, "loss": 6.886756420135498, "step": 3813 }, { "epoch": 0.49459001645283385, "grad_norm": 0.8361650705337524, "learning_rate": 5.10822374403877e-05, "loss": 9.824772834777832, "step": 3814 }, { "epoch": 0.49471969396108056, "grad_norm": 1.0518251657485962, "learning_rate": 5.106182096576614e-05, "loss": 10.877922058105469, "step": 3815 }, { "epoch": 0.4948493714693272, "grad_norm": 1.1904197931289673, "learning_rate": 5.104140431402259e-05, "loss": 9.488689422607422, "step": 3816 }, { "epoch": 0.4949790489775739, "grad_norm": 0.7651389837265015, "learning_rate": 5.102098748856273e-05, "loss": 8.149639129638672, "step": 3817 }, { "epoch": 0.49510872648582055, "grad_norm": 0.7679510712623596, "learning_rate": 5.100057049279231e-05, "loss": 11.112489700317383, "step": 3818 }, { "epoch": 0.49523840399406727, "grad_norm": 0.887533962726593, "learning_rate": 5.0980153330117066e-05, "loss": 7.390460968017578, "step": 3819 }, { "epoch": 0.49536808150231393, "grad_norm": 0.7589675188064575, "learning_rate": 5.095973600394278e-05, "loss": 8.8723783493042, "step": 3820 }, { "epoch": 0.4954977590105606, "grad_norm": 0.7080494165420532, "learning_rate": 5.093931851767526e-05, "loss": 10.135957717895508, "step": 3821 }, { "epoch": 0.4956274365188073, "grad_norm": 0.7931122779846191, "learning_rate": 5.091890087472035e-05, "loss": 7.479616165161133, "step": 3822 }, { "epoch": 0.49575711402705397, "grad_norm": 1.1633397340774536, "learning_rate": 5.089848307848389e-05, "loss": 10.411344528198242, "step": 3823 }, { "epoch": 0.49588679153530063, "grad_norm": 0.7172092199325562, "learning_rate": 5.08780651323718e-05, "loss": 11.9107084274292, "step": 3824 }, { "epoch": 0.49601646904354735, "grad_norm": 0.8368725776672363, "learning_rate": 5.085764703978994e-05, "loss": 13.09782600402832, "step": 3825 }, { "epoch": 0.496146146551794, "grad_norm": 0.8143326044082642, "learning_rate": 5.0837228804144286e-05, "loss": 11.156578063964844, "step": 3826 }, { "epoch": 0.49627582406004067, "grad_norm": 0.7690153121948242, "learning_rate": 5.0816810428840786e-05, "loss": 8.129948616027832, "step": 3827 }, { "epoch": 0.4964055015682874, "grad_norm": 0.9013156294822693, "learning_rate": 5.0796391917285415e-05, "loss": 9.015557289123535, "step": 3828 }, { "epoch": 0.49653517907653405, "grad_norm": 0.7955037951469421, "learning_rate": 5.077597327288417e-05, "loss": 11.924935340881348, "step": 3829 }, { "epoch": 0.4966648565847807, "grad_norm": 0.8649492263793945, "learning_rate": 5.07555544990431e-05, "loss": 9.559704780578613, "step": 3830 }, { "epoch": 0.49679453409302743, "grad_norm": 0.8787140846252441, "learning_rate": 5.0735135599168234e-05, "loss": 10.838668823242188, "step": 3831 }, { "epoch": 0.4969242116012741, "grad_norm": 0.719954252243042, "learning_rate": 5.071471657666567e-05, "loss": 6.4667229652404785, "step": 3832 }, { "epoch": 0.49705388910952075, "grad_norm": 0.9549972414970398, "learning_rate": 5.069429743494146e-05, "loss": 10.924700736999512, "step": 3833 }, { "epoch": 0.4971835666177674, "grad_norm": 0.702817976474762, "learning_rate": 5.067387817740174e-05, "loss": 7.5018086433410645, "step": 3834 }, { "epoch": 0.49731324412601413, "grad_norm": 0.8147560358047485, "learning_rate": 5.065345880745263e-05, "loss": 9.10917854309082, "step": 3835 }, { "epoch": 0.4974429216342608, "grad_norm": 0.7041993737220764, "learning_rate": 5.063303932850029e-05, "loss": 6.706677436828613, "step": 3836 }, { "epoch": 0.49757259914250745, "grad_norm": 0.7281227707862854, "learning_rate": 5.0612619743950877e-05, "loss": 9.245158195495605, "step": 3837 }, { "epoch": 0.49770227665075417, "grad_norm": 1.093706727027893, "learning_rate": 5.059220005721056e-05, "loss": 10.480618476867676, "step": 3838 }, { "epoch": 0.49783195415900083, "grad_norm": 0.8250797390937805, "learning_rate": 5.0571780271685555e-05, "loss": 12.563263893127441, "step": 3839 }, { "epoch": 0.4979616316672475, "grad_norm": 0.7769240736961365, "learning_rate": 5.055136039078209e-05, "loss": 10.55655288696289, "step": 3840 }, { "epoch": 0.4980913091754942, "grad_norm": 0.885834813117981, "learning_rate": 5.053094041790639e-05, "loss": 10.237945556640625, "step": 3841 }, { "epoch": 0.4982209866837409, "grad_norm": 1.0028516054153442, "learning_rate": 5.05105203564647e-05, "loss": 11.75611686706543, "step": 3842 }, { "epoch": 0.49835066419198754, "grad_norm": 1.146733283996582, "learning_rate": 5.049010020986328e-05, "loss": 7.980444431304932, "step": 3843 }, { "epoch": 0.49848034170023425, "grad_norm": 0.8847466707229614, "learning_rate": 5.046967998150841e-05, "loss": 9.790225982666016, "step": 3844 }, { "epoch": 0.4986100192084809, "grad_norm": 0.9897902607917786, "learning_rate": 5.044925967480638e-05, "loss": 11.647749900817871, "step": 3845 }, { "epoch": 0.4987396967167276, "grad_norm": 0.7565620541572571, "learning_rate": 5.042883929316351e-05, "loss": 10.760053634643555, "step": 3846 }, { "epoch": 0.4988693742249743, "grad_norm": 0.9401645660400391, "learning_rate": 5.040841883998611e-05, "loss": 11.593989372253418, "step": 3847 }, { "epoch": 0.49899905173322096, "grad_norm": 0.720101535320282, "learning_rate": 5.038799831868049e-05, "loss": 8.400684356689453, "step": 3848 }, { "epoch": 0.4991287292414676, "grad_norm": 0.6307227611541748, "learning_rate": 5.0367577732653005e-05, "loss": 8.433475494384766, "step": 3849 }, { "epoch": 0.4992584067497143, "grad_norm": 0.6711888909339905, "learning_rate": 5.034715708531e-05, "loss": 8.045830726623535, "step": 3850 }, { "epoch": 0.499388084257961, "grad_norm": 0.6207488179206848, "learning_rate": 5.032673638005786e-05, "loss": 7.590845584869385, "step": 3851 }, { "epoch": 0.49951776176620766, "grad_norm": 0.8304135799407959, "learning_rate": 5.0306315620302926e-05, "loss": 9.357146263122559, "step": 3852 }, { "epoch": 0.4996474392744543, "grad_norm": 0.9010029435157776, "learning_rate": 5.028589480945158e-05, "loss": 8.127429008483887, "step": 3853 }, { "epoch": 0.49977711678270104, "grad_norm": 0.9687169790267944, "learning_rate": 5.026547395091022e-05, "loss": 9.72959041595459, "step": 3854 }, { "epoch": 0.4999067942909477, "grad_norm": 0.6638097167015076, "learning_rate": 5.024505304808526e-05, "loss": 9.093875885009766, "step": 3855 }, { "epoch": 0.5000364717991944, "grad_norm": 0.7909667491912842, "learning_rate": 5.0224632104383076e-05, "loss": 8.856266975402832, "step": 3856 }, { "epoch": 0.5001661493074411, "grad_norm": 0.6415793895721436, "learning_rate": 5.020421112321009e-05, "loss": 7.985410690307617, "step": 3857 }, { "epoch": 0.5002958268156877, "grad_norm": 0.5974816679954529, "learning_rate": 5.01837901079727e-05, "loss": 9.537633895874023, "step": 3858 }, { "epoch": 0.5004255043239344, "grad_norm": 0.7745687365531921, "learning_rate": 5.016336906207736e-05, "loss": 10.585715293884277, "step": 3859 }, { "epoch": 0.5005551818321811, "grad_norm": 0.7451351284980774, "learning_rate": 5.014294798893048e-05, "loss": 8.665901184082031, "step": 3860 }, { "epoch": 0.5006848593404277, "grad_norm": 0.868720293045044, "learning_rate": 5.012252689193849e-05, "loss": 9.708673477172852, "step": 3861 }, { "epoch": 0.5008145368486745, "grad_norm": 0.9445063471794128, "learning_rate": 5.0102105774507834e-05, "loss": 11.357529640197754, "step": 3862 }, { "epoch": 0.5009442143569212, "grad_norm": 0.8014496564865112, "learning_rate": 5.008168464004496e-05, "loss": 9.558601379394531, "step": 3863 }, { "epoch": 0.5010738918651678, "grad_norm": 1.2388406991958618, "learning_rate": 5.0061263491956275e-05, "loss": 11.950454711914062, "step": 3864 }, { "epoch": 0.5012035693734145, "grad_norm": 0.9342011213302612, "learning_rate": 5.0040842333648275e-05, "loss": 11.437544822692871, "step": 3865 }, { "epoch": 0.5013332468816611, "grad_norm": 0.8075425624847412, "learning_rate": 5.0020421168527366e-05, "loss": 9.9751558303833, "step": 3866 }, { "epoch": 0.5014629243899078, "grad_norm": 0.8432214856147766, "learning_rate": 5e-05, "loss": 10.504950523376465, "step": 3867 }, { "epoch": 0.5015926018981546, "grad_norm": 0.6387094855308533, "learning_rate": 4.997957883147265e-05, "loss": 9.035258293151855, "step": 3868 }, { "epoch": 0.5017222794064012, "grad_norm": 0.7803868055343628, "learning_rate": 4.995915766635174e-05, "loss": 10.00583553314209, "step": 3869 }, { "epoch": 0.5018519569146479, "grad_norm": 0.8128936886787415, "learning_rate": 4.993873650804373e-05, "loss": 11.949959754943848, "step": 3870 }, { "epoch": 0.5019816344228946, "grad_norm": 1.0870437622070312, "learning_rate": 4.991831535995506e-05, "loss": 9.674641609191895, "step": 3871 }, { "epoch": 0.5021113119311412, "grad_norm": 0.8597813844680786, "learning_rate": 4.989789422549218e-05, "loss": 10.120686531066895, "step": 3872 }, { "epoch": 0.5022409894393879, "grad_norm": 0.823850154876709, "learning_rate": 4.987747310806152e-05, "loss": 9.329193115234375, "step": 3873 }, { "epoch": 0.5023706669476345, "grad_norm": 0.7501969337463379, "learning_rate": 4.985705201106954e-05, "loss": 7.606064319610596, "step": 3874 }, { "epoch": 0.5025003444558813, "grad_norm": 0.975874125957489, "learning_rate": 4.983663093792266e-05, "loss": 10.823816299438477, "step": 3875 }, { "epoch": 0.502630021964128, "grad_norm": 0.5814899802207947, "learning_rate": 4.981620989202731e-05, "loss": 8.390593528747559, "step": 3876 }, { "epoch": 0.5027596994723746, "grad_norm": 0.7967005372047424, "learning_rate": 4.9795788876789935e-05, "loss": 9.771562576293945, "step": 3877 }, { "epoch": 0.5028893769806213, "grad_norm": 1.1052240133285522, "learning_rate": 4.977536789561694e-05, "loss": 9.234786987304688, "step": 3878 }, { "epoch": 0.503019054488868, "grad_norm": 0.956632673740387, "learning_rate": 4.9754946951914755e-05, "loss": 11.083100318908691, "step": 3879 }, { "epoch": 0.5031487319971146, "grad_norm": 0.7736469507217407, "learning_rate": 4.973452604908979e-05, "loss": 10.19814682006836, "step": 3880 }, { "epoch": 0.5032784095053614, "grad_norm": 0.7267274856567383, "learning_rate": 4.971410519054843e-05, "loss": 9.604265213012695, "step": 3881 }, { "epoch": 0.5034080870136081, "grad_norm": 0.6798133254051208, "learning_rate": 4.969368437969709e-05, "loss": 10.031381607055664, "step": 3882 }, { "epoch": 0.5035377645218547, "grad_norm": 0.7226709723472595, "learning_rate": 4.967326361994216e-05, "loss": 8.533388137817383, "step": 3883 }, { "epoch": 0.5036674420301014, "grad_norm": 0.7616114020347595, "learning_rate": 4.9652842914690004e-05, "loss": 10.826545715332031, "step": 3884 }, { "epoch": 0.503797119538348, "grad_norm": 0.8475544452667236, "learning_rate": 4.9632422267347006e-05, "loss": 10.990665435791016, "step": 3885 }, { "epoch": 0.5039267970465947, "grad_norm": 1.0330588817596436, "learning_rate": 4.9612001681319525e-05, "loss": 10.117711067199707, "step": 3886 }, { "epoch": 0.5040564745548415, "grad_norm": 0.7218482494354248, "learning_rate": 4.959158116001389e-05, "loss": 7.2156195640563965, "step": 3887 }, { "epoch": 0.5041861520630881, "grad_norm": 0.9726538062095642, "learning_rate": 4.957116070683649e-05, "loss": 12.906126022338867, "step": 3888 }, { "epoch": 0.5043158295713348, "grad_norm": 0.9220515489578247, "learning_rate": 4.955074032519361e-05, "loss": 8.51132583618164, "step": 3889 }, { "epoch": 0.5044455070795815, "grad_norm": 0.8173506259918213, "learning_rate": 4.953032001849159e-05, "loss": 10.995684623718262, "step": 3890 }, { "epoch": 0.5045751845878281, "grad_norm": 0.7733414769172668, "learning_rate": 4.950989979013672e-05, "loss": 12.126702308654785, "step": 3891 }, { "epoch": 0.5047048620960748, "grad_norm": 0.9572435021400452, "learning_rate": 4.9489479643535305e-05, "loss": 11.618929862976074, "step": 3892 }, { "epoch": 0.5048345396043215, "grad_norm": 0.772981584072113, "learning_rate": 4.9469059582093616e-05, "loss": 8.881756782531738, "step": 3893 }, { "epoch": 0.5049642171125682, "grad_norm": 0.7483662962913513, "learning_rate": 4.944863960921791e-05, "loss": 9.828853607177734, "step": 3894 }, { "epoch": 0.5050938946208149, "grad_norm": 0.9327800869941711, "learning_rate": 4.942821972831444e-05, "loss": 10.282361030578613, "step": 3895 }, { "epoch": 0.5052235721290615, "grad_norm": 1.1807949542999268, "learning_rate": 4.940779994278945e-05, "loss": 8.400017738342285, "step": 3896 }, { "epoch": 0.5053532496373082, "grad_norm": 1.1418274641036987, "learning_rate": 4.9387380256049135e-05, "loss": 11.811419486999512, "step": 3897 }, { "epoch": 0.5054829271455549, "grad_norm": 0.680141806602478, "learning_rate": 4.9366960671499715e-05, "loss": 11.470406532287598, "step": 3898 }, { "epoch": 0.5056126046538015, "grad_norm": 0.9878180027008057, "learning_rate": 4.934654119254737e-05, "loss": 8.155542373657227, "step": 3899 }, { "epoch": 0.5057422821620483, "grad_norm": 0.8012754917144775, "learning_rate": 4.932612182259827e-05, "loss": 10.708715438842773, "step": 3900 }, { "epoch": 0.505871959670295, "grad_norm": 0.7021231055259705, "learning_rate": 4.930570256505855e-05, "loss": 7.987115859985352, "step": 3901 }, { "epoch": 0.5060016371785416, "grad_norm": 0.7719669938087463, "learning_rate": 4.928528342333434e-05, "loss": 11.777684211730957, "step": 3902 }, { "epoch": 0.5061313146867883, "grad_norm": 0.7490576505661011, "learning_rate": 4.926486440083177e-05, "loss": 9.981902122497559, "step": 3903 }, { "epoch": 0.506260992195035, "grad_norm": 0.7813612222671509, "learning_rate": 4.924444550095691e-05, "loss": 11.985824584960938, "step": 3904 }, { "epoch": 0.5063906697032816, "grad_norm": 0.8568201661109924, "learning_rate": 4.922402672711584e-05, "loss": 12.856853485107422, "step": 3905 }, { "epoch": 0.5065203472115283, "grad_norm": 0.8919969201087952, "learning_rate": 4.92036080827146e-05, "loss": 8.829134941101074, "step": 3906 }, { "epoch": 0.506650024719775, "grad_norm": 0.9645377397537231, "learning_rate": 4.9183189571159226e-05, "loss": 12.919805526733398, "step": 3907 }, { "epoch": 0.5067797022280217, "grad_norm": 0.6652172803878784, "learning_rate": 4.916277119585572e-05, "loss": 10.703139305114746, "step": 3908 }, { "epoch": 0.5069093797362684, "grad_norm": 0.5814927220344543, "learning_rate": 4.914235296021006e-05, "loss": 6.6374382972717285, "step": 3909 }, { "epoch": 0.507039057244515, "grad_norm": 0.6765544414520264, "learning_rate": 4.912193486762821e-05, "loss": 6.988815784454346, "step": 3910 }, { "epoch": 0.5071687347527617, "grad_norm": 0.763552188873291, "learning_rate": 4.910151692151611e-05, "loss": 9.518548011779785, "step": 3911 }, { "epoch": 0.5072984122610084, "grad_norm": 0.9462231397628784, "learning_rate": 4.908109912527966e-05, "loss": 12.010377883911133, "step": 3912 }, { "epoch": 0.5074280897692551, "grad_norm": 0.8129125833511353, "learning_rate": 4.906068148232474e-05, "loss": 12.484845161437988, "step": 3913 }, { "epoch": 0.5075577672775018, "grad_norm": 0.7913107872009277, "learning_rate": 4.904026399605723e-05, "loss": 11.593862533569336, "step": 3914 }, { "epoch": 0.5076874447857485, "grad_norm": 1.2208503484725952, "learning_rate": 4.9019846669882946e-05, "loss": 13.348651885986328, "step": 3915 }, { "epoch": 0.5078171222939951, "grad_norm": 0.9289389848709106, "learning_rate": 4.89994295072077e-05, "loss": 11.591687202453613, "step": 3916 }, { "epoch": 0.5079467998022418, "grad_norm": 0.6932907700538635, "learning_rate": 4.897901251143729e-05, "loss": 9.222874641418457, "step": 3917 }, { "epoch": 0.5080764773104884, "grad_norm": 0.8059044480323792, "learning_rate": 4.895859568597744e-05, "loss": 6.505439281463623, "step": 3918 }, { "epoch": 0.5082061548187352, "grad_norm": 0.6967765092849731, "learning_rate": 4.893817903423388e-05, "loss": 8.350994110107422, "step": 3919 }, { "epoch": 0.5083358323269819, "grad_norm": 0.8758399486541748, "learning_rate": 4.8917762559612324e-05, "loss": 9.87790298461914, "step": 3920 }, { "epoch": 0.5084655098352285, "grad_norm": 0.8883233666419983, "learning_rate": 4.889734626551842e-05, "loss": 8.157119750976562, "step": 3921 }, { "epoch": 0.5085951873434752, "grad_norm": 0.8728767037391663, "learning_rate": 4.88769301553578e-05, "loss": 9.6219482421875, "step": 3922 }, { "epoch": 0.5087248648517219, "grad_norm": 1.1996736526489258, "learning_rate": 4.885651423253608e-05, "loss": 12.064748764038086, "step": 3923 }, { "epoch": 0.5088545423599685, "grad_norm": 0.9363698363304138, "learning_rate": 4.8836098500458825e-05, "loss": 12.57946491241455, "step": 3924 }, { "epoch": 0.5089842198682152, "grad_norm": 0.7409453392028809, "learning_rate": 4.881568296253156e-05, "loss": 9.306720733642578, "step": 3925 }, { "epoch": 0.509113897376462, "grad_norm": 0.7541502118110657, "learning_rate": 4.8795267622159835e-05, "loss": 9.461207389831543, "step": 3926 }, { "epoch": 0.5092435748847086, "grad_norm": 0.8849387168884277, "learning_rate": 4.87748524827491e-05, "loss": 10.15745735168457, "step": 3927 }, { "epoch": 0.5093732523929553, "grad_norm": 0.7808519005775452, "learning_rate": 4.875443754770481e-05, "loss": 7.485677719116211, "step": 3928 }, { "epoch": 0.5095029299012019, "grad_norm": 0.5691893100738525, "learning_rate": 4.8734022820432364e-05, "loss": 5.127785682678223, "step": 3929 }, { "epoch": 0.5096326074094486, "grad_norm": 1.0788947343826294, "learning_rate": 4.871360830433713e-05, "loss": 12.653449058532715, "step": 3930 }, { "epoch": 0.5097622849176953, "grad_norm": 0.9558882713317871, "learning_rate": 4.869319400282446e-05, "loss": 11.123942375183105, "step": 3931 }, { "epoch": 0.509891962425942, "grad_norm": 1.0104116201400757, "learning_rate": 4.8672779919299645e-05, "loss": 11.537060737609863, "step": 3932 }, { "epoch": 0.5100216399341887, "grad_norm": 1.145586371421814, "learning_rate": 4.865236605716796e-05, "loss": 10.428114891052246, "step": 3933 }, { "epoch": 0.5101513174424354, "grad_norm": 0.8125141859054565, "learning_rate": 4.863195241983464e-05, "loss": 9.94880199432373, "step": 3934 }, { "epoch": 0.510280994950682, "grad_norm": 0.669348418712616, "learning_rate": 4.861153901070485e-05, "loss": 11.322216033935547, "step": 3935 }, { "epoch": 0.5104106724589287, "grad_norm": 0.8049017786979675, "learning_rate": 4.859112583318377e-05, "loss": 13.629810333251953, "step": 3936 }, { "epoch": 0.5105403499671753, "grad_norm": 0.832078218460083, "learning_rate": 4.8570712890676495e-05, "loss": 9.390066146850586, "step": 3937 }, { "epoch": 0.510670027475422, "grad_norm": 0.9385185837745667, "learning_rate": 4.855030018658811e-05, "loss": 14.120662689208984, "step": 3938 }, { "epoch": 0.5107997049836688, "grad_norm": 0.8267498016357422, "learning_rate": 4.852988772432367e-05, "loss": 8.550061225891113, "step": 3939 }, { "epoch": 0.5109293824919154, "grad_norm": 1.1074943542480469, "learning_rate": 4.8509475507288146e-05, "loss": 13.467698097229004, "step": 3940 }, { "epoch": 0.5110590600001621, "grad_norm": 0.8159096240997314, "learning_rate": 4.8489063538886496e-05, "loss": 9.28162956237793, "step": 3941 }, { "epoch": 0.5111887375084088, "grad_norm": 0.8543811440467834, "learning_rate": 4.846865182252364e-05, "loss": 10.924138069152832, "step": 3942 }, { "epoch": 0.5113184150166554, "grad_norm": 0.8661693930625916, "learning_rate": 4.8448240361604454e-05, "loss": 9.366446495056152, "step": 3943 }, { "epoch": 0.5114480925249021, "grad_norm": 0.6635358333587646, "learning_rate": 4.8427829159533746e-05, "loss": 5.989320755004883, "step": 3944 }, { "epoch": 0.5115777700331489, "grad_norm": 1.0875849723815918, "learning_rate": 4.840741821971633e-05, "loss": 12.240568161010742, "step": 3945 }, { "epoch": 0.5117074475413955, "grad_norm": 0.6129525303840637, "learning_rate": 4.838700754555693e-05, "loss": 7.631508827209473, "step": 3946 }, { "epoch": 0.5118371250496422, "grad_norm": 0.8385546207427979, "learning_rate": 4.8366597140460245e-05, "loss": 10.678245544433594, "step": 3947 }, { "epoch": 0.5119668025578888, "grad_norm": 0.6732395887374878, "learning_rate": 4.8346187007830934e-05, "loss": 6.971324920654297, "step": 3948 }, { "epoch": 0.5120964800661355, "grad_norm": 0.5044738054275513, "learning_rate": 4.83257771510736e-05, "loss": 9.319246292114258, "step": 3949 }, { "epoch": 0.5122261575743822, "grad_norm": 0.9446305632591248, "learning_rate": 4.83053675735928e-05, "loss": 11.08983039855957, "step": 3950 }, { "epoch": 0.5123558350826289, "grad_norm": 0.7195689678192139, "learning_rate": 4.828495827879305e-05, "loss": 10.388317108154297, "step": 3951 }, { "epoch": 0.5124855125908756, "grad_norm": 0.8585192561149597, "learning_rate": 4.8264549270078826e-05, "loss": 11.061854362487793, "step": 3952 }, { "epoch": 0.5126151900991223, "grad_norm": 0.7659410238265991, "learning_rate": 4.8244140550854545e-05, "loss": 9.921098709106445, "step": 3953 }, { "epoch": 0.5127448676073689, "grad_norm": 0.7923212647438049, "learning_rate": 4.8223732124524575e-05, "loss": 9.947664260864258, "step": 3954 }, { "epoch": 0.5128745451156156, "grad_norm": 1.2295265197753906, "learning_rate": 4.8203323994493234e-05, "loss": 10.820405960083008, "step": 3955 }, { "epoch": 0.5130042226238622, "grad_norm": 0.5427071452140808, "learning_rate": 4.8182916164164804e-05, "loss": 8.235221862792969, "step": 3956 }, { "epoch": 0.5131339001321089, "grad_norm": 0.9329939484596252, "learning_rate": 4.81625086369435e-05, "loss": 13.210711479187012, "step": 3957 }, { "epoch": 0.5132635776403557, "grad_norm": 0.8195869326591492, "learning_rate": 4.814210141623351e-05, "loss": 9.599055290222168, "step": 3958 }, { "epoch": 0.5133932551486023, "grad_norm": 0.8511806130409241, "learning_rate": 4.812169450543893e-05, "loss": 11.97089958190918, "step": 3959 }, { "epoch": 0.513522932656849, "grad_norm": 0.7834486961364746, "learning_rate": 4.810128790796386e-05, "loss": 7.720414161682129, "step": 3960 }, { "epoch": 0.5136526101650957, "grad_norm": 1.038436770439148, "learning_rate": 4.8080881627212296e-05, "loss": 14.282047271728516, "step": 3961 }, { "epoch": 0.5137822876733423, "grad_norm": 0.7251902222633362, "learning_rate": 4.80604756665882e-05, "loss": 9.740272521972656, "step": 3962 }, { "epoch": 0.513911965181589, "grad_norm": 0.8657886385917664, "learning_rate": 4.80400700294955e-05, "loss": 11.657451629638672, "step": 3963 }, { "epoch": 0.5140416426898358, "grad_norm": 1.0554945468902588, "learning_rate": 4.801966471933804e-05, "loss": 15.0374174118042, "step": 3964 }, { "epoch": 0.5141713201980824, "grad_norm": 0.9831622242927551, "learning_rate": 4.7999259739519625e-05, "loss": 11.406229019165039, "step": 3965 }, { "epoch": 0.5143009977063291, "grad_norm": 0.6437193155288696, "learning_rate": 4.797885509344402e-05, "loss": 6.616147994995117, "step": 3966 }, { "epoch": 0.5144306752145757, "grad_norm": 0.929836094379425, "learning_rate": 4.795845078451489e-05, "loss": 10.956246376037598, "step": 3967 }, { "epoch": 0.5145603527228224, "grad_norm": 0.7037713527679443, "learning_rate": 4.793804681613589e-05, "loss": 8.59537410736084, "step": 3968 }, { "epoch": 0.5146900302310691, "grad_norm": 0.6814921498298645, "learning_rate": 4.7917643191710594e-05, "loss": 9.223849296569824, "step": 3969 }, { "epoch": 0.5148197077393157, "grad_norm": 0.8498775362968445, "learning_rate": 4.7897239914642513e-05, "loss": 9.693428993225098, "step": 3970 }, { "epoch": 0.5149493852475625, "grad_norm": 0.9137595891952515, "learning_rate": 4.787683698833513e-05, "loss": 9.16525650024414, "step": 3971 }, { "epoch": 0.5150790627558092, "grad_norm": 1.104243278503418, "learning_rate": 4.785643441619184e-05, "loss": 12.282512664794922, "step": 3972 }, { "epoch": 0.5152087402640558, "grad_norm": 0.8478916883468628, "learning_rate": 4.783603220161598e-05, "loss": 7.9797210693359375, "step": 3973 }, { "epoch": 0.5153384177723025, "grad_norm": 0.8680679202079773, "learning_rate": 4.781563034801085e-05, "loss": 11.092884063720703, "step": 3974 }, { "epoch": 0.5154680952805492, "grad_norm": 0.7528985738754272, "learning_rate": 4.779522885877967e-05, "loss": 8.678824424743652, "step": 3975 }, { "epoch": 0.5155977727887958, "grad_norm": 0.8756726384162903, "learning_rate": 4.777482773732561e-05, "loss": 8.429698944091797, "step": 3976 }, { "epoch": 0.5157274502970426, "grad_norm": 0.9556937217712402, "learning_rate": 4.775442698705178e-05, "loss": 11.629881858825684, "step": 3977 }, { "epoch": 0.5158571278052893, "grad_norm": 1.0696008205413818, "learning_rate": 4.7734026611361193e-05, "loss": 8.462018966674805, "step": 3978 }, { "epoch": 0.5159868053135359, "grad_norm": 0.8687130212783813, "learning_rate": 4.771362661365686e-05, "loss": 9.253092765808105, "step": 3979 }, { "epoch": 0.5161164828217826, "grad_norm": 0.6554186344146729, "learning_rate": 4.769322699734169e-05, "loss": 9.827850341796875, "step": 3980 }, { "epoch": 0.5162461603300292, "grad_norm": 1.083364486694336, "learning_rate": 4.767282776581852e-05, "loss": 11.650741577148438, "step": 3981 }, { "epoch": 0.5163758378382759, "grad_norm": 1.5236557722091675, "learning_rate": 4.765242892249016e-05, "loss": 11.67576789855957, "step": 3982 }, { "epoch": 0.5165055153465227, "grad_norm": 0.7009593844413757, "learning_rate": 4.7632030470759326e-05, "loss": 10.515165328979492, "step": 3983 }, { "epoch": 0.5166351928547693, "grad_norm": 0.7208053469657898, "learning_rate": 4.761163241402867e-05, "loss": 5.992188453674316, "step": 3984 }, { "epoch": 0.516764870363016, "grad_norm": 0.8213793635368347, "learning_rate": 4.75912347557008e-05, "loss": 10.14766788482666, "step": 3985 }, { "epoch": 0.5168945478712627, "grad_norm": 0.9463720321655273, "learning_rate": 4.7570837499178223e-05, "loss": 12.163161277770996, "step": 3986 }, { "epoch": 0.5170242253795093, "grad_norm": 0.7550551295280457, "learning_rate": 4.7550440647863416e-05, "loss": 10.051375389099121, "step": 3987 }, { "epoch": 0.517153902887756, "grad_norm": 0.8520498871803284, "learning_rate": 4.753004420515876e-05, "loss": 11.319669723510742, "step": 3988 }, { "epoch": 0.5172835803960026, "grad_norm": 0.9806081652641296, "learning_rate": 4.750964817446658e-05, "loss": 9.366620063781738, "step": 3989 }, { "epoch": 0.5174132579042494, "grad_norm": 0.8009794354438782, "learning_rate": 4.748925255918914e-05, "loss": 10.475547790527344, "step": 3990 }, { "epoch": 0.5175429354124961, "grad_norm": 1.2896547317504883, "learning_rate": 4.746885736272861e-05, "loss": 12.455707550048828, "step": 3991 }, { "epoch": 0.5176726129207427, "grad_norm": 0.6376634240150452, "learning_rate": 4.744846258848711e-05, "loss": 10.924413681030273, "step": 3992 }, { "epoch": 0.5178022904289894, "grad_norm": 0.8935121297836304, "learning_rate": 4.74280682398667e-05, "loss": 9.592622756958008, "step": 3993 }, { "epoch": 0.5179319679372361, "grad_norm": 0.8919717669487, "learning_rate": 4.740767432026934e-05, "loss": 10.687987327575684, "step": 3994 }, { "epoch": 0.5180616454454827, "grad_norm": 0.7289046049118042, "learning_rate": 4.7387280833096936e-05, "loss": 10.00118637084961, "step": 3995 }, { "epoch": 0.5181913229537295, "grad_norm": 0.7818265557289124, "learning_rate": 4.7366887781751325e-05, "loss": 9.810912132263184, "step": 3996 }, { "epoch": 0.5183210004619762, "grad_norm": 0.9191657304763794, "learning_rate": 4.7346495169634256e-05, "loss": 9.026086807250977, "step": 3997 }, { "epoch": 0.5184506779702228, "grad_norm": 0.6397749185562134, "learning_rate": 4.732610300014742e-05, "loss": 5.9040727615356445, "step": 3998 }, { "epoch": 0.5185803554784695, "grad_norm": 0.8342471718788147, "learning_rate": 4.730571127669241e-05, "loss": 12.361144065856934, "step": 3999 }, { "epoch": 0.5187100329867161, "grad_norm": 0.8802516460418701, "learning_rate": 4.728532000267079e-05, "loss": 13.318771362304688, "step": 4000 }, { "epoch": 0.5188397104949628, "grad_norm": 1.1359679698944092, "learning_rate": 4.7264929181484e-05, "loss": 9.785347938537598, "step": 4001 }, { "epoch": 0.5189693880032095, "grad_norm": 0.9234712719917297, "learning_rate": 4.724453881653342e-05, "loss": 9.762746810913086, "step": 4002 }, { "epoch": 0.5190990655114562, "grad_norm": 0.7491815090179443, "learning_rate": 4.722414891122039e-05, "loss": 9.422891616821289, "step": 4003 }, { "epoch": 0.5192287430197029, "grad_norm": 0.9451029896736145, "learning_rate": 4.720375946894612e-05, "loss": 10.340686798095703, "step": 4004 }, { "epoch": 0.5193584205279496, "grad_norm": 0.9408575892448425, "learning_rate": 4.718337049311178e-05, "loss": 10.034679412841797, "step": 4005 }, { "epoch": 0.5194880980361962, "grad_norm": 0.8697379231452942, "learning_rate": 4.7162981987118445e-05, "loss": 9.773404121398926, "step": 4006 }, { "epoch": 0.5196177755444429, "grad_norm": 1.0665884017944336, "learning_rate": 4.7142593954367105e-05, "loss": 9.493412971496582, "step": 4007 }, { "epoch": 0.5197474530526895, "grad_norm": 0.9064645171165466, "learning_rate": 4.712220639825869e-05, "loss": 11.138994216918945, "step": 4008 }, { "epoch": 0.5198771305609363, "grad_norm": 1.0951310396194458, "learning_rate": 4.710181932219403e-05, "loss": 13.96871280670166, "step": 4009 }, { "epoch": 0.520006808069183, "grad_norm": 1.4048993587493896, "learning_rate": 4.70814327295739e-05, "loss": 12.841097831726074, "step": 4010 }, { "epoch": 0.5201364855774296, "grad_norm": 1.0062130689620972, "learning_rate": 4.7061046623798974e-05, "loss": 11.379425048828125, "step": 4011 }, { "epoch": 0.5202661630856763, "grad_norm": 0.7150290012359619, "learning_rate": 4.7040661008269845e-05, "loss": 8.714273452758789, "step": 4012 }, { "epoch": 0.520395840593923, "grad_norm": 0.719028651714325, "learning_rate": 4.7020275886387046e-05, "loss": 8.906416893005371, "step": 4013 }, { "epoch": 0.5205255181021696, "grad_norm": 0.6768006682395935, "learning_rate": 4.699989126155099e-05, "loss": 9.447565078735352, "step": 4014 }, { "epoch": 0.5206551956104164, "grad_norm": 0.6307193636894226, "learning_rate": 4.6979507137162057e-05, "loss": 9.782127380371094, "step": 4015 }, { "epoch": 0.5207848731186631, "grad_norm": 0.8109999895095825, "learning_rate": 4.695912351662049e-05, "loss": 8.656057357788086, "step": 4016 }, { "epoch": 0.5209145506269097, "grad_norm": 0.7598525285720825, "learning_rate": 4.693874040332649e-05, "loss": 9.414830207824707, "step": 4017 }, { "epoch": 0.5210442281351564, "grad_norm": 0.9915435910224915, "learning_rate": 4.691835780068015e-05, "loss": 12.837493896484375, "step": 4018 }, { "epoch": 0.521173905643403, "grad_norm": 0.8831403851509094, "learning_rate": 4.6897975712081485e-05, "loss": 10.21687126159668, "step": 4019 }, { "epoch": 0.5213035831516497, "grad_norm": 0.6657143235206604, "learning_rate": 4.6877594140930436e-05, "loss": 8.179842948913574, "step": 4020 }, { "epoch": 0.5214332606598964, "grad_norm": 0.7844367623329163, "learning_rate": 4.685721309062684e-05, "loss": 8.590189933776855, "step": 4021 }, { "epoch": 0.5215629381681431, "grad_norm": 0.8925003409385681, "learning_rate": 4.683683256457044e-05, "loss": 8.412660598754883, "step": 4022 }, { "epoch": 0.5216926156763898, "grad_norm": 0.7386960387229919, "learning_rate": 4.6816452566160905e-05, "loss": 8.151741981506348, "step": 4023 }, { "epoch": 0.5218222931846365, "grad_norm": 0.9175203442573547, "learning_rate": 4.679607309879783e-05, "loss": 9.340176582336426, "step": 4024 }, { "epoch": 0.5219519706928831, "grad_norm": 0.7933153510093689, "learning_rate": 4.677569416588071e-05, "loss": 6.82199239730835, "step": 4025 }, { "epoch": 0.5220816482011298, "grad_norm": 0.854523777961731, "learning_rate": 4.675531577080893e-05, "loss": 9.553264617919922, "step": 4026 }, { "epoch": 0.5222113257093765, "grad_norm": 0.959367573261261, "learning_rate": 4.6734937916981816e-05, "loss": 10.982638359069824, "step": 4027 }, { "epoch": 0.5223410032176232, "grad_norm": 0.9782571792602539, "learning_rate": 4.671456060779858e-05, "loss": 10.041996955871582, "step": 4028 }, { "epoch": 0.5224706807258699, "grad_norm": 0.9314438700675964, "learning_rate": 4.669418384665836e-05, "loss": 10.65495491027832, "step": 4029 }, { "epoch": 0.5226003582341165, "grad_norm": 0.7963306307792664, "learning_rate": 4.6673807636960204e-05, "loss": 11.888680458068848, "step": 4030 }, { "epoch": 0.5227300357423632, "grad_norm": 1.0321115255355835, "learning_rate": 4.665343198210305e-05, "loss": 13.534363746643066, "step": 4031 }, { "epoch": 0.5228597132506099, "grad_norm": 0.6616288423538208, "learning_rate": 4.663305688548576e-05, "loss": 11.31861686706543, "step": 4032 }, { "epoch": 0.5229893907588565, "grad_norm": 0.9091864228248596, "learning_rate": 4.661268235050709e-05, "loss": 10.227185249328613, "step": 4033 }, { "epoch": 0.5231190682671032, "grad_norm": 0.6934260129928589, "learning_rate": 4.6592308380565716e-05, "loss": 8.408472061157227, "step": 4034 }, { "epoch": 0.52324874577535, "grad_norm": 0.7581654191017151, "learning_rate": 4.657193497906022e-05, "loss": 8.385215759277344, "step": 4035 }, { "epoch": 0.5233784232835966, "grad_norm": 0.8705418109893799, "learning_rate": 4.655156214938906e-05, "loss": 11.199045181274414, "step": 4036 }, { "epoch": 0.5235081007918433, "grad_norm": 0.8076537847518921, "learning_rate": 4.653118989495064e-05, "loss": 9.986625671386719, "step": 4037 }, { "epoch": 0.52363777830009, "grad_norm": 0.8441168069839478, "learning_rate": 4.6510818219143244e-05, "loss": 10.490321159362793, "step": 4038 }, { "epoch": 0.5237674558083366, "grad_norm": 0.9132733345031738, "learning_rate": 4.649044712536506e-05, "loss": 9.163110733032227, "step": 4039 }, { "epoch": 0.5238971333165833, "grad_norm": 0.6331081986427307, "learning_rate": 4.647007661701418e-05, "loss": 11.612677574157715, "step": 4040 }, { "epoch": 0.52402681082483, "grad_norm": 0.7177819609642029, "learning_rate": 4.6449706697488596e-05, "loss": 10.239673614501953, "step": 4041 }, { "epoch": 0.5241564883330767, "grad_norm": 1.1973865032196045, "learning_rate": 4.642933737018623e-05, "loss": 11.847597122192383, "step": 4042 }, { "epoch": 0.5242861658413234, "grad_norm": 0.7268187403678894, "learning_rate": 4.640896863850488e-05, "loss": 8.707483291625977, "step": 4043 }, { "epoch": 0.52441584334957, "grad_norm": 0.9196692109107971, "learning_rate": 4.638860050584224e-05, "loss": 8.66935920715332, "step": 4044 }, { "epoch": 0.5245455208578167, "grad_norm": 0.9595401287078857, "learning_rate": 4.63682329755959e-05, "loss": 10.285072326660156, "step": 4045 }, { "epoch": 0.5246751983660634, "grad_norm": 0.6942451596260071, "learning_rate": 4.634786605116338e-05, "loss": 7.95138692855835, "step": 4046 }, { "epoch": 0.5248048758743101, "grad_norm": 0.8785197138786316, "learning_rate": 4.632749973594206e-05, "loss": 10.86507797241211, "step": 4047 }, { "epoch": 0.5249345533825568, "grad_norm": 0.8852591514587402, "learning_rate": 4.630713403332925e-05, "loss": 10.549846649169922, "step": 4048 }, { "epoch": 0.5250642308908035, "grad_norm": 0.7210069298744202, "learning_rate": 4.628676894672215e-05, "loss": 10.248689651489258, "step": 4049 }, { "epoch": 0.5251939083990501, "grad_norm": 0.6742934584617615, "learning_rate": 4.626640447951784e-05, "loss": 6.827592849731445, "step": 4050 }, { "epoch": 0.5253235859072968, "grad_norm": 0.9475855231285095, "learning_rate": 4.6246040635113314e-05, "loss": 10.45724868774414, "step": 4051 }, { "epoch": 0.5254532634155434, "grad_norm": 0.8236655592918396, "learning_rate": 4.622567741690547e-05, "loss": 6.111057281494141, "step": 4052 }, { "epoch": 0.5255829409237901, "grad_norm": 0.7132302522659302, "learning_rate": 4.620531482829107e-05, "loss": 8.212903022766113, "step": 4053 }, { "epoch": 0.5257126184320369, "grad_norm": 0.7895175814628601, "learning_rate": 4.61849528726668e-05, "loss": 8.386661529541016, "step": 4054 }, { "epoch": 0.5258422959402835, "grad_norm": 0.9803051352500916, "learning_rate": 4.6164591553429234e-05, "loss": 9.299866676330566, "step": 4055 }, { "epoch": 0.5259719734485302, "grad_norm": 0.8077079057693481, "learning_rate": 4.614423087397484e-05, "loss": 9.727737426757812, "step": 4056 }, { "epoch": 0.5261016509567769, "grad_norm": 0.69986891746521, "learning_rate": 4.6123870837699964e-05, "loss": 8.783387184143066, "step": 4057 }, { "epoch": 0.5262313284650235, "grad_norm": 1.0156922340393066, "learning_rate": 4.610351144800086e-05, "loss": 6.709911346435547, "step": 4058 }, { "epoch": 0.5263610059732702, "grad_norm": 0.7985063791275024, "learning_rate": 4.608315270827368e-05, "loss": 9.487711906433105, "step": 4059 }, { "epoch": 0.526490683481517, "grad_norm": 0.786224901676178, "learning_rate": 4.606279462191445e-05, "loss": 13.075094223022461, "step": 4060 }, { "epoch": 0.5266203609897636, "grad_norm": 0.883567750453949, "learning_rate": 4.6042437192319094e-05, "loss": 9.899885177612305, "step": 4061 }, { "epoch": 0.5267500384980103, "grad_norm": 0.6810879707336426, "learning_rate": 4.602208042288343e-05, "loss": 10.176248550415039, "step": 4062 }, { "epoch": 0.5268797160062569, "grad_norm": 0.7706116437911987, "learning_rate": 4.600172431700317e-05, "loss": 9.293444633483887, "step": 4063 }, { "epoch": 0.5270093935145036, "grad_norm": 0.7919438481330872, "learning_rate": 4.598136887807389e-05, "loss": 10.299385070800781, "step": 4064 }, { "epoch": 0.5271390710227503, "grad_norm": 1.016703486442566, "learning_rate": 4.5961014109491105e-05, "loss": 9.466764450073242, "step": 4065 }, { "epoch": 0.5272687485309969, "grad_norm": 0.7507076263427734, "learning_rate": 4.594066001465016e-05, "loss": 9.15340805053711, "step": 4066 }, { "epoch": 0.5273984260392437, "grad_norm": 0.6420252323150635, "learning_rate": 4.5920306596946325e-05, "loss": 8.567709922790527, "step": 4067 }, { "epoch": 0.5275281035474904, "grad_norm": 0.6826860308647156, "learning_rate": 4.5899953859774744e-05, "loss": 8.572848320007324, "step": 4068 }, { "epoch": 0.527657781055737, "grad_norm": 0.93901526927948, "learning_rate": 4.587960180653045e-05, "loss": 15.908368110656738, "step": 4069 }, { "epoch": 0.5277874585639837, "grad_norm": 1.1380988359451294, "learning_rate": 4.585925044060837e-05, "loss": 9.70860481262207, "step": 4070 }, { "epoch": 0.5279171360722303, "grad_norm": 1.001173496246338, "learning_rate": 4.58388997654033e-05, "loss": 9.362467765808105, "step": 4071 }, { "epoch": 0.528046813580477, "grad_norm": 0.6828227639198303, "learning_rate": 4.581854978430993e-05, "loss": 8.446693420410156, "step": 4072 }, { "epoch": 0.5281764910887238, "grad_norm": 1.2900276184082031, "learning_rate": 4.5798200500722834e-05, "loss": 8.823010444641113, "step": 4073 }, { "epoch": 0.5283061685969704, "grad_norm": 0.6400370597839355, "learning_rate": 4.577785191803647e-05, "loss": 10.94048023223877, "step": 4074 }, { "epoch": 0.5284358461052171, "grad_norm": 0.7868499755859375, "learning_rate": 4.5757504039645175e-05, "loss": 9.526719093322754, "step": 4075 }, { "epoch": 0.5285655236134638, "grad_norm": 0.6812804341316223, "learning_rate": 4.5737156868943175e-05, "loss": 10.065927505493164, "step": 4076 }, { "epoch": 0.5286952011217104, "grad_norm": 0.9423288106918335, "learning_rate": 4.5716810409324575e-05, "loss": 9.100692749023438, "step": 4077 }, { "epoch": 0.5288248786299571, "grad_norm": 0.9565255641937256, "learning_rate": 4.569646466418336e-05, "loss": 12.174193382263184, "step": 4078 }, { "epoch": 0.5289545561382039, "grad_norm": 1.078912377357483, "learning_rate": 4.567611963691338e-05, "loss": 10.809793472290039, "step": 4079 }, { "epoch": 0.5290842336464505, "grad_norm": 0.9598630666732788, "learning_rate": 4.5655775330908426e-05, "loss": 10.728082656860352, "step": 4080 }, { "epoch": 0.5292139111546972, "grad_norm": 0.909466564655304, "learning_rate": 4.563543174956208e-05, "loss": 7.213595867156982, "step": 4081 }, { "epoch": 0.5293435886629438, "grad_norm": 0.9942327737808228, "learning_rate": 4.561508889626787e-05, "loss": 13.900864601135254, "step": 4082 }, { "epoch": 0.5294732661711905, "grad_norm": 0.8555646538734436, "learning_rate": 4.5594746774419176e-05, "loss": 10.955838203430176, "step": 4083 }, { "epoch": 0.5296029436794372, "grad_norm": 0.8578656315803528, "learning_rate": 4.557440538740926e-05, "loss": 9.525371551513672, "step": 4084 }, { "epoch": 0.5297326211876838, "grad_norm": 0.8921083807945251, "learning_rate": 4.5554064738631256e-05, "loss": 9.828598022460938, "step": 4085 }, { "epoch": 0.5298622986959306, "grad_norm": 0.8380062580108643, "learning_rate": 4.5533724831478197e-05, "loss": 10.154605865478516, "step": 4086 }, { "epoch": 0.5299919762041773, "grad_norm": 0.7604771852493286, "learning_rate": 4.5513385669342954e-05, "loss": 8.212769508361816, "step": 4087 }, { "epoch": 0.5301216537124239, "grad_norm": 0.7257677316665649, "learning_rate": 4.5493047255618316e-05, "loss": 10.333611488342285, "step": 4088 }, { "epoch": 0.5302513312206706, "grad_norm": 0.6219606399536133, "learning_rate": 4.547270959369691e-05, "loss": 7.822074890136719, "step": 4089 }, { "epoch": 0.5303810087289172, "grad_norm": 0.7271294593811035, "learning_rate": 4.545237268697127e-05, "loss": 7.7328314781188965, "step": 4090 }, { "epoch": 0.5305106862371639, "grad_norm": 1.0544875860214233, "learning_rate": 4.5432036538833775e-05, "loss": 12.108089447021484, "step": 4091 }, { "epoch": 0.5306403637454107, "grad_norm": 0.6452759504318237, "learning_rate": 4.5411701152676686e-05, "loss": 9.03043270111084, "step": 4092 }, { "epoch": 0.5307700412536573, "grad_norm": 1.0192373991012573, "learning_rate": 4.5391366531892165e-05, "loss": 11.893614768981934, "step": 4093 }, { "epoch": 0.530899718761904, "grad_norm": 0.9513656497001648, "learning_rate": 4.537103267987221e-05, "loss": 11.871017456054688, "step": 4094 }, { "epoch": 0.5310293962701507, "grad_norm": 0.7309857606887817, "learning_rate": 4.53506996000087e-05, "loss": 10.775505065917969, "step": 4095 }, { "epoch": 0.5311590737783973, "grad_norm": 0.9075210094451904, "learning_rate": 4.533036729569339e-05, "loss": 10.870628356933594, "step": 4096 }, { "epoch": 0.531288751286644, "grad_norm": 0.673610508441925, "learning_rate": 4.531003577031791e-05, "loss": 7.8954362869262695, "step": 4097 }, { "epoch": 0.5314184287948907, "grad_norm": 0.7857381701469421, "learning_rate": 4.528970502727376e-05, "loss": 10.924576759338379, "step": 4098 }, { "epoch": 0.5315481063031374, "grad_norm": 0.8485713601112366, "learning_rate": 4.526937506995229e-05, "loss": 9.239556312561035, "step": 4099 }, { "epoch": 0.5316777838113841, "grad_norm": 1.0544229745864868, "learning_rate": 4.524904590174474e-05, "loss": 15.364801406860352, "step": 4100 }, { "epoch": 0.5318074613196307, "grad_norm": 0.7885229587554932, "learning_rate": 4.522871752604221e-05, "loss": 10.674613952636719, "step": 4101 }, { "epoch": 0.5319371388278774, "grad_norm": 0.8047952055931091, "learning_rate": 4.520838994623567e-05, "loss": 9.855687141418457, "step": 4102 }, { "epoch": 0.5320668163361241, "grad_norm": 0.7766533493995667, "learning_rate": 4.5188063165715966e-05, "loss": 7.451589107513428, "step": 4103 }, { "epoch": 0.5321964938443707, "grad_norm": 0.7639044523239136, "learning_rate": 4.516773718787379e-05, "loss": 9.050993919372559, "step": 4104 }, { "epoch": 0.5323261713526175, "grad_norm": 0.8433435559272766, "learning_rate": 4.514741201609971e-05, "loss": 10.384186744689941, "step": 4105 }, { "epoch": 0.5324558488608642, "grad_norm": 0.8670427799224854, "learning_rate": 4.5127087653784164e-05, "loss": 12.756152153015137, "step": 4106 }, { "epoch": 0.5325855263691108, "grad_norm": 0.9236217141151428, "learning_rate": 4.510676410431747e-05, "loss": 8.571908950805664, "step": 4107 }, { "epoch": 0.5327152038773575, "grad_norm": 0.7954450249671936, "learning_rate": 4.5086441371089764e-05, "loss": 10.305689811706543, "step": 4108 }, { "epoch": 0.5328448813856042, "grad_norm": 0.768521249294281, "learning_rate": 4.506611945749107e-05, "loss": 8.167224884033203, "step": 4109 }, { "epoch": 0.5329745588938508, "grad_norm": 1.2342989444732666, "learning_rate": 4.504579836691131e-05, "loss": 9.721555709838867, "step": 4110 }, { "epoch": 0.5331042364020976, "grad_norm": 0.9412213563919067, "learning_rate": 4.502547810274023e-05, "loss": 9.007220268249512, "step": 4111 }, { "epoch": 0.5332339139103442, "grad_norm": 0.7653121948242188, "learning_rate": 4.500515866836742e-05, "loss": 13.13898754119873, "step": 4112 }, { "epoch": 0.5333635914185909, "grad_norm": 0.6859414577484131, "learning_rate": 4.498484006718239e-05, "loss": 8.872817039489746, "step": 4113 }, { "epoch": 0.5334932689268376, "grad_norm": 0.809933066368103, "learning_rate": 4.4964522302574466e-05, "loss": 11.499948501586914, "step": 4114 }, { "epoch": 0.5336229464350842, "grad_norm": 0.8124129772186279, "learning_rate": 4.494420537793284e-05, "loss": 9.914828300476074, "step": 4115 }, { "epoch": 0.5337526239433309, "grad_norm": 0.8792083859443665, "learning_rate": 4.492388929664658e-05, "loss": 7.609779357910156, "step": 4116 }, { "epoch": 0.5338823014515776, "grad_norm": 0.6992030739784241, "learning_rate": 4.490357406210459e-05, "loss": 9.269498825073242, "step": 4117 }, { "epoch": 0.5340119789598243, "grad_norm": 0.962593138217926, "learning_rate": 4.488325967769567e-05, "loss": 9.628609657287598, "step": 4118 }, { "epoch": 0.534141656468071, "grad_norm": 0.8889840245246887, "learning_rate": 4.486294614680845e-05, "loss": 10.242947578430176, "step": 4119 }, { "epoch": 0.5342713339763177, "grad_norm": 0.894406795501709, "learning_rate": 4.484263347283142e-05, "loss": 7.7443413734436035, "step": 4120 }, { "epoch": 0.5344010114845643, "grad_norm": 0.8802054524421692, "learning_rate": 4.482232165915293e-05, "loss": 7.8440022468566895, "step": 4121 }, { "epoch": 0.534530688992811, "grad_norm": 0.7678064703941345, "learning_rate": 4.480201070916119e-05, "loss": 9.565176010131836, "step": 4122 }, { "epoch": 0.5346603665010576, "grad_norm": 0.6611738204956055, "learning_rate": 4.478170062624425e-05, "loss": 8.880576133728027, "step": 4123 }, { "epoch": 0.5347900440093044, "grad_norm": 0.8813105225563049, "learning_rate": 4.4761391413790045e-05, "loss": 7.703081130981445, "step": 4124 }, { "epoch": 0.5349197215175511, "grad_norm": 0.8579171895980835, "learning_rate": 4.474108307518633e-05, "loss": 9.184945106506348, "step": 4125 }, { "epoch": 0.5350493990257977, "grad_norm": 0.9286500811576843, "learning_rate": 4.472077561382075e-05, "loss": 9.74388313293457, "step": 4126 }, { "epoch": 0.5351790765340444, "grad_norm": 0.6917443871498108, "learning_rate": 4.470046903308077e-05, "loss": 7.729239463806152, "step": 4127 }, { "epoch": 0.5353087540422911, "grad_norm": 1.241034746170044, "learning_rate": 4.468016333635373e-05, "loss": 9.7440767288208, "step": 4128 }, { "epoch": 0.5354384315505377, "grad_norm": 0.7826287150382996, "learning_rate": 4.465985852702682e-05, "loss": 9.819913864135742, "step": 4129 }, { "epoch": 0.5355681090587844, "grad_norm": 0.7619661092758179, "learning_rate": 4.463955460848709e-05, "loss": 8.237598419189453, "step": 4130 }, { "epoch": 0.5356977865670312, "grad_norm": 1.0502712726593018, "learning_rate": 4.46192515841214e-05, "loss": 14.095938682556152, "step": 4131 }, { "epoch": 0.5358274640752778, "grad_norm": 0.8729004263877869, "learning_rate": 4.45989494573165e-05, "loss": 11.23515510559082, "step": 4132 }, { "epoch": 0.5359571415835245, "grad_norm": 0.9543861150741577, "learning_rate": 4.4578648231458995e-05, "loss": 8.88791561126709, "step": 4133 }, { "epoch": 0.5360868190917711, "grad_norm": 0.7596765160560608, "learning_rate": 4.455834790993531e-05, "loss": 16.301143646240234, "step": 4134 }, { "epoch": 0.5362164966000178, "grad_norm": 0.8116589188575745, "learning_rate": 4.4538048496131745e-05, "loss": 8.271319389343262, "step": 4135 }, { "epoch": 0.5363461741082645, "grad_norm": 0.8503918051719666, "learning_rate": 4.451774999343444e-05, "loss": 9.990538597106934, "step": 4136 }, { "epoch": 0.5364758516165112, "grad_norm": 1.0599592924118042, "learning_rate": 4.4497452405229365e-05, "loss": 8.667625427246094, "step": 4137 }, { "epoch": 0.5366055291247579, "grad_norm": 0.7325341701507568, "learning_rate": 4.447715573490237e-05, "loss": 8.061623573303223, "step": 4138 }, { "epoch": 0.5367352066330046, "grad_norm": 0.9693158864974976, "learning_rate": 4.445685998583913e-05, "loss": 8.15432357788086, "step": 4139 }, { "epoch": 0.5368648841412512, "grad_norm": 1.0018798112869263, "learning_rate": 4.443656516142517e-05, "loss": 10.11684799194336, "step": 4140 }, { "epoch": 0.5369945616494979, "grad_norm": 0.7525875568389893, "learning_rate": 4.441627126504587e-05, "loss": 9.767205238342285, "step": 4141 }, { "epoch": 0.5371242391577445, "grad_norm": 0.5921288132667542, "learning_rate": 4.439597830008643e-05, "loss": 7.119791507720947, "step": 4142 }, { "epoch": 0.5372539166659913, "grad_norm": 0.9788644313812256, "learning_rate": 4.4375686269931936e-05, "loss": 13.658400535583496, "step": 4143 }, { "epoch": 0.537383594174238, "grad_norm": 0.8956303596496582, "learning_rate": 4.4355395177967286e-05, "loss": 11.245291709899902, "step": 4144 }, { "epoch": 0.5375132716824846, "grad_norm": 1.261189579963684, "learning_rate": 4.433510502757722e-05, "loss": 9.476727485656738, "step": 4145 }, { "epoch": 0.5376429491907313, "grad_norm": 0.6854592561721802, "learning_rate": 4.4314815822146346e-05, "loss": 8.409154891967773, "step": 4146 }, { "epoch": 0.537772626698978, "grad_norm": 1.0352150201797485, "learning_rate": 4.429452756505908e-05, "loss": 10.858489990234375, "step": 4147 }, { "epoch": 0.5379023042072246, "grad_norm": 0.5822736024856567, "learning_rate": 4.4274240259699725e-05, "loss": 7.112534523010254, "step": 4148 }, { "epoch": 0.5380319817154713, "grad_norm": 0.8298487663269043, "learning_rate": 4.425395390945238e-05, "loss": 6.127121448516846, "step": 4149 }, { "epoch": 0.5381616592237181, "grad_norm": 0.8430182933807373, "learning_rate": 4.423366851770101e-05, "loss": 9.587291717529297, "step": 4150 }, { "epoch": 0.5382913367319647, "grad_norm": 0.8655492067337036, "learning_rate": 4.421338408782942e-05, "loss": 10.982851028442383, "step": 4151 }, { "epoch": 0.5384210142402114, "grad_norm": 0.9566348195075989, "learning_rate": 4.419310062322124e-05, "loss": 10.342745780944824, "step": 4152 }, { "epoch": 0.538550691748458, "grad_norm": 0.8778707385063171, "learning_rate": 4.417281812725994e-05, "loss": 11.380887031555176, "step": 4153 }, { "epoch": 0.5386803692567047, "grad_norm": 0.8746486306190491, "learning_rate": 4.4152536603328856e-05, "loss": 10.1268892288208, "step": 4154 }, { "epoch": 0.5388100467649514, "grad_norm": 0.9697388410568237, "learning_rate": 4.413225605481113e-05, "loss": 10.366447448730469, "step": 4155 }, { "epoch": 0.5389397242731981, "grad_norm": 1.06148099899292, "learning_rate": 4.411197648508975e-05, "loss": 12.883561134338379, "step": 4156 }, { "epoch": 0.5390694017814448, "grad_norm": 0.8772169947624207, "learning_rate": 4.409169789754756e-05, "loss": 9.36402702331543, "step": 4157 }, { "epoch": 0.5391990792896915, "grad_norm": 0.8260094523429871, "learning_rate": 4.4071420295567215e-05, "loss": 10.466312408447266, "step": 4158 }, { "epoch": 0.5393287567979381, "grad_norm": 0.9665601253509521, "learning_rate": 4.4051143682531214e-05, "loss": 12.278945922851562, "step": 4159 }, { "epoch": 0.5394584343061848, "grad_norm": 1.0343568325042725, "learning_rate": 4.403086806182189e-05, "loss": 9.055643081665039, "step": 4160 }, { "epoch": 0.5395881118144314, "grad_norm": 0.8434470295906067, "learning_rate": 4.401059343682142e-05, "loss": 12.544429779052734, "step": 4161 }, { "epoch": 0.5397177893226781, "grad_norm": 0.7310574650764465, "learning_rate": 4.399031981091179e-05, "loss": 7.967405796051025, "step": 4162 }, { "epoch": 0.5398474668309249, "grad_norm": 1.130980372428894, "learning_rate": 4.397004718747486e-05, "loss": 15.024463653564453, "step": 4163 }, { "epoch": 0.5399771443391715, "grad_norm": 0.740634560585022, "learning_rate": 4.394977556989229e-05, "loss": 8.473944664001465, "step": 4164 }, { "epoch": 0.5401068218474182, "grad_norm": 0.9599756598472595, "learning_rate": 4.392950496154557e-05, "loss": 11.825800895690918, "step": 4165 }, { "epoch": 0.5402364993556649, "grad_norm": 0.5418076515197754, "learning_rate": 4.3909235365816046e-05, "loss": 7.216375827789307, "step": 4166 }, { "epoch": 0.5403661768639115, "grad_norm": 0.835477352142334, "learning_rate": 4.388896678608488e-05, "loss": 6.997114181518555, "step": 4167 }, { "epoch": 0.5404958543721582, "grad_norm": 0.8721345067024231, "learning_rate": 4.386869922573306e-05, "loss": 8.608531951904297, "step": 4168 }, { "epoch": 0.540625531880405, "grad_norm": 0.8209207653999329, "learning_rate": 4.3848432688141424e-05, "loss": 7.864556312561035, "step": 4169 }, { "epoch": 0.5407552093886516, "grad_norm": 0.7760331630706787, "learning_rate": 4.382816717669062e-05, "loss": 10.093121528625488, "step": 4170 }, { "epoch": 0.5408848868968983, "grad_norm": 0.7642908096313477, "learning_rate": 4.380790269476112e-05, "loss": 11.166162490844727, "step": 4171 }, { "epoch": 0.541014564405145, "grad_norm": 0.9887101650238037, "learning_rate": 4.3787639245733244e-05, "loss": 8.232547760009766, "step": 4172 }, { "epoch": 0.5411442419133916, "grad_norm": 0.8210060596466064, "learning_rate": 4.376737683298715e-05, "loss": 7.883325099945068, "step": 4173 }, { "epoch": 0.5412739194216383, "grad_norm": 0.7681212425231934, "learning_rate": 4.374711545990277e-05, "loss": 10.978132247924805, "step": 4174 }, { "epoch": 0.541403596929885, "grad_norm": 1.2234928607940674, "learning_rate": 4.372685512985991e-05, "loss": 11.455957412719727, "step": 4175 }, { "epoch": 0.5415332744381317, "grad_norm": 0.5426959991455078, "learning_rate": 4.37065958462382e-05, "loss": 5.650925636291504, "step": 4176 }, { "epoch": 0.5416629519463784, "grad_norm": 1.0432028770446777, "learning_rate": 4.3686337612417076e-05, "loss": 8.82593822479248, "step": 4177 }, { "epoch": 0.541792629454625, "grad_norm": 0.7486379742622375, "learning_rate": 4.36660804317758e-05, "loss": 7.214292526245117, "step": 4178 }, { "epoch": 0.5419223069628717, "grad_norm": 1.015259027481079, "learning_rate": 4.364582430769348e-05, "loss": 12.590336799621582, "step": 4179 }, { "epoch": 0.5420519844711184, "grad_norm": 0.7511208653450012, "learning_rate": 4.3625569243549016e-05, "loss": 8.068760871887207, "step": 4180 }, { "epoch": 0.542181661979365, "grad_norm": 0.721991240978241, "learning_rate": 4.360531524272117e-05, "loss": 11.508042335510254, "step": 4181 }, { "epoch": 0.5423113394876118, "grad_norm": 0.8162355422973633, "learning_rate": 4.358506230858848e-05, "loss": 10.545246124267578, "step": 4182 }, { "epoch": 0.5424410169958584, "grad_norm": 0.8273496627807617, "learning_rate": 4.356481044452935e-05, "loss": 12.88964557647705, "step": 4183 }, { "epoch": 0.5425706945041051, "grad_norm": 0.9134483933448792, "learning_rate": 4.3544559653921976e-05, "loss": 11.737959861755371, "step": 4184 }, { "epoch": 0.5427003720123518, "grad_norm": 0.683530330657959, "learning_rate": 4.352430994014439e-05, "loss": 7.591092109680176, "step": 4185 }, { "epoch": 0.5428300495205984, "grad_norm": 0.9169346690177917, "learning_rate": 4.3504061306574437e-05, "loss": 12.478447914123535, "step": 4186 }, { "epoch": 0.5429597270288451, "grad_norm": 0.877422034740448, "learning_rate": 4.348381375658979e-05, "loss": 9.46059799194336, "step": 4187 }, { "epoch": 0.5430894045370919, "grad_norm": 0.5621384978294373, "learning_rate": 4.346356729356793e-05, "loss": 8.777799606323242, "step": 4188 }, { "epoch": 0.5432190820453385, "grad_norm": 0.6486225724220276, "learning_rate": 4.344332192088617e-05, "loss": 9.681536674499512, "step": 4189 }, { "epoch": 0.5433487595535852, "grad_norm": 0.9296668767929077, "learning_rate": 4.342307764192162e-05, "loss": 9.891663551330566, "step": 4190 }, { "epoch": 0.5434784370618319, "grad_norm": 0.9719076752662659, "learning_rate": 4.340283446005124e-05, "loss": 10.515152931213379, "step": 4191 }, { "epoch": 0.5436081145700785, "grad_norm": 0.8337421417236328, "learning_rate": 4.338259237865177e-05, "loss": 7.478322982788086, "step": 4192 }, { "epoch": 0.5437377920783252, "grad_norm": 0.7795371413230896, "learning_rate": 4.33623514010998e-05, "loss": 10.736794471740723, "step": 4193 }, { "epoch": 0.5438674695865718, "grad_norm": 0.721775233745575, "learning_rate": 4.33421115307717e-05, "loss": 9.791279792785645, "step": 4194 }, { "epoch": 0.5439971470948186, "grad_norm": 0.8461476564407349, "learning_rate": 4.332187277104369e-05, "loss": 7.583614826202393, "step": 4195 }, { "epoch": 0.5441268246030653, "grad_norm": 0.8338163495063782, "learning_rate": 4.3301635125291794e-05, "loss": 13.038569450378418, "step": 4196 }, { "epoch": 0.5442565021113119, "grad_norm": 0.7094146013259888, "learning_rate": 4.328139859689185e-05, "loss": 9.359047889709473, "step": 4197 }, { "epoch": 0.5443861796195586, "grad_norm": 0.7660896182060242, "learning_rate": 4.3261163189219494e-05, "loss": 8.644372940063477, "step": 4198 }, { "epoch": 0.5445158571278053, "grad_norm": 0.8567168712615967, "learning_rate": 4.324092890565019e-05, "loss": 9.617621421813965, "step": 4199 }, { "epoch": 0.5446455346360519, "grad_norm": 0.7760921120643616, "learning_rate": 4.322069574955921e-05, "loss": 9.711160659790039, "step": 4200 }, { "epoch": 0.5447752121442987, "grad_norm": 0.6059878468513489, "learning_rate": 4.320046372432166e-05, "loss": 7.423495769500732, "step": 4201 }, { "epoch": 0.5449048896525454, "grad_norm": 0.9323084354400635, "learning_rate": 4.318023283331241e-05, "loss": 12.334722518920898, "step": 4202 }, { "epoch": 0.545034567160792, "grad_norm": 0.967607319355011, "learning_rate": 4.3160003079906176e-05, "loss": 9.35188102722168, "step": 4203 }, { "epoch": 0.5451642446690387, "grad_norm": 1.0106085538864136, "learning_rate": 4.313977446747748e-05, "loss": 11.74159049987793, "step": 4204 }, { "epoch": 0.5452939221772853, "grad_norm": 0.714042067527771, "learning_rate": 4.3119546999400654e-05, "loss": 10.240531921386719, "step": 4205 }, { "epoch": 0.545423599685532, "grad_norm": 0.9127724170684814, "learning_rate": 4.3099320679049824e-05, "loss": 11.842456817626953, "step": 4206 }, { "epoch": 0.5455532771937788, "grad_norm": 0.7796609997749329, "learning_rate": 4.307909550979895e-05, "loss": 10.295350074768066, "step": 4207 }, { "epoch": 0.5456829547020254, "grad_norm": 0.9543867111206055, "learning_rate": 4.305887149502177e-05, "loss": 10.60958480834961, "step": 4208 }, { "epoch": 0.5458126322102721, "grad_norm": 0.8301588296890259, "learning_rate": 4.3038648638091864e-05, "loss": 9.181173324584961, "step": 4209 }, { "epoch": 0.5459423097185188, "grad_norm": 0.9484232068061829, "learning_rate": 4.301842694238257e-05, "loss": 13.251131057739258, "step": 4210 }, { "epoch": 0.5460719872267654, "grad_norm": 0.5593159198760986, "learning_rate": 4.299820641126709e-05, "loss": 6.07960844039917, "step": 4211 }, { "epoch": 0.5462016647350121, "grad_norm": 1.0507301092147827, "learning_rate": 4.2977987048118395e-05, "loss": 11.971179008483887, "step": 4212 }, { "epoch": 0.5463313422432587, "grad_norm": 0.9689366817474365, "learning_rate": 4.295776885630927e-05, "loss": 12.436474800109863, "step": 4213 }, { "epoch": 0.5464610197515055, "grad_norm": 0.788963794708252, "learning_rate": 4.29375518392123e-05, "loss": 9.279292106628418, "step": 4214 }, { "epoch": 0.5465906972597522, "grad_norm": 0.6179262399673462, "learning_rate": 4.2917336000199874e-05, "loss": 9.941976547241211, "step": 4215 }, { "epoch": 0.5467203747679988, "grad_norm": 1.1820611953735352, "learning_rate": 4.289712134264421e-05, "loss": 12.384964942932129, "step": 4216 }, { "epoch": 0.5468500522762455, "grad_norm": 0.7899194955825806, "learning_rate": 4.28769078699173e-05, "loss": 8.037558555603027, "step": 4217 }, { "epoch": 0.5469797297844922, "grad_norm": 0.875129222869873, "learning_rate": 4.285669558539093e-05, "loss": 10.330917358398438, "step": 4218 }, { "epoch": 0.5471094072927388, "grad_norm": 0.6716732978820801, "learning_rate": 4.2836484492436724e-05, "loss": 8.142512321472168, "step": 4219 }, { "epoch": 0.5472390848009856, "grad_norm": 0.6763362884521484, "learning_rate": 4.281627459442607e-05, "loss": 9.755340576171875, "step": 4220 }, { "epoch": 0.5473687623092323, "grad_norm": 0.7673466205596924, "learning_rate": 4.279606589473019e-05, "loss": 7.916284561157227, "step": 4221 }, { "epoch": 0.5474984398174789, "grad_norm": 0.7737191915512085, "learning_rate": 4.277585839672008e-05, "loss": 8.719712257385254, "step": 4222 }, { "epoch": 0.5476281173257256, "grad_norm": 0.8831226825714111, "learning_rate": 4.2755652103766554e-05, "loss": 10.190553665161133, "step": 4223 }, { "epoch": 0.5477577948339722, "grad_norm": 0.8961924910545349, "learning_rate": 4.273544701924021e-05, "loss": 10.17125415802002, "step": 4224 }, { "epoch": 0.5478874723422189, "grad_norm": 0.9474472999572754, "learning_rate": 4.2715243146511456e-05, "loss": 13.691142082214355, "step": 4225 }, { "epoch": 0.5480171498504656, "grad_norm": 0.8522559404373169, "learning_rate": 4.269504048895049e-05, "loss": 9.96664810180664, "step": 4226 }, { "epoch": 0.5481468273587123, "grad_norm": 0.6030705571174622, "learning_rate": 4.2674839049927314e-05, "loss": 6.10439920425415, "step": 4227 }, { "epoch": 0.548276504866959, "grad_norm": 0.7244048714637756, "learning_rate": 4.265463883281171e-05, "loss": 10.448070526123047, "step": 4228 }, { "epoch": 0.5484061823752057, "grad_norm": 0.7795324325561523, "learning_rate": 4.263443984097329e-05, "loss": 7.626093864440918, "step": 4229 }, { "epoch": 0.5485358598834523, "grad_norm": 0.6529316902160645, "learning_rate": 4.261424207778143e-05, "loss": 9.419718742370605, "step": 4230 }, { "epoch": 0.548665537391699, "grad_norm": 0.6498766541481018, "learning_rate": 4.259404554660531e-05, "loss": 8.393442153930664, "step": 4231 }, { "epoch": 0.5487952148999456, "grad_norm": 1.1065632104873657, "learning_rate": 4.257385025081391e-05, "loss": 8.859124183654785, "step": 4232 }, { "epoch": 0.5489248924081924, "grad_norm": 0.9986581206321716, "learning_rate": 4.255365619377597e-05, "loss": 12.599626541137695, "step": 4233 }, { "epoch": 0.5490545699164391, "grad_norm": 0.7954009175300598, "learning_rate": 4.253346337886012e-05, "loss": 11.139483451843262, "step": 4234 }, { "epoch": 0.5491842474246857, "grad_norm": 0.8016713261604309, "learning_rate": 4.2513271809434665e-05, "loss": 9.988621711730957, "step": 4235 }, { "epoch": 0.5493139249329324, "grad_norm": 0.9265700578689575, "learning_rate": 4.249308148886777e-05, "loss": 12.005020141601562, "step": 4236 }, { "epoch": 0.5494436024411791, "grad_norm": 0.8687149286270142, "learning_rate": 4.247289242052738e-05, "loss": 9.803754806518555, "step": 4237 }, { "epoch": 0.5495732799494257, "grad_norm": 1.055169939994812, "learning_rate": 4.245270460778122e-05, "loss": 10.119693756103516, "step": 4238 }, { "epoch": 0.5497029574576725, "grad_norm": 0.7650930285453796, "learning_rate": 4.2432518053996805e-05, "loss": 8.909507751464844, "step": 4239 }, { "epoch": 0.5498326349659192, "grad_norm": 0.8397281765937805, "learning_rate": 4.241233276254147e-05, "loss": 11.440773963928223, "step": 4240 }, { "epoch": 0.5499623124741658, "grad_norm": 0.7241148948669434, "learning_rate": 4.239214873678229e-05, "loss": 7.220906734466553, "step": 4241 }, { "epoch": 0.5500919899824125, "grad_norm": 0.8962618112564087, "learning_rate": 4.237196598008617e-05, "loss": 11.418717384338379, "step": 4242 }, { "epoch": 0.5502216674906591, "grad_norm": 0.7782111763954163, "learning_rate": 4.235178449581979e-05, "loss": 9.500452041625977, "step": 4243 }, { "epoch": 0.5503513449989058, "grad_norm": 1.073141098022461, "learning_rate": 4.2331604287349596e-05, "loss": 10.55008316040039, "step": 4244 }, { "epoch": 0.5504810225071525, "grad_norm": 0.5907437205314636, "learning_rate": 4.231142535804187e-05, "loss": 9.618668556213379, "step": 4245 }, { "epoch": 0.5506107000153992, "grad_norm": 0.8548005819320679, "learning_rate": 4.2291247711262634e-05, "loss": 8.552434921264648, "step": 4246 }, { "epoch": 0.5507403775236459, "grad_norm": 0.8587753772735596, "learning_rate": 4.2271071350377716e-05, "loss": 12.165016174316406, "step": 4247 }, { "epoch": 0.5508700550318926, "grad_norm": 0.8800547122955322, "learning_rate": 4.225089627875274e-05, "loss": 12.186665534973145, "step": 4248 }, { "epoch": 0.5509997325401392, "grad_norm": 1.1048436164855957, "learning_rate": 4.223072249975309e-05, "loss": 12.161852836608887, "step": 4249 }, { "epoch": 0.5511294100483859, "grad_norm": 0.9453327655792236, "learning_rate": 4.221055001674395e-05, "loss": 11.257555961608887, "step": 4250 }, { "epoch": 0.5512590875566326, "grad_norm": 0.897209882736206, "learning_rate": 4.2190378833090285e-05, "loss": 9.502913475036621, "step": 4251 }, { "epoch": 0.5513887650648793, "grad_norm": 0.8865352869033813, "learning_rate": 4.217020895215685e-05, "loss": 11.724142074584961, "step": 4252 }, { "epoch": 0.551518442573126, "grad_norm": 0.8866050839424133, "learning_rate": 4.215004037730817e-05, "loss": 9.333327293395996, "step": 4253 }, { "epoch": 0.5516481200813727, "grad_norm": 0.6906765699386597, "learning_rate": 4.212987311190856e-05, "loss": 8.219663619995117, "step": 4254 }, { "epoch": 0.5517777975896193, "grad_norm": 0.9747017621994019, "learning_rate": 4.210970715932211e-05, "loss": 12.817535400390625, "step": 4255 }, { "epoch": 0.551907475097866, "grad_norm": 0.7947919964790344, "learning_rate": 4.20895425229127e-05, "loss": 7.500373840332031, "step": 4256 }, { "epoch": 0.5520371526061126, "grad_norm": 1.07587468624115, "learning_rate": 4.2069379206043987e-05, "loss": 11.477189064025879, "step": 4257 }, { "epoch": 0.5521668301143593, "grad_norm": 0.8139875531196594, "learning_rate": 4.20492172120794e-05, "loss": 10.934128761291504, "step": 4258 }, { "epoch": 0.5522965076226061, "grad_norm": 0.8401921987533569, "learning_rate": 4.202905654438216e-05, "loss": 6.269927501678467, "step": 4259 }, { "epoch": 0.5524261851308527, "grad_norm": 0.7196791172027588, "learning_rate": 4.2008897206315266e-05, "loss": 10.408289909362793, "step": 4260 }, { "epoch": 0.5525558626390994, "grad_norm": 0.9752296805381775, "learning_rate": 4.1988739201241476e-05, "loss": 10.561073303222656, "step": 4261 }, { "epoch": 0.552685540147346, "grad_norm": 0.7373898029327393, "learning_rate": 4.196858253252336e-05, "loss": 8.587995529174805, "step": 4262 }, { "epoch": 0.5528152176555927, "grad_norm": 0.9970616698265076, "learning_rate": 4.194842720352322e-05, "loss": 10.870234489440918, "step": 4263 }, { "epoch": 0.5529448951638394, "grad_norm": 0.709791898727417, "learning_rate": 4.192827321760319e-05, "loss": 7.895356178283691, "step": 4264 }, { "epoch": 0.5530745726720862, "grad_norm": 1.2561283111572266, "learning_rate": 4.1908120578125114e-05, "loss": 13.932167053222656, "step": 4265 }, { "epoch": 0.5532042501803328, "grad_norm": 1.0351753234863281, "learning_rate": 4.188796928845068e-05, "loss": 12.604715347290039, "step": 4266 }, { "epoch": 0.5533339276885795, "grad_norm": 0.8745814561843872, "learning_rate": 4.186781935194129e-05, "loss": 12.741762161254883, "step": 4267 }, { "epoch": 0.5534636051968261, "grad_norm": 0.8406123518943787, "learning_rate": 4.184767077195817e-05, "loss": 9.329827308654785, "step": 4268 }, { "epoch": 0.5535932827050728, "grad_norm": 0.9936078786849976, "learning_rate": 4.182752355186229e-05, "loss": 12.072701454162598, "step": 4269 }, { "epoch": 0.5537229602133195, "grad_norm": 1.303184986114502, "learning_rate": 4.180737769501439e-05, "loss": 14.503111839294434, "step": 4270 }, { "epoch": 0.5538526377215662, "grad_norm": 0.8113439083099365, "learning_rate": 4.1787233204775e-05, "loss": 8.470870971679688, "step": 4271 }, { "epoch": 0.5539823152298129, "grad_norm": 0.783501148223877, "learning_rate": 4.176709008450443e-05, "loss": 8.786630630493164, "step": 4272 }, { "epoch": 0.5541119927380596, "grad_norm": 0.8099730014801025, "learning_rate": 4.174694833756274e-05, "loss": 11.169042587280273, "step": 4273 }, { "epoch": 0.5542416702463062, "grad_norm": 0.7578005194664001, "learning_rate": 4.172680796730975e-05, "loss": 7.493168830871582, "step": 4274 }, { "epoch": 0.5543713477545529, "grad_norm": 0.9463997483253479, "learning_rate": 4.170666897710509e-05, "loss": 8.921014785766602, "step": 4275 }, { "epoch": 0.5545010252627995, "grad_norm": 0.6902334094047546, "learning_rate": 4.168653137030813e-05, "loss": 7.527994632720947, "step": 4276 }, { "epoch": 0.5546307027710462, "grad_norm": 0.6549860239028931, "learning_rate": 4.1666395150278015e-05, "loss": 9.402437210083008, "step": 4277 }, { "epoch": 0.554760380279293, "grad_norm": 1.0126174688339233, "learning_rate": 4.164626032037366e-05, "loss": 11.116616249084473, "step": 4278 }, { "epoch": 0.5548900577875396, "grad_norm": 0.6722534894943237, "learning_rate": 4.162612688395376e-05, "loss": 8.454630851745605, "step": 4279 }, { "epoch": 0.5550197352957863, "grad_norm": 0.8214412927627563, "learning_rate": 4.160599484437675e-05, "loss": 8.70781135559082, "step": 4280 }, { "epoch": 0.555149412804033, "grad_norm": 0.7163363695144653, "learning_rate": 4.1585864205000865e-05, "loss": 9.130846977233887, "step": 4281 }, { "epoch": 0.5552790903122796, "grad_norm": 0.7247523665428162, "learning_rate": 4.156573496918408e-05, "loss": 8.581188201904297, "step": 4282 }, { "epoch": 0.5554087678205263, "grad_norm": 0.6907714009284973, "learning_rate": 4.154560714028414e-05, "loss": 9.823230743408203, "step": 4283 }, { "epoch": 0.5555384453287731, "grad_norm": 0.8455949425697327, "learning_rate": 4.152548072165858e-05, "loss": 7.0166401863098145, "step": 4284 }, { "epoch": 0.5556681228370197, "grad_norm": 1.1133743524551392, "learning_rate": 4.1505355716664664e-05, "loss": 10.957141876220703, "step": 4285 }, { "epoch": 0.5557978003452664, "grad_norm": 1.0916470289230347, "learning_rate": 4.148523212865945e-05, "loss": 10.819197654724121, "step": 4286 }, { "epoch": 0.555927477853513, "grad_norm": 0.8487904071807861, "learning_rate": 4.1465109960999735e-05, "loss": 10.450043678283691, "step": 4287 }, { "epoch": 0.5560571553617597, "grad_norm": 1.0034055709838867, "learning_rate": 4.144498921704209e-05, "loss": 11.028993606567383, "step": 4288 }, { "epoch": 0.5561868328700064, "grad_norm": 0.7852100729942322, "learning_rate": 4.1424869900142874e-05, "loss": 8.160137176513672, "step": 4289 }, { "epoch": 0.5563165103782531, "grad_norm": 1.0054627656936646, "learning_rate": 4.140475201365815e-05, "loss": 11.9576416015625, "step": 4290 }, { "epoch": 0.5564461878864998, "grad_norm": 0.7764672040939331, "learning_rate": 4.1384635560943794e-05, "loss": 10.466963768005371, "step": 4291 }, { "epoch": 0.5565758653947465, "grad_norm": 0.7839481830596924, "learning_rate": 4.136452054535542e-05, "loss": 9.816695213317871, "step": 4292 }, { "epoch": 0.5567055429029931, "grad_norm": 0.9878373742103577, "learning_rate": 4.134440697024841e-05, "loss": 10.656944274902344, "step": 4293 }, { "epoch": 0.5568352204112398, "grad_norm": 1.0321317911148071, "learning_rate": 4.13242948389779e-05, "loss": 11.054817199707031, "step": 4294 }, { "epoch": 0.5569648979194864, "grad_norm": 1.2020366191864014, "learning_rate": 4.130418415489879e-05, "loss": 12.307039260864258, "step": 4295 }, { "epoch": 0.5570945754277331, "grad_norm": 0.9707954525947571, "learning_rate": 4.1284074921365735e-05, "loss": 9.33927059173584, "step": 4296 }, { "epoch": 0.5572242529359799, "grad_norm": 0.9998613595962524, "learning_rate": 4.126396714173315e-05, "loss": 9.148552894592285, "step": 4297 }, { "epoch": 0.5573539304442265, "grad_norm": 0.8601602911949158, "learning_rate": 4.12438608193552e-05, "loss": 10.036905288696289, "step": 4298 }, { "epoch": 0.5574836079524732, "grad_norm": 1.0426751375198364, "learning_rate": 4.1223755957585824e-05, "loss": 10.584796905517578, "step": 4299 }, { "epoch": 0.5576132854607199, "grad_norm": 0.7825103402137756, "learning_rate": 4.1203652559778706e-05, "loss": 8.596038818359375, "step": 4300 }, { "epoch": 0.5577429629689665, "grad_norm": 0.6682544350624084, "learning_rate": 4.118355062928728e-05, "loss": 10.242456436157227, "step": 4301 }, { "epoch": 0.5578726404772132, "grad_norm": 0.6213396787643433, "learning_rate": 4.116345016946474e-05, "loss": 10.395064353942871, "step": 4302 }, { "epoch": 0.55800231798546, "grad_norm": 0.7842866778373718, "learning_rate": 4.114335118366405e-05, "loss": 9.275668144226074, "step": 4303 }, { "epoch": 0.5581319954937066, "grad_norm": 0.7747189402580261, "learning_rate": 4.112325367523792e-05, "loss": 10.26329231262207, "step": 4304 }, { "epoch": 0.5582616730019533, "grad_norm": 1.0027974843978882, "learning_rate": 4.110315764753878e-05, "loss": 9.113391876220703, "step": 4305 }, { "epoch": 0.5583913505102, "grad_norm": 1.1175638437271118, "learning_rate": 4.108306310391885e-05, "loss": 15.544465065002441, "step": 4306 }, { "epoch": 0.5585210280184466, "grad_norm": 0.5785732865333557, "learning_rate": 4.106297004773011e-05, "loss": 4.660335540771484, "step": 4307 }, { "epoch": 0.5586507055266933, "grad_norm": 1.0150699615478516, "learning_rate": 4.104287848232426e-05, "loss": 11.559051513671875, "step": 4308 }, { "epoch": 0.5587803830349399, "grad_norm": 1.1352323293685913, "learning_rate": 4.102278841105279e-05, "loss": 10.86176586151123, "step": 4309 }, { "epoch": 0.5589100605431867, "grad_norm": 0.8402100205421448, "learning_rate": 4.1002699837266864e-05, "loss": 8.841367721557617, "step": 4310 }, { "epoch": 0.5590397380514334, "grad_norm": 1.166971206665039, "learning_rate": 4.098261276431752e-05, "loss": 13.001623153686523, "step": 4311 }, { "epoch": 0.55916941555968, "grad_norm": 0.9603792428970337, "learning_rate": 4.0962527195555445e-05, "loss": 10.923389434814453, "step": 4312 }, { "epoch": 0.5592990930679267, "grad_norm": 0.8446981906890869, "learning_rate": 4.09424431343311e-05, "loss": 10.462714195251465, "step": 4313 }, { "epoch": 0.5594287705761734, "grad_norm": 0.8465525507926941, "learning_rate": 4.09223605839947e-05, "loss": 10.686680793762207, "step": 4314 }, { "epoch": 0.55955844808442, "grad_norm": 0.8028733134269714, "learning_rate": 4.090227954789622e-05, "loss": 9.78139877319336, "step": 4315 }, { "epoch": 0.5596881255926668, "grad_norm": 0.8902069330215454, "learning_rate": 4.088220002938536e-05, "loss": 10.098655700683594, "step": 4316 }, { "epoch": 0.5598178031009134, "grad_norm": 0.8348515629768372, "learning_rate": 4.086212203181159e-05, "loss": 10.214326858520508, "step": 4317 }, { "epoch": 0.5599474806091601, "grad_norm": 0.6849381923675537, "learning_rate": 4.08420455585241e-05, "loss": 7.6915082931518555, "step": 4318 }, { "epoch": 0.5600771581174068, "grad_norm": 0.8238711357116699, "learning_rate": 4.082197061287185e-05, "loss": 10.615055084228516, "step": 4319 }, { "epoch": 0.5602068356256534, "grad_norm": 0.5149873495101929, "learning_rate": 4.080189719820352e-05, "loss": 6.197465896606445, "step": 4320 }, { "epoch": 0.5603365131339001, "grad_norm": 0.8077370524406433, "learning_rate": 4.078182531786757e-05, "loss": 11.96178913116455, "step": 4321 }, { "epoch": 0.5604661906421469, "grad_norm": 0.6626466512680054, "learning_rate": 4.076175497521216e-05, "loss": 8.924748420715332, "step": 4322 }, { "epoch": 0.5605958681503935, "grad_norm": 0.7581265568733215, "learning_rate": 4.074168617358524e-05, "loss": 8.21643352508545, "step": 4323 }, { "epoch": 0.5607255456586402, "grad_norm": 0.8748102188110352, "learning_rate": 4.072161891633447e-05, "loss": 11.721465110778809, "step": 4324 }, { "epoch": 0.5608552231668869, "grad_norm": 1.0983248949050903, "learning_rate": 4.0701553206807256e-05, "loss": 12.881925582885742, "step": 4325 }, { "epoch": 0.5609849006751335, "grad_norm": 0.6888283491134644, "learning_rate": 4.0681489048350765e-05, "loss": 11.111605644226074, "step": 4326 }, { "epoch": 0.5611145781833802, "grad_norm": 0.849455714225769, "learning_rate": 4.066142644431189e-05, "loss": 8.176647186279297, "step": 4327 }, { "epoch": 0.5612442556916268, "grad_norm": 0.9270704388618469, "learning_rate": 4.064136539803726e-05, "loss": 9.066597938537598, "step": 4328 }, { "epoch": 0.5613739331998736, "grad_norm": 1.1381683349609375, "learning_rate": 4.062130591287325e-05, "loss": 9.889464378356934, "step": 4329 }, { "epoch": 0.5615036107081203, "grad_norm": 0.6885874271392822, "learning_rate": 4.060124799216597e-05, "loss": 9.468043327331543, "step": 4330 }, { "epoch": 0.5616332882163669, "grad_norm": 0.6924487352371216, "learning_rate": 4.0581191639261294e-05, "loss": 9.251781463623047, "step": 4331 }, { "epoch": 0.5617629657246136, "grad_norm": 1.1382280588150024, "learning_rate": 4.0561136857504814e-05, "loss": 10.294190406799316, "step": 4332 }, { "epoch": 0.5618926432328603, "grad_norm": 0.6056285500526428, "learning_rate": 4.054108365024184e-05, "loss": 9.081460952758789, "step": 4333 }, { "epoch": 0.5620223207411069, "grad_norm": 0.8792160749435425, "learning_rate": 4.0521032020817456e-05, "loss": 12.53713321685791, "step": 4334 }, { "epoch": 0.5621519982493537, "grad_norm": 0.8399565815925598, "learning_rate": 4.050098197257647e-05, "loss": 9.157991409301758, "step": 4335 }, { "epoch": 0.5622816757576004, "grad_norm": 0.8406895399093628, "learning_rate": 4.0480933508863416e-05, "loss": 11.162907600402832, "step": 4336 }, { "epoch": 0.562411353265847, "grad_norm": 0.7764071226119995, "learning_rate": 4.046088663302257e-05, "loss": 8.026854515075684, "step": 4337 }, { "epoch": 0.5625410307740937, "grad_norm": 0.8931798338890076, "learning_rate": 4.044084134839796e-05, "loss": 8.173171043395996, "step": 4338 }, { "epoch": 0.5626707082823403, "grad_norm": 0.7562678456306458, "learning_rate": 4.042079765833331e-05, "loss": 7.471691131591797, "step": 4339 }, { "epoch": 0.562800385790587, "grad_norm": 0.847344160079956, "learning_rate": 4.0400755566172114e-05, "loss": 8.230305671691895, "step": 4340 }, { "epoch": 0.5629300632988337, "grad_norm": 0.8186734318733215, "learning_rate": 4.038071507525759e-05, "loss": 8.366915702819824, "step": 4341 }, { "epoch": 0.5630597408070804, "grad_norm": 0.7195960283279419, "learning_rate": 4.036067618893269e-05, "loss": 8.634562492370605, "step": 4342 }, { "epoch": 0.5631894183153271, "grad_norm": 0.8886789083480835, "learning_rate": 4.0340638910540064e-05, "loss": 9.614042282104492, "step": 4343 }, { "epoch": 0.5633190958235738, "grad_norm": 0.7171433568000793, "learning_rate": 4.0320603243422154e-05, "loss": 10.10730266571045, "step": 4344 }, { "epoch": 0.5634487733318204, "grad_norm": 1.2607769966125488, "learning_rate": 4.03005691909211e-05, "loss": 12.213042259216309, "step": 4345 }, { "epoch": 0.5635784508400671, "grad_norm": 0.8558400273323059, "learning_rate": 4.028053675637876e-05, "loss": 10.45974349975586, "step": 4346 }, { "epoch": 0.5637081283483137, "grad_norm": 0.9039463400840759, "learning_rate": 4.026050594313674e-05, "loss": 9.696287155151367, "step": 4347 }, { "epoch": 0.5638378058565605, "grad_norm": 0.9939180016517639, "learning_rate": 4.024047675453636e-05, "loss": 10.909323692321777, "step": 4348 }, { "epoch": 0.5639674833648072, "grad_norm": 0.8161842226982117, "learning_rate": 4.022044919391873e-05, "loss": 8.526534080505371, "step": 4349 }, { "epoch": 0.5640971608730538, "grad_norm": 0.7842848300933838, "learning_rate": 4.020042326462462e-05, "loss": 9.599444389343262, "step": 4350 }, { "epoch": 0.5642268383813005, "grad_norm": 0.7535830736160278, "learning_rate": 4.0180398969994525e-05, "loss": 6.948762893676758, "step": 4351 }, { "epoch": 0.5643565158895472, "grad_norm": 0.730209231376648, "learning_rate": 4.0160376313368706e-05, "loss": 10.099609375, "step": 4352 }, { "epoch": 0.5644861933977938, "grad_norm": 0.9033828377723694, "learning_rate": 4.014035529808714e-05, "loss": 9.043560028076172, "step": 4353 }, { "epoch": 0.5646158709060406, "grad_norm": 0.9631759524345398, "learning_rate": 4.0120335927489515e-05, "loss": 13.276988983154297, "step": 4354 }, { "epoch": 0.5647455484142873, "grad_norm": 1.1376947164535522, "learning_rate": 4.010031820491526e-05, "loss": 12.590937614440918, "step": 4355 }, { "epoch": 0.5648752259225339, "grad_norm": 0.74467933177948, "learning_rate": 4.008030213370353e-05, "loss": 7.395541191101074, "step": 4356 }, { "epoch": 0.5650049034307806, "grad_norm": 0.7208897471427917, "learning_rate": 4.006028771719319e-05, "loss": 11.231086730957031, "step": 4357 }, { "epoch": 0.5651345809390272, "grad_norm": 0.5411869883537292, "learning_rate": 4.0040274958722846e-05, "loss": 6.8642578125, "step": 4358 }, { "epoch": 0.5652642584472739, "grad_norm": 0.6570701003074646, "learning_rate": 4.0020263861630814e-05, "loss": 6.50565767288208, "step": 4359 }, { "epoch": 0.5653939359555206, "grad_norm": 0.956269383430481, "learning_rate": 4.000025442925514e-05, "loss": 11.57127571105957, "step": 4360 }, { "epoch": 0.5655236134637673, "grad_norm": 0.6578161120414734, "learning_rate": 3.9980246664933596e-05, "loss": 7.714093208312988, "step": 4361 }, { "epoch": 0.565653290972014, "grad_norm": 0.9186057448387146, "learning_rate": 3.996024057200365e-05, "loss": 11.090970993041992, "step": 4362 }, { "epoch": 0.5657829684802607, "grad_norm": 0.9686712622642517, "learning_rate": 3.994023615380254e-05, "loss": 9.6542329788208, "step": 4363 }, { "epoch": 0.5659126459885073, "grad_norm": 0.787592887878418, "learning_rate": 3.992023341366718e-05, "loss": 8.320466995239258, "step": 4364 }, { "epoch": 0.566042323496754, "grad_norm": 1.2077947854995728, "learning_rate": 3.990023235493422e-05, "loss": 12.83811092376709, "step": 4365 }, { "epoch": 0.5661720010050006, "grad_norm": 0.8831261396408081, "learning_rate": 3.988023298094003e-05, "loss": 7.975433826446533, "step": 4366 }, { "epoch": 0.5663016785132474, "grad_norm": 0.9060620665550232, "learning_rate": 3.9860235295020706e-05, "loss": 9.805030822753906, "step": 4367 }, { "epoch": 0.5664313560214941, "grad_norm": 0.8860798478126526, "learning_rate": 3.984023930051205e-05, "loss": 11.268105506896973, "step": 4368 }, { "epoch": 0.5665610335297407, "grad_norm": 0.8664879202842712, "learning_rate": 3.982024500074958e-05, "loss": 8.763129234313965, "step": 4369 }, { "epoch": 0.5666907110379874, "grad_norm": 1.0485018491744995, "learning_rate": 3.980025239906855e-05, "loss": 12.544586181640625, "step": 4370 }, { "epoch": 0.5668203885462341, "grad_norm": 0.8282836675643921, "learning_rate": 3.9780261498803916e-05, "loss": 9.55185317993164, "step": 4371 }, { "epoch": 0.5669500660544807, "grad_norm": 0.5380795001983643, "learning_rate": 3.976027230329035e-05, "loss": 6.910245895385742, "step": 4372 }, { "epoch": 0.5670797435627274, "grad_norm": 0.9265869855880737, "learning_rate": 3.974028481586224e-05, "loss": 10.489418029785156, "step": 4373 }, { "epoch": 0.5672094210709742, "grad_norm": 0.696131706237793, "learning_rate": 3.9720299039853704e-05, "loss": 8.613557815551758, "step": 4374 }, { "epoch": 0.5673390985792208, "grad_norm": 0.9591232538223267, "learning_rate": 3.970031497859855e-05, "loss": 9.266472816467285, "step": 4375 }, { "epoch": 0.5674687760874675, "grad_norm": 0.7220364212989807, "learning_rate": 3.968033263543032e-05, "loss": 6.888695240020752, "step": 4376 }, { "epoch": 0.5675984535957141, "grad_norm": 1.1378744840621948, "learning_rate": 3.966035201368226e-05, "loss": 13.376988410949707, "step": 4377 }, { "epoch": 0.5677281311039608, "grad_norm": 0.8182876110076904, "learning_rate": 3.964037311668733e-05, "loss": 10.238015174865723, "step": 4378 }, { "epoch": 0.5678578086122075, "grad_norm": 0.7682612538337708, "learning_rate": 3.9620395947778196e-05, "loss": 11.525018692016602, "step": 4379 }, { "epoch": 0.5679874861204542, "grad_norm": 1.1208736896514893, "learning_rate": 3.960042051028725e-05, "loss": 11.218973159790039, "step": 4380 }, { "epoch": 0.5681171636287009, "grad_norm": 0.8294427394866943, "learning_rate": 3.958044680754659e-05, "loss": 9.883021354675293, "step": 4381 }, { "epoch": 0.5682468411369476, "grad_norm": 0.7615907192230225, "learning_rate": 3.9560474842888026e-05, "loss": 10.722258567810059, "step": 4382 }, { "epoch": 0.5683765186451942, "grad_norm": 0.9033230543136597, "learning_rate": 3.954050461964306e-05, "loss": 9.984125137329102, "step": 4383 }, { "epoch": 0.5685061961534409, "grad_norm": 1.0279399156570435, "learning_rate": 3.952053614114293e-05, "loss": 11.333868980407715, "step": 4384 }, { "epoch": 0.5686358736616876, "grad_norm": 0.9017779231071472, "learning_rate": 3.9500569410718566e-05, "loss": 10.211834907531738, "step": 4385 }, { "epoch": 0.5687655511699343, "grad_norm": 0.8194003701210022, "learning_rate": 3.948060443170061e-05, "loss": 11.803415298461914, "step": 4386 }, { "epoch": 0.568895228678181, "grad_norm": 0.8234769105911255, "learning_rate": 3.94606412074194e-05, "loss": 10.243700981140137, "step": 4387 }, { "epoch": 0.5690249061864276, "grad_norm": 0.6823790073394775, "learning_rate": 3.9440679741205036e-05, "loss": 9.772153854370117, "step": 4388 }, { "epoch": 0.5691545836946743, "grad_norm": 0.9709606766700745, "learning_rate": 3.942072003638725e-05, "loss": 14.10621166229248, "step": 4389 }, { "epoch": 0.569284261202921, "grad_norm": 0.7609313726425171, "learning_rate": 3.9400762096295517e-05, "loss": 9.935787200927734, "step": 4390 }, { "epoch": 0.5694139387111676, "grad_norm": 0.6723366975784302, "learning_rate": 3.938080592425903e-05, "loss": 9.581403732299805, "step": 4391 }, { "epoch": 0.5695436162194143, "grad_norm": 1.0831608772277832, "learning_rate": 3.9360851523606654e-05, "loss": 10.119475364685059, "step": 4392 }, { "epoch": 0.5696732937276611, "grad_norm": 1.1631115674972534, "learning_rate": 3.934089889766698e-05, "loss": 12.140610694885254, "step": 4393 }, { "epoch": 0.5698029712359077, "grad_norm": 0.8715519309043884, "learning_rate": 3.93209480497683e-05, "loss": 11.492111206054688, "step": 4394 }, { "epoch": 0.5699326487441544, "grad_norm": 0.8741035461425781, "learning_rate": 3.9300998983238606e-05, "loss": 9.721247673034668, "step": 4395 }, { "epoch": 0.570062326252401, "grad_norm": 0.6711577773094177, "learning_rate": 3.92810517014056e-05, "loss": 7.43397331237793, "step": 4396 }, { "epoch": 0.5701920037606477, "grad_norm": 0.7524131536483765, "learning_rate": 3.926110620759668e-05, "loss": 9.263747215270996, "step": 4397 }, { "epoch": 0.5703216812688944, "grad_norm": 0.6617199182510376, "learning_rate": 3.924116250513894e-05, "loss": 9.998286247253418, "step": 4398 }, { "epoch": 0.5704513587771411, "grad_norm": 0.999663770198822, "learning_rate": 3.9221220597359196e-05, "loss": 8.275476455688477, "step": 4399 }, { "epoch": 0.5705810362853878, "grad_norm": 0.7286233305931091, "learning_rate": 3.9201280487583944e-05, "loss": 10.243305206298828, "step": 4400 }, { "epoch": 0.5707107137936345, "grad_norm": 0.7383270859718323, "learning_rate": 3.9181342179139376e-05, "loss": 7.349791526794434, "step": 4401 }, { "epoch": 0.5708403913018811, "grad_norm": 0.745418131351471, "learning_rate": 3.9161405675351405e-05, "loss": 7.625733852386475, "step": 4402 }, { "epoch": 0.5709700688101278, "grad_norm": 0.8358169794082642, "learning_rate": 3.914147097954564e-05, "loss": 12.542442321777344, "step": 4403 }, { "epoch": 0.5710997463183745, "grad_norm": 0.8033991456031799, "learning_rate": 3.912153809504736e-05, "loss": 10.812435150146484, "step": 4404 }, { "epoch": 0.5712294238266211, "grad_norm": 0.8079754710197449, "learning_rate": 3.910160702518158e-05, "loss": 11.017923355102539, "step": 4405 }, { "epoch": 0.5713591013348679, "grad_norm": 0.8051195740699768, "learning_rate": 3.9081677773272985e-05, "loss": 9.511366844177246, "step": 4406 }, { "epoch": 0.5714887788431146, "grad_norm": 0.759467363357544, "learning_rate": 3.906175034264598e-05, "loss": 7.142051696777344, "step": 4407 }, { "epoch": 0.5716184563513612, "grad_norm": 0.8029176592826843, "learning_rate": 3.904182473662463e-05, "loss": 9.071236610412598, "step": 4408 }, { "epoch": 0.5717481338596079, "grad_norm": 0.6421509981155396, "learning_rate": 3.902190095853274e-05, "loss": 7.492922306060791, "step": 4409 }, { "epoch": 0.5718778113678545, "grad_norm": 0.8266140818595886, "learning_rate": 3.9001979011693784e-05, "loss": 12.807830810546875, "step": 4410 }, { "epoch": 0.5720074888761012, "grad_norm": 1.1741199493408203, "learning_rate": 3.8982058899430926e-05, "loss": 8.60164737701416, "step": 4411 }, { "epoch": 0.572137166384348, "grad_norm": 1.0088045597076416, "learning_rate": 3.896214062506704e-05, "loss": 7.727224349975586, "step": 4412 }, { "epoch": 0.5722668438925946, "grad_norm": 1.0135927200317383, "learning_rate": 3.89422241919247e-05, "loss": 11.332080841064453, "step": 4413 }, { "epoch": 0.5723965214008413, "grad_norm": 0.8497774600982666, "learning_rate": 3.8922309603326135e-05, "loss": 9.341963768005371, "step": 4414 }, { "epoch": 0.572526198909088, "grad_norm": 0.889819324016571, "learning_rate": 3.8902396862593306e-05, "loss": 12.527583122253418, "step": 4415 }, { "epoch": 0.5726558764173346, "grad_norm": 0.667293906211853, "learning_rate": 3.888248597304783e-05, "loss": 7.687584400177002, "step": 4416 }, { "epoch": 0.5727855539255813, "grad_norm": 0.7721832394599915, "learning_rate": 3.8862576938011066e-05, "loss": 8.837607383728027, "step": 4417 }, { "epoch": 0.572915231433828, "grad_norm": 0.9898870587348938, "learning_rate": 3.8842669760804006e-05, "loss": 10.488162994384766, "step": 4418 }, { "epoch": 0.5730449089420747, "grad_norm": 0.7006867527961731, "learning_rate": 3.882276444474737e-05, "loss": 8.692145347595215, "step": 4419 }, { "epoch": 0.5731745864503214, "grad_norm": 0.9045264720916748, "learning_rate": 3.8802860993161565e-05, "loss": 9.367941856384277, "step": 4420 }, { "epoch": 0.573304263958568, "grad_norm": 0.8656830787658691, "learning_rate": 3.878295940936667e-05, "loss": 9.389113426208496, "step": 4421 }, { "epoch": 0.5734339414668147, "grad_norm": 0.9118683934211731, "learning_rate": 3.876305969668247e-05, "loss": 7.716753005981445, "step": 4422 }, { "epoch": 0.5735636189750614, "grad_norm": 0.8468695878982544, "learning_rate": 3.874316185842842e-05, "loss": 11.360467910766602, "step": 4423 }, { "epoch": 0.573693296483308, "grad_norm": 1.1470967531204224, "learning_rate": 3.872326589792367e-05, "loss": 11.708464622497559, "step": 4424 }, { "epoch": 0.5738229739915548, "grad_norm": 0.7538755536079407, "learning_rate": 3.8703371818487036e-05, "loss": 9.938854217529297, "step": 4425 }, { "epoch": 0.5739526514998015, "grad_norm": 0.9050289988517761, "learning_rate": 3.86834796234371e-05, "loss": 8.424776077270508, "step": 4426 }, { "epoch": 0.5740823290080481, "grad_norm": 0.7095338702201843, "learning_rate": 3.866358931609204e-05, "loss": 7.9629225730896, "step": 4427 }, { "epoch": 0.5742120065162948, "grad_norm": 1.1682720184326172, "learning_rate": 3.864370089976975e-05, "loss": 11.86011791229248, "step": 4428 }, { "epoch": 0.5743416840245414, "grad_norm": 1.0843032598495483, "learning_rate": 3.862381437778782e-05, "loss": 12.261153221130371, "step": 4429 }, { "epoch": 0.5744713615327881, "grad_norm": 1.0451878309249878, "learning_rate": 3.86039297534635e-05, "loss": 10.73867416381836, "step": 4430 }, { "epoch": 0.5746010390410349, "grad_norm": 0.8243438601493835, "learning_rate": 3.8584047030113746e-05, "loss": 8.549515724182129, "step": 4431 }, { "epoch": 0.5747307165492815, "grad_norm": 0.9631202816963196, "learning_rate": 3.8564166211055185e-05, "loss": 9.716922760009766, "step": 4432 }, { "epoch": 0.5748603940575282, "grad_norm": 0.6383441686630249, "learning_rate": 3.854428729960413e-05, "loss": 8.191244125366211, "step": 4433 }, { "epoch": 0.5749900715657749, "grad_norm": 1.1492366790771484, "learning_rate": 3.852441029907658e-05, "loss": 12.546659469604492, "step": 4434 }, { "epoch": 0.5751197490740215, "grad_norm": 1.0545732975006104, "learning_rate": 3.850453521278821e-05, "loss": 9.691804885864258, "step": 4435 }, { "epoch": 0.5752494265822682, "grad_norm": 1.0027871131896973, "learning_rate": 3.848466204405438e-05, "loss": 9.236687660217285, "step": 4436 }, { "epoch": 0.5753791040905148, "grad_norm": 0.7511997222900391, "learning_rate": 3.8464790796190104e-05, "loss": 13.040681838989258, "step": 4437 }, { "epoch": 0.5755087815987616, "grad_norm": 1.0285121202468872, "learning_rate": 3.844492147251011e-05, "loss": 9.682022094726562, "step": 4438 }, { "epoch": 0.5756384591070083, "grad_norm": 0.9336590766906738, "learning_rate": 3.842505407632881e-05, "loss": 9.245157241821289, "step": 4439 }, { "epoch": 0.5757681366152549, "grad_norm": 0.8590396046638489, "learning_rate": 3.840518861096026e-05, "loss": 11.780939102172852, "step": 4440 }, { "epoch": 0.5758978141235016, "grad_norm": 0.811209499835968, "learning_rate": 3.8385325079718216e-05, "loss": 8.713112831115723, "step": 4441 }, { "epoch": 0.5760274916317483, "grad_norm": 1.01600182056427, "learning_rate": 3.8365463485916106e-05, "loss": 10.663003921508789, "step": 4442 }, { "epoch": 0.5761571691399949, "grad_norm": 1.0124675035476685, "learning_rate": 3.8345603832867035e-05, "loss": 10.134322166442871, "step": 4443 }, { "epoch": 0.5762868466482417, "grad_norm": 0.6564371585845947, "learning_rate": 3.832574612388378e-05, "loss": 8.33043384552002, "step": 4444 }, { "epoch": 0.5764165241564884, "grad_norm": 0.8493540287017822, "learning_rate": 3.830589036227881e-05, "loss": 7.729111671447754, "step": 4445 }, { "epoch": 0.576546201664735, "grad_norm": 0.8293138146400452, "learning_rate": 3.8286036551364256e-05, "loss": 12.283899307250977, "step": 4446 }, { "epoch": 0.5766758791729817, "grad_norm": 0.9419121146202087, "learning_rate": 3.826618469445192e-05, "loss": 10.612476348876953, "step": 4447 }, { "epoch": 0.5768055566812283, "grad_norm": 0.8954209089279175, "learning_rate": 3.8246334794853285e-05, "loss": 9.928788185119629, "step": 4448 }, { "epoch": 0.576935234189475, "grad_norm": 1.179291844367981, "learning_rate": 3.822648685587951e-05, "loss": 11.772513389587402, "step": 4449 }, { "epoch": 0.5770649116977218, "grad_norm": 0.8511631488800049, "learning_rate": 3.820664088084141e-05, "loss": 11.4068021774292, "step": 4450 }, { "epoch": 0.5771945892059684, "grad_norm": 0.6072134375572205, "learning_rate": 3.8186796873049504e-05, "loss": 6.026877403259277, "step": 4451 }, { "epoch": 0.5773242667142151, "grad_norm": 0.9626163244247437, "learning_rate": 3.8166954835813945e-05, "loss": 13.944426536560059, "step": 4452 }, { "epoch": 0.5774539442224618, "grad_norm": 0.6847878098487854, "learning_rate": 3.814711477244459e-05, "loss": 8.805712699890137, "step": 4453 }, { "epoch": 0.5775836217307084, "grad_norm": 0.6536785364151001, "learning_rate": 3.812727668625095e-05, "loss": 6.535151481628418, "step": 4454 }, { "epoch": 0.5777132992389551, "grad_norm": 0.8185533881187439, "learning_rate": 3.810744058054221e-05, "loss": 10.335628509521484, "step": 4455 }, { "epoch": 0.5778429767472018, "grad_norm": 1.0912141799926758, "learning_rate": 3.808760645862722e-05, "loss": 8.487483978271484, "step": 4456 }, { "epoch": 0.5779726542554485, "grad_norm": 0.761163592338562, "learning_rate": 3.8067774323814506e-05, "loss": 9.183295249938965, "step": 4457 }, { "epoch": 0.5781023317636952, "grad_norm": 1.0296908617019653, "learning_rate": 3.8047944179412263e-05, "loss": 10.919387817382812, "step": 4458 }, { "epoch": 0.5782320092719418, "grad_norm": 0.8577470183372498, "learning_rate": 3.802811602872834e-05, "loss": 8.626578330993652, "step": 4459 }, { "epoch": 0.5783616867801885, "grad_norm": 0.7257238626480103, "learning_rate": 3.800828987507028e-05, "loss": 5.790417671203613, "step": 4460 }, { "epoch": 0.5784913642884352, "grad_norm": 1.5387362241744995, "learning_rate": 3.7988465721745256e-05, "loss": 9.936014175415039, "step": 4461 }, { "epoch": 0.5786210417966818, "grad_norm": 0.7220943570137024, "learning_rate": 3.7968643572060146e-05, "loss": 7.418170928955078, "step": 4462 }, { "epoch": 0.5787507193049286, "grad_norm": 0.8940865397453308, "learning_rate": 3.794882342932147e-05, "loss": 10.351653099060059, "step": 4463 }, { "epoch": 0.5788803968131753, "grad_norm": 0.9114300012588501, "learning_rate": 3.792900529683541e-05, "loss": 9.396513938903809, "step": 4464 }, { "epoch": 0.5790100743214219, "grad_norm": 0.6481062769889832, "learning_rate": 3.790918917790784e-05, "loss": 9.404291152954102, "step": 4465 }, { "epoch": 0.5791397518296686, "grad_norm": 0.9263882040977478, "learning_rate": 3.788937507584427e-05, "loss": 10.629740715026855, "step": 4466 }, { "epoch": 0.5792694293379153, "grad_norm": 0.8742741346359253, "learning_rate": 3.786956299394988e-05, "loss": 9.480243682861328, "step": 4467 }, { "epoch": 0.5793991068461619, "grad_norm": 0.8035379648208618, "learning_rate": 3.784975293552953e-05, "loss": 8.904422760009766, "step": 4468 }, { "epoch": 0.5795287843544086, "grad_norm": 0.915899395942688, "learning_rate": 3.782994490388772e-05, "loss": 10.60610294342041, "step": 4469 }, { "epoch": 0.5796584618626553, "grad_norm": 0.8604477047920227, "learning_rate": 3.781013890232861e-05, "loss": 10.629741668701172, "step": 4470 }, { "epoch": 0.579788139370902, "grad_norm": 0.8929058313369751, "learning_rate": 3.779033493415605e-05, "loss": 10.159461975097656, "step": 4471 }, { "epoch": 0.5799178168791487, "grad_norm": 0.827549934387207, "learning_rate": 3.7770533002673525e-05, "loss": 8.816115379333496, "step": 4472 }, { "epoch": 0.5800474943873953, "grad_norm": 1.4735467433929443, "learning_rate": 3.775073311118419e-05, "loss": 12.764856338500977, "step": 4473 }, { "epoch": 0.580177171895642, "grad_norm": 0.750446081161499, "learning_rate": 3.773093526299086e-05, "loss": 9.572184562683105, "step": 4474 }, { "epoch": 0.5803068494038887, "grad_norm": 0.7147075533866882, "learning_rate": 3.7711139461396e-05, "loss": 7.490363597869873, "step": 4475 }, { "epoch": 0.5804365269121354, "grad_norm": 0.7742823958396912, "learning_rate": 3.769134570970175e-05, "loss": 9.117431640625, "step": 4476 }, { "epoch": 0.5805662044203821, "grad_norm": 0.8536288142204285, "learning_rate": 3.76715540112099e-05, "loss": 10.368861198425293, "step": 4477 }, { "epoch": 0.5806958819286288, "grad_norm": 0.9482219815254211, "learning_rate": 3.7651764369221884e-05, "loss": 11.588748931884766, "step": 4478 }, { "epoch": 0.5808255594368754, "grad_norm": 0.7862147092819214, "learning_rate": 3.763197678703881e-05, "loss": 9.5640287399292, "step": 4479 }, { "epoch": 0.5809552369451221, "grad_norm": 1.0560156106948853, "learning_rate": 3.761219126796145e-05, "loss": 10.455394744873047, "step": 4480 }, { "epoch": 0.5810849144533687, "grad_norm": 0.8414122462272644, "learning_rate": 3.7592407815290207e-05, "loss": 12.246912002563477, "step": 4481 }, { "epoch": 0.5812145919616155, "grad_norm": 0.6864312291145325, "learning_rate": 3.757262643232515e-05, "loss": 9.688995361328125, "step": 4482 }, { "epoch": 0.5813442694698622, "grad_norm": 0.8700284957885742, "learning_rate": 3.755284712236601e-05, "loss": 13.35999870300293, "step": 4483 }, { "epoch": 0.5814739469781088, "grad_norm": 1.0282714366912842, "learning_rate": 3.7533069888712176e-05, "loss": 11.035935401916504, "step": 4484 }, { "epoch": 0.5816036244863555, "grad_norm": 0.8813858032226562, "learning_rate": 3.7513294734662665e-05, "loss": 14.202323913574219, "step": 4485 }, { "epoch": 0.5817333019946022, "grad_norm": 0.9628540277481079, "learning_rate": 3.749352166351617e-05, "loss": 11.094277381896973, "step": 4486 }, { "epoch": 0.5818629795028488, "grad_norm": 1.0707169771194458, "learning_rate": 3.7473750678571033e-05, "loss": 12.366697311401367, "step": 4487 }, { "epoch": 0.5819926570110955, "grad_norm": 0.9844120144844055, "learning_rate": 3.7453981783125245e-05, "loss": 7.774473190307617, "step": 4488 }, { "epoch": 0.5821223345193423, "grad_norm": 0.956537663936615, "learning_rate": 3.743421498047644e-05, "loss": 11.409045219421387, "step": 4489 }, { "epoch": 0.5822520120275889, "grad_norm": 0.8995450735092163, "learning_rate": 3.741445027392193e-05, "loss": 12.31192684173584, "step": 4490 }, { "epoch": 0.5823816895358356, "grad_norm": 0.892659068107605, "learning_rate": 3.739468766675864e-05, "loss": 14.50940990447998, "step": 4491 }, { "epoch": 0.5825113670440822, "grad_norm": 0.6404837369918823, "learning_rate": 3.737492716228318e-05, "loss": 8.066264152526855, "step": 4492 }, { "epoch": 0.5826410445523289, "grad_norm": 0.9164557456970215, "learning_rate": 3.735516876379178e-05, "loss": 8.359943389892578, "step": 4493 }, { "epoch": 0.5827707220605756, "grad_norm": 0.8092225193977356, "learning_rate": 3.733541247458033e-05, "loss": 9.665651321411133, "step": 4494 }, { "epoch": 0.5829003995688223, "grad_norm": 0.7202469706535339, "learning_rate": 3.731565829794438e-05, "loss": 10.372004508972168, "step": 4495 }, { "epoch": 0.583030077077069, "grad_norm": 1.145103931427002, "learning_rate": 3.729590623717911e-05, "loss": 9.143585205078125, "step": 4496 }, { "epoch": 0.5831597545853157, "grad_norm": 0.8684026002883911, "learning_rate": 3.727615629557936e-05, "loss": 11.998302459716797, "step": 4497 }, { "epoch": 0.5832894320935623, "grad_norm": 1.1294511556625366, "learning_rate": 3.725640847643961e-05, "loss": 12.66080379486084, "step": 4498 }, { "epoch": 0.583419109601809, "grad_norm": 0.7137777209281921, "learning_rate": 3.723666278305398e-05, "loss": 5.739403247833252, "step": 4499 }, { "epoch": 0.5835487871100556, "grad_norm": 0.6087061762809753, "learning_rate": 3.7216919218716265e-05, "loss": 9.015624046325684, "step": 4500 }, { "epoch": 0.5836784646183023, "grad_norm": 0.7781050205230713, "learning_rate": 3.719717778671985e-05, "loss": 11.002025604248047, "step": 4501 }, { "epoch": 0.5838081421265491, "grad_norm": 0.6248841881752014, "learning_rate": 3.717743849035779e-05, "loss": 7.569186210632324, "step": 4502 }, { "epoch": 0.5839378196347957, "grad_norm": 1.046316385269165, "learning_rate": 3.715770133292283e-05, "loss": 7.106150150299072, "step": 4503 }, { "epoch": 0.5840674971430424, "grad_norm": 0.9501090049743652, "learning_rate": 3.713796631770731e-05, "loss": 10.255986213684082, "step": 4504 }, { "epoch": 0.5841971746512891, "grad_norm": 0.9978872537612915, "learning_rate": 3.7118233448003203e-05, "loss": 12.608124732971191, "step": 4505 }, { "epoch": 0.5843268521595357, "grad_norm": 0.7432085871696472, "learning_rate": 3.709850272710216e-05, "loss": 7.292626857757568, "step": 4506 }, { "epoch": 0.5844565296677824, "grad_norm": 0.8668110966682434, "learning_rate": 3.707877415829544e-05, "loss": 8.113055229187012, "step": 4507 }, { "epoch": 0.5845862071760292, "grad_norm": 0.7966635823249817, "learning_rate": 3.705904774487396e-05, "loss": 7.339634895324707, "step": 4508 }, { "epoch": 0.5847158846842758, "grad_norm": 0.7877048850059509, "learning_rate": 3.703932349012829e-05, "loss": 7.74687385559082, "step": 4509 }, { "epoch": 0.5848455621925225, "grad_norm": 0.7718782424926758, "learning_rate": 3.7019601397348614e-05, "loss": 7.355520725250244, "step": 4510 }, { "epoch": 0.5849752397007691, "grad_norm": 0.9451667666435242, "learning_rate": 3.699988146982477e-05, "loss": 7.624141216278076, "step": 4511 }, { "epoch": 0.5851049172090158, "grad_norm": 0.844683051109314, "learning_rate": 3.6980163710846225e-05, "loss": 10.47152042388916, "step": 4512 }, { "epoch": 0.5852345947172625, "grad_norm": 1.1536850929260254, "learning_rate": 3.696044812370211e-05, "loss": 14.315591812133789, "step": 4513 }, { "epoch": 0.5853642722255092, "grad_norm": 0.8850300908088684, "learning_rate": 3.694073471168117e-05, "loss": 8.763813972473145, "step": 4514 }, { "epoch": 0.5854939497337559, "grad_norm": 0.9027056694030762, "learning_rate": 3.692102347807178e-05, "loss": 8.384346008300781, "step": 4515 }, { "epoch": 0.5856236272420026, "grad_norm": 0.8137493133544922, "learning_rate": 3.690131442616197e-05, "loss": 9.147424697875977, "step": 4516 }, { "epoch": 0.5857533047502492, "grad_norm": 0.7469602823257446, "learning_rate": 3.6881607559239404e-05, "loss": 7.346220970153809, "step": 4517 }, { "epoch": 0.5858829822584959, "grad_norm": 0.9328942894935608, "learning_rate": 3.686190288059138e-05, "loss": 12.852834701538086, "step": 4518 }, { "epoch": 0.5860126597667425, "grad_norm": 0.8398600816726685, "learning_rate": 3.684220039350482e-05, "loss": 7.710803508758545, "step": 4519 }, { "epoch": 0.5861423372749892, "grad_norm": 0.6320206522941589, "learning_rate": 3.68225001012663e-05, "loss": 7.1467180252075195, "step": 4520 }, { "epoch": 0.586272014783236, "grad_norm": 0.7603126764297485, "learning_rate": 3.680280200716201e-05, "loss": 9.681085586547852, "step": 4521 }, { "epoch": 0.5864016922914826, "grad_norm": 0.9010932445526123, "learning_rate": 3.6783106114477784e-05, "loss": 8.697233200073242, "step": 4522 }, { "epoch": 0.5865313697997293, "grad_norm": 1.1242369413375854, "learning_rate": 3.6763412426499094e-05, "loss": 9.9142427444458, "step": 4523 }, { "epoch": 0.586661047307976, "grad_norm": 0.9030037522315979, "learning_rate": 3.6743720946511024e-05, "loss": 8.42510986328125, "step": 4524 }, { "epoch": 0.5867907248162226, "grad_norm": 0.8836542963981628, "learning_rate": 3.6724031677798306e-05, "loss": 9.220314979553223, "step": 4525 }, { "epoch": 0.5869204023244693, "grad_norm": 0.7272501587867737, "learning_rate": 3.6704344623645315e-05, "loss": 7.598121643066406, "step": 4526 }, { "epoch": 0.5870500798327161, "grad_norm": 1.0361013412475586, "learning_rate": 3.6684659787336016e-05, "loss": 10.055233001708984, "step": 4527 }, { "epoch": 0.5871797573409627, "grad_norm": 1.027572512626648, "learning_rate": 3.666497717215406e-05, "loss": 9.076565742492676, "step": 4528 }, { "epoch": 0.5873094348492094, "grad_norm": 0.8803635239601135, "learning_rate": 3.664529678138268e-05, "loss": 11.292191505432129, "step": 4529 }, { "epoch": 0.587439112357456, "grad_norm": 0.9946842193603516, "learning_rate": 3.6625618618304744e-05, "loss": 7.35246467590332, "step": 4530 }, { "epoch": 0.5875687898657027, "grad_norm": 0.9035788774490356, "learning_rate": 3.6605942686202786e-05, "loss": 9.780117988586426, "step": 4531 }, { "epoch": 0.5876984673739494, "grad_norm": 0.9639929533004761, "learning_rate": 3.658626898835892e-05, "loss": 11.036141395568848, "step": 4532 }, { "epoch": 0.587828144882196, "grad_norm": 0.8649172782897949, "learning_rate": 3.6566597528054924e-05, "loss": 14.215023040771484, "step": 4533 }, { "epoch": 0.5879578223904428, "grad_norm": 1.0267136096954346, "learning_rate": 3.654692830857218e-05, "loss": 7.538405895233154, "step": 4534 }, { "epoch": 0.5880874998986895, "grad_norm": 0.821931004524231, "learning_rate": 3.6527261333191704e-05, "loss": 8.818941116333008, "step": 4535 }, { "epoch": 0.5882171774069361, "grad_norm": 0.998033881187439, "learning_rate": 3.6507596605194136e-05, "loss": 13.346566200256348, "step": 4536 }, { "epoch": 0.5883468549151828, "grad_norm": 0.5906928777694702, "learning_rate": 3.6487934127859745e-05, "loss": 5.499337196350098, "step": 4537 }, { "epoch": 0.5884765324234295, "grad_norm": 0.8800438642501831, "learning_rate": 3.6468273904468426e-05, "loss": 8.036972999572754, "step": 4538 }, { "epoch": 0.5886062099316761, "grad_norm": 1.0152183771133423, "learning_rate": 3.6448615938299687e-05, "loss": 9.80577278137207, "step": 4539 }, { "epoch": 0.5887358874399229, "grad_norm": 1.0973671674728394, "learning_rate": 3.642896023263267e-05, "loss": 11.028575897216797, "step": 4540 }, { "epoch": 0.5888655649481696, "grad_norm": 0.8298323154449463, "learning_rate": 3.640930679074611e-05, "loss": 10.063024520874023, "step": 4541 }, { "epoch": 0.5889952424564162, "grad_norm": 0.825183093547821, "learning_rate": 3.638965561591844e-05, "loss": 7.533440113067627, "step": 4542 }, { "epoch": 0.5891249199646629, "grad_norm": 0.9099153280258179, "learning_rate": 3.6370006711427643e-05, "loss": 9.756765365600586, "step": 4543 }, { "epoch": 0.5892545974729095, "grad_norm": 1.0007332563400269, "learning_rate": 3.635036008055135e-05, "loss": 8.528068542480469, "step": 4544 }, { "epoch": 0.5893842749811562, "grad_norm": 0.8413167595863342, "learning_rate": 3.63307157265668e-05, "loss": 12.060235023498535, "step": 4545 }, { "epoch": 0.589513952489403, "grad_norm": 0.9178656339645386, "learning_rate": 3.6311073652750875e-05, "loss": 7.518836498260498, "step": 4546 }, { "epoch": 0.5896436299976496, "grad_norm": 0.6928701400756836, "learning_rate": 3.629143386238004e-05, "loss": 10.06767463684082, "step": 4547 }, { "epoch": 0.5897733075058963, "grad_norm": 1.1402205228805542, "learning_rate": 3.627179635873042e-05, "loss": 10.62914752960205, "step": 4548 }, { "epoch": 0.589902985014143, "grad_norm": 0.6869508028030396, "learning_rate": 3.6252161145077735e-05, "loss": 9.367648124694824, "step": 4549 }, { "epoch": 0.5900326625223896, "grad_norm": 0.857056736946106, "learning_rate": 3.623252822469733e-05, "loss": 13.46601676940918, "step": 4550 }, { "epoch": 0.5901623400306363, "grad_norm": 1.0842281579971313, "learning_rate": 3.621289760086415e-05, "loss": 10.11556625366211, "step": 4551 }, { "epoch": 0.5902920175388829, "grad_norm": 0.7959002256393433, "learning_rate": 3.61932692768528e-05, "loss": 8.611181259155273, "step": 4552 }, { "epoch": 0.5904216950471297, "grad_norm": 0.9900549054145813, "learning_rate": 3.617364325593745e-05, "loss": 9.3605318069458, "step": 4553 }, { "epoch": 0.5905513725553764, "grad_norm": 0.6817309856414795, "learning_rate": 3.615401954139193e-05, "loss": 6.820900917053223, "step": 4554 }, { "epoch": 0.590681050063623, "grad_norm": 0.7075349688529968, "learning_rate": 3.613439813648965e-05, "loss": 8.399896621704102, "step": 4555 }, { "epoch": 0.5908107275718697, "grad_norm": 1.0645596981048584, "learning_rate": 3.611477904450366e-05, "loss": 11.632207870483398, "step": 4556 }, { "epoch": 0.5909404050801164, "grad_norm": 0.7947684526443481, "learning_rate": 3.6095162268706595e-05, "loss": 10.791790962219238, "step": 4557 }, { "epoch": 0.591070082588363, "grad_norm": 0.6999990940093994, "learning_rate": 3.6075547812370745e-05, "loss": 10.432868003845215, "step": 4558 }, { "epoch": 0.5911997600966098, "grad_norm": 0.9546265602111816, "learning_rate": 3.6055935678767986e-05, "loss": 14.49551773071289, "step": 4559 }, { "epoch": 0.5913294376048565, "grad_norm": 0.7274544835090637, "learning_rate": 3.603632587116981e-05, "loss": 8.277348518371582, "step": 4560 }, { "epoch": 0.5914591151131031, "grad_norm": 0.7126508951187134, "learning_rate": 3.601671839284731e-05, "loss": 10.387914657592773, "step": 4561 }, { "epoch": 0.5915887926213498, "grad_norm": 0.5965462923049927, "learning_rate": 3.599711324707122e-05, "loss": 6.62672758102417, "step": 4562 }, { "epoch": 0.5917184701295964, "grad_norm": 0.7750075459480286, "learning_rate": 3.5977510437111854e-05, "loss": 10.650434494018555, "step": 4563 }, { "epoch": 0.5918481476378431, "grad_norm": 0.8971617817878723, "learning_rate": 3.595790996623916e-05, "loss": 8.360685348510742, "step": 4564 }, { "epoch": 0.5919778251460898, "grad_norm": 0.8778061270713806, "learning_rate": 3.5938311837722684e-05, "loss": 10.147431373596191, "step": 4565 }, { "epoch": 0.5921075026543365, "grad_norm": 0.8651867508888245, "learning_rate": 3.591871605483158e-05, "loss": 12.695535659790039, "step": 4566 }, { "epoch": 0.5922371801625832, "grad_norm": 0.7502747774124146, "learning_rate": 3.589912262083461e-05, "loss": 9.076433181762695, "step": 4567 }, { "epoch": 0.5923668576708299, "grad_norm": 0.7681984901428223, "learning_rate": 3.587953153900015e-05, "loss": 8.52619457244873, "step": 4568 }, { "epoch": 0.5924965351790765, "grad_norm": 1.4137390851974487, "learning_rate": 3.58599428125962e-05, "loss": 9.090690612792969, "step": 4569 }, { "epoch": 0.5926262126873232, "grad_norm": 0.9098421931266785, "learning_rate": 3.584035644489032e-05, "loss": 10.595878601074219, "step": 4570 }, { "epoch": 0.5927558901955698, "grad_norm": 0.671593964099884, "learning_rate": 3.5820772439149725e-05, "loss": 9.059771537780762, "step": 4571 }, { "epoch": 0.5928855677038166, "grad_norm": 1.1239268779754639, "learning_rate": 3.580119079864121e-05, "loss": 12.761222839355469, "step": 4572 }, { "epoch": 0.5930152452120633, "grad_norm": 0.8143115639686584, "learning_rate": 3.5781611526631174e-05, "loss": 10.672635078430176, "step": 4573 }, { "epoch": 0.5931449227203099, "grad_norm": 0.7374706268310547, "learning_rate": 3.576203462638564e-05, "loss": 9.746257781982422, "step": 4574 }, { "epoch": 0.5932746002285566, "grad_norm": 0.9270442724227905, "learning_rate": 3.574246010117022e-05, "loss": 10.763725280761719, "step": 4575 }, { "epoch": 0.5934042777368033, "grad_norm": 0.8352506160736084, "learning_rate": 3.572288795425014e-05, "loss": 11.416561126708984, "step": 4576 }, { "epoch": 0.5935339552450499, "grad_norm": 0.6731907725334167, "learning_rate": 3.5703318188890214e-05, "loss": 8.07433795928955, "step": 4577 }, { "epoch": 0.5936636327532967, "grad_norm": 1.0965778827667236, "learning_rate": 3.5683750808354863e-05, "loss": 10.029027938842773, "step": 4578 }, { "epoch": 0.5937933102615434, "grad_norm": 0.9989977478981018, "learning_rate": 3.566418581590813e-05, "loss": 10.843364715576172, "step": 4579 }, { "epoch": 0.59392298776979, "grad_norm": 0.6548342704772949, "learning_rate": 3.5644623214813624e-05, "loss": 9.026982307434082, "step": 4580 }, { "epoch": 0.5940526652780367, "grad_norm": 0.4514755606651306, "learning_rate": 3.5625063008334605e-05, "loss": 7.298060417175293, "step": 4581 }, { "epoch": 0.5941823427862833, "grad_norm": 0.6789869666099548, "learning_rate": 3.560550519973388e-05, "loss": 8.852612495422363, "step": 4582 }, { "epoch": 0.59431202029453, "grad_norm": 0.65744549036026, "learning_rate": 3.5585949792273895e-05, "loss": 9.749446868896484, "step": 4583 }, { "epoch": 0.5944416978027767, "grad_norm": 0.9427091479301453, "learning_rate": 3.556639678921668e-05, "loss": 9.484618186950684, "step": 4584 }, { "epoch": 0.5945713753110234, "grad_norm": 0.748043954372406, "learning_rate": 3.5546846193823855e-05, "loss": 8.14783763885498, "step": 4585 }, { "epoch": 0.5947010528192701, "grad_norm": 0.7036560773849487, "learning_rate": 3.5527298009356654e-05, "loss": 9.89401626586914, "step": 4586 }, { "epoch": 0.5948307303275168, "grad_norm": 1.0879095792770386, "learning_rate": 3.55077522390759e-05, "loss": 9.2285795211792, "step": 4587 }, { "epoch": 0.5949604078357634, "grad_norm": 0.8135930299758911, "learning_rate": 3.548820888624201e-05, "loss": 9.35839557647705, "step": 4588 }, { "epoch": 0.5950900853440101, "grad_norm": 0.9693294167518616, "learning_rate": 3.5468667954115026e-05, "loss": 8.349942207336426, "step": 4589 }, { "epoch": 0.5952197628522568, "grad_norm": 0.9404480457305908, "learning_rate": 3.5449129445954544e-05, "loss": 11.509288787841797, "step": 4590 }, { "epoch": 0.5953494403605035, "grad_norm": 1.0105098485946655, "learning_rate": 3.542959336501978e-05, "loss": 7.928623199462891, "step": 4591 }, { "epoch": 0.5954791178687502, "grad_norm": 0.7624886631965637, "learning_rate": 3.541005971456954e-05, "loss": 11.239078521728516, "step": 4592 }, { "epoch": 0.5956087953769968, "grad_norm": 0.9068053364753723, "learning_rate": 3.539052849786224e-05, "loss": 12.265582084655762, "step": 4593 }, { "epoch": 0.5957384728852435, "grad_norm": 0.8185057044029236, "learning_rate": 3.537099971815585e-05, "loss": 12.391437530517578, "step": 4594 }, { "epoch": 0.5958681503934902, "grad_norm": 0.9652115106582642, "learning_rate": 3.5351473378707985e-05, "loss": 8.986255645751953, "step": 4595 }, { "epoch": 0.5959978279017368, "grad_norm": 1.0099283456802368, "learning_rate": 3.533194948277581e-05, "loss": 12.080432891845703, "step": 4596 }, { "epoch": 0.5961275054099835, "grad_norm": 0.7235724925994873, "learning_rate": 3.5312428033616096e-05, "loss": 8.246481895446777, "step": 4597 }, { "epoch": 0.5962571829182303, "grad_norm": 0.7694550156593323, "learning_rate": 3.529290903448523e-05, "loss": 11.064701080322266, "step": 4598 }, { "epoch": 0.5963868604264769, "grad_norm": 0.7361782193183899, "learning_rate": 3.527339248863915e-05, "loss": 10.260150909423828, "step": 4599 }, { "epoch": 0.5965165379347236, "grad_norm": 0.9160913228988647, "learning_rate": 3.5253878399333417e-05, "loss": 12.323530197143555, "step": 4600 }, { "epoch": 0.5966462154429703, "grad_norm": 0.8313539028167725, "learning_rate": 3.523436676982316e-05, "loss": 11.76928424835205, "step": 4601 }, { "epoch": 0.5967758929512169, "grad_norm": 0.9038023948669434, "learning_rate": 3.5214857603363106e-05, "loss": 8.900247573852539, "step": 4602 }, { "epoch": 0.5969055704594636, "grad_norm": 0.7865744829177856, "learning_rate": 3.5195350903207585e-05, "loss": 9.169344902038574, "step": 4603 }, { "epoch": 0.5970352479677103, "grad_norm": 0.9226729273796082, "learning_rate": 3.517584667261049e-05, "loss": 8.175233840942383, "step": 4604 }, { "epoch": 0.597164925475957, "grad_norm": 0.833049476146698, "learning_rate": 3.5156344914825325e-05, "loss": 11.25358772277832, "step": 4605 }, { "epoch": 0.5972946029842037, "grad_norm": 0.8326337337493896, "learning_rate": 3.513684563310516e-05, "loss": 8.856170654296875, "step": 4606 }, { "epoch": 0.5974242804924503, "grad_norm": 0.8711010813713074, "learning_rate": 3.511734883070268e-05, "loss": 9.525716781616211, "step": 4607 }, { "epoch": 0.597553958000697, "grad_norm": 1.0300978422164917, "learning_rate": 3.509785451087013e-05, "loss": 10.392763137817383, "step": 4608 }, { "epoch": 0.5976836355089437, "grad_norm": 0.9306237697601318, "learning_rate": 3.507836267685934e-05, "loss": 7.749938488006592, "step": 4609 }, { "epoch": 0.5978133130171904, "grad_norm": 0.8256074786186218, "learning_rate": 3.505887333192177e-05, "loss": 10.047600746154785, "step": 4610 }, { "epoch": 0.5979429905254371, "grad_norm": 0.7674829959869385, "learning_rate": 3.5039386479308396e-05, "loss": 7.99718713760376, "step": 4611 }, { "epoch": 0.5980726680336838, "grad_norm": 1.3161671161651611, "learning_rate": 3.501990212226982e-05, "loss": 12.216038703918457, "step": 4612 }, { "epoch": 0.5982023455419304, "grad_norm": 1.2806437015533447, "learning_rate": 3.500042026405623e-05, "loss": 15.325568199157715, "step": 4613 }, { "epoch": 0.5983320230501771, "grad_norm": 0.7297218441963196, "learning_rate": 3.498094090791738e-05, "loss": 8.707527160644531, "step": 4614 }, { "epoch": 0.5984617005584237, "grad_norm": 0.9779185652732849, "learning_rate": 3.496146405710262e-05, "loss": 6.041141510009766, "step": 4615 }, { "epoch": 0.5985913780666704, "grad_norm": 0.8945653438568115, "learning_rate": 3.494198971486088e-05, "loss": 9.326343536376953, "step": 4616 }, { "epoch": 0.5987210555749172, "grad_norm": 0.9491593837738037, "learning_rate": 3.492251788444064e-05, "loss": 10.586783409118652, "step": 4617 }, { "epoch": 0.5988507330831638, "grad_norm": 0.6309866309165955, "learning_rate": 3.4903048569090035e-05, "loss": 7.701427459716797, "step": 4618 }, { "epoch": 0.5989804105914105, "grad_norm": 0.6645802855491638, "learning_rate": 3.4883581772056715e-05, "loss": 8.435921669006348, "step": 4619 }, { "epoch": 0.5991100880996572, "grad_norm": 1.1872605085372925, "learning_rate": 3.4864117496587936e-05, "loss": 11.260419845581055, "step": 4620 }, { "epoch": 0.5992397656079038, "grad_norm": 1.0200084447860718, "learning_rate": 3.4844655745930504e-05, "loss": 11.025506019592285, "step": 4621 }, { "epoch": 0.5993694431161505, "grad_norm": 1.1876535415649414, "learning_rate": 3.482519652333085e-05, "loss": 8.9063138961792, "step": 4622 }, { "epoch": 0.5994991206243973, "grad_norm": 0.6212384104728699, "learning_rate": 3.480573983203495e-05, "loss": 9.233030319213867, "step": 4623 }, { "epoch": 0.5996287981326439, "grad_norm": 1.0720572471618652, "learning_rate": 3.478628567528837e-05, "loss": 8.700054168701172, "step": 4624 }, { "epoch": 0.5997584756408906, "grad_norm": 1.0170516967773438, "learning_rate": 3.476683405633625e-05, "loss": 12.55591869354248, "step": 4625 }, { "epoch": 0.5998881531491372, "grad_norm": 1.162032961845398, "learning_rate": 3.474738497842332e-05, "loss": 14.217386245727539, "step": 4626 }, { "epoch": 0.6000178306573839, "grad_norm": 1.0392870903015137, "learning_rate": 3.4727938444793854e-05, "loss": 10.543582916259766, "step": 4627 }, { "epoch": 0.6001475081656306, "grad_norm": 0.6960374712944031, "learning_rate": 3.470849445869173e-05, "loss": 8.19919490814209, "step": 4628 }, { "epoch": 0.6002771856738772, "grad_norm": 1.3335115909576416, "learning_rate": 3.4689053023360394e-05, "loss": 10.286993980407715, "step": 4629 }, { "epoch": 0.600406863182124, "grad_norm": 0.9127882719039917, "learning_rate": 3.4669614142042874e-05, "loss": 10.312010765075684, "step": 4630 }, { "epoch": 0.6005365406903707, "grad_norm": 1.0511807203292847, "learning_rate": 3.465017781798174e-05, "loss": 14.04603099822998, "step": 4631 }, { "epoch": 0.6006662181986173, "grad_norm": 1.0727996826171875, "learning_rate": 3.4630744054419184e-05, "loss": 12.13599681854248, "step": 4632 }, { "epoch": 0.600795895706864, "grad_norm": 0.9533147215843201, "learning_rate": 3.4611312854596936e-05, "loss": 8.858724594116211, "step": 4633 }, { "epoch": 0.6009255732151106, "grad_norm": 1.1786712408065796, "learning_rate": 3.45918842217563e-05, "loss": 13.900224685668945, "step": 4634 }, { "epoch": 0.6010552507233573, "grad_norm": 0.8280442357063293, "learning_rate": 3.4572458159138165e-05, "loss": 9.498339653015137, "step": 4635 }, { "epoch": 0.6011849282316041, "grad_norm": 0.8139671683311462, "learning_rate": 3.455303466998299e-05, "loss": 8.061529159545898, "step": 4636 }, { "epoch": 0.6013146057398507, "grad_norm": 0.597672700881958, "learning_rate": 3.453361375753079e-05, "loss": 7.0645060539245605, "step": 4637 }, { "epoch": 0.6014442832480974, "grad_norm": 0.9387645721435547, "learning_rate": 3.451419542502117e-05, "loss": 13.082042694091797, "step": 4638 }, { "epoch": 0.6015739607563441, "grad_norm": 0.8384819626808167, "learning_rate": 3.4494779675693296e-05, "loss": 13.265789031982422, "step": 4639 }, { "epoch": 0.6017036382645907, "grad_norm": 1.0219099521636963, "learning_rate": 3.44753665127859e-05, "loss": 10.368791580200195, "step": 4640 }, { "epoch": 0.6018333157728374, "grad_norm": 0.7494152784347534, "learning_rate": 3.4455955939537273e-05, "loss": 10.905707359313965, "step": 4641 }, { "epoch": 0.6019629932810842, "grad_norm": 0.8645635843276978, "learning_rate": 3.4436547959185303e-05, "loss": 8.138875007629395, "step": 4642 }, { "epoch": 0.6020926707893308, "grad_norm": 0.6883842945098877, "learning_rate": 3.441714257496743e-05, "loss": 9.49161434173584, "step": 4643 }, { "epoch": 0.6022223482975775, "grad_norm": 0.980394184589386, "learning_rate": 3.4397739790120645e-05, "loss": 11.888057708740234, "step": 4644 }, { "epoch": 0.6023520258058241, "grad_norm": 0.7866372466087341, "learning_rate": 3.4378339607881525e-05, "loss": 9.352187156677246, "step": 4645 }, { "epoch": 0.6024817033140708, "grad_norm": 0.9332238435745239, "learning_rate": 3.4358942031486216e-05, "loss": 9.724251747131348, "step": 4646 }, { "epoch": 0.6026113808223175, "grad_norm": 0.819322407245636, "learning_rate": 3.433954706417042e-05, "loss": 10.197736740112305, "step": 4647 }, { "epoch": 0.6027410583305641, "grad_norm": 1.1772054433822632, "learning_rate": 3.4320154709169395e-05, "loss": 8.554816246032715, "step": 4648 }, { "epoch": 0.6028707358388109, "grad_norm": 0.7684087753295898, "learning_rate": 3.430076496971797e-05, "loss": 8.873092651367188, "step": 4649 }, { "epoch": 0.6030004133470576, "grad_norm": 0.7839000225067139, "learning_rate": 3.4281377849050557e-05, "loss": 7.547467231750488, "step": 4650 }, { "epoch": 0.6031300908553042, "grad_norm": 0.8198671936988831, "learning_rate": 3.4261993350401104e-05, "loss": 10.497072219848633, "step": 4651 }, { "epoch": 0.6032597683635509, "grad_norm": 0.726381778717041, "learning_rate": 3.424261147700314e-05, "loss": 7.868891716003418, "step": 4652 }, { "epoch": 0.6033894458717975, "grad_norm": 0.9516142010688782, "learning_rate": 3.4223232232089733e-05, "loss": 10.43648910522461, "step": 4653 }, { "epoch": 0.6035191233800442, "grad_norm": 0.831863522529602, "learning_rate": 3.420385561889355e-05, "loss": 7.478342056274414, "step": 4654 }, { "epoch": 0.603648800888291, "grad_norm": 0.7649266719818115, "learning_rate": 3.418448164064678e-05, "loss": 8.35755443572998, "step": 4655 }, { "epoch": 0.6037784783965376, "grad_norm": 0.7465689778327942, "learning_rate": 3.4165110300581185e-05, "loss": 7.13839054107666, "step": 4656 }, { "epoch": 0.6039081559047843, "grad_norm": 0.8986688256263733, "learning_rate": 3.4145741601928114e-05, "loss": 10.26867389678955, "step": 4657 }, { "epoch": 0.604037833413031, "grad_norm": 0.790802001953125, "learning_rate": 3.412637554791845e-05, "loss": 9.513063430786133, "step": 4658 }, { "epoch": 0.6041675109212776, "grad_norm": 0.7284097671508789, "learning_rate": 3.4107012141782626e-05, "loss": 10.570182800292969, "step": 4659 }, { "epoch": 0.6042971884295243, "grad_norm": 0.8935545086860657, "learning_rate": 3.4087651386750644e-05, "loss": 9.414302825927734, "step": 4660 }, { "epoch": 0.604426865937771, "grad_norm": 0.6423092484474182, "learning_rate": 3.406829328605207e-05, "loss": 7.774046897888184, "step": 4661 }, { "epoch": 0.6045565434460177, "grad_norm": 0.6532108783721924, "learning_rate": 3.4048937842916015e-05, "loss": 6.200345039367676, "step": 4662 }, { "epoch": 0.6046862209542644, "grad_norm": 1.0187710523605347, "learning_rate": 3.4029585060571166e-05, "loss": 12.599807739257812, "step": 4663 }, { "epoch": 0.604815898462511, "grad_norm": 0.8106988668441772, "learning_rate": 3.4010234942245754e-05, "loss": 9.074533462524414, "step": 4664 }, { "epoch": 0.6049455759707577, "grad_norm": 0.9826228618621826, "learning_rate": 3.3990887491167556e-05, "loss": 10.697858810424805, "step": 4665 }, { "epoch": 0.6050752534790044, "grad_norm": 0.7675603032112122, "learning_rate": 3.397154271056391e-05, "loss": 9.60464859008789, "step": 4666 }, { "epoch": 0.605204930987251, "grad_norm": 0.7775033116340637, "learning_rate": 3.395220060366173e-05, "loss": 9.714552879333496, "step": 4667 }, { "epoch": 0.6053346084954978, "grad_norm": 0.8766688108444214, "learning_rate": 3.3932861173687455e-05, "loss": 7.2681169509887695, "step": 4668 }, { "epoch": 0.6054642860037445, "grad_norm": 0.8476055264472961, "learning_rate": 3.391352442386708e-05, "loss": 9.765488624572754, "step": 4669 }, { "epoch": 0.6055939635119911, "grad_norm": 0.8449630737304688, "learning_rate": 3.389419035742618e-05, "loss": 10.812847137451172, "step": 4670 }, { "epoch": 0.6057236410202378, "grad_norm": 0.6175012588500977, "learning_rate": 3.387485897758985e-05, "loss": 7.655002593994141, "step": 4671 }, { "epoch": 0.6058533185284845, "grad_norm": 1.0715248584747314, "learning_rate": 3.3855530287582766e-05, "loss": 10.388614654541016, "step": 4672 }, { "epoch": 0.6059829960367311, "grad_norm": 0.8484711050987244, "learning_rate": 3.3836204290629124e-05, "loss": 11.133843421936035, "step": 4673 }, { "epoch": 0.6061126735449779, "grad_norm": 0.7703144550323486, "learning_rate": 3.381688098995269e-05, "loss": 6.249692916870117, "step": 4674 }, { "epoch": 0.6062423510532245, "grad_norm": 1.1911653280258179, "learning_rate": 3.3797560388776786e-05, "loss": 9.39081859588623, "step": 4675 }, { "epoch": 0.6063720285614712, "grad_norm": 0.7262590527534485, "learning_rate": 3.377824249032426e-05, "loss": 6.405818462371826, "step": 4676 }, { "epoch": 0.6065017060697179, "grad_norm": 0.6979170441627502, "learning_rate": 3.375892729781754e-05, "loss": 8.010231018066406, "step": 4677 }, { "epoch": 0.6066313835779645, "grad_norm": 0.7117998600006104, "learning_rate": 3.373961481447858e-05, "loss": 8.90934944152832, "step": 4678 }, { "epoch": 0.6067610610862112, "grad_norm": 0.6698656678199768, "learning_rate": 3.372030504352889e-05, "loss": 5.89674186706543, "step": 4679 }, { "epoch": 0.6068907385944579, "grad_norm": 0.8234989047050476, "learning_rate": 3.370099798818952e-05, "loss": 7.817464828491211, "step": 4680 }, { "epoch": 0.6070204161027046, "grad_norm": 0.6702801585197449, "learning_rate": 3.368169365168108e-05, "loss": 8.12693977355957, "step": 4681 }, { "epoch": 0.6071500936109513, "grad_norm": 0.6015859246253967, "learning_rate": 3.366239203722371e-05, "loss": 7.4075798988342285, "step": 4682 }, { "epoch": 0.607279771119198, "grad_norm": 0.8467558026313782, "learning_rate": 3.3643093148037125e-05, "loss": 7.941405773162842, "step": 4683 }, { "epoch": 0.6074094486274446, "grad_norm": 0.7219114899635315, "learning_rate": 3.362379698734055e-05, "loss": 7.682496547698975, "step": 4684 }, { "epoch": 0.6075391261356913, "grad_norm": 0.7027954459190369, "learning_rate": 3.360450355835278e-05, "loss": 11.359989166259766, "step": 4685 }, { "epoch": 0.6076688036439379, "grad_norm": 0.7102304100990295, "learning_rate": 3.358521286429213e-05, "loss": 7.168757438659668, "step": 4686 }, { "epoch": 0.6077984811521847, "grad_norm": 1.011231541633606, "learning_rate": 3.356592490837649e-05, "loss": 13.40389633178711, "step": 4687 }, { "epoch": 0.6079281586604314, "grad_norm": 1.1982213258743286, "learning_rate": 3.3546639693823276e-05, "loss": 12.492423057556152, "step": 4688 }, { "epoch": 0.608057836168678, "grad_norm": 0.9920386075973511, "learning_rate": 3.3527357223849445e-05, "loss": 11.92780876159668, "step": 4689 }, { "epoch": 0.6081875136769247, "grad_norm": 0.6623721122741699, "learning_rate": 3.350807750167149e-05, "loss": 5.868074417114258, "step": 4690 }, { "epoch": 0.6083171911851714, "grad_norm": 0.9146478176116943, "learning_rate": 3.3488800530505466e-05, "loss": 11.763620376586914, "step": 4691 }, { "epoch": 0.608446868693418, "grad_norm": 0.6449342370033264, "learning_rate": 3.3469526313566955e-05, "loss": 6.263585090637207, "step": 4692 }, { "epoch": 0.6085765462016647, "grad_norm": 0.75013667345047, "learning_rate": 3.345025485407108e-05, "loss": 7.390512943267822, "step": 4693 }, { "epoch": 0.6087062237099115, "grad_norm": 0.8336730599403381, "learning_rate": 3.343098615523251e-05, "loss": 9.294113159179688, "step": 4694 }, { "epoch": 0.6088359012181581, "grad_norm": 1.0226620435714722, "learning_rate": 3.3411720220265455e-05, "loss": 8.420454025268555, "step": 4695 }, { "epoch": 0.6089655787264048, "grad_norm": 0.5435344576835632, "learning_rate": 3.339245705238364e-05, "loss": 9.135526657104492, "step": 4696 }, { "epoch": 0.6090952562346514, "grad_norm": 0.7378359436988831, "learning_rate": 3.3373196654800375e-05, "loss": 7.591791152954102, "step": 4697 }, { "epoch": 0.6092249337428981, "grad_norm": 0.7892722487449646, "learning_rate": 3.335393903072846e-05, "loss": 7.878573894500732, "step": 4698 }, { "epoch": 0.6093546112511448, "grad_norm": 0.7196400165557861, "learning_rate": 3.333468418338026e-05, "loss": 7.769919395446777, "step": 4699 }, { "epoch": 0.6094842887593915, "grad_norm": 0.7100630402565002, "learning_rate": 3.3315432115967664e-05, "loss": 7.855522632598877, "step": 4700 }, { "epoch": 0.6096139662676382, "grad_norm": 0.8423293828964233, "learning_rate": 3.329618283170211e-05, "loss": 7.854236125946045, "step": 4701 }, { "epoch": 0.6097436437758849, "grad_norm": 1.0232117176055908, "learning_rate": 3.327693633379456e-05, "loss": 11.147332191467285, "step": 4702 }, { "epoch": 0.6098733212841315, "grad_norm": 1.0781655311584473, "learning_rate": 3.325769262545552e-05, "loss": 9.592458724975586, "step": 4703 }, { "epoch": 0.6100029987923782, "grad_norm": 0.9213435053825378, "learning_rate": 3.323845170989502e-05, "loss": 8.759370803833008, "step": 4704 }, { "epoch": 0.6101326763006248, "grad_norm": 0.6058790683746338, "learning_rate": 3.321921359032264e-05, "loss": 7.189439296722412, "step": 4705 }, { "epoch": 0.6102623538088716, "grad_norm": 0.9810937643051147, "learning_rate": 3.3199978269947476e-05, "loss": 12.079915046691895, "step": 4706 }, { "epoch": 0.6103920313171183, "grad_norm": 0.8576740622520447, "learning_rate": 3.318074575197816e-05, "loss": 11.37607479095459, "step": 4707 }, { "epoch": 0.6105217088253649, "grad_norm": 0.8711262345314026, "learning_rate": 3.3161516039622874e-05, "loss": 10.570344924926758, "step": 4708 }, { "epoch": 0.6106513863336116, "grad_norm": 0.7291814088821411, "learning_rate": 3.3142289136089314e-05, "loss": 9.714130401611328, "step": 4709 }, { "epoch": 0.6107810638418583, "grad_norm": 0.8620819449424744, "learning_rate": 3.312306504458471e-05, "loss": 9.319379806518555, "step": 4710 }, { "epoch": 0.6109107413501049, "grad_norm": 0.7316655516624451, "learning_rate": 3.3103843768315835e-05, "loss": 10.34874153137207, "step": 4711 }, { "epoch": 0.6110404188583516, "grad_norm": 1.0686700344085693, "learning_rate": 3.308462531048897e-05, "loss": 9.5079345703125, "step": 4712 }, { "epoch": 0.6111700963665984, "grad_norm": 0.6786089539527893, "learning_rate": 3.3065409674309954e-05, "loss": 6.949512958526611, "step": 4713 }, { "epoch": 0.611299773874845, "grad_norm": 0.9737909436225891, "learning_rate": 3.3046196862984134e-05, "loss": 10.715397834777832, "step": 4714 }, { "epoch": 0.6114294513830917, "grad_norm": 0.8963040709495544, "learning_rate": 3.30269868797164e-05, "loss": 8.376116752624512, "step": 4715 }, { "epoch": 0.6115591288913383, "grad_norm": 0.5530466437339783, "learning_rate": 3.300777972771115e-05, "loss": 8.680220603942871, "step": 4716 }, { "epoch": 0.611688806399585, "grad_norm": 0.6593091487884521, "learning_rate": 3.2988575410172326e-05, "loss": 8.010496139526367, "step": 4717 }, { "epoch": 0.6118184839078317, "grad_norm": 1.0775858163833618, "learning_rate": 3.296937393030339e-05, "loss": 13.003765106201172, "step": 4718 }, { "epoch": 0.6119481614160784, "grad_norm": 0.8553515076637268, "learning_rate": 3.295017529130736e-05, "loss": 9.955313682556152, "step": 4719 }, { "epoch": 0.6120778389243251, "grad_norm": 0.849409818649292, "learning_rate": 3.2930979496386714e-05, "loss": 10.826172828674316, "step": 4720 }, { "epoch": 0.6122075164325718, "grad_norm": 0.9038370847702026, "learning_rate": 3.2911786548743526e-05, "loss": 11.972704887390137, "step": 4721 }, { "epoch": 0.6123371939408184, "grad_norm": 0.8957101106643677, "learning_rate": 3.289259645157935e-05, "loss": 10.612011909484863, "step": 4722 }, { "epoch": 0.6124668714490651, "grad_norm": 1.1561135053634644, "learning_rate": 3.287340920809529e-05, "loss": 10.295012474060059, "step": 4723 }, { "epoch": 0.6125965489573117, "grad_norm": 0.9818467497825623, "learning_rate": 3.285422482149195e-05, "loss": 10.987683296203613, "step": 4724 }, { "epoch": 0.6127262264655584, "grad_norm": 0.8596866726875305, "learning_rate": 3.2835043294969484e-05, "loss": 9.028217315673828, "step": 4725 }, { "epoch": 0.6128559039738052, "grad_norm": 0.6092216372489929, "learning_rate": 3.281586463172755e-05, "loss": 7.320558071136475, "step": 4726 }, { "epoch": 0.6129855814820518, "grad_norm": 0.7302206754684448, "learning_rate": 3.2796688834965334e-05, "loss": 6.154566764831543, "step": 4727 }, { "epoch": 0.6131152589902985, "grad_norm": 0.8390267491340637, "learning_rate": 3.277751590788155e-05, "loss": 7.2640767097473145, "step": 4728 }, { "epoch": 0.6132449364985452, "grad_norm": 0.7050285339355469, "learning_rate": 3.275834585367441e-05, "loss": 8.294333457946777, "step": 4729 }, { "epoch": 0.6133746140067918, "grad_norm": 0.8797138929367065, "learning_rate": 3.2739178675541694e-05, "loss": 10.396602630615234, "step": 4730 }, { "epoch": 0.6135042915150385, "grad_norm": 0.8525869846343994, "learning_rate": 3.2720014376680644e-05, "loss": 7.627668380737305, "step": 4731 }, { "epoch": 0.6136339690232853, "grad_norm": 0.8564395308494568, "learning_rate": 3.270085296028806e-05, "loss": 10.697531700134277, "step": 4732 }, { "epoch": 0.6137636465315319, "grad_norm": 1.1244502067565918, "learning_rate": 3.2681694429560236e-05, "loss": 9.636983871459961, "step": 4733 }, { "epoch": 0.6138933240397786, "grad_norm": 1.188859224319458, "learning_rate": 3.266253878769304e-05, "loss": 11.47677993774414, "step": 4734 }, { "epoch": 0.6140230015480252, "grad_norm": 0.6581576466560364, "learning_rate": 3.264338603788179e-05, "loss": 8.329015731811523, "step": 4735 }, { "epoch": 0.6141526790562719, "grad_norm": 1.1947858333587646, "learning_rate": 3.262423618332135e-05, "loss": 11.655113220214844, "step": 4736 }, { "epoch": 0.6142823565645186, "grad_norm": 0.9740300178527832, "learning_rate": 3.260508922720612e-05, "loss": 9.999832153320312, "step": 4737 }, { "epoch": 0.6144120340727653, "grad_norm": 0.7279183864593506, "learning_rate": 3.258594517272997e-05, "loss": 7.978835105895996, "step": 4738 }, { "epoch": 0.614541711581012, "grad_norm": 0.7155054211616516, "learning_rate": 3.256680402308633e-05, "loss": 9.7598237991333, "step": 4739 }, { "epoch": 0.6146713890892587, "grad_norm": 1.0684555768966675, "learning_rate": 3.2547665781468116e-05, "loss": 9.87458324432373, "step": 4740 }, { "epoch": 0.6148010665975053, "grad_norm": 0.8356285095214844, "learning_rate": 3.2528530451067786e-05, "loss": 7.875716209411621, "step": 4741 }, { "epoch": 0.614930744105752, "grad_norm": 1.1576472520828247, "learning_rate": 3.25093980350773e-05, "loss": 12.366754531860352, "step": 4742 }, { "epoch": 0.6150604216139987, "grad_norm": 0.9085551500320435, "learning_rate": 3.2490268536688114e-05, "loss": 9.52559757232666, "step": 4743 }, { "epoch": 0.6151900991222453, "grad_norm": 0.8885136246681213, "learning_rate": 3.247114195909121e-05, "loss": 9.421338081359863, "step": 4744 }, { "epoch": 0.6153197766304921, "grad_norm": 0.8197653293609619, "learning_rate": 3.24520183054771e-05, "loss": 10.014771461486816, "step": 4745 }, { "epoch": 0.6154494541387387, "grad_norm": 0.6346478462219238, "learning_rate": 3.2432897579035806e-05, "loss": 8.263703346252441, "step": 4746 }, { "epoch": 0.6155791316469854, "grad_norm": 1.0516011714935303, "learning_rate": 3.241377978295681e-05, "loss": 10.903261184692383, "step": 4747 }, { "epoch": 0.6157088091552321, "grad_norm": 0.7785004377365112, "learning_rate": 3.2394664920429184e-05, "loss": 9.35763931274414, "step": 4748 }, { "epoch": 0.6158384866634787, "grad_norm": 0.8016407489776611, "learning_rate": 3.237555299464145e-05, "loss": 7.746971607208252, "step": 4749 }, { "epoch": 0.6159681641717254, "grad_norm": 0.8279991149902344, "learning_rate": 3.235644400878167e-05, "loss": 8.395153045654297, "step": 4750 }, { "epoch": 0.6160978416799722, "grad_norm": 0.7028660178184509, "learning_rate": 3.2337337966037404e-05, "loss": 8.851558685302734, "step": 4751 }, { "epoch": 0.6162275191882188, "grad_norm": 0.8827502131462097, "learning_rate": 3.231823486959573e-05, "loss": 8.978109359741211, "step": 4752 }, { "epoch": 0.6163571966964655, "grad_norm": 0.8200672268867493, "learning_rate": 3.229913472264322e-05, "loss": 9.380903244018555, "step": 4753 }, { "epoch": 0.6164868742047122, "grad_norm": 0.8214842677116394, "learning_rate": 3.228003752836597e-05, "loss": 7.495499610900879, "step": 4754 }, { "epoch": 0.6166165517129588, "grad_norm": 0.9455710053443909, "learning_rate": 3.2260943289949575e-05, "loss": 11.611021041870117, "step": 4755 }, { "epoch": 0.6167462292212055, "grad_norm": 1.0848731994628906, "learning_rate": 3.224185201057913e-05, "loss": 10.015952110290527, "step": 4756 }, { "epoch": 0.6168759067294521, "grad_norm": 0.8593799471855164, "learning_rate": 3.222276369343926e-05, "loss": 10.704282760620117, "step": 4757 }, { "epoch": 0.6170055842376989, "grad_norm": 0.8210050463676453, "learning_rate": 3.2203678341714075e-05, "loss": 7.178097724914551, "step": 4758 }, { "epoch": 0.6171352617459456, "grad_norm": 0.7388744950294495, "learning_rate": 3.218459595858719e-05, "loss": 8.00688648223877, "step": 4759 }, { "epoch": 0.6172649392541922, "grad_norm": 0.8546148538589478, "learning_rate": 3.2165516547241735e-05, "loss": 10.21773624420166, "step": 4760 }, { "epoch": 0.6173946167624389, "grad_norm": 0.8470679521560669, "learning_rate": 3.214644011086034e-05, "loss": 9.429256439208984, "step": 4761 }, { "epoch": 0.6175242942706856, "grad_norm": 0.7337941527366638, "learning_rate": 3.212736665262514e-05, "loss": 9.733375549316406, "step": 4762 }, { "epoch": 0.6176539717789322, "grad_norm": 0.8278391361236572, "learning_rate": 3.2108296175717765e-05, "loss": 10.587179183959961, "step": 4763 }, { "epoch": 0.617783649287179, "grad_norm": 0.8884491920471191, "learning_rate": 3.208922868331936e-05, "loss": 11.69318962097168, "step": 4764 }, { "epoch": 0.6179133267954257, "grad_norm": 0.7885341048240662, "learning_rate": 3.2070164178610576e-05, "loss": 8.544168472290039, "step": 4765 }, { "epoch": 0.6180430043036723, "grad_norm": 0.7923682332038879, "learning_rate": 3.205110266477154e-05, "loss": 7.604401588439941, "step": 4766 }, { "epoch": 0.618172681811919, "grad_norm": 0.6445578336715698, "learning_rate": 3.203204414498191e-05, "loss": 8.044867515563965, "step": 4767 }, { "epoch": 0.6183023593201656, "grad_norm": 0.8612692952156067, "learning_rate": 3.201298862242082e-05, "loss": 7.150617599487305, "step": 4768 }, { "epoch": 0.6184320368284123, "grad_norm": 0.9782787561416626, "learning_rate": 3.199393610026693e-05, "loss": 10.679039001464844, "step": 4769 }, { "epoch": 0.6185617143366591, "grad_norm": 0.7021231651306152, "learning_rate": 3.1974886581698365e-05, "loss": 9.631206512451172, "step": 4770 }, { "epoch": 0.6186913918449057, "grad_norm": 0.8621488809585571, "learning_rate": 3.1955840069892774e-05, "loss": 9.134733200073242, "step": 4771 }, { "epoch": 0.6188210693531524, "grad_norm": 0.8991274237632751, "learning_rate": 3.193679656802731e-05, "loss": 8.899630546569824, "step": 4772 }, { "epoch": 0.6189507468613991, "grad_norm": 0.8267920613288879, "learning_rate": 3.191775607927861e-05, "loss": 10.550132751464844, "step": 4773 }, { "epoch": 0.6190804243696457, "grad_norm": 0.9717512726783752, "learning_rate": 3.189871860682281e-05, "loss": 11.070923805236816, "step": 4774 }, { "epoch": 0.6192101018778924, "grad_norm": 0.9340200424194336, "learning_rate": 3.187968415383554e-05, "loss": 10.137592315673828, "step": 4775 }, { "epoch": 0.619339779386139, "grad_norm": 0.8808977603912354, "learning_rate": 3.186065272349193e-05, "loss": 7.625162601470947, "step": 4776 }, { "epoch": 0.6194694568943858, "grad_norm": 0.7522274851799011, "learning_rate": 3.184162431896661e-05, "loss": 8.169905662536621, "step": 4777 }, { "epoch": 0.6195991344026325, "grad_norm": 0.8203619718551636, "learning_rate": 3.182259894343371e-05, "loss": 10.471414566040039, "step": 4778 }, { "epoch": 0.6197288119108791, "grad_norm": 0.743886411190033, "learning_rate": 3.180357660006683e-05, "loss": 8.360481262207031, "step": 4779 }, { "epoch": 0.6198584894191258, "grad_norm": 0.8674502372741699, "learning_rate": 3.178455729203909e-05, "loss": 7.564647674560547, "step": 4780 }, { "epoch": 0.6199881669273725, "grad_norm": 1.032112717628479, "learning_rate": 3.176554102252309e-05, "loss": 10.515254974365234, "step": 4781 }, { "epoch": 0.6201178444356191, "grad_norm": 0.559445321559906, "learning_rate": 3.174652779469093e-05, "loss": 7.719876766204834, "step": 4782 }, { "epoch": 0.6202475219438659, "grad_norm": 0.862405002117157, "learning_rate": 3.17275176117142e-05, "loss": 7.604101657867432, "step": 4783 }, { "epoch": 0.6203771994521126, "grad_norm": 0.8438173532485962, "learning_rate": 3.1708510476763984e-05, "loss": 11.256324768066406, "step": 4784 }, { "epoch": 0.6205068769603592, "grad_norm": 1.0649142265319824, "learning_rate": 3.168950639301086e-05, "loss": 11.480265617370605, "step": 4785 }, { "epoch": 0.6206365544686059, "grad_norm": 0.7576923966407776, "learning_rate": 3.1670505363624876e-05, "loss": 7.113148212432861, "step": 4786 }, { "epoch": 0.6207662319768525, "grad_norm": 0.841801643371582, "learning_rate": 3.16515073917756e-05, "loss": 11.378801345825195, "step": 4787 }, { "epoch": 0.6208959094850992, "grad_norm": 1.259743571281433, "learning_rate": 3.163251248063207e-05, "loss": 11.591483116149902, "step": 4788 }, { "epoch": 0.6210255869933459, "grad_norm": 1.1153299808502197, "learning_rate": 3.161352063336283e-05, "loss": 10.987405776977539, "step": 4789 }, { "epoch": 0.6211552645015926, "grad_norm": 1.3093335628509521, "learning_rate": 3.15945318531359e-05, "loss": 13.973479270935059, "step": 4790 }, { "epoch": 0.6212849420098393, "grad_norm": 0.7605413198471069, "learning_rate": 3.157554614311879e-05, "loss": 8.270622253417969, "step": 4791 }, { "epoch": 0.621414619518086, "grad_norm": 0.9399951100349426, "learning_rate": 3.15565635064785e-05, "loss": 11.178701400756836, "step": 4792 }, { "epoch": 0.6215442970263326, "grad_norm": 0.7180965542793274, "learning_rate": 3.153758394638151e-05, "loss": 5.89818000793457, "step": 4793 }, { "epoch": 0.6216739745345793, "grad_norm": 0.8833959102630615, "learning_rate": 3.151860746599381e-05, "loss": 9.610138893127441, "step": 4794 }, { "epoch": 0.621803652042826, "grad_norm": 0.9428449273109436, "learning_rate": 3.149963406848086e-05, "loss": 12.669783592224121, "step": 4795 }, { "epoch": 0.6219333295510727, "grad_norm": 0.917892336845398, "learning_rate": 3.148066375700758e-05, "loss": 10.102529525756836, "step": 4796 }, { "epoch": 0.6220630070593194, "grad_norm": 0.9135167002677917, "learning_rate": 3.1461696534738425e-05, "loss": 9.932629585266113, "step": 4797 }, { "epoch": 0.622192684567566, "grad_norm": 0.9857338666915894, "learning_rate": 3.14427324048373e-05, "loss": 9.327312469482422, "step": 4798 }, { "epoch": 0.6223223620758127, "grad_norm": 1.1042686700820923, "learning_rate": 3.1423771370467616e-05, "loss": 11.043011665344238, "step": 4799 }, { "epoch": 0.6224520395840594, "grad_norm": 1.0626404285430908, "learning_rate": 3.140481343479225e-05, "loss": 9.013334274291992, "step": 4800 }, { "epoch": 0.622581717092306, "grad_norm": 0.865551233291626, "learning_rate": 3.138585860097356e-05, "loss": 8.9346342086792, "step": 4801 }, { "epoch": 0.6227113946005528, "grad_norm": 0.8214853405952454, "learning_rate": 3.136690687217341e-05, "loss": 10.178102493286133, "step": 4802 }, { "epoch": 0.6228410721087995, "grad_norm": 0.7063064575195312, "learning_rate": 3.134795825155313e-05, "loss": 5.098569869995117, "step": 4803 }, { "epoch": 0.6229707496170461, "grad_norm": 0.7143144011497498, "learning_rate": 3.132901274227352e-05, "loss": 12.051878929138184, "step": 4804 }, { "epoch": 0.6231004271252928, "grad_norm": 0.9882458448410034, "learning_rate": 3.131007034749489e-05, "loss": 9.9002685546875, "step": 4805 }, { "epoch": 0.6232301046335395, "grad_norm": 0.9568392634391785, "learning_rate": 3.1291131070377014e-05, "loss": 9.301193237304688, "step": 4806 }, { "epoch": 0.6233597821417861, "grad_norm": 1.0909018516540527, "learning_rate": 3.127219491407912e-05, "loss": 13.17979907989502, "step": 4807 }, { "epoch": 0.6234894596500328, "grad_norm": 0.8902022838592529, "learning_rate": 3.125326188175998e-05, "loss": 7.004453182220459, "step": 4808 }, { "epoch": 0.6236191371582795, "grad_norm": 0.8897815346717834, "learning_rate": 3.123433197657777e-05, "loss": 10.381818771362305, "step": 4809 }, { "epoch": 0.6237488146665262, "grad_norm": 1.1225181818008423, "learning_rate": 3.1215405201690186e-05, "loss": 10.891632080078125, "step": 4810 }, { "epoch": 0.6238784921747729, "grad_norm": 0.8336370587348938, "learning_rate": 3.119648156025442e-05, "loss": 8.959819793701172, "step": 4811 }, { "epoch": 0.6240081696830195, "grad_norm": 1.1078131198883057, "learning_rate": 3.1177561055427117e-05, "loss": 11.248960494995117, "step": 4812 }, { "epoch": 0.6241378471912662, "grad_norm": 1.024543285369873, "learning_rate": 3.1158643690364386e-05, "loss": 10.902965545654297, "step": 4813 }, { "epoch": 0.6242675246995129, "grad_norm": 0.8659440875053406, "learning_rate": 3.113972946822183e-05, "loss": 6.259806156158447, "step": 4814 }, { "epoch": 0.6243972022077596, "grad_norm": 0.8174930214881897, "learning_rate": 3.112081839215453e-05, "loss": 10.602813720703125, "step": 4815 }, { "epoch": 0.6245268797160063, "grad_norm": 0.8028150200843811, "learning_rate": 3.1101910465317016e-05, "loss": 8.913505554199219, "step": 4816 }, { "epoch": 0.624656557224253, "grad_norm": 1.0210355520248413, "learning_rate": 3.108300569086334e-05, "loss": 9.560986518859863, "step": 4817 }, { "epoch": 0.6247862347324996, "grad_norm": 0.962767481803894, "learning_rate": 3.106410407194699e-05, "loss": 8.15137004852295, "step": 4818 }, { "epoch": 0.6249159122407463, "grad_norm": 1.0292413234710693, "learning_rate": 3.1045205611720926e-05, "loss": 9.723268508911133, "step": 4819 }, { "epoch": 0.6250455897489929, "grad_norm": 0.9535102844238281, "learning_rate": 3.102631031333761e-05, "loss": 9.530670166015625, "step": 4820 }, { "epoch": 0.6251752672572396, "grad_norm": 0.8443005681037903, "learning_rate": 3.100741817994896e-05, "loss": 8.359664916992188, "step": 4821 }, { "epoch": 0.6253049447654864, "grad_norm": 0.9983400702476501, "learning_rate": 3.0988529214706354e-05, "loss": 13.33322525024414, "step": 4822 }, { "epoch": 0.625434622273733, "grad_norm": 1.0179630517959595, "learning_rate": 3.0969643420760655e-05, "loss": 9.808442115783691, "step": 4823 }, { "epoch": 0.6255642997819797, "grad_norm": 0.7447241544723511, "learning_rate": 3.0950760801262206e-05, "loss": 10.62268352508545, "step": 4824 }, { "epoch": 0.6256939772902264, "grad_norm": 0.9576059579849243, "learning_rate": 3.093188135936081e-05, "loss": 6.62615966796875, "step": 4825 }, { "epoch": 0.625823654798473, "grad_norm": 0.7723658084869385, "learning_rate": 3.0913005098205726e-05, "loss": 7.102057933807373, "step": 4826 }, { "epoch": 0.6259533323067197, "grad_norm": 1.054061770439148, "learning_rate": 3.089413202094572e-05, "loss": 9.90230941772461, "step": 4827 }, { "epoch": 0.6260830098149665, "grad_norm": 0.9595156311988831, "learning_rate": 3.087526213072898e-05, "loss": 10.19410514831543, "step": 4828 }, { "epoch": 0.6262126873232131, "grad_norm": 0.7530204653739929, "learning_rate": 3.0856395430703214e-05, "loss": 6.5859527587890625, "step": 4829 }, { "epoch": 0.6263423648314598, "grad_norm": 0.7679187059402466, "learning_rate": 3.0837531924015525e-05, "loss": 6.541668891906738, "step": 4830 }, { "epoch": 0.6264720423397064, "grad_norm": 1.0602666139602661, "learning_rate": 3.0818671613812574e-05, "loss": 12.271754264831543, "step": 4831 }, { "epoch": 0.6266017198479531, "grad_norm": 1.1552925109863281, "learning_rate": 3.0799814503240415e-05, "loss": 12.174210548400879, "step": 4832 }, { "epoch": 0.6267313973561998, "grad_norm": 0.8895775675773621, "learning_rate": 3.0780960595444605e-05, "loss": 9.724352836608887, "step": 4833 }, { "epoch": 0.6268610748644465, "grad_norm": 0.8807516694068909, "learning_rate": 3.0762109893570156e-05, "loss": 6.8243913650512695, "step": 4834 }, { "epoch": 0.6269907523726932, "grad_norm": 1.159224510192871, "learning_rate": 3.0743262400761554e-05, "loss": 12.845566749572754, "step": 4835 }, { "epoch": 0.6271204298809399, "grad_norm": 1.0720192193984985, "learning_rate": 3.072441812016273e-05, "loss": 8.835820198059082, "step": 4836 }, { "epoch": 0.6272501073891865, "grad_norm": 0.992239773273468, "learning_rate": 3.070557705491711e-05, "loss": 9.625040054321289, "step": 4837 }, { "epoch": 0.6273797848974332, "grad_norm": 0.89598149061203, "learning_rate": 3.068673920816755e-05, "loss": 8.4546480178833, "step": 4838 }, { "epoch": 0.6275094624056798, "grad_norm": 0.6121299862861633, "learning_rate": 3.0667904583056385e-05, "loss": 7.626204013824463, "step": 4839 }, { "epoch": 0.6276391399139265, "grad_norm": 0.7194380760192871, "learning_rate": 3.064907318272542e-05, "loss": 6.933538913726807, "step": 4840 }, { "epoch": 0.6277688174221733, "grad_norm": 0.7199390530586243, "learning_rate": 3.063024501031591e-05, "loss": 6.2643961906433105, "step": 4841 }, { "epoch": 0.6278984949304199, "grad_norm": 0.8535076379776001, "learning_rate": 3.0611420068968585e-05, "loss": 13.181070327758789, "step": 4842 }, { "epoch": 0.6280281724386666, "grad_norm": 0.8691511750221252, "learning_rate": 3.059259836182361e-05, "loss": 11.346120834350586, "step": 4843 }, { "epoch": 0.6281578499469133, "grad_norm": 1.0657325983047485, "learning_rate": 3.057377989202064e-05, "loss": 11.402166366577148, "step": 4844 }, { "epoch": 0.6282875274551599, "grad_norm": 0.8358966708183289, "learning_rate": 3.055496466269878e-05, "loss": 6.79878568649292, "step": 4845 }, { "epoch": 0.6284172049634066, "grad_norm": 0.9680917859077454, "learning_rate": 3.053615267699658e-05, "loss": 13.232285499572754, "step": 4846 }, { "epoch": 0.6285468824716534, "grad_norm": 1.4080721139907837, "learning_rate": 3.0517343938052073e-05, "loss": 13.193492889404297, "step": 4847 }, { "epoch": 0.6286765599799, "grad_norm": 0.9510368704795837, "learning_rate": 3.0498538449002713e-05, "loss": 9.89961051940918, "step": 4848 }, { "epoch": 0.6288062374881467, "grad_norm": 0.9317164421081543, "learning_rate": 3.0479736212985466e-05, "loss": 10.601844787597656, "step": 4849 }, { "epoch": 0.6289359149963933, "grad_norm": 0.902201235294342, "learning_rate": 3.0460937233136728e-05, "loss": 11.142145156860352, "step": 4850 }, { "epoch": 0.62906559250464, "grad_norm": 0.97474205493927, "learning_rate": 3.044214151259234e-05, "loss": 8.602822303771973, "step": 4851 }, { "epoch": 0.6291952700128867, "grad_norm": 0.8491238951683044, "learning_rate": 3.0423349054487615e-05, "loss": 10.569402694702148, "step": 4852 }, { "epoch": 0.6293249475211333, "grad_norm": 0.875068724155426, "learning_rate": 3.04045598619573e-05, "loss": 8.101827621459961, "step": 4853 }, { "epoch": 0.6294546250293801, "grad_norm": 0.8761893510818481, "learning_rate": 3.0385773938135633e-05, "loss": 7.634902477264404, "step": 4854 }, { "epoch": 0.6295843025376268, "grad_norm": 0.9581679701805115, "learning_rate": 3.0366991286156266e-05, "loss": 10.595166206359863, "step": 4855 }, { "epoch": 0.6297139800458734, "grad_norm": 1.1588034629821777, "learning_rate": 3.0348211909152347e-05, "loss": 9.892313957214355, "step": 4856 }, { "epoch": 0.6298436575541201, "grad_norm": 0.8025848269462585, "learning_rate": 3.032943581025645e-05, "loss": 9.764847755432129, "step": 4857 }, { "epoch": 0.6299733350623667, "grad_norm": 0.7016123533248901, "learning_rate": 3.0310662992600615e-05, "loss": 10.004192352294922, "step": 4858 }, { "epoch": 0.6301030125706134, "grad_norm": 0.8425335884094238, "learning_rate": 3.0291893459316312e-05, "loss": 11.27983570098877, "step": 4859 }, { "epoch": 0.6302326900788602, "grad_norm": 0.4764502942562103, "learning_rate": 3.0273127213534503e-05, "loss": 4.546441555023193, "step": 4860 }, { "epoch": 0.6303623675871068, "grad_norm": 0.6656271815299988, "learning_rate": 3.025436425838557e-05, "loss": 8.998265266418457, "step": 4861 }, { "epoch": 0.6304920450953535, "grad_norm": 0.9651265144348145, "learning_rate": 3.0235604596999345e-05, "loss": 9.726051330566406, "step": 4862 }, { "epoch": 0.6306217226036002, "grad_norm": 0.77359938621521, "learning_rate": 3.0216848232505136e-05, "loss": 9.394481658935547, "step": 4863 }, { "epoch": 0.6307514001118468, "grad_norm": 0.930255115032196, "learning_rate": 3.0198095168031677e-05, "loss": 9.181968688964844, "step": 4864 }, { "epoch": 0.6308810776200935, "grad_norm": 0.9341420531272888, "learning_rate": 3.017934540670716e-05, "loss": 8.104841232299805, "step": 4865 }, { "epoch": 0.6310107551283403, "grad_norm": 0.9026255011558533, "learning_rate": 3.016059895165923e-05, "loss": 10.287581443786621, "step": 4866 }, { "epoch": 0.6311404326365869, "grad_norm": 1.1371591091156006, "learning_rate": 3.0141855806014974e-05, "loss": 8.54751205444336, "step": 4867 }, { "epoch": 0.6312701101448336, "grad_norm": 1.107379674911499, "learning_rate": 3.012311597290094e-05, "loss": 8.772019386291504, "step": 4868 }, { "epoch": 0.6313997876530802, "grad_norm": 1.576286792755127, "learning_rate": 3.0104379455443092e-05, "loss": 8.792388916015625, "step": 4869 }, { "epoch": 0.6315294651613269, "grad_norm": 0.790498673915863, "learning_rate": 3.0085646256766882e-05, "loss": 8.318045616149902, "step": 4870 }, { "epoch": 0.6316591426695736, "grad_norm": 0.9416093826293945, "learning_rate": 3.006691637999718e-05, "loss": 10.758583068847656, "step": 4871 }, { "epoch": 0.6317888201778202, "grad_norm": 0.8132476210594177, "learning_rate": 3.0048189828258304e-05, "loss": 9.491302490234375, "step": 4872 }, { "epoch": 0.631918497686067, "grad_norm": 0.5956004858016968, "learning_rate": 3.002946660467404e-05, "loss": 7.042006969451904, "step": 4873 }, { "epoch": 0.6320481751943137, "grad_norm": 1.0024945735931396, "learning_rate": 3.0010746712367587e-05, "loss": 8.851435661315918, "step": 4874 }, { "epoch": 0.6321778527025603, "grad_norm": 0.6695559620857239, "learning_rate": 2.9992030154461614e-05, "loss": 6.850353240966797, "step": 4875 }, { "epoch": 0.632307530210807, "grad_norm": 1.1595653295516968, "learning_rate": 2.997331693407821e-05, "loss": 10.774730682373047, "step": 4876 }, { "epoch": 0.6324372077190537, "grad_norm": 0.6026251912117004, "learning_rate": 2.9954607054338944e-05, "loss": 10.028450965881348, "step": 4877 }, { "epoch": 0.6325668852273003, "grad_norm": 0.9124496579170227, "learning_rate": 2.993590051836478e-05, "loss": 8.174210548400879, "step": 4878 }, { "epoch": 0.6326965627355471, "grad_norm": 1.0157723426818848, "learning_rate": 2.991719732927617e-05, "loss": 9.043537139892578, "step": 4879 }, { "epoch": 0.6328262402437937, "grad_norm": 1.0515962839126587, "learning_rate": 2.9898497490192966e-05, "loss": 11.703956604003906, "step": 4880 }, { "epoch": 0.6329559177520404, "grad_norm": 1.149607539176941, "learning_rate": 2.9879801004234498e-05, "loss": 12.865161895751953, "step": 4881 }, { "epoch": 0.6330855952602871, "grad_norm": 0.731583297252655, "learning_rate": 2.9861107874519512e-05, "loss": 6.857309341430664, "step": 4882 }, { "epoch": 0.6332152727685337, "grad_norm": 0.7442476749420166, "learning_rate": 2.9842418104166208e-05, "loss": 8.153467178344727, "step": 4883 }, { "epoch": 0.6333449502767804, "grad_norm": 1.1192618608474731, "learning_rate": 2.9823731696292218e-05, "loss": 12.91268539428711, "step": 4884 }, { "epoch": 0.6334746277850271, "grad_norm": 0.893189013004303, "learning_rate": 2.980504865401461e-05, "loss": 7.514439582824707, "step": 4885 }, { "epoch": 0.6336043052932738, "grad_norm": 1.2482842206954956, "learning_rate": 2.9786368980449902e-05, "loss": 14.609513282775879, "step": 4886 }, { "epoch": 0.6337339828015205, "grad_norm": 0.832666277885437, "learning_rate": 2.976769267871403e-05, "loss": 8.914485931396484, "step": 4887 }, { "epoch": 0.6338636603097672, "grad_norm": 0.9219530820846558, "learning_rate": 2.9749019751922414e-05, "loss": 10.134851455688477, "step": 4888 }, { "epoch": 0.6339933378180138, "grad_norm": 1.045357584953308, "learning_rate": 2.973035020318985e-05, "loss": 11.793601989746094, "step": 4889 }, { "epoch": 0.6341230153262605, "grad_norm": 1.1490837335586548, "learning_rate": 2.9711684035630605e-05, "loss": 13.997142791748047, "step": 4890 }, { "epoch": 0.6342526928345071, "grad_norm": 0.6633259654045105, "learning_rate": 2.9693021252358382e-05, "loss": 6.252723217010498, "step": 4891 }, { "epoch": 0.6343823703427539, "grad_norm": 0.7729999423027039, "learning_rate": 2.967436185648631e-05, "loss": 7.579959392547607, "step": 4892 }, { "epoch": 0.6345120478510006, "grad_norm": 0.8678035736083984, "learning_rate": 2.9655705851126957e-05, "loss": 10.358784675598145, "step": 4893 }, { "epoch": 0.6346417253592472, "grad_norm": 1.1584914922714233, "learning_rate": 2.963705323939232e-05, "loss": 11.617799758911133, "step": 4894 }, { "epoch": 0.6347714028674939, "grad_norm": 0.7468295693397522, "learning_rate": 2.9618404024393843e-05, "loss": 8.95218563079834, "step": 4895 }, { "epoch": 0.6349010803757406, "grad_norm": 0.8581597805023193, "learning_rate": 2.959975820924239e-05, "loss": 6.926490783691406, "step": 4896 }, { "epoch": 0.6350307578839872, "grad_norm": 0.8725395202636719, "learning_rate": 2.9581115797048264e-05, "loss": 6.936730861663818, "step": 4897 }, { "epoch": 0.635160435392234, "grad_norm": 0.9743935465812683, "learning_rate": 2.9562476790921197e-05, "loss": 7.264726638793945, "step": 4898 }, { "epoch": 0.6352901129004807, "grad_norm": 0.5957101583480835, "learning_rate": 2.9543841193970357e-05, "loss": 8.760034561157227, "step": 4899 }, { "epoch": 0.6354197904087273, "grad_norm": 0.841722309589386, "learning_rate": 2.9525209009304343e-05, "loss": 7.323392868041992, "step": 4900 }, { "epoch": 0.635549467916974, "grad_norm": 1.0691653490066528, "learning_rate": 2.950658024003119e-05, "loss": 8.904861450195312, "step": 4901 }, { "epoch": 0.6356791454252206, "grad_norm": 1.1544458866119385, "learning_rate": 2.948795488925834e-05, "loss": 11.23766040802002, "step": 4902 }, { "epoch": 0.6358088229334673, "grad_norm": 0.97279953956604, "learning_rate": 2.946933296009269e-05, "loss": 9.824674606323242, "step": 4903 }, { "epoch": 0.635938500441714, "grad_norm": 1.2466844320297241, "learning_rate": 2.945071445564056e-05, "loss": 11.194999694824219, "step": 4904 }, { "epoch": 0.6360681779499607, "grad_norm": 0.8229658603668213, "learning_rate": 2.94320993790077e-05, "loss": 9.34874439239502, "step": 4905 }, { "epoch": 0.6361978554582074, "grad_norm": 1.075683355331421, "learning_rate": 2.9413487733299272e-05, "loss": 12.412837028503418, "step": 4906 }, { "epoch": 0.6363275329664541, "grad_norm": 0.9112797379493713, "learning_rate": 2.939487952161989e-05, "loss": 11.2381591796875, "step": 4907 }, { "epoch": 0.6364572104747007, "grad_norm": 0.9770417809486389, "learning_rate": 2.9376274747073574e-05, "loss": 11.230798721313477, "step": 4908 }, { "epoch": 0.6365868879829474, "grad_norm": 0.6842162609100342, "learning_rate": 2.9357673412763788e-05, "loss": 8.837505340576172, "step": 4909 }, { "epoch": 0.636716565491194, "grad_norm": 0.7173904776573181, "learning_rate": 2.9339075521793407e-05, "loss": 8.47362232208252, "step": 4910 }, { "epoch": 0.6368462429994408, "grad_norm": 0.9586758613586426, "learning_rate": 2.9320481077264746e-05, "loss": 10.479453086853027, "step": 4911 }, { "epoch": 0.6369759205076875, "grad_norm": 0.6618956923484802, "learning_rate": 2.930189008227953e-05, "loss": 6.160170555114746, "step": 4912 }, { "epoch": 0.6371055980159341, "grad_norm": 0.6505143046379089, "learning_rate": 2.928330253993893e-05, "loss": 5.106342315673828, "step": 4913 }, { "epoch": 0.6372352755241808, "grad_norm": 0.9434051513671875, "learning_rate": 2.926471845334351e-05, "loss": 10.441506385803223, "step": 4914 }, { "epoch": 0.6373649530324275, "grad_norm": 1.0155963897705078, "learning_rate": 2.9246137825593285e-05, "loss": 13.1731595993042, "step": 4915 }, { "epoch": 0.6374946305406741, "grad_norm": 0.6696951985359192, "learning_rate": 2.922756065978769e-05, "loss": 8.624177932739258, "step": 4916 }, { "epoch": 0.6376243080489208, "grad_norm": 1.040756106376648, "learning_rate": 2.9208986959025563e-05, "loss": 9.302696228027344, "step": 4917 }, { "epoch": 0.6377539855571676, "grad_norm": 0.9543461799621582, "learning_rate": 2.9190416726405167e-05, "loss": 10.721558570861816, "step": 4918 }, { "epoch": 0.6378836630654142, "grad_norm": 0.7193475961685181, "learning_rate": 2.9171849965024235e-05, "loss": 7.544010639190674, "step": 4919 }, { "epoch": 0.6380133405736609, "grad_norm": 0.9058685302734375, "learning_rate": 2.915328667797983e-05, "loss": 9.361337661743164, "step": 4920 }, { "epoch": 0.6381430180819075, "grad_norm": 0.8561416268348694, "learning_rate": 2.913472686836854e-05, "loss": 8.990403175354004, "step": 4921 }, { "epoch": 0.6382726955901542, "grad_norm": 0.7861694693565369, "learning_rate": 2.911617053928627e-05, "loss": 10.71741771697998, "step": 4922 }, { "epoch": 0.6384023730984009, "grad_norm": 1.3373968601226807, "learning_rate": 2.909761769382845e-05, "loss": 9.807299613952637, "step": 4923 }, { "epoch": 0.6385320506066476, "grad_norm": 0.6265674233436584, "learning_rate": 2.907906833508981e-05, "loss": 8.553878784179688, "step": 4924 }, { "epoch": 0.6386617281148943, "grad_norm": 0.7921228408813477, "learning_rate": 2.9060522466164607e-05, "loss": 9.574156761169434, "step": 4925 }, { "epoch": 0.638791405623141, "grad_norm": 0.9915903806686401, "learning_rate": 2.9041980090146477e-05, "loss": 10.763733863830566, "step": 4926 }, { "epoch": 0.6389210831313876, "grad_norm": 0.9241407513618469, "learning_rate": 2.9023441210128433e-05, "loss": 11.962875366210938, "step": 4927 }, { "epoch": 0.6390507606396343, "grad_norm": 0.7246354222297668, "learning_rate": 2.9004905829202973e-05, "loss": 7.155545234680176, "step": 4928 }, { "epoch": 0.639180438147881, "grad_norm": 0.9442535042762756, "learning_rate": 2.8986373950461942e-05, "loss": 9.15970230102539, "step": 4929 }, { "epoch": 0.6393101156561277, "grad_norm": 1.096835970878601, "learning_rate": 2.8967845576996672e-05, "loss": 13.186769485473633, "step": 4930 }, { "epoch": 0.6394397931643744, "grad_norm": 0.9373850226402283, "learning_rate": 2.894932071189784e-05, "loss": 10.926410675048828, "step": 4931 }, { "epoch": 0.639569470672621, "grad_norm": 0.7903071641921997, "learning_rate": 2.8930799358255612e-05, "loss": 6.80826473236084, "step": 4932 }, { "epoch": 0.6396991481808677, "grad_norm": 0.9121434092521667, "learning_rate": 2.891228151915949e-05, "loss": 8.33617115020752, "step": 4933 }, { "epoch": 0.6398288256891144, "grad_norm": 0.8326963782310486, "learning_rate": 2.8893767197698463e-05, "loss": 8.170631408691406, "step": 4934 }, { "epoch": 0.639958503197361, "grad_norm": 0.7036893963813782, "learning_rate": 2.887525639696086e-05, "loss": 5.594693660736084, "step": 4935 }, { "epoch": 0.6400881807056077, "grad_norm": 0.7458931803703308, "learning_rate": 2.8856749120034504e-05, "loss": 10.02328872680664, "step": 4936 }, { "epoch": 0.6402178582138545, "grad_norm": 1.0629743337631226, "learning_rate": 2.8838245370006544e-05, "loss": 13.101361274719238, "step": 4937 }, { "epoch": 0.6403475357221011, "grad_norm": 1.1279175281524658, "learning_rate": 2.8819745149963627e-05, "loss": 9.412561416625977, "step": 4938 }, { "epoch": 0.6404772132303478, "grad_norm": 0.8566469550132751, "learning_rate": 2.8801248462991732e-05, "loss": 11.724859237670898, "step": 4939 }, { "epoch": 0.6406068907385944, "grad_norm": 1.3327172994613647, "learning_rate": 2.8782755312176312e-05, "loss": 14.820533752441406, "step": 4940 }, { "epoch": 0.6407365682468411, "grad_norm": 1.1450903415679932, "learning_rate": 2.8764265700602177e-05, "loss": 11.442119598388672, "step": 4941 }, { "epoch": 0.6408662457550878, "grad_norm": 0.9973031878471375, "learning_rate": 2.874577963135361e-05, "loss": 8.679819107055664, "step": 4942 }, { "epoch": 0.6409959232633345, "grad_norm": 0.8335708975791931, "learning_rate": 2.8727297107514218e-05, "loss": 10.262434005737305, "step": 4943 }, { "epoch": 0.6411256007715812, "grad_norm": 0.9520320296287537, "learning_rate": 2.8708818132167116e-05, "loss": 10.952750205993652, "step": 4944 }, { "epoch": 0.6412552782798279, "grad_norm": 1.13309645652771, "learning_rate": 2.8690342708394725e-05, "loss": 13.881988525390625, "step": 4945 }, { "epoch": 0.6413849557880745, "grad_norm": 1.0818530321121216, "learning_rate": 2.8671870839278964e-05, "loss": 9.432482719421387, "step": 4946 }, { "epoch": 0.6415146332963212, "grad_norm": 1.1389007568359375, "learning_rate": 2.8653402527901085e-05, "loss": 11.240142822265625, "step": 4947 }, { "epoch": 0.6416443108045679, "grad_norm": 1.1306487321853638, "learning_rate": 2.8634937777341825e-05, "loss": 10.278396606445312, "step": 4948 }, { "epoch": 0.6417739883128145, "grad_norm": 0.561013400554657, "learning_rate": 2.861647659068123e-05, "loss": 5.148392677307129, "step": 4949 }, { "epoch": 0.6419036658210613, "grad_norm": 1.144761562347412, "learning_rate": 2.8598018970998857e-05, "loss": 6.433663368225098, "step": 4950 }, { "epoch": 0.642033343329308, "grad_norm": 0.9251754879951477, "learning_rate": 2.8579564921373568e-05, "loss": 10.802090644836426, "step": 4951 }, { "epoch": 0.6421630208375546, "grad_norm": 1.1159182786941528, "learning_rate": 2.8561114444883718e-05, "loss": 11.859102249145508, "step": 4952 }, { "epoch": 0.6422926983458013, "grad_norm": 0.8710925579071045, "learning_rate": 2.8542667544606982e-05, "loss": 6.247552871704102, "step": 4953 }, { "epoch": 0.6424223758540479, "grad_norm": 0.9411748051643372, "learning_rate": 2.8524224223620523e-05, "loss": 11.096647262573242, "step": 4954 }, { "epoch": 0.6425520533622946, "grad_norm": 0.7330266833305359, "learning_rate": 2.850578448500083e-05, "loss": 8.638396263122559, "step": 4955 }, { "epoch": 0.6426817308705414, "grad_norm": 1.3563909530639648, "learning_rate": 2.8487348331823858e-05, "loss": 8.841400146484375, "step": 4956 }, { "epoch": 0.642811408378788, "grad_norm": 0.7221941947937012, "learning_rate": 2.8468915767164915e-05, "loss": 7.020867824554443, "step": 4957 }, { "epoch": 0.6429410858870347, "grad_norm": 1.1139798164367676, "learning_rate": 2.845048679409875e-05, "loss": 8.897643089294434, "step": 4958 }, { "epoch": 0.6430707633952814, "grad_norm": 1.0243382453918457, "learning_rate": 2.8432061415699486e-05, "loss": 10.782678604125977, "step": 4959 }, { "epoch": 0.643200440903528, "grad_norm": 0.7735705971717834, "learning_rate": 2.8413639635040623e-05, "loss": 9.386479377746582, "step": 4960 }, { "epoch": 0.6433301184117747, "grad_norm": 1.1427842378616333, "learning_rate": 2.8395221455195143e-05, "loss": 12.483131408691406, "step": 4961 }, { "epoch": 0.6434597959200214, "grad_norm": 0.6342519521713257, "learning_rate": 2.837680687923533e-05, "loss": 7.350094318389893, "step": 4962 }, { "epoch": 0.6435894734282681, "grad_norm": 0.8449109196662903, "learning_rate": 2.835839591023296e-05, "loss": 10.62481689453125, "step": 4963 }, { "epoch": 0.6437191509365148, "grad_norm": 0.8304232954978943, "learning_rate": 2.8339988551259095e-05, "loss": 10.72813892364502, "step": 4964 }, { "epoch": 0.6438488284447614, "grad_norm": 0.872793436050415, "learning_rate": 2.8321584805384337e-05, "loss": 8.182371139526367, "step": 4965 }, { "epoch": 0.6439785059530081, "grad_norm": 0.8248372673988342, "learning_rate": 2.8303184675678558e-05, "loss": 11.151629447937012, "step": 4966 }, { "epoch": 0.6441081834612548, "grad_norm": 0.8376092314720154, "learning_rate": 2.8284788165211108e-05, "loss": 9.633890151977539, "step": 4967 }, { "epoch": 0.6442378609695014, "grad_norm": 0.783057689666748, "learning_rate": 2.8266395277050668e-05, "loss": 7.290022850036621, "step": 4968 }, { "epoch": 0.6443675384777482, "grad_norm": 0.8000997304916382, "learning_rate": 2.824800601426538e-05, "loss": 7.152478218078613, "step": 4969 }, { "epoch": 0.6444972159859949, "grad_norm": 0.9592974185943604, "learning_rate": 2.8229620379922727e-05, "loss": 8.112422943115234, "step": 4970 }, { "epoch": 0.6446268934942415, "grad_norm": 0.8664504289627075, "learning_rate": 2.8211238377089645e-05, "loss": 9.046710968017578, "step": 4971 }, { "epoch": 0.6447565710024882, "grad_norm": 0.6614881157875061, "learning_rate": 2.8192860008832378e-05, "loss": 6.601409912109375, "step": 4972 }, { "epoch": 0.6448862485107348, "grad_norm": 1.034663438796997, "learning_rate": 2.8174485278216667e-05, "loss": 6.7523345947265625, "step": 4973 }, { "epoch": 0.6450159260189815, "grad_norm": 0.8873386383056641, "learning_rate": 2.815611418830755e-05, "loss": 9.096787452697754, "step": 4974 }, { "epoch": 0.6451456035272283, "grad_norm": 0.9234120845794678, "learning_rate": 2.8137746742169546e-05, "loss": 7.464312553405762, "step": 4975 }, { "epoch": 0.6452752810354749, "grad_norm": 0.975204348564148, "learning_rate": 2.811938294286648e-05, "loss": 7.292767524719238, "step": 4976 }, { "epoch": 0.6454049585437216, "grad_norm": 1.0248658657073975, "learning_rate": 2.810102279346165e-05, "loss": 10.779975891113281, "step": 4977 }, { "epoch": 0.6455346360519683, "grad_norm": 0.580348789691925, "learning_rate": 2.8082666297017668e-05, "loss": 7.711015701293945, "step": 4978 }, { "epoch": 0.6456643135602149, "grad_norm": 0.9445729851722717, "learning_rate": 2.8064313456596607e-05, "loss": 10.698928833007812, "step": 4979 }, { "epoch": 0.6457939910684616, "grad_norm": 0.8596693277359009, "learning_rate": 2.8045964275259874e-05, "loss": 12.463884353637695, "step": 4980 }, { "epoch": 0.6459236685767082, "grad_norm": 0.7258192896842957, "learning_rate": 2.802761875606831e-05, "loss": 9.3995943069458, "step": 4981 }, { "epoch": 0.646053346084955, "grad_norm": 0.6543010473251343, "learning_rate": 2.8009276902082127e-05, "loss": 9.58934497833252, "step": 4982 }, { "epoch": 0.6461830235932017, "grad_norm": 1.0345107316970825, "learning_rate": 2.799093871636088e-05, "loss": 8.972929954528809, "step": 4983 }, { "epoch": 0.6463127011014483, "grad_norm": 0.7218173742294312, "learning_rate": 2.7972604201963613e-05, "loss": 7.120004653930664, "step": 4984 }, { "epoch": 0.646442378609695, "grad_norm": 0.739714503288269, "learning_rate": 2.7954273361948658e-05, "loss": 7.874845027923584, "step": 4985 }, { "epoch": 0.6465720561179417, "grad_norm": 0.8482235670089722, "learning_rate": 2.793594619937381e-05, "loss": 7.768548965454102, "step": 4986 }, { "epoch": 0.6467017336261883, "grad_norm": 0.9597770571708679, "learning_rate": 2.7917622717296172e-05, "loss": 10.975621223449707, "step": 4987 }, { "epoch": 0.6468314111344351, "grad_norm": 0.8744484782218933, "learning_rate": 2.789930291877233e-05, "loss": 7.765430450439453, "step": 4988 }, { "epoch": 0.6469610886426818, "grad_norm": 0.7292494177818298, "learning_rate": 2.7880986806858156e-05, "loss": 9.46678352355957, "step": 4989 }, { "epoch": 0.6470907661509284, "grad_norm": 1.1527798175811768, "learning_rate": 2.7862674384609e-05, "loss": 12.547521591186523, "step": 4990 }, { "epoch": 0.6472204436591751, "grad_norm": 0.864955723285675, "learning_rate": 2.7844365655079508e-05, "loss": 8.3906831741333, "step": 4991 }, { "epoch": 0.6473501211674217, "grad_norm": 0.8883989453315735, "learning_rate": 2.7826060621323785e-05, "loss": 10.931571960449219, "step": 4992 }, { "epoch": 0.6474797986756684, "grad_norm": 0.7853997349739075, "learning_rate": 2.7807759286395256e-05, "loss": 7.775432586669922, "step": 4993 }, { "epoch": 0.6476094761839152, "grad_norm": 0.962685763835907, "learning_rate": 2.7789461653346804e-05, "loss": 10.016670227050781, "step": 4994 }, { "epoch": 0.6477391536921618, "grad_norm": 1.018371343612671, "learning_rate": 2.7771167725230608e-05, "loss": 10.302947044372559, "step": 4995 }, { "epoch": 0.6478688312004085, "grad_norm": 1.1601839065551758, "learning_rate": 2.77528775050983e-05, "loss": 11.47224235534668, "step": 4996 }, { "epoch": 0.6479985087086552, "grad_norm": 1.0047402381896973, "learning_rate": 2.773459099600084e-05, "loss": 7.443029880523682, "step": 4997 }, { "epoch": 0.6481281862169018, "grad_norm": 1.0662261247634888, "learning_rate": 2.771630820098863e-05, "loss": 11.297884941101074, "step": 4998 }, { "epoch": 0.6482578637251485, "grad_norm": 0.7828722596168518, "learning_rate": 2.7698029123111374e-05, "loss": 7.9994330406188965, "step": 4999 }, { "epoch": 0.6483875412333951, "grad_norm": 1.0675060749053955, "learning_rate": 2.7679753765418243e-05, "loss": 10.449555397033691, "step": 5000 }, { "epoch": 0.6485172187416419, "grad_norm": 0.910192608833313, "learning_rate": 2.7661482130957694e-05, "loss": 7.850570201873779, "step": 5001 }, { "epoch": 0.6486468962498886, "grad_norm": 0.892307460308075, "learning_rate": 2.7643214222777646e-05, "loss": 8.858000755310059, "step": 5002 }, { "epoch": 0.6487765737581352, "grad_norm": 0.8638333678245544, "learning_rate": 2.7624950043925357e-05, "loss": 9.007485389709473, "step": 5003 }, { "epoch": 0.6489062512663819, "grad_norm": 0.733394980430603, "learning_rate": 2.7606689597447484e-05, "loss": 8.486174583435059, "step": 5004 }, { "epoch": 0.6490359287746286, "grad_norm": 0.9057901501655579, "learning_rate": 2.7588432886390024e-05, "loss": 8.697627067565918, "step": 5005 }, { "epoch": 0.6491656062828752, "grad_norm": 1.0073786973953247, "learning_rate": 2.7570179913798366e-05, "loss": 8.925445556640625, "step": 5006 }, { "epoch": 0.649295283791122, "grad_norm": 1.0342230796813965, "learning_rate": 2.755193068271731e-05, "loss": 8.203510284423828, "step": 5007 }, { "epoch": 0.6494249612993687, "grad_norm": 0.6573958992958069, "learning_rate": 2.753368519619096e-05, "loss": 6.324462890625, "step": 5008 }, { "epoch": 0.6495546388076153, "grad_norm": 1.066753625869751, "learning_rate": 2.75154434572629e-05, "loss": 10.172144889831543, "step": 5009 }, { "epoch": 0.649684316315862, "grad_norm": 0.5162827372550964, "learning_rate": 2.7497205468975968e-05, "loss": 5.267126560211182, "step": 5010 }, { "epoch": 0.6498139938241086, "grad_norm": 0.6829078197479248, "learning_rate": 2.747897123437248e-05, "loss": 7.0276031494140625, "step": 5011 }, { "epoch": 0.6499436713323553, "grad_norm": 0.9758620858192444, "learning_rate": 2.7460740756494052e-05, "loss": 9.933566093444824, "step": 5012 }, { "epoch": 0.6500733488406021, "grad_norm": 1.0307526588439941, "learning_rate": 2.7442514038381735e-05, "loss": 12.793781280517578, "step": 5013 }, { "epoch": 0.6502030263488487, "grad_norm": 0.9246707558631897, "learning_rate": 2.742429108307588e-05, "loss": 10.910511016845703, "step": 5014 }, { "epoch": 0.6503327038570954, "grad_norm": 1.0197683572769165, "learning_rate": 2.7406071893616303e-05, "loss": 10.081451416015625, "step": 5015 }, { "epoch": 0.6504623813653421, "grad_norm": 1.195848822593689, "learning_rate": 2.7387856473042096e-05, "loss": 11.892945289611816, "step": 5016 }, { "epoch": 0.6505920588735887, "grad_norm": 0.7520760297775269, "learning_rate": 2.7369644824391794e-05, "loss": 8.31669807434082, "step": 5017 }, { "epoch": 0.6507217363818354, "grad_norm": 0.734954297542572, "learning_rate": 2.7351436950703257e-05, "loss": 9.23308277130127, "step": 5018 }, { "epoch": 0.650851413890082, "grad_norm": 0.7211245894432068, "learning_rate": 2.7333232855013758e-05, "loss": 9.090690612792969, "step": 5019 }, { "epoch": 0.6509810913983288, "grad_norm": 1.056696891784668, "learning_rate": 2.7315032540359887e-05, "loss": 11.04392147064209, "step": 5020 }, { "epoch": 0.6511107689065755, "grad_norm": 0.8496816158294678, "learning_rate": 2.729683600977766e-05, "loss": 9.097342491149902, "step": 5021 }, { "epoch": 0.6512404464148221, "grad_norm": 0.8631885647773743, "learning_rate": 2.7278643266302406e-05, "loss": 8.048078536987305, "step": 5022 }, { "epoch": 0.6513701239230688, "grad_norm": 0.8663167357444763, "learning_rate": 2.7260454312968888e-05, "loss": 11.7732572555542, "step": 5023 }, { "epoch": 0.6514998014313155, "grad_norm": 1.1427757740020752, "learning_rate": 2.724226915281115e-05, "loss": 9.36408805847168, "step": 5024 }, { "epoch": 0.6516294789395621, "grad_norm": 0.9137476682662964, "learning_rate": 2.7224087788862706e-05, "loss": 9.100711822509766, "step": 5025 }, { "epoch": 0.6517591564478089, "grad_norm": 1.1989600658416748, "learning_rate": 2.7205910224156326e-05, "loss": 13.236783981323242, "step": 5026 }, { "epoch": 0.6518888339560556, "grad_norm": 0.7126905918121338, "learning_rate": 2.7187736461724262e-05, "loss": 8.568026542663574, "step": 5027 }, { "epoch": 0.6520185114643022, "grad_norm": 0.8783485889434814, "learning_rate": 2.716956650459802e-05, "loss": 11.319190979003906, "step": 5028 }, { "epoch": 0.6521481889725489, "grad_norm": 1.0747445821762085, "learning_rate": 2.7151400355808572e-05, "loss": 11.857959747314453, "step": 5029 }, { "epoch": 0.6522778664807956, "grad_norm": 0.9589406251907349, "learning_rate": 2.7133238018386165e-05, "loss": 10.102189064025879, "step": 5030 }, { "epoch": 0.6524075439890422, "grad_norm": 0.8294435739517212, "learning_rate": 2.7115079495360483e-05, "loss": 10.310846328735352, "step": 5031 }, { "epoch": 0.6525372214972889, "grad_norm": 0.9098931550979614, "learning_rate": 2.7096924789760513e-05, "loss": 9.351471900939941, "step": 5032 }, { "epoch": 0.6526668990055357, "grad_norm": 1.0710395574569702, "learning_rate": 2.707877390461468e-05, "loss": 7.982139587402344, "step": 5033 }, { "epoch": 0.6527965765137823, "grad_norm": 0.823407769203186, "learning_rate": 2.7060626842950676e-05, "loss": 6.783425807952881, "step": 5034 }, { "epoch": 0.652926254022029, "grad_norm": 0.7276312112808228, "learning_rate": 2.7042483607795648e-05, "loss": 8.745603561401367, "step": 5035 }, { "epoch": 0.6530559315302756, "grad_norm": 1.4821778535842896, "learning_rate": 2.7024344202176026e-05, "loss": 14.11630630493164, "step": 5036 }, { "epoch": 0.6531856090385223, "grad_norm": 0.8635435700416565, "learning_rate": 2.700620862911768e-05, "loss": 8.975055694580078, "step": 5037 }, { "epoch": 0.653315286546769, "grad_norm": 0.7665860652923584, "learning_rate": 2.6988076891645753e-05, "loss": 9.46064567565918, "step": 5038 }, { "epoch": 0.6534449640550157, "grad_norm": 0.6851433515548706, "learning_rate": 2.6969948992784833e-05, "loss": 6.974678039550781, "step": 5039 }, { "epoch": 0.6535746415632624, "grad_norm": 0.5378909111022949, "learning_rate": 2.69518249355588e-05, "loss": 7.09033727645874, "step": 5040 }, { "epoch": 0.653704319071509, "grad_norm": 0.7402740716934204, "learning_rate": 2.6933704722990925e-05, "loss": 8.900596618652344, "step": 5041 }, { "epoch": 0.6538339965797557, "grad_norm": 0.9061734080314636, "learning_rate": 2.691558835810387e-05, "loss": 8.560454368591309, "step": 5042 }, { "epoch": 0.6539636740880024, "grad_norm": 0.9853019118309021, "learning_rate": 2.6897475843919572e-05, "loss": 10.802488327026367, "step": 5043 }, { "epoch": 0.654093351596249, "grad_norm": 1.0944666862487793, "learning_rate": 2.6879367183459413e-05, "loss": 15.048016548156738, "step": 5044 }, { "epoch": 0.6542230291044958, "grad_norm": 0.9493936896324158, "learning_rate": 2.6861262379744056e-05, "loss": 9.93271255493164, "step": 5045 }, { "epoch": 0.6543527066127425, "grad_norm": 0.9678588509559631, "learning_rate": 2.6843161435793597e-05, "loss": 11.44360065460205, "step": 5046 }, { "epoch": 0.6544823841209891, "grad_norm": 0.8505994081497192, "learning_rate": 2.6825064354627406e-05, "loss": 6.805205821990967, "step": 5047 }, { "epoch": 0.6546120616292358, "grad_norm": 0.5576269030570984, "learning_rate": 2.680697113926428e-05, "loss": 6.286295413970947, "step": 5048 }, { "epoch": 0.6547417391374825, "grad_norm": 1.0115851163864136, "learning_rate": 2.678888179272232e-05, "loss": 9.10064697265625, "step": 5049 }, { "epoch": 0.6548714166457291, "grad_norm": 0.8352214694023132, "learning_rate": 2.6770796318019042e-05, "loss": 5.923681259155273, "step": 5050 }, { "epoch": 0.6550010941539758, "grad_norm": 0.8651370406150818, "learning_rate": 2.6752714718171223e-05, "loss": 7.599997043609619, "step": 5051 }, { "epoch": 0.6551307716622226, "grad_norm": 0.9929762482643127, "learning_rate": 2.6734636996195105e-05, "loss": 11.042423248291016, "step": 5052 }, { "epoch": 0.6552604491704692, "grad_norm": 1.0041697025299072, "learning_rate": 2.671656315510618e-05, "loss": 7.594548225402832, "step": 5053 }, { "epoch": 0.6553901266787159, "grad_norm": 0.902812659740448, "learning_rate": 2.669849319791937e-05, "loss": 7.978048324584961, "step": 5054 }, { "epoch": 0.6555198041869625, "grad_norm": 0.9747381210327148, "learning_rate": 2.66804271276489e-05, "loss": 8.537566184997559, "step": 5055 }, { "epoch": 0.6556494816952092, "grad_norm": 0.9821406602859497, "learning_rate": 2.6662364947308383e-05, "loss": 8.157307624816895, "step": 5056 }, { "epoch": 0.6557791592034559, "grad_norm": 1.0530469417572021, "learning_rate": 2.6644306659910733e-05, "loss": 10.276403427124023, "step": 5057 }, { "epoch": 0.6559088367117026, "grad_norm": 1.0258435010910034, "learning_rate": 2.6626252268468294e-05, "loss": 9.794341087341309, "step": 5058 }, { "epoch": 0.6560385142199493, "grad_norm": 1.2322875261306763, "learning_rate": 2.6608201775992663e-05, "loss": 7.38394832611084, "step": 5059 }, { "epoch": 0.656168191728196, "grad_norm": 0.794636070728302, "learning_rate": 2.659015518549488e-05, "loss": 9.520535469055176, "step": 5060 }, { "epoch": 0.6562978692364426, "grad_norm": 0.8277735710144043, "learning_rate": 2.657211249998525e-05, "loss": 9.34242057800293, "step": 5061 }, { "epoch": 0.6564275467446893, "grad_norm": 0.7772183418273926, "learning_rate": 2.6554073722473517e-05, "loss": 7.832113742828369, "step": 5062 }, { "epoch": 0.656557224252936, "grad_norm": 0.683660626411438, "learning_rate": 2.653603885596866e-05, "loss": 7.128417015075684, "step": 5063 }, { "epoch": 0.6566869017611826, "grad_norm": 0.7789179682731628, "learning_rate": 2.651800790347913e-05, "loss": 9.994285583496094, "step": 5064 }, { "epoch": 0.6568165792694294, "grad_norm": 1.1245009899139404, "learning_rate": 2.649998086801262e-05, "loss": 8.060342788696289, "step": 5065 }, { "epoch": 0.656946256777676, "grad_norm": 0.7895206809043884, "learning_rate": 2.648195775257625e-05, "loss": 10.538413047790527, "step": 5066 }, { "epoch": 0.6570759342859227, "grad_norm": 1.0634512901306152, "learning_rate": 2.6463938560176403e-05, "loss": 11.033185958862305, "step": 5067 }, { "epoch": 0.6572056117941694, "grad_norm": 1.1050382852554321, "learning_rate": 2.6445923293818907e-05, "loss": 10.430862426757812, "step": 5068 }, { "epoch": 0.657335289302416, "grad_norm": 1.6179654598236084, "learning_rate": 2.6427911956508854e-05, "loss": 13.572032928466797, "step": 5069 }, { "epoch": 0.6574649668106627, "grad_norm": 1.0742684602737427, "learning_rate": 2.6409904551250696e-05, "loss": 11.130098342895508, "step": 5070 }, { "epoch": 0.6575946443189095, "grad_norm": 1.0888596773147583, "learning_rate": 2.639190108104828e-05, "loss": 11.318507194519043, "step": 5071 }, { "epoch": 0.6577243218271561, "grad_norm": 0.9010010361671448, "learning_rate": 2.637390154890471e-05, "loss": 11.683945655822754, "step": 5072 }, { "epoch": 0.6578539993354028, "grad_norm": 1.0276583433151245, "learning_rate": 2.6355905957822536e-05, "loss": 6.398054599761963, "step": 5073 }, { "epoch": 0.6579836768436494, "grad_norm": 0.9731137156486511, "learning_rate": 2.6337914310803547e-05, "loss": 10.380642890930176, "step": 5074 }, { "epoch": 0.6581133543518961, "grad_norm": 0.8205057382583618, "learning_rate": 2.6319926610848967e-05, "loss": 7.232493877410889, "step": 5075 }, { "epoch": 0.6582430318601428, "grad_norm": 0.9710333943367004, "learning_rate": 2.630194286095927e-05, "loss": 12.165815353393555, "step": 5076 }, { "epoch": 0.6583727093683895, "grad_norm": 0.8707737326622009, "learning_rate": 2.628396306413437e-05, "loss": 10.044084548950195, "step": 5077 }, { "epoch": 0.6585023868766362, "grad_norm": 0.9387595653533936, "learning_rate": 2.6265987223373423e-05, "loss": 11.945396423339844, "step": 5078 }, { "epoch": 0.6586320643848829, "grad_norm": 1.1415016651153564, "learning_rate": 2.6248015341675003e-05, "loss": 13.996502876281738, "step": 5079 }, { "epoch": 0.6587617418931295, "grad_norm": 0.8575733304023743, "learning_rate": 2.6230047422036978e-05, "loss": 11.713789939880371, "step": 5080 }, { "epoch": 0.6588914194013762, "grad_norm": 0.9202203154563904, "learning_rate": 2.6212083467456605e-05, "loss": 11.27547836303711, "step": 5081 }, { "epoch": 0.6590210969096228, "grad_norm": 1.1734991073608398, "learning_rate": 2.61941234809304e-05, "loss": 10.38459300994873, "step": 5082 }, { "epoch": 0.6591507744178695, "grad_norm": 0.8704180121421814, "learning_rate": 2.6176167465454293e-05, "loss": 7.76045036315918, "step": 5083 }, { "epoch": 0.6592804519261163, "grad_norm": 1.0563775300979614, "learning_rate": 2.61582154240235e-05, "loss": 11.152535438537598, "step": 5084 }, { "epoch": 0.659410129434363, "grad_norm": 1.1853511333465576, "learning_rate": 2.614026735963262e-05, "loss": 11.281938552856445, "step": 5085 }, { "epoch": 0.6595398069426096, "grad_norm": 1.1048623323440552, "learning_rate": 2.612232327527553e-05, "loss": 12.366442680358887, "step": 5086 }, { "epoch": 0.6596694844508563, "grad_norm": 0.7615442276000977, "learning_rate": 2.6104383173945507e-05, "loss": 8.769938468933105, "step": 5087 }, { "epoch": 0.6597991619591029, "grad_norm": 1.1965776681900024, "learning_rate": 2.60864470586351e-05, "loss": 10.662277221679688, "step": 5088 }, { "epoch": 0.6599288394673496, "grad_norm": 0.5080127716064453, "learning_rate": 2.6068514932336275e-05, "loss": 4.855563640594482, "step": 5089 }, { "epoch": 0.6600585169755964, "grad_norm": 0.8642823696136475, "learning_rate": 2.605058679804023e-05, "loss": 7.963737964630127, "step": 5090 }, { "epoch": 0.660188194483843, "grad_norm": 0.7847434878349304, "learning_rate": 2.6032662658737596e-05, "loss": 6.399192810058594, "step": 5091 }, { "epoch": 0.6603178719920897, "grad_norm": 0.9836119413375854, "learning_rate": 2.6014742517418273e-05, "loss": 9.311333656311035, "step": 5092 }, { "epoch": 0.6604475495003364, "grad_norm": 0.8234129548072815, "learning_rate": 2.599682637707149e-05, "loss": 6.043230056762695, "step": 5093 }, { "epoch": 0.660577227008583, "grad_norm": 1.017181158065796, "learning_rate": 2.597891424068588e-05, "loss": 9.781452178955078, "step": 5094 }, { "epoch": 0.6607069045168297, "grad_norm": 0.76573646068573, "learning_rate": 2.5961006111249318e-05, "loss": 9.11951732635498, "step": 5095 }, { "epoch": 0.6608365820250763, "grad_norm": 1.0669703483581543, "learning_rate": 2.5943101991749087e-05, "loss": 10.662214279174805, "step": 5096 }, { "epoch": 0.6609662595333231, "grad_norm": 0.7108951210975647, "learning_rate": 2.5925201885171734e-05, "loss": 8.960124015808105, "step": 5097 }, { "epoch": 0.6610959370415698, "grad_norm": 0.7225047945976257, "learning_rate": 2.59073057945032e-05, "loss": 11.273990631103516, "step": 5098 }, { "epoch": 0.6612256145498164, "grad_norm": 0.7833425402641296, "learning_rate": 2.5889413722728696e-05, "loss": 7.043799877166748, "step": 5099 }, { "epoch": 0.6613552920580631, "grad_norm": 1.1128885746002197, "learning_rate": 2.5871525672832832e-05, "loss": 12.517223358154297, "step": 5100 }, { "epoch": 0.6614849695663098, "grad_norm": 0.868484377861023, "learning_rate": 2.585364164779946e-05, "loss": 8.95670223236084, "step": 5101 }, { "epoch": 0.6616146470745564, "grad_norm": 0.6150869131088257, "learning_rate": 2.5835761650611852e-05, "loss": 5.794485569000244, "step": 5102 }, { "epoch": 0.6617443245828032, "grad_norm": 1.3964897394180298, "learning_rate": 2.5817885684252525e-05, "loss": 14.842803001403809, "step": 5103 }, { "epoch": 0.6618740020910499, "grad_norm": 1.0969016551971436, "learning_rate": 2.58000137517034e-05, "loss": 11.731915473937988, "step": 5104 }, { "epoch": 0.6620036795992965, "grad_norm": 0.8253207802772522, "learning_rate": 2.578214585594565e-05, "loss": 7.591456890106201, "step": 5105 }, { "epoch": 0.6621333571075432, "grad_norm": 0.7388143539428711, "learning_rate": 2.576428199995986e-05, "loss": 6.440877914428711, "step": 5106 }, { "epoch": 0.6622630346157898, "grad_norm": 1.0094377994537354, "learning_rate": 2.574642218672584e-05, "loss": 10.513631820678711, "step": 5107 }, { "epoch": 0.6623927121240365, "grad_norm": 0.8526279926300049, "learning_rate": 2.5728566419222823e-05, "loss": 7.663385391235352, "step": 5108 }, { "epoch": 0.6625223896322833, "grad_norm": 1.2142711877822876, "learning_rate": 2.571071470042929e-05, "loss": 13.956950187683105, "step": 5109 }, { "epoch": 0.6626520671405299, "grad_norm": 0.9057555198669434, "learning_rate": 2.5692867033323115e-05, "loss": 6.983813285827637, "step": 5110 }, { "epoch": 0.6627817446487766, "grad_norm": 0.6696953177452087, "learning_rate": 2.5675023420881422e-05, "loss": 6.972714900970459, "step": 5111 }, { "epoch": 0.6629114221570233, "grad_norm": 0.9205620288848877, "learning_rate": 2.5657183866080735e-05, "loss": 10.368660926818848, "step": 5112 }, { "epoch": 0.6630410996652699, "grad_norm": 0.8957619071006775, "learning_rate": 2.563934837189683e-05, "loss": 7.95017147064209, "step": 5113 }, { "epoch": 0.6631707771735166, "grad_norm": 0.8382812142372131, "learning_rate": 2.562151694130488e-05, "loss": 9.153846740722656, "step": 5114 }, { "epoch": 0.6633004546817632, "grad_norm": 0.9356840252876282, "learning_rate": 2.56036895772793e-05, "loss": 8.161262512207031, "step": 5115 }, { "epoch": 0.66343013219001, "grad_norm": 0.8590646982192993, "learning_rate": 2.558586628279389e-05, "loss": 10.824382781982422, "step": 5116 }, { "epoch": 0.6635598096982567, "grad_norm": 0.7752134203910828, "learning_rate": 2.556804706082173e-05, "loss": 6.558254718780518, "step": 5117 }, { "epoch": 0.6636894872065033, "grad_norm": 0.9122812151908875, "learning_rate": 2.5550231914335247e-05, "loss": 9.090088844299316, "step": 5118 }, { "epoch": 0.66381916471475, "grad_norm": 0.7755042314529419, "learning_rate": 2.5532420846306204e-05, "loss": 8.738387107849121, "step": 5119 }, { "epoch": 0.6639488422229967, "grad_norm": 1.1026250123977661, "learning_rate": 2.551461385970561e-05, "loss": 9.342416763305664, "step": 5120 }, { "epoch": 0.6640785197312433, "grad_norm": 0.7406139373779297, "learning_rate": 2.5496810957503893e-05, "loss": 7.307826042175293, "step": 5121 }, { "epoch": 0.6642081972394901, "grad_norm": 1.0114892721176147, "learning_rate": 2.5479012142670707e-05, "loss": 10.280919075012207, "step": 5122 }, { "epoch": 0.6643378747477368, "grad_norm": 1.0141197443008423, "learning_rate": 2.54612174181751e-05, "loss": 12.09228515625, "step": 5123 }, { "epoch": 0.6644675522559834, "grad_norm": 0.8144813776016235, "learning_rate": 2.544342678698537e-05, "loss": 9.778071403503418, "step": 5124 }, { "epoch": 0.6645972297642301, "grad_norm": 0.7518331408500671, "learning_rate": 2.5425640252069204e-05, "loss": 8.189417839050293, "step": 5125 }, { "epoch": 0.6647269072724767, "grad_norm": 1.0188815593719482, "learning_rate": 2.5407857816393533e-05, "loss": 8.500584602355957, "step": 5126 }, { "epoch": 0.6648565847807234, "grad_norm": 1.1691093444824219, "learning_rate": 2.5390079482924666e-05, "loss": 10.419638633728027, "step": 5127 }, { "epoch": 0.6649862622889701, "grad_norm": 0.7329731583595276, "learning_rate": 2.537230525462817e-05, "loss": 9.76602554321289, "step": 5128 }, { "epoch": 0.6651159397972168, "grad_norm": 0.9035617709159851, "learning_rate": 2.5354535134469e-05, "loss": 9.572212219238281, "step": 5129 }, { "epoch": 0.6652456173054635, "grad_norm": 0.846876323223114, "learning_rate": 2.5336769125411335e-05, "loss": 9.321157455444336, "step": 5130 }, { "epoch": 0.6653752948137102, "grad_norm": 0.8886515498161316, "learning_rate": 2.5319007230418768e-05, "loss": 7.9470624923706055, "step": 5131 }, { "epoch": 0.6655049723219568, "grad_norm": 0.7814303636550903, "learning_rate": 2.5301249452454102e-05, "loss": 7.897145748138428, "step": 5132 }, { "epoch": 0.6656346498302035, "grad_norm": 0.987016499042511, "learning_rate": 2.5283495794479556e-05, "loss": 10.912430763244629, "step": 5133 }, { "epoch": 0.6657643273384501, "grad_norm": 0.8696178793907166, "learning_rate": 2.526574625945657e-05, "loss": 7.534451961517334, "step": 5134 }, { "epoch": 0.6658940048466969, "grad_norm": 0.7884777188301086, "learning_rate": 2.524800085034597e-05, "loss": 7.791383266448975, "step": 5135 }, { "epoch": 0.6660236823549436, "grad_norm": 1.4558414220809937, "learning_rate": 2.5230259570107833e-05, "loss": 10.527212142944336, "step": 5136 }, { "epoch": 0.6661533598631902, "grad_norm": 1.019431471824646, "learning_rate": 2.521252242170161e-05, "loss": 9.512166023254395, "step": 5137 }, { "epoch": 0.6662830373714369, "grad_norm": 1.0083893537521362, "learning_rate": 2.5194789408085984e-05, "loss": 7.251402378082275, "step": 5138 }, { "epoch": 0.6664127148796836, "grad_norm": 0.9699774980545044, "learning_rate": 2.5177060532219045e-05, "loss": 8.007609367370605, "step": 5139 }, { "epoch": 0.6665423923879302, "grad_norm": 0.7370617985725403, "learning_rate": 2.5159335797058092e-05, "loss": 6.506278991699219, "step": 5140 }, { "epoch": 0.666672069896177, "grad_norm": 1.0431982278823853, "learning_rate": 2.514161520555982e-05, "loss": 10.639716148376465, "step": 5141 }, { "epoch": 0.6668017474044237, "grad_norm": 0.6699467301368713, "learning_rate": 2.512389876068016e-05, "loss": 6.503354072570801, "step": 5142 }, { "epoch": 0.6669314249126703, "grad_norm": 0.8965701460838318, "learning_rate": 2.5106186465374425e-05, "loss": 7.7250871658325195, "step": 5143 }, { "epoch": 0.667061102420917, "grad_norm": 1.1842344999313354, "learning_rate": 2.5088478322597152e-05, "loss": 8.466426849365234, "step": 5144 }, { "epoch": 0.6671907799291636, "grad_norm": 1.0289068222045898, "learning_rate": 2.5070774335302284e-05, "loss": 11.378077507019043, "step": 5145 }, { "epoch": 0.6673204574374103, "grad_norm": 0.8274596333503723, "learning_rate": 2.5053074506442952e-05, "loss": 9.532018661499023, "step": 5146 }, { "epoch": 0.667450134945657, "grad_norm": 0.726851761341095, "learning_rate": 2.503537883897173e-05, "loss": 6.418034076690674, "step": 5147 }, { "epoch": 0.6675798124539037, "grad_norm": 1.3165147304534912, "learning_rate": 2.501768733584036e-05, "loss": 7.52365255355835, "step": 5148 }, { "epoch": 0.6677094899621504, "grad_norm": 0.9396947026252747, "learning_rate": 2.500000000000001e-05, "loss": 6.22675085067749, "step": 5149 }, { "epoch": 0.6678391674703971, "grad_norm": 1.6425871849060059, "learning_rate": 2.4982316834401054e-05, "loss": 15.322137832641602, "step": 5150 }, { "epoch": 0.6679688449786437, "grad_norm": 0.9233659505844116, "learning_rate": 2.4964637841993256e-05, "loss": 12.25538444519043, "step": 5151 }, { "epoch": 0.6680985224868904, "grad_norm": 0.7869453430175781, "learning_rate": 2.49469630257256e-05, "loss": 8.591964721679688, "step": 5152 }, { "epoch": 0.668228199995137, "grad_norm": 0.9896265268325806, "learning_rate": 2.4929292388546454e-05, "loss": 9.832792282104492, "step": 5153 }, { "epoch": 0.6683578775033838, "grad_norm": 0.9177928566932678, "learning_rate": 2.4911625933403422e-05, "loss": 8.777830123901367, "step": 5154 }, { "epoch": 0.6684875550116305, "grad_norm": 1.0118534564971924, "learning_rate": 2.489396366324347e-05, "loss": 11.81280517578125, "step": 5155 }, { "epoch": 0.6686172325198771, "grad_norm": 0.9194180965423584, "learning_rate": 2.4876305581012792e-05, "loss": 11.95287036895752, "step": 5156 }, { "epoch": 0.6687469100281238, "grad_norm": 0.9203320145606995, "learning_rate": 2.4858651689656952e-05, "loss": 6.133281707763672, "step": 5157 }, { "epoch": 0.6688765875363705, "grad_norm": 0.9938702583312988, "learning_rate": 2.4841001992120815e-05, "loss": 9.153589248657227, "step": 5158 }, { "epoch": 0.6690062650446171, "grad_norm": 0.7366145253181458, "learning_rate": 2.482335649134847e-05, "loss": 11.80258846282959, "step": 5159 }, { "epoch": 0.6691359425528638, "grad_norm": 0.7412990927696228, "learning_rate": 2.4805715190283407e-05, "loss": 6.64081335067749, "step": 5160 }, { "epoch": 0.6692656200611106, "grad_norm": 0.8132412433624268, "learning_rate": 2.4788078091868316e-05, "loss": 6.987839698791504, "step": 5161 }, { "epoch": 0.6693952975693572, "grad_norm": 0.717228889465332, "learning_rate": 2.4770445199045284e-05, "loss": 7.77498722076416, "step": 5162 }, { "epoch": 0.6695249750776039, "grad_norm": 0.9390698671340942, "learning_rate": 2.47528165147556e-05, "loss": 7.419919967651367, "step": 5163 }, { "epoch": 0.6696546525858506, "grad_norm": 1.424211859703064, "learning_rate": 2.4735192041939947e-05, "loss": 12.338834762573242, "step": 5164 }, { "epoch": 0.6697843300940972, "grad_norm": 0.6774596571922302, "learning_rate": 2.4717571783538208e-05, "loss": 7.294820308685303, "step": 5165 }, { "epoch": 0.6699140076023439, "grad_norm": 0.9277650117874146, "learning_rate": 2.4699955742489655e-05, "loss": 12.924705505371094, "step": 5166 }, { "epoch": 0.6700436851105906, "grad_norm": 1.202364206314087, "learning_rate": 2.468234392173278e-05, "loss": 13.72245979309082, "step": 5167 }, { "epoch": 0.6701733626188373, "grad_norm": 0.6422247290611267, "learning_rate": 2.4664736324205438e-05, "loss": 6.559562683105469, "step": 5168 }, { "epoch": 0.670303040127084, "grad_norm": 1.316726803779602, "learning_rate": 2.4647132952844714e-05, "loss": 13.357914924621582, "step": 5169 }, { "epoch": 0.6704327176353306, "grad_norm": 0.8250283002853394, "learning_rate": 2.4629533810587053e-05, "loss": 7.938155174255371, "step": 5170 }, { "epoch": 0.6705623951435773, "grad_norm": 1.174203872680664, "learning_rate": 2.461193890036812e-05, "loss": 9.634186744689941, "step": 5171 }, { "epoch": 0.670692072651824, "grad_norm": 0.7517443299293518, "learning_rate": 2.4594348225122964e-05, "loss": 11.48833179473877, "step": 5172 }, { "epoch": 0.6708217501600707, "grad_norm": 1.0461950302124023, "learning_rate": 2.4576761787785833e-05, "loss": 10.762557029724121, "step": 5173 }, { "epoch": 0.6709514276683174, "grad_norm": 0.7966043949127197, "learning_rate": 2.455917959129036e-05, "loss": 12.204643249511719, "step": 5174 }, { "epoch": 0.671081105176564, "grad_norm": 1.2874356508255005, "learning_rate": 2.454160163856938e-05, "loss": 12.201680183410645, "step": 5175 }, { "epoch": 0.6712107826848107, "grad_norm": 1.2581889629364014, "learning_rate": 2.4524027932555104e-05, "loss": 11.32523250579834, "step": 5176 }, { "epoch": 0.6713404601930574, "grad_norm": 0.858386218547821, "learning_rate": 2.4506458476178963e-05, "loss": 8.694921493530273, "step": 5177 }, { "epoch": 0.671470137701304, "grad_norm": 1.178192377090454, "learning_rate": 2.4488893272371748e-05, "loss": 13.21712875366211, "step": 5178 }, { "epoch": 0.6715998152095507, "grad_norm": 0.8700788021087646, "learning_rate": 2.4471332324063484e-05, "loss": 9.236956596374512, "step": 5179 }, { "epoch": 0.6717294927177975, "grad_norm": 0.8219951391220093, "learning_rate": 2.445377563418349e-05, "loss": 9.012832641601562, "step": 5180 }, { "epoch": 0.6718591702260441, "grad_norm": 1.046960711479187, "learning_rate": 2.4436223205660424e-05, "loss": 12.921340942382812, "step": 5181 }, { "epoch": 0.6719888477342908, "grad_norm": 0.8655885457992554, "learning_rate": 2.4418675041422166e-05, "loss": 9.513084411621094, "step": 5182 }, { "epoch": 0.6721185252425375, "grad_norm": 0.7337485551834106, "learning_rate": 2.4401131144395967e-05, "loss": 7.149022102355957, "step": 5183 }, { "epoch": 0.6722482027507841, "grad_norm": 0.8699536919593811, "learning_rate": 2.4383591517508264e-05, "loss": 12.889233589172363, "step": 5184 }, { "epoch": 0.6723778802590308, "grad_norm": 0.8840482234954834, "learning_rate": 2.4366056163684882e-05, "loss": 9.181903839111328, "step": 5185 }, { "epoch": 0.6725075577672776, "grad_norm": 0.8340387344360352, "learning_rate": 2.434852508585085e-05, "loss": 8.024754524230957, "step": 5186 }, { "epoch": 0.6726372352755242, "grad_norm": 0.6810439229011536, "learning_rate": 2.4330998286930557e-05, "loss": 6.842522621154785, "step": 5187 }, { "epoch": 0.6727669127837709, "grad_norm": 0.8163902163505554, "learning_rate": 2.4313475769847604e-05, "loss": 8.370217323303223, "step": 5188 }, { "epoch": 0.6728965902920175, "grad_norm": 0.9348554015159607, "learning_rate": 2.4295957537524955e-05, "loss": 11.79965877532959, "step": 5189 }, { "epoch": 0.6730262678002642, "grad_norm": 0.4578361511230469, "learning_rate": 2.4278443592884786e-05, "loss": 5.661981582641602, "step": 5190 }, { "epoch": 0.6731559453085109, "grad_norm": 1.0420737266540527, "learning_rate": 2.4260933938848622e-05, "loss": 8.705546379089355, "step": 5191 }, { "epoch": 0.6732856228167575, "grad_norm": 1.0422765016555786, "learning_rate": 2.4243428578337212e-05, "loss": 10.294745445251465, "step": 5192 }, { "epoch": 0.6734153003250043, "grad_norm": 0.6840249300003052, "learning_rate": 2.422592751427066e-05, "loss": 5.874569892883301, "step": 5193 }, { "epoch": 0.673544977833251, "grad_norm": 0.7945894598960876, "learning_rate": 2.420843074956824e-05, "loss": 6.447649955749512, "step": 5194 }, { "epoch": 0.6736746553414976, "grad_norm": 0.8302642107009888, "learning_rate": 2.4190938287148668e-05, "loss": 8.753827095031738, "step": 5195 }, { "epoch": 0.6738043328497443, "grad_norm": 1.1091135740280151, "learning_rate": 2.4173450129929797e-05, "loss": 8.729265213012695, "step": 5196 }, { "epoch": 0.6739340103579909, "grad_norm": 0.8517442345619202, "learning_rate": 2.415596628082887e-05, "loss": 9.262459754943848, "step": 5197 }, { "epoch": 0.6740636878662376, "grad_norm": 0.7179979085922241, "learning_rate": 2.41384867427623e-05, "loss": 9.212130546569824, "step": 5198 }, { "epoch": 0.6741933653744844, "grad_norm": 0.4160095453262329, "learning_rate": 2.41210115186459e-05, "loss": 5.358565807342529, "step": 5199 }, { "epoch": 0.674323042882731, "grad_norm": 0.7982378602027893, "learning_rate": 2.410354061139466e-05, "loss": 10.127557754516602, "step": 5200 }, { "epoch": 0.6744527203909777, "grad_norm": 0.7395320534706116, "learning_rate": 2.408607402392294e-05, "loss": 7.628262042999268, "step": 5201 }, { "epoch": 0.6745823978992244, "grad_norm": 1.0202009677886963, "learning_rate": 2.4068611759144306e-05, "loss": 9.745285034179688, "step": 5202 }, { "epoch": 0.674712075407471, "grad_norm": 0.5901804566383362, "learning_rate": 2.4051153819971623e-05, "loss": 7.216884136199951, "step": 5203 }, { "epoch": 0.6748417529157177, "grad_norm": 1.079352855682373, "learning_rate": 2.4033700209317078e-05, "loss": 10.896544456481934, "step": 5204 }, { "epoch": 0.6749714304239645, "grad_norm": 0.8955376148223877, "learning_rate": 2.401625093009206e-05, "loss": 7.372275352478027, "step": 5205 }, { "epoch": 0.6751011079322111, "grad_norm": 0.9947144389152527, "learning_rate": 2.399880598520733e-05, "loss": 11.972302436828613, "step": 5206 }, { "epoch": 0.6752307854404578, "grad_norm": 0.8924961686134338, "learning_rate": 2.398136537757282e-05, "loss": 11.71983814239502, "step": 5207 }, { "epoch": 0.6753604629487044, "grad_norm": 1.1051568984985352, "learning_rate": 2.3963929110097826e-05, "loss": 11.32588005065918, "step": 5208 }, { "epoch": 0.6754901404569511, "grad_norm": 0.9235348105430603, "learning_rate": 2.394649718569087e-05, "loss": 7.599402904510498, "step": 5209 }, { "epoch": 0.6756198179651978, "grad_norm": 0.9015533924102783, "learning_rate": 2.392906960725978e-05, "loss": 10.418837547302246, "step": 5210 }, { "epoch": 0.6757494954734444, "grad_norm": 0.9430243968963623, "learning_rate": 2.391164637771162e-05, "loss": 11.398999214172363, "step": 5211 }, { "epoch": 0.6758791729816912, "grad_norm": 1.140872597694397, "learning_rate": 2.3894227499952787e-05, "loss": 12.451600074768066, "step": 5212 }, { "epoch": 0.6760088504899379, "grad_norm": 1.0983258485794067, "learning_rate": 2.387681297688888e-05, "loss": 8.332859992980957, "step": 5213 }, { "epoch": 0.6761385279981845, "grad_norm": 1.0036951303482056, "learning_rate": 2.3859402811424847e-05, "loss": 8.28455924987793, "step": 5214 }, { "epoch": 0.6762682055064312, "grad_norm": 0.7470367550849915, "learning_rate": 2.3841997006464838e-05, "loss": 8.382922172546387, "step": 5215 }, { "epoch": 0.6763978830146778, "grad_norm": 0.7386993169784546, "learning_rate": 2.382459556491234e-05, "loss": 8.140296936035156, "step": 5216 }, { "epoch": 0.6765275605229245, "grad_norm": 0.6469051241874695, "learning_rate": 2.3807198489670048e-05, "loss": 6.226133823394775, "step": 5217 }, { "epoch": 0.6766572380311713, "grad_norm": 0.9330136775970459, "learning_rate": 2.3789805783640003e-05, "loss": 11.527752876281738, "step": 5218 }, { "epoch": 0.6767869155394179, "grad_norm": 1.0055224895477295, "learning_rate": 2.377241744972343e-05, "loss": 10.574394226074219, "step": 5219 }, { "epoch": 0.6769165930476646, "grad_norm": 0.6489219069480896, "learning_rate": 2.3755033490820914e-05, "loss": 6.744993209838867, "step": 5220 }, { "epoch": 0.6770462705559113, "grad_norm": 0.7592036724090576, "learning_rate": 2.373765390983223e-05, "loss": 8.683375358581543, "step": 5221 }, { "epoch": 0.6771759480641579, "grad_norm": 1.2923094034194946, "learning_rate": 2.3720278709656496e-05, "loss": 13.548358917236328, "step": 5222 }, { "epoch": 0.6773056255724046, "grad_norm": 0.8473970293998718, "learning_rate": 2.3702907893192027e-05, "loss": 11.170645713806152, "step": 5223 }, { "epoch": 0.6774353030806513, "grad_norm": 0.7150459289550781, "learning_rate": 2.368554146333648e-05, "loss": 7.13893461227417, "step": 5224 }, { "epoch": 0.677564980588898, "grad_norm": 0.701984703540802, "learning_rate": 2.366817942298671e-05, "loss": 7.704203128814697, "step": 5225 }, { "epoch": 0.6776946580971447, "grad_norm": 0.8342140913009644, "learning_rate": 2.36508217750389e-05, "loss": 8.704279899597168, "step": 5226 }, { "epoch": 0.6778243356053913, "grad_norm": 0.9869472980499268, "learning_rate": 2.363346852238844e-05, "loss": 8.608449935913086, "step": 5227 }, { "epoch": 0.677954013113638, "grad_norm": 1.0519230365753174, "learning_rate": 2.3616119667930064e-05, "loss": 11.72964096069336, "step": 5228 }, { "epoch": 0.6780836906218847, "grad_norm": 0.7086394429206848, "learning_rate": 2.3598775214557684e-05, "loss": 7.527560234069824, "step": 5229 }, { "epoch": 0.6782133681301313, "grad_norm": 0.7245563268661499, "learning_rate": 2.3581435165164567e-05, "loss": 9.594749450683594, "step": 5230 }, { "epoch": 0.6783430456383781, "grad_norm": 0.8810352087020874, "learning_rate": 2.356409952264315e-05, "loss": 10.95544719696045, "step": 5231 }, { "epoch": 0.6784727231466248, "grad_norm": 0.9318565726280212, "learning_rate": 2.354676828988524e-05, "loss": 11.179525375366211, "step": 5232 }, { "epoch": 0.6786024006548714, "grad_norm": 0.8571442365646362, "learning_rate": 2.3529441469781804e-05, "loss": 7.743783473968506, "step": 5233 }, { "epoch": 0.6787320781631181, "grad_norm": 0.8755283355712891, "learning_rate": 2.3512119065223143e-05, "loss": 5.918860912322998, "step": 5234 }, { "epoch": 0.6788617556713648, "grad_norm": 1.1179448366165161, "learning_rate": 2.3494801079098827e-05, "loss": 11.478310585021973, "step": 5235 }, { "epoch": 0.6789914331796114, "grad_norm": 0.8110019564628601, "learning_rate": 2.347748751429762e-05, "loss": 8.989544868469238, "step": 5236 }, { "epoch": 0.6791211106878582, "grad_norm": 0.8670118451118469, "learning_rate": 2.3460178373707625e-05, "loss": 8.010139465332031, "step": 5237 }, { "epoch": 0.6792507881961048, "grad_norm": 1.2605410814285278, "learning_rate": 2.3442873660216146e-05, "loss": 9.07260513305664, "step": 5238 }, { "epoch": 0.6793804657043515, "grad_norm": 0.7998138666152954, "learning_rate": 2.3425573376709813e-05, "loss": 9.664527893066406, "step": 5239 }, { "epoch": 0.6795101432125982, "grad_norm": 1.2036198377609253, "learning_rate": 2.340827752607444e-05, "loss": 13.170221328735352, "step": 5240 }, { "epoch": 0.6796398207208448, "grad_norm": 0.7013399600982666, "learning_rate": 2.3390986111195174e-05, "loss": 7.1038594245910645, "step": 5241 }, { "epoch": 0.6797694982290915, "grad_norm": 0.9128169417381287, "learning_rate": 2.337369913495636e-05, "loss": 6.856722831726074, "step": 5242 }, { "epoch": 0.6798991757373382, "grad_norm": 0.9461127519607544, "learning_rate": 2.335641660024167e-05, "loss": 14.096874237060547, "step": 5243 }, { "epoch": 0.6800288532455849, "grad_norm": 0.7591800093650818, "learning_rate": 2.3339138509933955e-05, "loss": 7.186279773712158, "step": 5244 }, { "epoch": 0.6801585307538316, "grad_norm": 1.005750060081482, "learning_rate": 2.3321864866915405e-05, "loss": 10.362939834594727, "step": 5245 }, { "epoch": 0.6802882082620783, "grad_norm": 0.8179032206535339, "learning_rate": 2.3304595674067403e-05, "loss": 8.371757507324219, "step": 5246 }, { "epoch": 0.6804178857703249, "grad_norm": 0.6711539030075073, "learning_rate": 2.328733093427064e-05, "loss": 6.810558319091797, "step": 5247 }, { "epoch": 0.6805475632785716, "grad_norm": 1.0524146556854248, "learning_rate": 2.3270070650405017e-05, "loss": 13.297934532165527, "step": 5248 }, { "epoch": 0.6806772407868182, "grad_norm": 0.8453881144523621, "learning_rate": 2.3252814825349744e-05, "loss": 11.94461441040039, "step": 5249 }, { "epoch": 0.680806918295065, "grad_norm": 0.9272341132164001, "learning_rate": 2.323556346198323e-05, "loss": 12.310079574584961, "step": 5250 }, { "epoch": 0.6809365958033117, "grad_norm": 0.6768457889556885, "learning_rate": 2.3218316563183196e-05, "loss": 10.69997501373291, "step": 5251 }, { "epoch": 0.6810662733115583, "grad_norm": 0.6539787650108337, "learning_rate": 2.3201074131826563e-05, "loss": 8.775208473205566, "step": 5252 }, { "epoch": 0.681195950819805, "grad_norm": 1.1225318908691406, "learning_rate": 2.3183836170789562e-05, "loss": 8.992563247680664, "step": 5253 }, { "epoch": 0.6813256283280517, "grad_norm": 0.713982343673706, "learning_rate": 2.316660268294763e-05, "loss": 6.0852437019348145, "step": 5254 }, { "epoch": 0.6814553058362983, "grad_norm": 1.0469450950622559, "learning_rate": 2.3149373671175506e-05, "loss": 14.2011137008667, "step": 5255 }, { "epoch": 0.681584983344545, "grad_norm": 1.3259848356246948, "learning_rate": 2.3132149138347113e-05, "loss": 12.338485717773438, "step": 5256 }, { "epoch": 0.6817146608527918, "grad_norm": 0.9827983379364014, "learning_rate": 2.3114929087335725e-05, "loss": 9.930167198181152, "step": 5257 }, { "epoch": 0.6818443383610384, "grad_norm": 0.8795430660247803, "learning_rate": 2.3097713521013753e-05, "loss": 9.80187702178955, "step": 5258 }, { "epoch": 0.6819740158692851, "grad_norm": 0.9492084980010986, "learning_rate": 2.308050244225297e-05, "loss": 8.59495735168457, "step": 5259 }, { "epoch": 0.6821036933775317, "grad_norm": 0.7855455875396729, "learning_rate": 2.3063295853924315e-05, "loss": 7.543326377868652, "step": 5260 }, { "epoch": 0.6822333708857784, "grad_norm": 1.4506957530975342, "learning_rate": 2.3046093758898048e-05, "loss": 14.626436233520508, "step": 5261 }, { "epoch": 0.6823630483940251, "grad_norm": 0.6456313729286194, "learning_rate": 2.30288961600436e-05, "loss": 9.967455863952637, "step": 5262 }, { "epoch": 0.6824927259022718, "grad_norm": 0.8115907311439514, "learning_rate": 2.3011703060229745e-05, "loss": 10.805359840393066, "step": 5263 }, { "epoch": 0.6826224034105185, "grad_norm": 0.8542612791061401, "learning_rate": 2.2994514462324413e-05, "loss": 9.859734535217285, "step": 5264 }, { "epoch": 0.6827520809187652, "grad_norm": 0.9902028441429138, "learning_rate": 2.2977330369194865e-05, "loss": 9.583504676818848, "step": 5265 }, { "epoch": 0.6828817584270118, "grad_norm": 0.8295624852180481, "learning_rate": 2.2960150783707555e-05, "loss": 8.384557723999023, "step": 5266 }, { "epoch": 0.6830114359352585, "grad_norm": 0.9673526883125305, "learning_rate": 2.2942975708728193e-05, "loss": 9.92573356628418, "step": 5267 }, { "epoch": 0.6831411134435051, "grad_norm": 0.9869945049285889, "learning_rate": 2.2925805147121783e-05, "loss": 8.083054542541504, "step": 5268 }, { "epoch": 0.6832707909517519, "grad_norm": 0.906688928604126, "learning_rate": 2.2908639101752495e-05, "loss": 8.778523445129395, "step": 5269 }, { "epoch": 0.6834004684599986, "grad_norm": 0.8510981202125549, "learning_rate": 2.2891477575483827e-05, "loss": 6.793665409088135, "step": 5270 }, { "epoch": 0.6835301459682452, "grad_norm": 0.7246623039245605, "learning_rate": 2.2874320571178447e-05, "loss": 7.197179794311523, "step": 5271 }, { "epoch": 0.6836598234764919, "grad_norm": 1.0923641920089722, "learning_rate": 2.2857168091698365e-05, "loss": 9.540711402893066, "step": 5272 }, { "epoch": 0.6837895009847386, "grad_norm": 0.9260254502296448, "learning_rate": 2.2840020139904743e-05, "loss": 8.262057304382324, "step": 5273 }, { "epoch": 0.6839191784929852, "grad_norm": 0.602284848690033, "learning_rate": 2.282287671865805e-05, "loss": 5.501136302947998, "step": 5274 }, { "epoch": 0.6840488560012319, "grad_norm": 0.7241485714912415, "learning_rate": 2.2805737830817937e-05, "loss": 7.306522846221924, "step": 5275 }, { "epoch": 0.6841785335094787, "grad_norm": 0.9732916951179504, "learning_rate": 2.278860347924337e-05, "loss": 9.46986198425293, "step": 5276 }, { "epoch": 0.6843082110177253, "grad_norm": 1.0046576261520386, "learning_rate": 2.2771473666792498e-05, "loss": 9.254732131958008, "step": 5277 }, { "epoch": 0.684437888525972, "grad_norm": 1.0039476156234741, "learning_rate": 2.2754348396322774e-05, "loss": 9.192014694213867, "step": 5278 }, { "epoch": 0.6845675660342186, "grad_norm": 0.7009773850440979, "learning_rate": 2.2737227670690815e-05, "loss": 8.969292640686035, "step": 5279 }, { "epoch": 0.6846972435424653, "grad_norm": 0.8006728291511536, "learning_rate": 2.2720111492752567e-05, "loss": 9.943241119384766, "step": 5280 }, { "epoch": 0.684826921050712, "grad_norm": 0.9850025773048401, "learning_rate": 2.270299986536313e-05, "loss": 9.633190155029297, "step": 5281 }, { "epoch": 0.6849565985589587, "grad_norm": 0.8916730284690857, "learning_rate": 2.2685892791376927e-05, "loss": 8.208847045898438, "step": 5282 }, { "epoch": 0.6850862760672054, "grad_norm": 0.8155078887939453, "learning_rate": 2.2668790273647554e-05, "loss": 8.242262840270996, "step": 5283 }, { "epoch": 0.6852159535754521, "grad_norm": 1.0700620412826538, "learning_rate": 2.2651692315027904e-05, "loss": 11.940051078796387, "step": 5284 }, { "epoch": 0.6853456310836987, "grad_norm": 0.8063937425613403, "learning_rate": 2.2634598918370048e-05, "loss": 7.386655807495117, "step": 5285 }, { "epoch": 0.6854753085919454, "grad_norm": 0.9765474796295166, "learning_rate": 2.2617510086525367e-05, "loss": 7.297825336456299, "step": 5286 }, { "epoch": 0.685604986100192, "grad_norm": 1.0554794073104858, "learning_rate": 2.2600425822344406e-05, "loss": 9.528127670288086, "step": 5287 }, { "epoch": 0.6857346636084387, "grad_norm": 1.1736817359924316, "learning_rate": 2.2583346128677024e-05, "loss": 10.385137557983398, "step": 5288 }, { "epoch": 0.6858643411166855, "grad_norm": 1.071919560432434, "learning_rate": 2.256627100837226e-05, "loss": 8.28737735748291, "step": 5289 }, { "epoch": 0.6859940186249321, "grad_norm": 0.8315131664276123, "learning_rate": 2.2549200464278387e-05, "loss": 11.253034591674805, "step": 5290 }, { "epoch": 0.6861236961331788, "grad_norm": 1.0626815557479858, "learning_rate": 2.2532134499242974e-05, "loss": 9.49354076385498, "step": 5291 }, { "epoch": 0.6862533736414255, "grad_norm": 1.0301144123077393, "learning_rate": 2.251507311611275e-05, "loss": 9.660149574279785, "step": 5292 }, { "epoch": 0.6863830511496721, "grad_norm": 0.9954848885536194, "learning_rate": 2.2498016317733765e-05, "loss": 11.065669059753418, "step": 5293 }, { "epoch": 0.6865127286579188, "grad_norm": 1.0866525173187256, "learning_rate": 2.2480964106951213e-05, "loss": 6.8071112632751465, "step": 5294 }, { "epoch": 0.6866424061661656, "grad_norm": 1.1040925979614258, "learning_rate": 2.2463916486609604e-05, "loss": 10.934064865112305, "step": 5295 }, { "epoch": 0.6867720836744122, "grad_norm": 1.0802266597747803, "learning_rate": 2.244687345955261e-05, "loss": 13.984566688537598, "step": 5296 }, { "epoch": 0.6869017611826589, "grad_norm": 0.9730914235115051, "learning_rate": 2.2429835028623214e-05, "loss": 8.509878158569336, "step": 5297 }, { "epoch": 0.6870314386909055, "grad_norm": 0.9824655652046204, "learning_rate": 2.2412801196663547e-05, "loss": 9.807419776916504, "step": 5298 }, { "epoch": 0.6871611161991522, "grad_norm": 0.8836339116096497, "learning_rate": 2.2395771966515056e-05, "loss": 13.107853889465332, "step": 5299 }, { "epoch": 0.6872907937073989, "grad_norm": 0.9358702301979065, "learning_rate": 2.2378747341018346e-05, "loss": 7.30417537689209, "step": 5300 }, { "epoch": 0.6874204712156456, "grad_norm": 0.8915132284164429, "learning_rate": 2.236172732301332e-05, "loss": 8.23781681060791, "step": 5301 }, { "epoch": 0.6875501487238923, "grad_norm": 1.1355212926864624, "learning_rate": 2.2344711915339056e-05, "loss": 8.05876636505127, "step": 5302 }, { "epoch": 0.687679826232139, "grad_norm": 0.6807695031166077, "learning_rate": 2.2327701120833914e-05, "loss": 6.323291778564453, "step": 5303 }, { "epoch": 0.6878095037403856, "grad_norm": 0.8171533346176147, "learning_rate": 2.2310694942335426e-05, "loss": 7.86356782913208, "step": 5304 }, { "epoch": 0.6879391812486323, "grad_norm": 0.6263673901557922, "learning_rate": 2.2293693382680426e-05, "loss": 10.1926851272583, "step": 5305 }, { "epoch": 0.688068858756879, "grad_norm": 0.8203395009040833, "learning_rate": 2.2276696444704896e-05, "loss": 8.26791763305664, "step": 5306 }, { "epoch": 0.6881985362651256, "grad_norm": 0.7592834234237671, "learning_rate": 2.225970413124413e-05, "loss": 8.990200996398926, "step": 5307 }, { "epoch": 0.6883282137733724, "grad_norm": 1.0361179113388062, "learning_rate": 2.224271644513257e-05, "loss": 11.76041316986084, "step": 5308 }, { "epoch": 0.688457891281619, "grad_norm": 0.8960956931114197, "learning_rate": 2.2225733389203972e-05, "loss": 7.477031230926514, "step": 5309 }, { "epoch": 0.6885875687898657, "grad_norm": 0.9071250557899475, "learning_rate": 2.2208754966291206e-05, "loss": 9.47321891784668, "step": 5310 }, { "epoch": 0.6887172462981124, "grad_norm": 0.567359209060669, "learning_rate": 2.219178117922652e-05, "loss": 6.110758304595947, "step": 5311 }, { "epoch": 0.688846923806359, "grad_norm": 0.9306678771972656, "learning_rate": 2.2174812030841262e-05, "loss": 9.216883659362793, "step": 5312 }, { "epoch": 0.6889766013146057, "grad_norm": 0.931033730506897, "learning_rate": 2.215784752396604e-05, "loss": 10.532546043395996, "step": 5313 }, { "epoch": 0.6891062788228525, "grad_norm": 1.0295897722244263, "learning_rate": 2.2140887661430727e-05, "loss": 11.461613655090332, "step": 5314 }, { "epoch": 0.6892359563310991, "grad_norm": 0.9050365686416626, "learning_rate": 2.2123932446064356e-05, "loss": 9.650641441345215, "step": 5315 }, { "epoch": 0.6893656338393458, "grad_norm": 0.9356322288513184, "learning_rate": 2.2106981880695255e-05, "loss": 6.901517868041992, "step": 5316 }, { "epoch": 0.6894953113475925, "grad_norm": 0.9173954725265503, "learning_rate": 2.2090035968150913e-05, "loss": 9.2923583984375, "step": 5317 }, { "epoch": 0.6896249888558391, "grad_norm": 1.1508740186691284, "learning_rate": 2.2073094711258103e-05, "loss": 11.477425575256348, "step": 5318 }, { "epoch": 0.6897546663640858, "grad_norm": 0.6988860368728638, "learning_rate": 2.2056158112842755e-05, "loss": 7.40053653717041, "step": 5319 }, { "epoch": 0.6898843438723324, "grad_norm": 0.7840965986251831, "learning_rate": 2.2039226175730092e-05, "loss": 7.680447578430176, "step": 5320 }, { "epoch": 0.6900140213805792, "grad_norm": 0.8098912835121155, "learning_rate": 2.202229890274449e-05, "loss": 8.052828788757324, "step": 5321 }, { "epoch": 0.6901436988888259, "grad_norm": 0.9705098271369934, "learning_rate": 2.2005376296709623e-05, "loss": 7.872656345367432, "step": 5322 }, { "epoch": 0.6902733763970725, "grad_norm": 0.8476381897926331, "learning_rate": 2.1988458360448294e-05, "loss": 8.284026145935059, "step": 5323 }, { "epoch": 0.6904030539053192, "grad_norm": 0.679311215877533, "learning_rate": 2.197154509678263e-05, "loss": 10.661789894104004, "step": 5324 }, { "epoch": 0.6905327314135659, "grad_norm": 0.866855800151825, "learning_rate": 2.195463650853389e-05, "loss": 10.692971229553223, "step": 5325 }, { "epoch": 0.6906624089218125, "grad_norm": 0.896503210067749, "learning_rate": 2.1937732598522607e-05, "loss": 8.94180965423584, "step": 5326 }, { "epoch": 0.6907920864300593, "grad_norm": 0.6872888803482056, "learning_rate": 2.1920833369568507e-05, "loss": 6.669284820556641, "step": 5327 }, { "epoch": 0.690921763938306, "grad_norm": 0.9045247435569763, "learning_rate": 2.1903938824490554e-05, "loss": 7.9660115242004395, "step": 5328 }, { "epoch": 0.6910514414465526, "grad_norm": 0.8138790726661682, "learning_rate": 2.188704896610691e-05, "loss": 7.575140953063965, "step": 5329 }, { "epoch": 0.6911811189547993, "grad_norm": 1.0043838024139404, "learning_rate": 2.1870163797234984e-05, "loss": 10.267558097839355, "step": 5330 }, { "epoch": 0.6913107964630459, "grad_norm": 0.45882996916770935, "learning_rate": 2.185328332069136e-05, "loss": 6.355679512023926, "step": 5331 }, { "epoch": 0.6914404739712926, "grad_norm": 0.846171498298645, "learning_rate": 2.1836407539291894e-05, "loss": 9.288606643676758, "step": 5332 }, { "epoch": 0.6915701514795394, "grad_norm": 1.4736435413360596, "learning_rate": 2.1819536455851596e-05, "loss": 8.146768569946289, "step": 5333 }, { "epoch": 0.691699828987786, "grad_norm": 1.0693358182907104, "learning_rate": 2.1802670073184767e-05, "loss": 7.761246204376221, "step": 5334 }, { "epoch": 0.6918295064960327, "grad_norm": 0.9585523009300232, "learning_rate": 2.1785808394104833e-05, "loss": 7.2209153175354, "step": 5335 }, { "epoch": 0.6919591840042794, "grad_norm": 1.0007966756820679, "learning_rate": 2.1768951421424533e-05, "loss": 8.457660675048828, "step": 5336 }, { "epoch": 0.692088861512526, "grad_norm": 0.7502652406692505, "learning_rate": 2.1752099157955736e-05, "loss": 10.150689125061035, "step": 5337 }, { "epoch": 0.6922185390207727, "grad_norm": 0.9811396598815918, "learning_rate": 2.1735251606509595e-05, "loss": 10.704477310180664, "step": 5338 }, { "epoch": 0.6923482165290193, "grad_norm": 0.9664120078086853, "learning_rate": 2.1718408769896405e-05, "loss": 9.258047103881836, "step": 5339 }, { "epoch": 0.6924778940372661, "grad_norm": 0.7058826088905334, "learning_rate": 2.1701570650925752e-05, "loss": 9.652825355529785, "step": 5340 }, { "epoch": 0.6926075715455128, "grad_norm": 1.2429349422454834, "learning_rate": 2.1684737252406362e-05, "loss": 11.314435958862305, "step": 5341 }, { "epoch": 0.6927372490537594, "grad_norm": 0.6559330224990845, "learning_rate": 2.1667908577146246e-05, "loss": 7.67290735244751, "step": 5342 }, { "epoch": 0.6928669265620061, "grad_norm": 1.0802332162857056, "learning_rate": 2.165108462795255e-05, "loss": 12.772021293640137, "step": 5343 }, { "epoch": 0.6929966040702528, "grad_norm": 0.8912749290466309, "learning_rate": 2.1634265407631714e-05, "loss": 11.121626853942871, "step": 5344 }, { "epoch": 0.6931262815784994, "grad_norm": 1.0346109867095947, "learning_rate": 2.16174509189893e-05, "loss": 11.26523208618164, "step": 5345 }, { "epoch": 0.6932559590867462, "grad_norm": 0.9556483030319214, "learning_rate": 2.1600641164830175e-05, "loss": 9.192200660705566, "step": 5346 }, { "epoch": 0.6933856365949929, "grad_norm": 1.0083352327346802, "learning_rate": 2.1583836147958326e-05, "loss": 10.521482467651367, "step": 5347 }, { "epoch": 0.6935153141032395, "grad_norm": 1.042155385017395, "learning_rate": 2.1567035871177006e-05, "loss": 11.91384506225586, "step": 5348 }, { "epoch": 0.6936449916114862, "grad_norm": 1.1162109375, "learning_rate": 2.1550240337288684e-05, "loss": 10.290369033813477, "step": 5349 }, { "epoch": 0.6937746691197328, "grad_norm": 0.7255839705467224, "learning_rate": 2.1533449549094987e-05, "loss": 9.912511825561523, "step": 5350 }, { "epoch": 0.6939043466279795, "grad_norm": 0.6881080269813538, "learning_rate": 2.151666350939681e-05, "loss": 6.376795768737793, "step": 5351 }, { "epoch": 0.6940340241362262, "grad_norm": 0.7995132207870483, "learning_rate": 2.1499882220994194e-05, "loss": 7.4098076820373535, "step": 5352 }, { "epoch": 0.6941637016444729, "grad_norm": 0.7670912146568298, "learning_rate": 2.148310568668645e-05, "loss": 9.930537223815918, "step": 5353 }, { "epoch": 0.6942933791527196, "grad_norm": 0.909239649772644, "learning_rate": 2.146633390927204e-05, "loss": 8.308783531188965, "step": 5354 }, { "epoch": 0.6944230566609663, "grad_norm": 0.945521354675293, "learning_rate": 2.144956689154869e-05, "loss": 6.759042263031006, "step": 5355 }, { "epoch": 0.6945527341692129, "grad_norm": 0.7027114033699036, "learning_rate": 2.1432804636313258e-05, "loss": 8.810389518737793, "step": 5356 }, { "epoch": 0.6946824116774596, "grad_norm": 0.7244935631752014, "learning_rate": 2.1416047146361894e-05, "loss": 4.868971347808838, "step": 5357 }, { "epoch": 0.6948120891857062, "grad_norm": 0.7988734245300293, "learning_rate": 2.1399294424489874e-05, "loss": 8.393521308898926, "step": 5358 }, { "epoch": 0.694941766693953, "grad_norm": 1.0404514074325562, "learning_rate": 2.1382546473491743e-05, "loss": 12.773833274841309, "step": 5359 }, { "epoch": 0.6950714442021997, "grad_norm": 0.8164035081863403, "learning_rate": 2.136580329616119e-05, "loss": 6.766005516052246, "step": 5360 }, { "epoch": 0.6952011217104463, "grad_norm": 0.6177158951759338, "learning_rate": 2.1349064895291176e-05, "loss": 9.646099090576172, "step": 5361 }, { "epoch": 0.695330799218693, "grad_norm": 0.6352272629737854, "learning_rate": 2.1332331273673788e-05, "loss": 7.7751665115356445, "step": 5362 }, { "epoch": 0.6954604767269397, "grad_norm": 1.0077228546142578, "learning_rate": 2.131560243410039e-05, "loss": 12.017627716064453, "step": 5363 }, { "epoch": 0.6955901542351863, "grad_norm": 0.9286916851997375, "learning_rate": 2.129887837936148e-05, "loss": 9.086913108825684, "step": 5364 }, { "epoch": 0.6957198317434331, "grad_norm": 1.1175967454910278, "learning_rate": 2.128215911224683e-05, "loss": 6.780285358428955, "step": 5365 }, { "epoch": 0.6958495092516798, "grad_norm": 0.9471895694732666, "learning_rate": 2.126544463554534e-05, "loss": 10.413182258605957, "step": 5366 }, { "epoch": 0.6959791867599264, "grad_norm": 0.787143349647522, "learning_rate": 2.124873495204518e-05, "loss": 10.416664123535156, "step": 5367 }, { "epoch": 0.6961088642681731, "grad_norm": 0.850269079208374, "learning_rate": 2.123203006453365e-05, "loss": 9.460272789001465, "step": 5368 }, { "epoch": 0.6962385417764198, "grad_norm": 1.061248779296875, "learning_rate": 2.121532997579732e-05, "loss": 7.920608997344971, "step": 5369 }, { "epoch": 0.6963682192846664, "grad_norm": 0.8847149014472961, "learning_rate": 2.1198634688621893e-05, "loss": 9.52124309539795, "step": 5370 }, { "epoch": 0.6964978967929131, "grad_norm": 0.906870424747467, "learning_rate": 2.118194420579234e-05, "loss": 9.292656898498535, "step": 5371 }, { "epoch": 0.6966275743011598, "grad_norm": 0.7793776988983154, "learning_rate": 2.1165258530092762e-05, "loss": 9.583417892456055, "step": 5372 }, { "epoch": 0.6967572518094065, "grad_norm": 0.8099340796470642, "learning_rate": 2.1148577664306524e-05, "loss": 9.711668014526367, "step": 5373 }, { "epoch": 0.6968869293176532, "grad_norm": 0.9573941230773926, "learning_rate": 2.1131901611216115e-05, "loss": 9.572305679321289, "step": 5374 }, { "epoch": 0.6970166068258998, "grad_norm": 0.9240430593490601, "learning_rate": 2.1115230373603305e-05, "loss": 8.561101913452148, "step": 5375 }, { "epoch": 0.6971462843341465, "grad_norm": 0.6292369365692139, "learning_rate": 2.1098563954249002e-05, "loss": 7.520822525024414, "step": 5376 }, { "epoch": 0.6972759618423932, "grad_norm": 1.093201994895935, "learning_rate": 2.108190235593329e-05, "loss": 8.076834678649902, "step": 5377 }, { "epoch": 0.6974056393506399, "grad_norm": 0.9099239706993103, "learning_rate": 2.1065245581435537e-05, "loss": 13.006954193115234, "step": 5378 }, { "epoch": 0.6975353168588866, "grad_norm": 0.7020778059959412, "learning_rate": 2.1048593633534218e-05, "loss": 7.174769401550293, "step": 5379 }, { "epoch": 0.6976649943671333, "grad_norm": 0.8422185182571411, "learning_rate": 2.1031946515007067e-05, "loss": 5.981759071350098, "step": 5380 }, { "epoch": 0.6977946718753799, "grad_norm": 0.660965621471405, "learning_rate": 2.101530422863095e-05, "loss": 8.301152229309082, "step": 5381 }, { "epoch": 0.6979243493836266, "grad_norm": 0.6943963170051575, "learning_rate": 2.0998666777182002e-05, "loss": 6.5286688804626465, "step": 5382 }, { "epoch": 0.6980540268918732, "grad_norm": 0.9239564538002014, "learning_rate": 2.098203416343547e-05, "loss": 12.07065200805664, "step": 5383 }, { "epoch": 0.6981837044001199, "grad_norm": 0.9468674063682556, "learning_rate": 2.096540639016587e-05, "loss": 10.994624137878418, "step": 5384 }, { "epoch": 0.6983133819083667, "grad_norm": 0.8921528458595276, "learning_rate": 2.0948783460146835e-05, "loss": 8.289515495300293, "step": 5385 }, { "epoch": 0.6984430594166133, "grad_norm": 0.7168510556221008, "learning_rate": 2.093216537615128e-05, "loss": 7.380796909332275, "step": 5386 }, { "epoch": 0.69857273692486, "grad_norm": 0.9925438761711121, "learning_rate": 2.09155521409512e-05, "loss": 13.378217697143555, "step": 5387 }, { "epoch": 0.6987024144331067, "grad_norm": 0.9295891523361206, "learning_rate": 2.0898943757317913e-05, "loss": 10.739012718200684, "step": 5388 }, { "epoch": 0.6988320919413533, "grad_norm": 1.0368355512619019, "learning_rate": 2.0882340228021802e-05, "loss": 10.758484840393066, "step": 5389 }, { "epoch": 0.6989617694496, "grad_norm": 0.7114872932434082, "learning_rate": 2.0865741555832535e-05, "loss": 8.749482154846191, "step": 5390 }, { "epoch": 0.6990914469578468, "grad_norm": 0.7030739188194275, "learning_rate": 2.08491477435189e-05, "loss": 6.037009239196777, "step": 5391 }, { "epoch": 0.6992211244660934, "grad_norm": 1.115174412727356, "learning_rate": 2.0832558793848934e-05, "loss": 8.097472190856934, "step": 5392 }, { "epoch": 0.6993508019743401, "grad_norm": 0.8891645669937134, "learning_rate": 2.0815974709589804e-05, "loss": 7.500103950500488, "step": 5393 }, { "epoch": 0.6994804794825867, "grad_norm": 1.039348840713501, "learning_rate": 2.0799395493507923e-05, "loss": 8.441307067871094, "step": 5394 }, { "epoch": 0.6996101569908334, "grad_norm": 0.9266243577003479, "learning_rate": 2.0782821148368837e-05, "loss": 7.289034366607666, "step": 5395 }, { "epoch": 0.6997398344990801, "grad_norm": 0.7601134777069092, "learning_rate": 2.0766251676937338e-05, "loss": 8.951727867126465, "step": 5396 }, { "epoch": 0.6998695120073268, "grad_norm": 1.0754932165145874, "learning_rate": 2.0749687081977336e-05, "loss": 9.053912162780762, "step": 5397 }, { "epoch": 0.6999991895155735, "grad_norm": 0.6969127655029297, "learning_rate": 2.073312736625201e-05, "loss": 7.235550403594971, "step": 5398 }, { "epoch": 0.7001288670238202, "grad_norm": 1.0426864624023438, "learning_rate": 2.071657253252365e-05, "loss": 7.516812801361084, "step": 5399 }, { "epoch": 0.7002585445320668, "grad_norm": 0.9163370132446289, "learning_rate": 2.0700022583553752e-05, "loss": 7.847023963928223, "step": 5400 }, { "epoch": 0.7003882220403135, "grad_norm": 1.007779836654663, "learning_rate": 2.0683477522103045e-05, "loss": 9.7041597366333, "step": 5401 }, { "epoch": 0.7005178995485601, "grad_norm": 0.8567949533462524, "learning_rate": 2.0666937350931366e-05, "loss": 8.008288383483887, "step": 5402 }, { "epoch": 0.7006475770568068, "grad_norm": 0.5864836573600769, "learning_rate": 2.0650402072797813e-05, "loss": 6.8021368980407715, "step": 5403 }, { "epoch": 0.7007772545650536, "grad_norm": 0.8158031702041626, "learning_rate": 2.0633871690460587e-05, "loss": 10.89797306060791, "step": 5404 }, { "epoch": 0.7009069320733002, "grad_norm": 1.327601671218872, "learning_rate": 2.0617346206677163e-05, "loss": 11.949983596801758, "step": 5405 }, { "epoch": 0.7010366095815469, "grad_norm": 1.022524118423462, "learning_rate": 2.0600825624204108e-05, "loss": 8.633235931396484, "step": 5406 }, { "epoch": 0.7011662870897936, "grad_norm": 1.0686694383621216, "learning_rate": 2.0584309945797246e-05, "loss": 6.8554205894470215, "step": 5407 }, { "epoch": 0.7012959645980402, "grad_norm": 1.05577552318573, "learning_rate": 2.0567799174211533e-05, "loss": 8.451729774475098, "step": 5408 }, { "epoch": 0.7014256421062869, "grad_norm": 0.6920284032821655, "learning_rate": 2.0551293312201144e-05, "loss": 6.3569016456604, "step": 5409 }, { "epoch": 0.7015553196145337, "grad_norm": 1.040794849395752, "learning_rate": 2.0534792362519385e-05, "loss": 7.864495277404785, "step": 5410 }, { "epoch": 0.7016849971227803, "grad_norm": 0.5955052375793457, "learning_rate": 2.0518296327918806e-05, "loss": 5.741418361663818, "step": 5411 }, { "epoch": 0.701814674631027, "grad_norm": 1.0160373449325562, "learning_rate": 2.050180521115108e-05, "loss": 7.4160237312316895, "step": 5412 }, { "epoch": 0.7019443521392736, "grad_norm": 0.849759578704834, "learning_rate": 2.0485319014967113e-05, "loss": 7.410754680633545, "step": 5413 }, { "epoch": 0.7020740296475203, "grad_norm": 0.742063581943512, "learning_rate": 2.0468837742116926e-05, "loss": 7.758786201477051, "step": 5414 }, { "epoch": 0.702203707155767, "grad_norm": 0.9820054173469543, "learning_rate": 2.045236139534979e-05, "loss": 11.490815162658691, "step": 5415 }, { "epoch": 0.7023333846640136, "grad_norm": 0.67436683177948, "learning_rate": 2.0435889977414085e-05, "loss": 7.735679626464844, "step": 5416 }, { "epoch": 0.7024630621722604, "grad_norm": 0.8975594639778137, "learning_rate": 2.0419423491057437e-05, "loss": 9.516154289245605, "step": 5417 }, { "epoch": 0.7025927396805071, "grad_norm": 1.0389817953109741, "learning_rate": 2.040296193902658e-05, "loss": 8.539433479309082, "step": 5418 }, { "epoch": 0.7027224171887537, "grad_norm": 0.8014415502548218, "learning_rate": 2.0386505324067496e-05, "loss": 7.171384811401367, "step": 5419 }, { "epoch": 0.7028520946970004, "grad_norm": 0.8582507967948914, "learning_rate": 2.0370053648925268e-05, "loss": 8.436896324157715, "step": 5420 }, { "epoch": 0.702981772205247, "grad_norm": 0.8622596859931946, "learning_rate": 2.0353606916344238e-05, "loss": 9.023338317871094, "step": 5421 }, { "epoch": 0.7031114497134937, "grad_norm": 0.9882696866989136, "learning_rate": 2.0337165129067838e-05, "loss": 9.08625316619873, "step": 5422 }, { "epoch": 0.7032411272217405, "grad_norm": 1.0154701471328735, "learning_rate": 2.0320728289838747e-05, "loss": 8.465089797973633, "step": 5423 }, { "epoch": 0.7033708047299871, "grad_norm": 0.9829505681991577, "learning_rate": 2.0304296401398763e-05, "loss": 9.718876838684082, "step": 5424 }, { "epoch": 0.7035004822382338, "grad_norm": 0.8075791001319885, "learning_rate": 2.0287869466488903e-05, "loss": 8.854580879211426, "step": 5425 }, { "epoch": 0.7036301597464805, "grad_norm": 1.0684584379196167, "learning_rate": 2.0271447487849343e-05, "loss": 10.995675086975098, "step": 5426 }, { "epoch": 0.7037598372547271, "grad_norm": 1.1194459199905396, "learning_rate": 2.0255030468219404e-05, "loss": 11.245954513549805, "step": 5427 }, { "epoch": 0.7038895147629738, "grad_norm": 1.1284911632537842, "learning_rate": 2.0238618410337633e-05, "loss": 9.053231239318848, "step": 5428 }, { "epoch": 0.7040191922712206, "grad_norm": 0.7350820302963257, "learning_rate": 2.0222211316941685e-05, "loss": 5.689398765563965, "step": 5429 }, { "epoch": 0.7041488697794672, "grad_norm": 1.0776125192642212, "learning_rate": 2.020580919076846e-05, "loss": 10.749398231506348, "step": 5430 }, { "epoch": 0.7042785472877139, "grad_norm": 1.2207612991333008, "learning_rate": 2.0189412034553957e-05, "loss": 10.715105056762695, "step": 5431 }, { "epoch": 0.7044082247959605, "grad_norm": 0.8546812534332275, "learning_rate": 2.017301985103341e-05, "loss": 7.869419097900391, "step": 5432 }, { "epoch": 0.7045379023042072, "grad_norm": 1.1430050134658813, "learning_rate": 2.015663264294116e-05, "loss": 12.041977882385254, "step": 5433 }, { "epoch": 0.7046675798124539, "grad_norm": 1.1024503707885742, "learning_rate": 2.0140250413010786e-05, "loss": 10.088443756103516, "step": 5434 }, { "epoch": 0.7047972573207005, "grad_norm": 0.7543153762817383, "learning_rate": 2.0123873163974965e-05, "loss": 9.000398635864258, "step": 5435 }, { "epoch": 0.7049269348289473, "grad_norm": 1.1077098846435547, "learning_rate": 2.0107500898565622e-05, "loss": 8.338399887084961, "step": 5436 }, { "epoch": 0.705056612337194, "grad_norm": 0.7492190003395081, "learning_rate": 2.0091133619513774e-05, "loss": 11.257882118225098, "step": 5437 }, { "epoch": 0.7051862898454406, "grad_norm": 0.6681104302406311, "learning_rate": 2.0074771329549668e-05, "loss": 7.168365001678467, "step": 5438 }, { "epoch": 0.7053159673536873, "grad_norm": 0.9198975563049316, "learning_rate": 2.0058414031402668e-05, "loss": 10.783097267150879, "step": 5439 }, { "epoch": 0.705445644861934, "grad_norm": 1.0565121173858643, "learning_rate": 2.0042061727801353e-05, "loss": 10.263460159301758, "step": 5440 }, { "epoch": 0.7055753223701806, "grad_norm": 0.8720030784606934, "learning_rate": 2.0025714421473412e-05, "loss": 8.148776054382324, "step": 5441 }, { "epoch": 0.7057049998784274, "grad_norm": 0.809492826461792, "learning_rate": 2.0009372115145776e-05, "loss": 7.678422451019287, "step": 5442 }, { "epoch": 0.705834677386674, "grad_norm": 0.7947193384170532, "learning_rate": 1.999303481154446e-05, "loss": 6.921628952026367, "step": 5443 }, { "epoch": 0.7059643548949207, "grad_norm": 0.931642472743988, "learning_rate": 1.997670251339472e-05, "loss": 11.953412055969238, "step": 5444 }, { "epoch": 0.7060940324031674, "grad_norm": 1.1912546157836914, "learning_rate": 1.9960375223420902e-05, "loss": 10.6983060836792, "step": 5445 }, { "epoch": 0.706223709911414, "grad_norm": 0.7819059491157532, "learning_rate": 1.9944052944346597e-05, "loss": 9.094184875488281, "step": 5446 }, { "epoch": 0.7063533874196607, "grad_norm": 1.2153456211090088, "learning_rate": 1.9927735678894476e-05, "loss": 9.355611801147461, "step": 5447 }, { "epoch": 0.7064830649279074, "grad_norm": 0.8431050181388855, "learning_rate": 1.9911423429786462e-05, "loss": 7.282803535461426, "step": 5448 }, { "epoch": 0.7066127424361541, "grad_norm": 1.1797024011611938, "learning_rate": 1.989511619974355e-05, "loss": 7.049310684204102, "step": 5449 }, { "epoch": 0.7067424199444008, "grad_norm": 0.6956495642662048, "learning_rate": 1.9878813991485982e-05, "loss": 4.924487590789795, "step": 5450 }, { "epoch": 0.7068720974526475, "grad_norm": 1.3007973432540894, "learning_rate": 1.9862516807733094e-05, "loss": 13.890420913696289, "step": 5451 }, { "epoch": 0.7070017749608941, "grad_norm": 0.7444718480110168, "learning_rate": 1.984622465120344e-05, "loss": 6.765076637268066, "step": 5452 }, { "epoch": 0.7071314524691408, "grad_norm": 1.1573600769042969, "learning_rate": 1.982993752461469e-05, "loss": 10.695296287536621, "step": 5453 }, { "epoch": 0.7072611299773874, "grad_norm": 0.8417676091194153, "learning_rate": 1.9813655430683707e-05, "loss": 7.699193000793457, "step": 5454 }, { "epoch": 0.7073908074856342, "grad_norm": 1.2097129821777344, "learning_rate": 1.979737837212648e-05, "loss": 11.660201072692871, "step": 5455 }, { "epoch": 0.7075204849938809, "grad_norm": 0.7905622720718384, "learning_rate": 1.9781106351658212e-05, "loss": 8.3821382522583, "step": 5456 }, { "epoch": 0.7076501625021275, "grad_norm": 0.6865084767341614, "learning_rate": 1.97648393719932e-05, "loss": 8.002596855163574, "step": 5457 }, { "epoch": 0.7077798400103742, "grad_norm": 1.1109833717346191, "learning_rate": 1.974857743584496e-05, "loss": 10.10630989074707, "step": 5458 }, { "epoch": 0.7079095175186209, "grad_norm": 0.8619446158409119, "learning_rate": 1.9732320545926113e-05, "loss": 12.155610084533691, "step": 5459 }, { "epoch": 0.7080391950268675, "grad_norm": 0.9890413284301758, "learning_rate": 1.9716068704948498e-05, "loss": 8.623029708862305, "step": 5460 }, { "epoch": 0.7081688725351143, "grad_norm": 0.8313897252082825, "learning_rate": 1.9699821915623068e-05, "loss": 8.917532920837402, "step": 5461 }, { "epoch": 0.708298550043361, "grad_norm": 0.7971683740615845, "learning_rate": 1.9683580180659918e-05, "loss": 6.759172439575195, "step": 5462 }, { "epoch": 0.7084282275516076, "grad_norm": 1.2859519720077515, "learning_rate": 1.966734350276836e-05, "loss": 13.248282432556152, "step": 5463 }, { "epoch": 0.7085579050598543, "grad_norm": 1.0532739162445068, "learning_rate": 1.965111188465678e-05, "loss": 5.849549770355225, "step": 5464 }, { "epoch": 0.7086875825681009, "grad_norm": 0.8316953778266907, "learning_rate": 1.9634885329032838e-05, "loss": 10.625113487243652, "step": 5465 }, { "epoch": 0.7088172600763476, "grad_norm": 0.7542009353637695, "learning_rate": 1.961866383860323e-05, "loss": 10.099152565002441, "step": 5466 }, { "epoch": 0.7089469375845943, "grad_norm": 0.8600478768348694, "learning_rate": 1.9602447416073892e-05, "loss": 9.383421897888184, "step": 5467 }, { "epoch": 0.709076615092841, "grad_norm": 0.5432096719741821, "learning_rate": 1.9586236064149845e-05, "loss": 6.444488048553467, "step": 5468 }, { "epoch": 0.7092062926010877, "grad_norm": 0.8590654134750366, "learning_rate": 1.9570029785535333e-05, "loss": 7.038569450378418, "step": 5469 }, { "epoch": 0.7093359701093344, "grad_norm": 0.9272621273994446, "learning_rate": 1.9553828582933674e-05, "loss": 8.241316795349121, "step": 5470 }, { "epoch": 0.709465647617581, "grad_norm": 0.8381741046905518, "learning_rate": 1.9537632459047444e-05, "loss": 9.587545394897461, "step": 5471 }, { "epoch": 0.7095953251258277, "grad_norm": 0.7542586326599121, "learning_rate": 1.9521441416578255e-05, "loss": 8.836511611938477, "step": 5472 }, { "epoch": 0.7097250026340743, "grad_norm": 0.9848942160606384, "learning_rate": 1.9505255458226977e-05, "loss": 9.828222274780273, "step": 5473 }, { "epoch": 0.7098546801423211, "grad_norm": 0.941726803779602, "learning_rate": 1.9489074586693544e-05, "loss": 7.196441650390625, "step": 5474 }, { "epoch": 0.7099843576505678, "grad_norm": 1.109517216682434, "learning_rate": 1.947289880467712e-05, "loss": 8.800692558288574, "step": 5475 }, { "epoch": 0.7101140351588144, "grad_norm": 0.9748290777206421, "learning_rate": 1.9456728114875943e-05, "loss": 12.56844425201416, "step": 5476 }, { "epoch": 0.7102437126670611, "grad_norm": 0.810466468334198, "learning_rate": 1.9440562519987483e-05, "loss": 10.337713241577148, "step": 5477 }, { "epoch": 0.7103733901753078, "grad_norm": 1.0778288841247559, "learning_rate": 1.9424402022708264e-05, "loss": 11.922471046447754, "step": 5478 }, { "epoch": 0.7105030676835544, "grad_norm": 0.9086779952049255, "learning_rate": 1.940824662573407e-05, "loss": 10.383301734924316, "step": 5479 }, { "epoch": 0.7106327451918011, "grad_norm": 0.7736936211585999, "learning_rate": 1.9392096331759735e-05, "loss": 7.286220073699951, "step": 5480 }, { "epoch": 0.7107624227000479, "grad_norm": 0.9677636623382568, "learning_rate": 1.937595114347931e-05, "loss": 8.03748607635498, "step": 5481 }, { "epoch": 0.7108921002082945, "grad_norm": 0.5779348015785217, "learning_rate": 1.935981106358594e-05, "loss": 7.618823051452637, "step": 5482 }, { "epoch": 0.7110217777165412, "grad_norm": 0.988920271396637, "learning_rate": 1.9343676094771986e-05, "loss": 11.108003616333008, "step": 5483 }, { "epoch": 0.7111514552247878, "grad_norm": 0.7267476916313171, "learning_rate": 1.9327546239728888e-05, "loss": 6.541676998138428, "step": 5484 }, { "epoch": 0.7112811327330345, "grad_norm": 0.9087883234024048, "learning_rate": 1.9311421501147254e-05, "loss": 10.658256530761719, "step": 5485 }, { "epoch": 0.7114108102412812, "grad_norm": 0.8000058531761169, "learning_rate": 1.9295301881716877e-05, "loss": 7.585365295410156, "step": 5486 }, { "epoch": 0.7115404877495279, "grad_norm": 1.0799483060836792, "learning_rate": 1.927918738412663e-05, "loss": 11.299530029296875, "step": 5487 }, { "epoch": 0.7116701652577746, "grad_norm": 0.6518272161483765, "learning_rate": 1.9263078011064607e-05, "loss": 8.186370849609375, "step": 5488 }, { "epoch": 0.7117998427660213, "grad_norm": 0.8325561285018921, "learning_rate": 1.9246973765217964e-05, "loss": 8.246156692504883, "step": 5489 }, { "epoch": 0.7119295202742679, "grad_norm": 1.0163661241531372, "learning_rate": 1.923087464927308e-05, "loss": 12.958235740661621, "step": 5490 }, { "epoch": 0.7120591977825146, "grad_norm": 0.805907666683197, "learning_rate": 1.9214780665915412e-05, "loss": 9.482943534851074, "step": 5491 }, { "epoch": 0.7121888752907612, "grad_norm": 1.1530427932739258, "learning_rate": 1.9198691817829628e-05, "loss": 11.02029800415039, "step": 5492 }, { "epoch": 0.712318552799008, "grad_norm": 0.8618877530097961, "learning_rate": 1.9182608107699464e-05, "loss": 12.025674819946289, "step": 5493 }, { "epoch": 0.7124482303072547, "grad_norm": 0.6463292837142944, "learning_rate": 1.916652953820788e-05, "loss": 7.711493492126465, "step": 5494 }, { "epoch": 0.7125779078155013, "grad_norm": 0.6986920833587646, "learning_rate": 1.915045611203689e-05, "loss": 6.574398994445801, "step": 5495 }, { "epoch": 0.712707585323748, "grad_norm": 0.8018855452537537, "learning_rate": 1.9134387831867746e-05, "loss": 10.051185607910156, "step": 5496 }, { "epoch": 0.7128372628319947, "grad_norm": 0.983475387096405, "learning_rate": 1.911832470038074e-05, "loss": 10.385711669921875, "step": 5497 }, { "epoch": 0.7129669403402413, "grad_norm": 0.8482627272605896, "learning_rate": 1.9102266720255414e-05, "loss": 6.238028526306152, "step": 5498 }, { "epoch": 0.713096617848488, "grad_norm": 1.1338698863983154, "learning_rate": 1.908621389417034e-05, "loss": 13.525694847106934, "step": 5499 }, { "epoch": 0.7132262953567348, "grad_norm": 1.0975282192230225, "learning_rate": 1.9070166224803326e-05, "loss": 8.428576469421387, "step": 5500 }, { "epoch": 0.7133559728649814, "grad_norm": 0.9211544394493103, "learning_rate": 1.905412371483125e-05, "loss": 9.0911226272583, "step": 5501 }, { "epoch": 0.7134856503732281, "grad_norm": 0.9933562278747559, "learning_rate": 1.9038086366930165e-05, "loss": 10.188343048095703, "step": 5502 }, { "epoch": 0.7136153278814747, "grad_norm": 1.1461164951324463, "learning_rate": 1.9022054183775263e-05, "loss": 8.588313102722168, "step": 5503 }, { "epoch": 0.7137450053897214, "grad_norm": 1.0442184209823608, "learning_rate": 1.900602716804088e-05, "loss": 12.540472984313965, "step": 5504 }, { "epoch": 0.7138746828979681, "grad_norm": 1.447767972946167, "learning_rate": 1.8990005322400438e-05, "loss": 11.35180950164795, "step": 5505 }, { "epoch": 0.7140043604062148, "grad_norm": 0.7499863505363464, "learning_rate": 1.8973988649526587e-05, "loss": 7.591308116912842, "step": 5506 }, { "epoch": 0.7141340379144615, "grad_norm": 0.9145460724830627, "learning_rate": 1.8957977152091004e-05, "loss": 6.802785396575928, "step": 5507 }, { "epoch": 0.7142637154227082, "grad_norm": 1.2343157529830933, "learning_rate": 1.8941970832764617e-05, "loss": 8.659595489501953, "step": 5508 }, { "epoch": 0.7143933929309548, "grad_norm": 0.7112570405006409, "learning_rate": 1.8925969694217406e-05, "loss": 7.633080005645752, "step": 5509 }, { "epoch": 0.7145230704392015, "grad_norm": 0.6762117743492126, "learning_rate": 1.8909973739118503e-05, "loss": 6.9498090744018555, "step": 5510 }, { "epoch": 0.7146527479474482, "grad_norm": 0.9479188323020935, "learning_rate": 1.889398297013622e-05, "loss": 9.194295883178711, "step": 5511 }, { "epoch": 0.7147824254556948, "grad_norm": 0.9217292070388794, "learning_rate": 1.8877997389937936e-05, "loss": 10.317343711853027, "step": 5512 }, { "epoch": 0.7149121029639416, "grad_norm": 0.7246597409248352, "learning_rate": 1.8862017001190237e-05, "loss": 9.46413516998291, "step": 5513 }, { "epoch": 0.7150417804721882, "grad_norm": 0.7501819729804993, "learning_rate": 1.8846041806558766e-05, "loss": 11.39745807647705, "step": 5514 }, { "epoch": 0.7151714579804349, "grad_norm": 1.0096784830093384, "learning_rate": 1.8830071808708388e-05, "loss": 12.555395126342773, "step": 5515 }, { "epoch": 0.7153011354886816, "grad_norm": 0.8537389636039734, "learning_rate": 1.8814107010303006e-05, "loss": 9.002203941345215, "step": 5516 }, { "epoch": 0.7154308129969282, "grad_norm": 0.902781069278717, "learning_rate": 1.879814741400574e-05, "loss": 6.035604476928711, "step": 5517 }, { "epoch": 0.7155604905051749, "grad_norm": 0.8127384781837463, "learning_rate": 1.8782193022478772e-05, "loss": 15.142292976379395, "step": 5518 }, { "epoch": 0.7156901680134217, "grad_norm": 1.2538999319076538, "learning_rate": 1.8766243838383478e-05, "loss": 12.933883666992188, "step": 5519 }, { "epoch": 0.7158198455216683, "grad_norm": 0.901213526725769, "learning_rate": 1.875029986438031e-05, "loss": 9.88615608215332, "step": 5520 }, { "epoch": 0.715949523029915, "grad_norm": 0.5622350573539734, "learning_rate": 1.8734361103128905e-05, "loss": 5.902910232543945, "step": 5521 }, { "epoch": 0.7160792005381617, "grad_norm": 0.698438286781311, "learning_rate": 1.871842755728797e-05, "loss": 7.147611618041992, "step": 5522 }, { "epoch": 0.7162088780464083, "grad_norm": 0.7048574090003967, "learning_rate": 1.8702499229515408e-05, "loss": 6.858699798583984, "step": 5523 }, { "epoch": 0.716338555554655, "grad_norm": 0.58216392993927, "learning_rate": 1.868657612246818e-05, "loss": 6.1611223220825195, "step": 5524 }, { "epoch": 0.7164682330629017, "grad_norm": 0.9303081035614014, "learning_rate": 1.8670658238802456e-05, "loss": 8.355582237243652, "step": 5525 }, { "epoch": 0.7165979105711484, "grad_norm": 0.9469187259674072, "learning_rate": 1.8654745581173455e-05, "loss": 13.885151863098145, "step": 5526 }, { "epoch": 0.7167275880793951, "grad_norm": 0.7988415956497192, "learning_rate": 1.8638838152235594e-05, "loss": 10.402817726135254, "step": 5527 }, { "epoch": 0.7168572655876417, "grad_norm": 0.9110346436500549, "learning_rate": 1.8622935954642357e-05, "loss": 9.024986267089844, "step": 5528 }, { "epoch": 0.7169869430958884, "grad_norm": 0.9023231863975525, "learning_rate": 1.860703899104641e-05, "loss": 7.633073806762695, "step": 5529 }, { "epoch": 0.7171166206041351, "grad_norm": 0.7131081819534302, "learning_rate": 1.859114726409949e-05, "loss": 6.1850080490112305, "step": 5530 }, { "epoch": 0.7172462981123817, "grad_norm": 0.8460307121276855, "learning_rate": 1.8575260776452523e-05, "loss": 9.51320743560791, "step": 5531 }, { "epoch": 0.7173759756206285, "grad_norm": 0.8632845878601074, "learning_rate": 1.8559379530755507e-05, "loss": 9.718311309814453, "step": 5532 }, { "epoch": 0.7175056531288752, "grad_norm": 1.0005971193313599, "learning_rate": 1.85435035296576e-05, "loss": 10.06037425994873, "step": 5533 }, { "epoch": 0.7176353306371218, "grad_norm": 1.190398931503296, "learning_rate": 1.852763277580705e-05, "loss": 10.119117736816406, "step": 5534 }, { "epoch": 0.7177650081453685, "grad_norm": 1.2244805097579956, "learning_rate": 1.851176727185129e-05, "loss": 9.376157760620117, "step": 5535 }, { "epoch": 0.7178946856536151, "grad_norm": 0.9257404208183289, "learning_rate": 1.8495907020436797e-05, "loss": 9.686066627502441, "step": 5536 }, { "epoch": 0.7180243631618618, "grad_norm": 1.0366857051849365, "learning_rate": 1.8480052024209253e-05, "loss": 12.582457542419434, "step": 5537 }, { "epoch": 0.7181540406701086, "grad_norm": 0.9067603945732117, "learning_rate": 1.8464202285813386e-05, "loss": 7.928398132324219, "step": 5538 }, { "epoch": 0.7182837181783552, "grad_norm": 0.5398454666137695, "learning_rate": 1.844835780789313e-05, "loss": 6.593112945556641, "step": 5539 }, { "epoch": 0.7184133956866019, "grad_norm": 1.025194525718689, "learning_rate": 1.8432518593091446e-05, "loss": 11.042410850524902, "step": 5540 }, { "epoch": 0.7185430731948486, "grad_norm": 1.2253535985946655, "learning_rate": 1.8416684644050496e-05, "loss": 8.803253173828125, "step": 5541 }, { "epoch": 0.7186727507030952, "grad_norm": 0.8463313579559326, "learning_rate": 1.8400855963411556e-05, "loss": 7.756769180297852, "step": 5542 }, { "epoch": 0.7188024282113419, "grad_norm": 0.7559575438499451, "learning_rate": 1.8385032553814957e-05, "loss": 10.21492862701416, "step": 5543 }, { "epoch": 0.7189321057195885, "grad_norm": 0.7231060266494751, "learning_rate": 1.8369214417900244e-05, "loss": 7.5151190757751465, "step": 5544 }, { "epoch": 0.7190617832278353, "grad_norm": 1.1132112741470337, "learning_rate": 1.8353401558305988e-05, "loss": 10.059982299804688, "step": 5545 }, { "epoch": 0.719191460736082, "grad_norm": 1.1184430122375488, "learning_rate": 1.8337593977669964e-05, "loss": 9.493947982788086, "step": 5546 }, { "epoch": 0.7193211382443286, "grad_norm": 0.9592131972312927, "learning_rate": 1.8321791678629e-05, "loss": 7.564813137054443, "step": 5547 }, { "epoch": 0.7194508157525753, "grad_norm": 1.1260343790054321, "learning_rate": 1.830599466381911e-05, "loss": 12.10582160949707, "step": 5548 }, { "epoch": 0.719580493260822, "grad_norm": 0.715935468673706, "learning_rate": 1.8290202935875345e-05, "loss": 10.147767066955566, "step": 5549 }, { "epoch": 0.7197101707690686, "grad_norm": 0.9144628643989563, "learning_rate": 1.827441649743195e-05, "loss": 10.476081848144531, "step": 5550 }, { "epoch": 0.7198398482773154, "grad_norm": 0.8792417049407959, "learning_rate": 1.8258635351122232e-05, "loss": 10.064390182495117, "step": 5551 }, { "epoch": 0.7199695257855621, "grad_norm": 1.118333339691162, "learning_rate": 1.824285949957867e-05, "loss": 10.638578414916992, "step": 5552 }, { "epoch": 0.7200992032938087, "grad_norm": 0.8159320950508118, "learning_rate": 1.8227088945432786e-05, "loss": 8.868685722351074, "step": 5553 }, { "epoch": 0.7202288808020554, "grad_norm": 0.7028814554214478, "learning_rate": 1.8211323691315297e-05, "loss": 7.467286586761475, "step": 5554 }, { "epoch": 0.720358558310302, "grad_norm": 1.0433485507965088, "learning_rate": 1.8195563739855976e-05, "loss": 7.138751029968262, "step": 5555 }, { "epoch": 0.7204882358185487, "grad_norm": 0.8023721575737, "learning_rate": 1.8179809093683754e-05, "loss": 6.847019195556641, "step": 5556 }, { "epoch": 0.7206179133267955, "grad_norm": 0.9001631736755371, "learning_rate": 1.8164059755426638e-05, "loss": 9.984114646911621, "step": 5557 }, { "epoch": 0.7207475908350421, "grad_norm": 0.8074120879173279, "learning_rate": 1.8148315727711797e-05, "loss": 11.047887802124023, "step": 5558 }, { "epoch": 0.7208772683432888, "grad_norm": 1.1053013801574707, "learning_rate": 1.8132577013165454e-05, "loss": 9.969034194946289, "step": 5559 }, { "epoch": 0.7210069458515355, "grad_norm": 0.690030038356781, "learning_rate": 1.811684361441301e-05, "loss": 8.226313591003418, "step": 5560 }, { "epoch": 0.7211366233597821, "grad_norm": 0.7293067574501038, "learning_rate": 1.810111553407891e-05, "loss": 6.721833229064941, "step": 5561 }, { "epoch": 0.7212663008680288, "grad_norm": 0.9862430691719055, "learning_rate": 1.8085392774786796e-05, "loss": 10.397486686706543, "step": 5562 }, { "epoch": 0.7213959783762754, "grad_norm": 0.7993158102035522, "learning_rate": 1.8069675339159337e-05, "loss": 7.898286819458008, "step": 5563 }, { "epoch": 0.7215256558845222, "grad_norm": 0.9552704095840454, "learning_rate": 1.8053963229818388e-05, "loss": 11.040939331054688, "step": 5564 }, { "epoch": 0.7216553333927689, "grad_norm": 0.8238338828086853, "learning_rate": 1.803825644938484e-05, "loss": 10.157553672790527, "step": 5565 }, { "epoch": 0.7217850109010155, "grad_norm": 0.9296931028366089, "learning_rate": 1.8022555000478776e-05, "loss": 7.650491237640381, "step": 5566 }, { "epoch": 0.7219146884092622, "grad_norm": 0.9590789079666138, "learning_rate": 1.8006858885719307e-05, "loss": 7.545560836791992, "step": 5567 }, { "epoch": 0.7220443659175089, "grad_norm": 0.7572529911994934, "learning_rate": 1.7991168107724743e-05, "loss": 7.755287170410156, "step": 5568 }, { "epoch": 0.7221740434257555, "grad_norm": 1.2145212888717651, "learning_rate": 1.7975482669112415e-05, "loss": 8.701169967651367, "step": 5569 }, { "epoch": 0.7223037209340023, "grad_norm": 1.290354609489441, "learning_rate": 1.795980257249884e-05, "loss": 11.923404693603516, "step": 5570 }, { "epoch": 0.722433398442249, "grad_norm": 0.7535591125488281, "learning_rate": 1.7944127820499597e-05, "loss": 6.612511157989502, "step": 5571 }, { "epoch": 0.7225630759504956, "grad_norm": 0.6989402174949646, "learning_rate": 1.7928458415729366e-05, "loss": 8.814227104187012, "step": 5572 }, { "epoch": 0.7226927534587423, "grad_norm": 1.021163821220398, "learning_rate": 1.7912794360801987e-05, "loss": 7.027766227722168, "step": 5573 }, { "epoch": 0.722822430966989, "grad_norm": 0.8684087991714478, "learning_rate": 1.789713565833035e-05, "loss": 10.73574161529541, "step": 5574 }, { "epoch": 0.7229521084752356, "grad_norm": 0.9559865593910217, "learning_rate": 1.7881482310926502e-05, "loss": 10.371110916137695, "step": 5575 }, { "epoch": 0.7230817859834823, "grad_norm": 0.6722229719161987, "learning_rate": 1.7865834321201535e-05, "loss": 9.397625923156738, "step": 5576 }, { "epoch": 0.723211463491729, "grad_norm": 0.9267637729644775, "learning_rate": 1.7850191691765732e-05, "loss": 9.156343460083008, "step": 5577 }, { "epoch": 0.7233411409999757, "grad_norm": 0.8904090523719788, "learning_rate": 1.7834554425228396e-05, "loss": 6.840391159057617, "step": 5578 }, { "epoch": 0.7234708185082224, "grad_norm": 0.9533516764640808, "learning_rate": 1.781892252419799e-05, "loss": 10.250297546386719, "step": 5579 }, { "epoch": 0.723600496016469, "grad_norm": 0.8382953405380249, "learning_rate": 1.780329599128206e-05, "loss": 10.187238693237305, "step": 5580 }, { "epoch": 0.7237301735247157, "grad_norm": 1.2625229358673096, "learning_rate": 1.7787674829087292e-05, "loss": 9.974414825439453, "step": 5581 }, { "epoch": 0.7238598510329624, "grad_norm": 1.0221633911132812, "learning_rate": 1.77720590402194e-05, "loss": 7.687809944152832, "step": 5582 }, { "epoch": 0.7239895285412091, "grad_norm": 0.8841068744659424, "learning_rate": 1.7756448627283296e-05, "loss": 7.832972526550293, "step": 5583 }, { "epoch": 0.7241192060494558, "grad_norm": 1.1538608074188232, "learning_rate": 1.77408435928829e-05, "loss": 11.52134895324707, "step": 5584 }, { "epoch": 0.7242488835577024, "grad_norm": 0.693841278553009, "learning_rate": 1.772524393962132e-05, "loss": 4.848850727081299, "step": 5585 }, { "epoch": 0.7243785610659491, "grad_norm": 0.6207788586616516, "learning_rate": 1.7709649670100702e-05, "loss": 6.314074993133545, "step": 5586 }, { "epoch": 0.7245082385741958, "grad_norm": 1.115759253501892, "learning_rate": 1.769406078692235e-05, "loss": 8.889435768127441, "step": 5587 }, { "epoch": 0.7246379160824424, "grad_norm": 0.8928805589675903, "learning_rate": 1.76784772926866e-05, "loss": 7.774936676025391, "step": 5588 }, { "epoch": 0.7247675935906892, "grad_norm": 0.7240104675292969, "learning_rate": 1.7662899189992977e-05, "loss": 8.92231559753418, "step": 5589 }, { "epoch": 0.7248972710989359, "grad_norm": 0.8975764513015747, "learning_rate": 1.7647326481440013e-05, "loss": 8.314501762390137, "step": 5590 }, { "epoch": 0.7250269486071825, "grad_norm": 0.9762169122695923, "learning_rate": 1.763175916962543e-05, "loss": 8.495543479919434, "step": 5591 }, { "epoch": 0.7251566261154292, "grad_norm": 0.8326175212860107, "learning_rate": 1.761619725714596e-05, "loss": 9.0722074508667, "step": 5592 }, { "epoch": 0.7252863036236759, "grad_norm": 0.8328027725219727, "learning_rate": 1.7600640746597525e-05, "loss": 8.345436096191406, "step": 5593 }, { "epoch": 0.7254159811319225, "grad_norm": 0.8137236833572388, "learning_rate": 1.7585089640575082e-05, "loss": 8.9151611328125, "step": 5594 }, { "epoch": 0.7255456586401692, "grad_norm": 0.8089928030967712, "learning_rate": 1.756954394167268e-05, "loss": 8.899270057678223, "step": 5595 }, { "epoch": 0.725675336148416, "grad_norm": 0.8934457898139954, "learning_rate": 1.7554003652483536e-05, "loss": 9.733235359191895, "step": 5596 }, { "epoch": 0.7258050136566626, "grad_norm": 1.1714924573898315, "learning_rate": 1.753846877559988e-05, "loss": 12.208372116088867, "step": 5597 }, { "epoch": 0.7259346911649093, "grad_norm": 0.9270420670509338, "learning_rate": 1.7522939313613117e-05, "loss": 11.370792388916016, "step": 5598 }, { "epoch": 0.7260643686731559, "grad_norm": 1.062382459640503, "learning_rate": 1.750741526911368e-05, "loss": 10.115527153015137, "step": 5599 }, { "epoch": 0.7261940461814026, "grad_norm": 1.0629812479019165, "learning_rate": 1.749189664469116e-05, "loss": 11.626459121704102, "step": 5600 }, { "epoch": 0.7263237236896493, "grad_norm": 0.955750584602356, "learning_rate": 1.7476383442934173e-05, "loss": 7.855014324188232, "step": 5601 }, { "epoch": 0.726453401197896, "grad_norm": 0.9421610236167908, "learning_rate": 1.7460875666430516e-05, "loss": 6.8225483894348145, "step": 5602 }, { "epoch": 0.7265830787061427, "grad_norm": 0.914915144443512, "learning_rate": 1.7445373317767e-05, "loss": 8.0003080368042, "step": 5603 }, { "epoch": 0.7267127562143894, "grad_norm": 0.9425815343856812, "learning_rate": 1.7429876399529593e-05, "loss": 8.582601547241211, "step": 5604 }, { "epoch": 0.726842433722636, "grad_norm": 0.6651062965393066, "learning_rate": 1.74143849143033e-05, "loss": 7.82194709777832, "step": 5605 }, { "epoch": 0.7269721112308827, "grad_norm": 0.6244687438011169, "learning_rate": 1.7398898864672285e-05, "loss": 8.200831413269043, "step": 5606 }, { "epoch": 0.7271017887391293, "grad_norm": 1.0387614965438843, "learning_rate": 1.738341825321974e-05, "loss": 11.13438892364502, "step": 5607 }, { "epoch": 0.727231466247376, "grad_norm": 0.8270686864852905, "learning_rate": 1.736794308252801e-05, "loss": 9.587994575500488, "step": 5608 }, { "epoch": 0.7273611437556228, "grad_norm": 1.2213671207427979, "learning_rate": 1.7352473355178472e-05, "loss": 10.534235954284668, "step": 5609 }, { "epoch": 0.7274908212638694, "grad_norm": 0.6896965503692627, "learning_rate": 1.7337009073751658e-05, "loss": 7.085715293884277, "step": 5610 }, { "epoch": 0.7276204987721161, "grad_norm": 0.9159055948257446, "learning_rate": 1.7321550240827124e-05, "loss": 8.870988845825195, "step": 5611 }, { "epoch": 0.7277501762803628, "grad_norm": 1.0749551057815552, "learning_rate": 1.730609685898359e-05, "loss": 7.2315673828125, "step": 5612 }, { "epoch": 0.7278798537886094, "grad_norm": 0.6577610373497009, "learning_rate": 1.729064893079879e-05, "loss": 7.41486930847168, "step": 5613 }, { "epoch": 0.7280095312968561, "grad_norm": 0.7100976705551147, "learning_rate": 1.727520645884963e-05, "loss": 8.417808532714844, "step": 5614 }, { "epoch": 0.7281392088051029, "grad_norm": 1.0255999565124512, "learning_rate": 1.7259769445712026e-05, "loss": 8.089003562927246, "step": 5615 }, { "epoch": 0.7282688863133495, "grad_norm": 0.7700716257095337, "learning_rate": 1.7244337893961047e-05, "loss": 8.029197692871094, "step": 5616 }, { "epoch": 0.7283985638215962, "grad_norm": 0.7944751381874084, "learning_rate": 1.7228911806170804e-05, "loss": 10.977972984313965, "step": 5617 }, { "epoch": 0.7285282413298428, "grad_norm": 0.8681538105010986, "learning_rate": 1.7213491184914525e-05, "loss": 8.321988105773926, "step": 5618 }, { "epoch": 0.7286579188380895, "grad_norm": 0.87191241979599, "learning_rate": 1.7198076032764544e-05, "loss": 6.357160568237305, "step": 5619 }, { "epoch": 0.7287875963463362, "grad_norm": 0.776475191116333, "learning_rate": 1.718266635229221e-05, "loss": 8.834117889404297, "step": 5620 }, { "epoch": 0.7289172738545829, "grad_norm": 0.8128113150596619, "learning_rate": 1.7167262146068054e-05, "loss": 7.620642185211182, "step": 5621 }, { "epoch": 0.7290469513628296, "grad_norm": 0.9390064477920532, "learning_rate": 1.71518634166616e-05, "loss": 10.583780288696289, "step": 5622 }, { "epoch": 0.7291766288710763, "grad_norm": 1.003129482269287, "learning_rate": 1.713647016664155e-05, "loss": 11.396256446838379, "step": 5623 }, { "epoch": 0.7293063063793229, "grad_norm": 1.4030102491378784, "learning_rate": 1.7121082398575604e-05, "loss": 10.950901985168457, "step": 5624 }, { "epoch": 0.7294359838875696, "grad_norm": 0.7435871362686157, "learning_rate": 1.7105700115030633e-05, "loss": 6.971834182739258, "step": 5625 }, { "epoch": 0.7295656613958162, "grad_norm": 1.2112996578216553, "learning_rate": 1.709032331857251e-05, "loss": 7.0142927169799805, "step": 5626 }, { "epoch": 0.7296953389040629, "grad_norm": 0.8317955136299133, "learning_rate": 1.7074952011766264e-05, "loss": 7.123628616333008, "step": 5627 }, { "epoch": 0.7298250164123097, "grad_norm": 1.0034739971160889, "learning_rate": 1.705958619717595e-05, "loss": 8.096173286437988, "step": 5628 }, { "epoch": 0.7299546939205563, "grad_norm": 1.0132691860198975, "learning_rate": 1.7044225877364774e-05, "loss": 11.165844917297363, "step": 5629 }, { "epoch": 0.730084371428803, "grad_norm": 1.2328922748565674, "learning_rate": 1.702887105489494e-05, "loss": 13.908906936645508, "step": 5630 }, { "epoch": 0.7302140489370497, "grad_norm": 0.7986921072006226, "learning_rate": 1.701352173232782e-05, "loss": 11.144340515136719, "step": 5631 }, { "epoch": 0.7303437264452963, "grad_norm": 0.8778841495513916, "learning_rate": 1.699817791222379e-05, "loss": 9.421072959899902, "step": 5632 }, { "epoch": 0.730473403953543, "grad_norm": 1.4815458059310913, "learning_rate": 1.698283959714239e-05, "loss": 12.136791229248047, "step": 5633 }, { "epoch": 0.7306030814617898, "grad_norm": 0.6408952474594116, "learning_rate": 1.6967506789642158e-05, "loss": 6.970175266265869, "step": 5634 }, { "epoch": 0.7307327589700364, "grad_norm": 0.8800951838493347, "learning_rate": 1.6952179492280796e-05, "loss": 8.638350486755371, "step": 5635 }, { "epoch": 0.7308624364782831, "grad_norm": 1.1540342569351196, "learning_rate": 1.6936857707615e-05, "loss": 10.728414535522461, "step": 5636 }, { "epoch": 0.7309921139865297, "grad_norm": 0.9772174954414368, "learning_rate": 1.6921541438200633e-05, "loss": 9.034954071044922, "step": 5637 }, { "epoch": 0.7311217914947764, "grad_norm": 1.0663940906524658, "learning_rate": 1.690623068659256e-05, "loss": 9.847009658813477, "step": 5638 }, { "epoch": 0.7312514690030231, "grad_norm": 0.6695917844772339, "learning_rate": 1.6890925455344803e-05, "loss": 8.327768325805664, "step": 5639 }, { "epoch": 0.7313811465112697, "grad_norm": 1.2729171514511108, "learning_rate": 1.6875625747010377e-05, "loss": 13.284502029418945, "step": 5640 }, { "epoch": 0.7315108240195165, "grad_norm": 0.8423110842704773, "learning_rate": 1.6860331564141463e-05, "loss": 8.743651390075684, "step": 5641 }, { "epoch": 0.7316405015277632, "grad_norm": 1.1796541213989258, "learning_rate": 1.6845042909289242e-05, "loss": 9.139788627624512, "step": 5642 }, { "epoch": 0.7317701790360098, "grad_norm": 0.9559906721115112, "learning_rate": 1.6829759785004044e-05, "loss": 6.842536449432373, "step": 5643 }, { "epoch": 0.7318998565442565, "grad_norm": 0.594031035900116, "learning_rate": 1.6814482193835213e-05, "loss": 6.76305627822876, "step": 5644 }, { "epoch": 0.7320295340525032, "grad_norm": 0.5284383296966553, "learning_rate": 1.679921013833122e-05, "loss": 6.212497234344482, "step": 5645 }, { "epoch": 0.7321592115607498, "grad_norm": 0.9492420554161072, "learning_rate": 1.6783943621039568e-05, "loss": 8.549778938293457, "step": 5646 }, { "epoch": 0.7322888890689966, "grad_norm": 0.851267397403717, "learning_rate": 1.676868264450689e-05, "loss": 9.170435905456543, "step": 5647 }, { "epoch": 0.7324185665772432, "grad_norm": 1.1931382417678833, "learning_rate": 1.6753427211278828e-05, "loss": 10.121976852416992, "step": 5648 }, { "epoch": 0.7325482440854899, "grad_norm": 1.101669430732727, "learning_rate": 1.6738177323900173e-05, "loss": 6.555028915405273, "step": 5649 }, { "epoch": 0.7326779215937366, "grad_norm": 0.8930280208587646, "learning_rate": 1.672293298491472e-05, "loss": 7.7138566970825195, "step": 5650 }, { "epoch": 0.7328075991019832, "grad_norm": 0.8618791699409485, "learning_rate": 1.6707694196865405e-05, "loss": 7.135411262512207, "step": 5651 }, { "epoch": 0.7329372766102299, "grad_norm": 1.0119025707244873, "learning_rate": 1.6692460962294166e-05, "loss": 8.682448387145996, "step": 5652 }, { "epoch": 0.7330669541184767, "grad_norm": 0.9994485974311829, "learning_rate": 1.6677233283742096e-05, "loss": 9.649288177490234, "step": 5653 }, { "epoch": 0.7331966316267233, "grad_norm": 0.8305063247680664, "learning_rate": 1.6662011163749274e-05, "loss": 9.79699993133545, "step": 5654 }, { "epoch": 0.73332630913497, "grad_norm": 0.7466878294944763, "learning_rate": 1.6646794604854943e-05, "loss": 7.743341445922852, "step": 5655 }, { "epoch": 0.7334559866432167, "grad_norm": 0.5528193116188049, "learning_rate": 1.6631583609597336e-05, "loss": 8.103473663330078, "step": 5656 }, { "epoch": 0.7335856641514633, "grad_norm": 1.3850516080856323, "learning_rate": 1.6616378180513802e-05, "loss": 10.954047203063965, "step": 5657 }, { "epoch": 0.73371534165971, "grad_norm": 0.9785155653953552, "learning_rate": 1.6601178320140774e-05, "loss": 9.697343826293945, "step": 5658 }, { "epoch": 0.7338450191679566, "grad_norm": 0.7765417098999023, "learning_rate": 1.6585984031013708e-05, "loss": 7.0570759773254395, "step": 5659 }, { "epoch": 0.7339746966762034, "grad_norm": 0.7840708494186401, "learning_rate": 1.657079531566718e-05, "loss": 10.263099670410156, "step": 5660 }, { "epoch": 0.7341043741844501, "grad_norm": 0.8018361926078796, "learning_rate": 1.6555612176634793e-05, "loss": 10.306984901428223, "step": 5661 }, { "epoch": 0.7342340516926967, "grad_norm": 1.1438928842544556, "learning_rate": 1.6540434616449272e-05, "loss": 11.250663757324219, "step": 5662 }, { "epoch": 0.7343637292009434, "grad_norm": 0.8467040657997131, "learning_rate": 1.6525262637642342e-05, "loss": 8.119624137878418, "step": 5663 }, { "epoch": 0.73449340670919, "grad_norm": 0.6721634864807129, "learning_rate": 1.6510096242744873e-05, "loss": 8.988986015319824, "step": 5664 }, { "epoch": 0.7346230842174367, "grad_norm": 0.9890714883804321, "learning_rate": 1.649493543428673e-05, "loss": 9.86978816986084, "step": 5665 }, { "epoch": 0.7347527617256835, "grad_norm": 0.955051064491272, "learning_rate": 1.6479780214796913e-05, "loss": 10.240073204040527, "step": 5666 }, { "epoch": 0.7348824392339302, "grad_norm": 1.188841700553894, "learning_rate": 1.6464630586803437e-05, "loss": 13.734231948852539, "step": 5667 }, { "epoch": 0.7350121167421768, "grad_norm": 0.9082068800926208, "learning_rate": 1.6449486552833425e-05, "loss": 9.411592483520508, "step": 5668 }, { "epoch": 0.7351417942504235, "grad_norm": 1.2747743129730225, "learning_rate": 1.6434348115413027e-05, "loss": 11.402216911315918, "step": 5669 }, { "epoch": 0.7352714717586701, "grad_norm": 1.0257675647735596, "learning_rate": 1.641921527706751e-05, "loss": 11.286077499389648, "step": 5670 }, { "epoch": 0.7354011492669168, "grad_norm": 0.9659031629562378, "learning_rate": 1.640408804032114e-05, "loss": 7.881206035614014, "step": 5671 }, { "epoch": 0.7355308267751635, "grad_norm": 1.014542818069458, "learning_rate": 1.6388966407697326e-05, "loss": 6.407600402832031, "step": 5672 }, { "epoch": 0.7356605042834102, "grad_norm": 0.9285289645195007, "learning_rate": 1.6373850381718463e-05, "loss": 9.33957290649414, "step": 5673 }, { "epoch": 0.7357901817916569, "grad_norm": 1.0839697122573853, "learning_rate": 1.635873996490609e-05, "loss": 8.903843879699707, "step": 5674 }, { "epoch": 0.7359198592999036, "grad_norm": 0.7836478352546692, "learning_rate": 1.634363515978073e-05, "loss": 9.261960983276367, "step": 5675 }, { "epoch": 0.7360495368081502, "grad_norm": 0.7209866046905518, "learning_rate": 1.6328535968862045e-05, "loss": 9.56467342376709, "step": 5676 }, { "epoch": 0.7361792143163969, "grad_norm": 0.789404571056366, "learning_rate": 1.63134423946687e-05, "loss": 4.841297149658203, "step": 5677 }, { "epoch": 0.7363088918246435, "grad_norm": 1.0352414846420288, "learning_rate": 1.6298354439718473e-05, "loss": 8.938953399658203, "step": 5678 }, { "epoch": 0.7364385693328903, "grad_norm": 1.089359164237976, "learning_rate": 1.6283272106528146e-05, "loss": 9.672892570495605, "step": 5679 }, { "epoch": 0.736568246841137, "grad_norm": 1.0595853328704834, "learning_rate": 1.6268195397613638e-05, "loss": 8.06527328491211, "step": 5680 }, { "epoch": 0.7366979243493836, "grad_norm": 0.6851950287818909, "learning_rate": 1.6253124315489877e-05, "loss": 10.926310539245605, "step": 5681 }, { "epoch": 0.7368276018576303, "grad_norm": 1.3912773132324219, "learning_rate": 1.623805886267084e-05, "loss": 9.06851863861084, "step": 5682 }, { "epoch": 0.736957279365877, "grad_norm": 0.5413646101951599, "learning_rate": 1.6222999041669624e-05, "loss": 5.698758125305176, "step": 5683 }, { "epoch": 0.7370869568741236, "grad_norm": 1.2219616174697876, "learning_rate": 1.6207944854998325e-05, "loss": 12.020833969116211, "step": 5684 }, { "epoch": 0.7372166343823704, "grad_norm": 0.9371863603591919, "learning_rate": 1.6192896305168154e-05, "loss": 7.491979598999023, "step": 5685 }, { "epoch": 0.7373463118906171, "grad_norm": 1.3250538110733032, "learning_rate": 1.6177853394689318e-05, "loss": 7.45565128326416, "step": 5686 }, { "epoch": 0.7374759893988637, "grad_norm": 0.8961927890777588, "learning_rate": 1.616281612607117e-05, "loss": 8.466031074523926, "step": 5687 }, { "epoch": 0.7376056669071104, "grad_norm": 0.6395536661148071, "learning_rate": 1.6147784501822018e-05, "loss": 6.275444507598877, "step": 5688 }, { "epoch": 0.737735344415357, "grad_norm": 0.8081601858139038, "learning_rate": 1.613275852444932e-05, "loss": 9.312792778015137, "step": 5689 }, { "epoch": 0.7378650219236037, "grad_norm": 1.1193381547927856, "learning_rate": 1.611773819645953e-05, "loss": 9.153032302856445, "step": 5690 }, { "epoch": 0.7379946994318504, "grad_norm": 1.1481225490570068, "learning_rate": 1.6102723520358215e-05, "loss": 13.371525764465332, "step": 5691 }, { "epoch": 0.7381243769400971, "grad_norm": 0.9364641904830933, "learning_rate": 1.6087714498649926e-05, "loss": 10.549816131591797, "step": 5692 }, { "epoch": 0.7382540544483438, "grad_norm": 1.0104784965515137, "learning_rate": 1.6072711133838354e-05, "loss": 11.04200267791748, "step": 5693 }, { "epoch": 0.7383837319565905, "grad_norm": 0.5737379193305969, "learning_rate": 1.6057713428426154e-05, "loss": 7.729524612426758, "step": 5694 }, { "epoch": 0.7385134094648371, "grad_norm": 0.9907150268554688, "learning_rate": 1.6042721384915148e-05, "loss": 9.301163673400879, "step": 5695 }, { "epoch": 0.7386430869730838, "grad_norm": 0.9108614921569824, "learning_rate": 1.602773500580611e-05, "loss": 8.882491111755371, "step": 5696 }, { "epoch": 0.7387727644813304, "grad_norm": 1.004197359085083, "learning_rate": 1.6012754293598954e-05, "loss": 9.205992698669434, "step": 5697 }, { "epoch": 0.7389024419895772, "grad_norm": 1.256699800491333, "learning_rate": 1.5997779250792555e-05, "loss": 10.125773429870605, "step": 5698 }, { "epoch": 0.7390321194978239, "grad_norm": 0.975898027420044, "learning_rate": 1.598280987988494e-05, "loss": 9.178667068481445, "step": 5699 }, { "epoch": 0.7391617970060705, "grad_norm": 0.8572267293930054, "learning_rate": 1.5967846183373115e-05, "loss": 10.263376235961914, "step": 5700 }, { "epoch": 0.7392914745143172, "grad_norm": 0.8641577959060669, "learning_rate": 1.5952888163753195e-05, "loss": 7.6739821434021, "step": 5701 }, { "epoch": 0.7394211520225639, "grad_norm": 0.8352086544036865, "learning_rate": 1.593793582352029e-05, "loss": 9.692669868469238, "step": 5702 }, { "epoch": 0.7395508295308105, "grad_norm": 0.7492668032646179, "learning_rate": 1.5922989165168622e-05, "loss": 8.987388610839844, "step": 5703 }, { "epoch": 0.7396805070390572, "grad_norm": 0.718762993812561, "learning_rate": 1.590804819119144e-05, "loss": 8.753534317016602, "step": 5704 }, { "epoch": 0.739810184547304, "grad_norm": 0.8161287307739258, "learning_rate": 1.5893112904081004e-05, "loss": 8.772930145263672, "step": 5705 }, { "epoch": 0.7399398620555506, "grad_norm": 1.6181745529174805, "learning_rate": 1.5878183306328713e-05, "loss": 12.872390747070312, "step": 5706 }, { "epoch": 0.7400695395637973, "grad_norm": 1.1135860681533813, "learning_rate": 1.5863259400424928e-05, "loss": 10.271413803100586, "step": 5707 }, { "epoch": 0.740199217072044, "grad_norm": 0.779879629611969, "learning_rate": 1.584834118885914e-05, "loss": 8.77488899230957, "step": 5708 }, { "epoch": 0.7403288945802906, "grad_norm": 0.6664450764656067, "learning_rate": 1.5833428674119804e-05, "loss": 6.445511341094971, "step": 5709 }, { "epoch": 0.7404585720885373, "grad_norm": 0.97674560546875, "learning_rate": 1.581852185869452e-05, "loss": 8.590441703796387, "step": 5710 }, { "epoch": 0.740588249596784, "grad_norm": 0.8597200512886047, "learning_rate": 1.5803620745069845e-05, "loss": 8.141327857971191, "step": 5711 }, { "epoch": 0.7407179271050307, "grad_norm": 0.9581713080406189, "learning_rate": 1.5788725335731468e-05, "loss": 7.544291973114014, "step": 5712 }, { "epoch": 0.7408476046132774, "grad_norm": 0.9188758134841919, "learning_rate": 1.5773835633164054e-05, "loss": 11.956608772277832, "step": 5713 }, { "epoch": 0.740977282121524, "grad_norm": 0.8035790324211121, "learning_rate": 1.5758951639851377e-05, "loss": 8.031414031982422, "step": 5714 }, { "epoch": 0.7411069596297707, "grad_norm": 0.8412436246871948, "learning_rate": 1.5744073358276207e-05, "loss": 9.223011016845703, "step": 5715 }, { "epoch": 0.7412366371380174, "grad_norm": 0.7715737223625183, "learning_rate": 1.5729200790920408e-05, "loss": 9.69924545288086, "step": 5716 }, { "epoch": 0.7413663146462641, "grad_norm": 0.7357975840568542, "learning_rate": 1.5714333940264837e-05, "loss": 7.597690582275391, "step": 5717 }, { "epoch": 0.7414959921545108, "grad_norm": 0.9432018399238586, "learning_rate": 1.569947280878946e-05, "loss": 8.924920082092285, "step": 5718 }, { "epoch": 0.7416256696627574, "grad_norm": 1.0235979557037354, "learning_rate": 1.5684617398973235e-05, "loss": 10.438897132873535, "step": 5719 }, { "epoch": 0.7417553471710041, "grad_norm": 0.9764136672019958, "learning_rate": 1.5669767713294204e-05, "loss": 9.264886856079102, "step": 5720 }, { "epoch": 0.7418850246792508, "grad_norm": 0.8236489295959473, "learning_rate": 1.5654923754229423e-05, "loss": 9.37096881866455, "step": 5721 }, { "epoch": 0.7420147021874974, "grad_norm": 0.8803822994232178, "learning_rate": 1.5640085524255026e-05, "loss": 10.022197723388672, "step": 5722 }, { "epoch": 0.7421443796957441, "grad_norm": 0.8707570433616638, "learning_rate": 1.562525302584615e-05, "loss": 7.210288047790527, "step": 5723 }, { "epoch": 0.7422740572039909, "grad_norm": 0.6828855276107788, "learning_rate": 1.5610426261477024e-05, "loss": 9.662002563476562, "step": 5724 }, { "epoch": 0.7424037347122375, "grad_norm": 1.0774948596954346, "learning_rate": 1.559560523362087e-05, "loss": 10.074917793273926, "step": 5725 }, { "epoch": 0.7425334122204842, "grad_norm": 0.7232702374458313, "learning_rate": 1.558078994475e-05, "loss": 9.231480598449707, "step": 5726 }, { "epoch": 0.7426630897287309, "grad_norm": 1.170309066772461, "learning_rate": 1.5565980397335734e-05, "loss": 9.14931583404541, "step": 5727 }, { "epoch": 0.7427927672369775, "grad_norm": 0.9607827067375183, "learning_rate": 1.555117659384846e-05, "loss": 6.169600963592529, "step": 5728 }, { "epoch": 0.7429224447452242, "grad_norm": 0.8070986270904541, "learning_rate": 1.5536378536757574e-05, "loss": 10.283819198608398, "step": 5729 }, { "epoch": 0.743052122253471, "grad_norm": 0.9089943766593933, "learning_rate": 1.5521586228531565e-05, "loss": 10.889375686645508, "step": 5730 }, { "epoch": 0.7431817997617176, "grad_norm": 0.8241091966629028, "learning_rate": 1.5506799671637895e-05, "loss": 8.227517127990723, "step": 5731 }, { "epoch": 0.7433114772699643, "grad_norm": 1.2608612775802612, "learning_rate": 1.5492018868543145e-05, "loss": 9.746793746948242, "step": 5732 }, { "epoch": 0.7434411547782109, "grad_norm": 0.9214163422584534, "learning_rate": 1.547724382171286e-05, "loss": 9.608041763305664, "step": 5733 }, { "epoch": 0.7435708322864576, "grad_norm": 1.0702706575393677, "learning_rate": 1.546247453361168e-05, "loss": 10.118298530578613, "step": 5734 }, { "epoch": 0.7437005097947043, "grad_norm": 1.1382362842559814, "learning_rate": 1.5447711006703274e-05, "loss": 10.283916473388672, "step": 5735 }, { "epoch": 0.743830187302951, "grad_norm": 1.003736138343811, "learning_rate": 1.5432953243450314e-05, "loss": 10.038139343261719, "step": 5736 }, { "epoch": 0.7439598648111977, "grad_norm": 0.9971024990081787, "learning_rate": 1.541820124631457e-05, "loss": 10.262113571166992, "step": 5737 }, { "epoch": 0.7440895423194444, "grad_norm": 1.0558863878250122, "learning_rate": 1.5403455017756778e-05, "loss": 8.919426918029785, "step": 5738 }, { "epoch": 0.744219219827691, "grad_norm": 0.962232768535614, "learning_rate": 1.5388714560236796e-05, "loss": 8.353034973144531, "step": 5739 }, { "epoch": 0.7443488973359377, "grad_norm": 0.7950736880302429, "learning_rate": 1.537397987621343e-05, "loss": 7.820736885070801, "step": 5740 }, { "epoch": 0.7444785748441843, "grad_norm": 1.061136245727539, "learning_rate": 1.5359250968144605e-05, "loss": 8.904288291931152, "step": 5741 }, { "epoch": 0.744608252352431, "grad_norm": 0.7846531271934509, "learning_rate": 1.5344527838487217e-05, "loss": 8.377557754516602, "step": 5742 }, { "epoch": 0.7447379298606778, "grad_norm": 1.0143537521362305, "learning_rate": 1.5329810489697256e-05, "loss": 10.97751235961914, "step": 5743 }, { "epoch": 0.7448676073689244, "grad_norm": 0.9255151748657227, "learning_rate": 1.5315098924229676e-05, "loss": 8.748897552490234, "step": 5744 }, { "epoch": 0.7449972848771711, "grad_norm": 1.0539770126342773, "learning_rate": 1.5300393144538553e-05, "loss": 12.57059383392334, "step": 5745 }, { "epoch": 0.7451269623854178, "grad_norm": 0.674304187297821, "learning_rate": 1.5285693153076918e-05, "loss": 8.200778007507324, "step": 5746 }, { "epoch": 0.7452566398936644, "grad_norm": 1.0442497730255127, "learning_rate": 1.52709989522969e-05, "loss": 8.452170372009277, "step": 5747 }, { "epoch": 0.7453863174019111, "grad_norm": 0.7129436135292053, "learning_rate": 1.5256310544649604e-05, "loss": 6.617553234100342, "step": 5748 }, { "epoch": 0.7455159949101579, "grad_norm": 1.02951979637146, "learning_rate": 1.5241627932585234e-05, "loss": 9.016398429870605, "step": 5749 }, { "epoch": 0.7456456724184045, "grad_norm": 1.1026030778884888, "learning_rate": 1.5226951118552952e-05, "loss": 14.690524101257324, "step": 5750 }, { "epoch": 0.7457753499266512, "grad_norm": 0.9403489232063293, "learning_rate": 1.5212280105001025e-05, "loss": 9.294432640075684, "step": 5751 }, { "epoch": 0.7459050274348978, "grad_norm": 0.8695266246795654, "learning_rate": 1.5197614894376693e-05, "loss": 10.508635520935059, "step": 5752 }, { "epoch": 0.7460347049431445, "grad_norm": 0.947059154510498, "learning_rate": 1.5182955489126282e-05, "loss": 8.56029987335205, "step": 5753 }, { "epoch": 0.7461643824513912, "grad_norm": 0.9939416646957397, "learning_rate": 1.5168301891695092e-05, "loss": 8.426453590393066, "step": 5754 }, { "epoch": 0.7462940599596378, "grad_norm": 1.0749964714050293, "learning_rate": 1.5153654104527521e-05, "loss": 9.169367790222168, "step": 5755 }, { "epoch": 0.7464237374678846, "grad_norm": 1.251776933670044, "learning_rate": 1.5139012130066922e-05, "loss": 13.076810836791992, "step": 5756 }, { "epoch": 0.7465534149761313, "grad_norm": 1.1664702892303467, "learning_rate": 1.5124375970755755e-05, "loss": 8.538931846618652, "step": 5757 }, { "epoch": 0.7466830924843779, "grad_norm": 1.0764672756195068, "learning_rate": 1.5109745629035437e-05, "loss": 10.193824768066406, "step": 5758 }, { "epoch": 0.7468127699926246, "grad_norm": 0.7954508662223816, "learning_rate": 1.5095121107346487e-05, "loss": 8.931798934936523, "step": 5759 }, { "epoch": 0.7469424475008712, "grad_norm": 0.7153353095054626, "learning_rate": 1.5080502408128383e-05, "loss": 8.33531665802002, "step": 5760 }, { "epoch": 0.7470721250091179, "grad_norm": 1.186800241470337, "learning_rate": 1.5065889533819688e-05, "loss": 9.349974632263184, "step": 5761 }, { "epoch": 0.7472018025173647, "grad_norm": 0.9563515186309814, "learning_rate": 1.5051282486857953e-05, "loss": 10.415960311889648, "step": 5762 }, { "epoch": 0.7473314800256113, "grad_norm": 0.8644459843635559, "learning_rate": 1.5036681269679797e-05, "loss": 5.938355922698975, "step": 5763 }, { "epoch": 0.747461157533858, "grad_norm": 1.516305685043335, "learning_rate": 1.5022085884720816e-05, "loss": 9.456453323364258, "step": 5764 }, { "epoch": 0.7475908350421047, "grad_norm": 0.6417722105979919, "learning_rate": 1.5007496334415694e-05, "loss": 7.557039260864258, "step": 5765 }, { "epoch": 0.7477205125503513, "grad_norm": 0.8907778859138489, "learning_rate": 1.4992912621198074e-05, "loss": 9.198266983032227, "step": 5766 }, { "epoch": 0.747850190058598, "grad_norm": 0.8805529475212097, "learning_rate": 1.4978334747500689e-05, "loss": 7.813214302062988, "step": 5767 }, { "epoch": 0.7479798675668448, "grad_norm": 0.8675059080123901, "learning_rate": 1.4963762715755258e-05, "loss": 7.484882831573486, "step": 5768 }, { "epoch": 0.7481095450750914, "grad_norm": 0.8296695947647095, "learning_rate": 1.4949196528392524e-05, "loss": 7.867065906524658, "step": 5769 }, { "epoch": 0.7482392225833381, "grad_norm": 0.9753895998001099, "learning_rate": 1.4934636187842293e-05, "loss": 10.917905807495117, "step": 5770 }, { "epoch": 0.7483689000915847, "grad_norm": 0.8620312213897705, "learning_rate": 1.4920081696533322e-05, "loss": 5.8895392417907715, "step": 5771 }, { "epoch": 0.7484985775998314, "grad_norm": 0.8910173177719116, "learning_rate": 1.490553305689351e-05, "loss": 7.931213855743408, "step": 5772 }, { "epoch": 0.7486282551080781, "grad_norm": 0.8137096762657166, "learning_rate": 1.4890990271349664e-05, "loss": 9.684272766113281, "step": 5773 }, { "epoch": 0.7487579326163247, "grad_norm": 1.3049275875091553, "learning_rate": 1.4876453342327685e-05, "loss": 10.96179485321045, "step": 5774 }, { "epoch": 0.7488876101245715, "grad_norm": 0.9865659475326538, "learning_rate": 1.4861922272252443e-05, "loss": 9.444040298461914, "step": 5775 }, { "epoch": 0.7490172876328182, "grad_norm": 0.7495754957199097, "learning_rate": 1.4847397063547896e-05, "loss": 8.392494201660156, "step": 5776 }, { "epoch": 0.7491469651410648, "grad_norm": 0.8169391751289368, "learning_rate": 1.4832877718636961e-05, "loss": 8.149834632873535, "step": 5777 }, { "epoch": 0.7492766426493115, "grad_norm": 0.7614285945892334, "learning_rate": 1.4818364239941629e-05, "loss": 6.236757278442383, "step": 5778 }, { "epoch": 0.7494063201575581, "grad_norm": 0.8354405164718628, "learning_rate": 1.4803856629882862e-05, "loss": 6.829475402832031, "step": 5779 }, { "epoch": 0.7495359976658048, "grad_norm": 1.1896206140518188, "learning_rate": 1.4789354890880697e-05, "loss": 10.581073760986328, "step": 5780 }, { "epoch": 0.7496656751740516, "grad_norm": 0.8932399749755859, "learning_rate": 1.4774859025354131e-05, "loss": 9.127266883850098, "step": 5781 }, { "epoch": 0.7497953526822982, "grad_norm": 1.3553128242492676, "learning_rate": 1.476036903572125e-05, "loss": 9.398191452026367, "step": 5782 }, { "epoch": 0.7499250301905449, "grad_norm": 0.7798241972923279, "learning_rate": 1.4745884924399095e-05, "loss": 10.123954772949219, "step": 5783 }, { "epoch": 0.7500547076987916, "grad_norm": 0.9276896119117737, "learning_rate": 1.4731406693803785e-05, "loss": 9.517451286315918, "step": 5784 }, { "epoch": 0.7501843852070382, "grad_norm": 0.9932178258895874, "learning_rate": 1.4716934346350392e-05, "loss": 8.57667064666748, "step": 5785 }, { "epoch": 0.7503140627152849, "grad_norm": 1.1000118255615234, "learning_rate": 1.4702467884453086e-05, "loss": 6.657549858093262, "step": 5786 }, { "epoch": 0.7504437402235316, "grad_norm": 0.738191545009613, "learning_rate": 1.4688007310524965e-05, "loss": 6.337273597717285, "step": 5787 }, { "epoch": 0.7505734177317783, "grad_norm": 1.0720157623291016, "learning_rate": 1.4673552626978238e-05, "loss": 9.783285140991211, "step": 5788 }, { "epoch": 0.750703095240025, "grad_norm": 0.9336608052253723, "learning_rate": 1.4659103836224048e-05, "loss": 9.337373733520508, "step": 5789 }, { "epoch": 0.7508327727482716, "grad_norm": 0.8756052255630493, "learning_rate": 1.4644660940672627e-05, "loss": 8.626930236816406, "step": 5790 }, { "epoch": 0.7509624502565183, "grad_norm": 0.9272647500038147, "learning_rate": 1.4630223942733173e-05, "loss": 7.350929260253906, "step": 5791 }, { "epoch": 0.751092127764765, "grad_norm": 0.9865864515304565, "learning_rate": 1.4615792844813897e-05, "loss": 9.48397159576416, "step": 5792 }, { "epoch": 0.7512218052730116, "grad_norm": 0.8416360020637512, "learning_rate": 1.4601367649322085e-05, "loss": 11.187240600585938, "step": 5793 }, { "epoch": 0.7513514827812584, "grad_norm": 1.129902958869934, "learning_rate": 1.4586948358663954e-05, "loss": 9.092010498046875, "step": 5794 }, { "epoch": 0.7514811602895051, "grad_norm": 0.887823224067688, "learning_rate": 1.4572534975244827e-05, "loss": 12.657995223999023, "step": 5795 }, { "epoch": 0.7516108377977517, "grad_norm": 1.0602084398269653, "learning_rate": 1.455812750146896e-05, "loss": 10.382365226745605, "step": 5796 }, { "epoch": 0.7517405153059984, "grad_norm": 0.9073916673660278, "learning_rate": 1.4543725939739683e-05, "loss": 8.277356147766113, "step": 5797 }, { "epoch": 0.751870192814245, "grad_norm": 0.9296297430992126, "learning_rate": 1.4529330292459293e-05, "loss": 10.053483963012695, "step": 5798 }, { "epoch": 0.7519998703224917, "grad_norm": 0.7252909541130066, "learning_rate": 1.451494056202915e-05, "loss": 6.7005181312561035, "step": 5799 }, { "epoch": 0.7521295478307385, "grad_norm": 0.9431643486022949, "learning_rate": 1.450055675084957e-05, "loss": 9.94852066040039, "step": 5800 }, { "epoch": 0.7522592253389851, "grad_norm": 0.8040807247161865, "learning_rate": 1.4486178861319939e-05, "loss": 7.4649271965026855, "step": 5801 }, { "epoch": 0.7523889028472318, "grad_norm": 0.8112024664878845, "learning_rate": 1.4471806895838602e-05, "loss": 7.035241603851318, "step": 5802 }, { "epoch": 0.7525185803554785, "grad_norm": 0.8894829750061035, "learning_rate": 1.4457440856802974e-05, "loss": 11.480588912963867, "step": 5803 }, { "epoch": 0.7526482578637251, "grad_norm": 1.317232608795166, "learning_rate": 1.4443080746609411e-05, "loss": 9.121025085449219, "step": 5804 }, { "epoch": 0.7527779353719718, "grad_norm": 0.756061315536499, "learning_rate": 1.442872656765335e-05, "loss": 7.649343490600586, "step": 5805 }, { "epoch": 0.7529076128802185, "grad_norm": 0.8660774827003479, "learning_rate": 1.4414378322329181e-05, "loss": 5.715575695037842, "step": 5806 }, { "epoch": 0.7530372903884652, "grad_norm": 0.7762756943702698, "learning_rate": 1.440003601303036e-05, "loss": 6.803373336791992, "step": 5807 }, { "epoch": 0.7531669678967119, "grad_norm": 0.9590229392051697, "learning_rate": 1.4385699642149287e-05, "loss": 8.662582397460938, "step": 5808 }, { "epoch": 0.7532966454049586, "grad_norm": 0.9751831293106079, "learning_rate": 1.4371369212077446e-05, "loss": 8.36738395690918, "step": 5809 }, { "epoch": 0.7534263229132052, "grad_norm": 0.9742197394371033, "learning_rate": 1.4357044725205232e-05, "loss": 8.347150802612305, "step": 5810 }, { "epoch": 0.7535560004214519, "grad_norm": 0.8351473808288574, "learning_rate": 1.4342726183922183e-05, "loss": 11.235639572143555, "step": 5811 }, { "epoch": 0.7536856779296985, "grad_norm": 0.711330235004425, "learning_rate": 1.4328413590616718e-05, "loss": 7.622499465942383, "step": 5812 }, { "epoch": 0.7538153554379453, "grad_norm": 0.7322642207145691, "learning_rate": 1.4314106947676342e-05, "loss": 9.46120834350586, "step": 5813 }, { "epoch": 0.753945032946192, "grad_norm": 0.7683553099632263, "learning_rate": 1.4299806257487536e-05, "loss": 6.768919467926025, "step": 5814 }, { "epoch": 0.7540747104544386, "grad_norm": 0.9919949173927307, "learning_rate": 1.4285511522435769e-05, "loss": 8.519418716430664, "step": 5815 }, { "epoch": 0.7542043879626853, "grad_norm": 0.9827864766120911, "learning_rate": 1.4271222744905577e-05, "loss": 7.417905807495117, "step": 5816 }, { "epoch": 0.754334065470932, "grad_norm": 1.0071841478347778, "learning_rate": 1.4256939927280428e-05, "loss": 10.028279304504395, "step": 5817 }, { "epoch": 0.7544637429791786, "grad_norm": 1.1100540161132812, "learning_rate": 1.4242663071942874e-05, "loss": 10.438447952270508, "step": 5818 }, { "epoch": 0.7545934204874253, "grad_norm": 0.6651888489723206, "learning_rate": 1.4228392181274398e-05, "loss": 7.353510856628418, "step": 5819 }, { "epoch": 0.754723097995672, "grad_norm": 0.7994949221611023, "learning_rate": 1.4214127257655546e-05, "loss": 12.777478218078613, "step": 5820 }, { "epoch": 0.7548527755039187, "grad_norm": 0.7251792550086975, "learning_rate": 1.4199868303465825e-05, "loss": 8.320033073425293, "step": 5821 }, { "epoch": 0.7549824530121654, "grad_norm": 1.1975301504135132, "learning_rate": 1.4185615321083795e-05, "loss": 10.106736183166504, "step": 5822 }, { "epoch": 0.755112130520412, "grad_norm": 1.1028677225112915, "learning_rate": 1.4171368312886956e-05, "loss": 7.945384979248047, "step": 5823 }, { "epoch": 0.7552418080286587, "grad_norm": 0.9356367588043213, "learning_rate": 1.415712728125188e-05, "loss": 9.464128494262695, "step": 5824 }, { "epoch": 0.7553714855369054, "grad_norm": 1.1741628646850586, "learning_rate": 1.4142892228554078e-05, "loss": 8.038617134094238, "step": 5825 }, { "epoch": 0.7555011630451521, "grad_norm": 0.6699219942092896, "learning_rate": 1.412866315716812e-05, "loss": 8.101263999938965, "step": 5826 }, { "epoch": 0.7556308405533988, "grad_norm": 0.8694753646850586, "learning_rate": 1.4114440069467528e-05, "loss": 10.049571990966797, "step": 5827 }, { "epoch": 0.7557605180616455, "grad_norm": 0.6613695621490479, "learning_rate": 1.4100222967824878e-05, "loss": 7.153995513916016, "step": 5828 }, { "epoch": 0.7558901955698921, "grad_norm": 0.8090588450431824, "learning_rate": 1.4086011854611692e-05, "loss": 6.115611553192139, "step": 5829 }, { "epoch": 0.7560198730781388, "grad_norm": 1.090197205543518, "learning_rate": 1.407180673219855e-05, "loss": 11.633204460144043, "step": 5830 }, { "epoch": 0.7561495505863854, "grad_norm": 0.9670327305793762, "learning_rate": 1.405760760295497e-05, "loss": 10.838214874267578, "step": 5831 }, { "epoch": 0.7562792280946322, "grad_norm": 1.2078567743301392, "learning_rate": 1.4043414469249544e-05, "loss": 6.587271690368652, "step": 5832 }, { "epoch": 0.7564089056028789, "grad_norm": 0.958065390586853, "learning_rate": 1.402922733344978e-05, "loss": 9.836784362792969, "step": 5833 }, { "epoch": 0.7565385831111255, "grad_norm": 1.1410908699035645, "learning_rate": 1.4015046197922265e-05, "loss": 8.131264686584473, "step": 5834 }, { "epoch": 0.7566682606193722, "grad_norm": 1.1857802867889404, "learning_rate": 1.4000871065032522e-05, "loss": 10.695479393005371, "step": 5835 }, { "epoch": 0.7567979381276189, "grad_norm": 1.547316312789917, "learning_rate": 1.398670193714513e-05, "loss": 12.291049003601074, "step": 5836 }, { "epoch": 0.7569276156358655, "grad_norm": 1.0970277786254883, "learning_rate": 1.3972538816623603e-05, "loss": 10.86402702331543, "step": 5837 }, { "epoch": 0.7570572931441122, "grad_norm": 0.9873675107955933, "learning_rate": 1.395838170583052e-05, "loss": 9.557168960571289, "step": 5838 }, { "epoch": 0.757186970652359, "grad_norm": 0.9495062232017517, "learning_rate": 1.394423060712739e-05, "loss": 8.13808536529541, "step": 5839 }, { "epoch": 0.7573166481606056, "grad_norm": 0.7160542011260986, "learning_rate": 1.3930085522874786e-05, "loss": 8.586027145385742, "step": 5840 }, { "epoch": 0.7574463256688523, "grad_norm": 1.1113510131835938, "learning_rate": 1.3915946455432217e-05, "loss": 8.968059539794922, "step": 5841 }, { "epoch": 0.7575760031770989, "grad_norm": 0.9680543541908264, "learning_rate": 1.3901813407158231e-05, "loss": 9.029557228088379, "step": 5842 }, { "epoch": 0.7577056806853456, "grad_norm": 0.829075813293457, "learning_rate": 1.3887686380410348e-05, "loss": 6.99722957611084, "step": 5843 }, { "epoch": 0.7578353581935923, "grad_norm": 0.8733194470405579, "learning_rate": 1.3873565377545112e-05, "loss": 9.707036972045898, "step": 5844 }, { "epoch": 0.757965035701839, "grad_norm": 0.9131604433059692, "learning_rate": 1.3859450400918006e-05, "loss": 7.716733932495117, "step": 5845 }, { "epoch": 0.7580947132100857, "grad_norm": 0.7712038159370422, "learning_rate": 1.3845341452883587e-05, "loss": 7.9572529792785645, "step": 5846 }, { "epoch": 0.7582243907183324, "grad_norm": 0.6672071814537048, "learning_rate": 1.3831238535795332e-05, "loss": 6.530402183532715, "step": 5847 }, { "epoch": 0.758354068226579, "grad_norm": 0.8974782228469849, "learning_rate": 1.381714165200575e-05, "loss": 8.897127151489258, "step": 5848 }, { "epoch": 0.7584837457348257, "grad_norm": 1.0911664962768555, "learning_rate": 1.3803050803866358e-05, "loss": 9.306300163269043, "step": 5849 }, { "epoch": 0.7586134232430723, "grad_norm": 0.8081110119819641, "learning_rate": 1.3788965993727614e-05, "loss": 6.523008823394775, "step": 5850 }, { "epoch": 0.758743100751319, "grad_norm": 1.2462010383605957, "learning_rate": 1.3774887223939037e-05, "loss": 9.006866455078125, "step": 5851 }, { "epoch": 0.7588727782595658, "grad_norm": 1.4224907159805298, "learning_rate": 1.3760814496849061e-05, "loss": 10.217071533203125, "step": 5852 }, { "epoch": 0.7590024557678124, "grad_norm": 1.362805962562561, "learning_rate": 1.3746747814805189e-05, "loss": 11.414278030395508, "step": 5853 }, { "epoch": 0.7591321332760591, "grad_norm": 0.8860002160072327, "learning_rate": 1.3732687180153848e-05, "loss": 9.295713424682617, "step": 5854 }, { "epoch": 0.7592618107843058, "grad_norm": 1.0026763677597046, "learning_rate": 1.3718632595240522e-05, "loss": 8.697168350219727, "step": 5855 }, { "epoch": 0.7593914882925524, "grad_norm": 0.9181576371192932, "learning_rate": 1.3704584062409615e-05, "loss": 9.698150634765625, "step": 5856 }, { "epoch": 0.7595211658007991, "grad_norm": 0.8926102519035339, "learning_rate": 1.369054158400459e-05, "loss": 10.909029006958008, "step": 5857 }, { "epoch": 0.7596508433090459, "grad_norm": 1.0420243740081787, "learning_rate": 1.3676505162367836e-05, "loss": 9.303832054138184, "step": 5858 }, { "epoch": 0.7597805208172925, "grad_norm": 1.2020655870437622, "learning_rate": 1.3662474799840797e-05, "loss": 7.827509880065918, "step": 5859 }, { "epoch": 0.7599101983255392, "grad_norm": 1.05704927444458, "learning_rate": 1.3648450498763843e-05, "loss": 9.280858993530273, "step": 5860 }, { "epoch": 0.7600398758337858, "grad_norm": 1.5104894638061523, "learning_rate": 1.3634432261476393e-05, "loss": 10.590035438537598, "step": 5861 }, { "epoch": 0.7601695533420325, "grad_norm": 1.1646970510482788, "learning_rate": 1.3620420090316793e-05, "loss": 7.597377300262451, "step": 5862 }, { "epoch": 0.7602992308502792, "grad_norm": 0.8899390697479248, "learning_rate": 1.360641398762244e-05, "loss": 8.96182632446289, "step": 5863 }, { "epoch": 0.760428908358526, "grad_norm": 1.0802310705184937, "learning_rate": 1.3592413955729654e-05, "loss": 13.826019287109375, "step": 5864 }, { "epoch": 0.7605585858667726, "grad_norm": 1.2864350080490112, "learning_rate": 1.3578419996973807e-05, "loss": 10.05420970916748, "step": 5865 }, { "epoch": 0.7606882633750193, "grad_norm": 1.5759443044662476, "learning_rate": 1.3564432113689202e-05, "loss": 9.852514266967773, "step": 5866 }, { "epoch": 0.7608179408832659, "grad_norm": 1.0135902166366577, "learning_rate": 1.3550450308209172e-05, "loss": 9.576017379760742, "step": 5867 }, { "epoch": 0.7609476183915126, "grad_norm": 0.7413110733032227, "learning_rate": 1.3536474582865994e-05, "loss": 9.069538116455078, "step": 5868 }, { "epoch": 0.7610772958997593, "grad_norm": 0.8107073903083801, "learning_rate": 1.3522504939990982e-05, "loss": 8.855182647705078, "step": 5869 }, { "epoch": 0.7612069734080059, "grad_norm": 0.6720375418663025, "learning_rate": 1.3508541381914381e-05, "loss": 6.400132179260254, "step": 5870 }, { "epoch": 0.7613366509162527, "grad_norm": 0.9669879078865051, "learning_rate": 1.3494583910965474e-05, "loss": 9.130714416503906, "step": 5871 }, { "epoch": 0.7614663284244994, "grad_norm": 1.0031276941299438, "learning_rate": 1.348063252947247e-05, "loss": 11.065872192382812, "step": 5872 }, { "epoch": 0.761596005932746, "grad_norm": 0.5569759011268616, "learning_rate": 1.3466687239762632e-05, "loss": 6.88283634185791, "step": 5873 }, { "epoch": 0.7617256834409927, "grad_norm": 0.9621927738189697, "learning_rate": 1.3452748044162128e-05, "loss": 8.668366432189941, "step": 5874 }, { "epoch": 0.7618553609492393, "grad_norm": 0.9132298827171326, "learning_rate": 1.3438814944996191e-05, "loss": 8.920797348022461, "step": 5875 }, { "epoch": 0.761985038457486, "grad_norm": 0.7707778811454773, "learning_rate": 1.342488794458896e-05, "loss": 5.862091541290283, "step": 5876 }, { "epoch": 0.7621147159657328, "grad_norm": 0.7865983843803406, "learning_rate": 1.3410967045263622e-05, "loss": 7.505589485168457, "step": 5877 }, { "epoch": 0.7622443934739794, "grad_norm": 1.1960901021957397, "learning_rate": 1.3397052249342313e-05, "loss": 10.519437789916992, "step": 5878 }, { "epoch": 0.7623740709822261, "grad_norm": 0.8475636839866638, "learning_rate": 1.3383143559146122e-05, "loss": 8.064061164855957, "step": 5879 }, { "epoch": 0.7625037484904728, "grad_norm": 0.8069463968276978, "learning_rate": 1.3369240976995201e-05, "loss": 9.233595848083496, "step": 5880 }, { "epoch": 0.7626334259987194, "grad_norm": 0.6658185720443726, "learning_rate": 1.3355344505208594e-05, "loss": 6.249923229217529, "step": 5881 }, { "epoch": 0.7627631035069661, "grad_norm": 1.049437165260315, "learning_rate": 1.3341454146104405e-05, "loss": 8.606840133666992, "step": 5882 }, { "epoch": 0.7628927810152127, "grad_norm": 0.8467989563941956, "learning_rate": 1.3327569901999637e-05, "loss": 6.759850978851318, "step": 5883 }, { "epoch": 0.7630224585234595, "grad_norm": 0.750124454498291, "learning_rate": 1.3313691775210357e-05, "loss": 6.097696304321289, "step": 5884 }, { "epoch": 0.7631521360317062, "grad_norm": 0.7627707123756409, "learning_rate": 1.3299819768051541e-05, "loss": 6.791970252990723, "step": 5885 }, { "epoch": 0.7632818135399528, "grad_norm": 0.992662250995636, "learning_rate": 1.3285953882837199e-05, "loss": 8.23179817199707, "step": 5886 }, { "epoch": 0.7634114910481995, "grad_norm": 0.6810060143470764, "learning_rate": 1.3272094121880246e-05, "loss": 8.939311027526855, "step": 5887 }, { "epoch": 0.7635411685564462, "grad_norm": 0.7401176691055298, "learning_rate": 1.3258240487492697e-05, "loss": 8.553297996520996, "step": 5888 }, { "epoch": 0.7636708460646928, "grad_norm": 1.436790108680725, "learning_rate": 1.3244392981985416e-05, "loss": 10.44275188446045, "step": 5889 }, { "epoch": 0.7638005235729396, "grad_norm": 1.3524889945983887, "learning_rate": 1.3230551607668329e-05, "loss": 15.347841262817383, "step": 5890 }, { "epoch": 0.7639302010811863, "grad_norm": 1.2630552053451538, "learning_rate": 1.321671636685029e-05, "loss": 10.423074722290039, "step": 5891 }, { "epoch": 0.7640598785894329, "grad_norm": 0.7787883281707764, "learning_rate": 1.3202887261839175e-05, "loss": 6.26908016204834, "step": 5892 }, { "epoch": 0.7641895560976796, "grad_norm": 0.9599921107292175, "learning_rate": 1.3189064294941784e-05, "loss": 8.04896354675293, "step": 5893 }, { "epoch": 0.7643192336059262, "grad_norm": 0.7462447285652161, "learning_rate": 1.3175247468463952e-05, "loss": 7.816433906555176, "step": 5894 }, { "epoch": 0.7644489111141729, "grad_norm": 1.1477065086364746, "learning_rate": 1.3161436784710424e-05, "loss": 9.917534828186035, "step": 5895 }, { "epoch": 0.7645785886224197, "grad_norm": 0.804155707359314, "learning_rate": 1.3147632245984992e-05, "loss": 7.721893310546875, "step": 5896 }, { "epoch": 0.7647082661306663, "grad_norm": 0.7407350540161133, "learning_rate": 1.3133833854590355e-05, "loss": 9.912117004394531, "step": 5897 }, { "epoch": 0.764837943638913, "grad_norm": 0.9025952219963074, "learning_rate": 1.3120041612828243e-05, "loss": 8.097795486450195, "step": 5898 }, { "epoch": 0.7649676211471597, "grad_norm": 0.8764932751655579, "learning_rate": 1.3106255522999311e-05, "loss": 8.298211097717285, "step": 5899 }, { "epoch": 0.7650972986554063, "grad_norm": 0.7172849178314209, "learning_rate": 1.3092475587403236e-05, "loss": 6.7054524421691895, "step": 5900 }, { "epoch": 0.765226976163653, "grad_norm": 0.8440815210342407, "learning_rate": 1.3078701808338639e-05, "loss": 9.473535537719727, "step": 5901 }, { "epoch": 0.7653566536718996, "grad_norm": 0.949428141117096, "learning_rate": 1.3064934188103096e-05, "loss": 8.639724731445312, "step": 5902 }, { "epoch": 0.7654863311801464, "grad_norm": 1.3307921886444092, "learning_rate": 1.305117272899321e-05, "loss": 10.87578296661377, "step": 5903 }, { "epoch": 0.7656160086883931, "grad_norm": 1.2244364023208618, "learning_rate": 1.3037417433304495e-05, "loss": 10.177035331726074, "step": 5904 }, { "epoch": 0.7657456861966397, "grad_norm": 0.6723634600639343, "learning_rate": 1.3023668303331504e-05, "loss": 8.267990112304688, "step": 5905 }, { "epoch": 0.7658753637048864, "grad_norm": 0.9668869972229004, "learning_rate": 1.3009925341367685e-05, "loss": 11.75329303741455, "step": 5906 }, { "epoch": 0.7660050412131331, "grad_norm": 0.8901361227035522, "learning_rate": 1.2996188549705534e-05, "loss": 8.506471633911133, "step": 5907 }, { "epoch": 0.7661347187213797, "grad_norm": 0.9842115044593811, "learning_rate": 1.2982457930636443e-05, "loss": 10.151565551757812, "step": 5908 }, { "epoch": 0.7662643962296265, "grad_norm": 0.8893540501594543, "learning_rate": 1.296873348645085e-05, "loss": 10.3126220703125, "step": 5909 }, { "epoch": 0.7663940737378732, "grad_norm": 0.9167189002037048, "learning_rate": 1.2955015219438093e-05, "loss": 11.736865043640137, "step": 5910 }, { "epoch": 0.7665237512461198, "grad_norm": 1.308146357536316, "learning_rate": 1.2941303131886534e-05, "loss": 12.425440788269043, "step": 5911 }, { "epoch": 0.7666534287543665, "grad_norm": 0.7486586570739746, "learning_rate": 1.2927597226083466e-05, "loss": 8.086092948913574, "step": 5912 }, { "epoch": 0.7667831062626131, "grad_norm": 0.9898008108139038, "learning_rate": 1.291389750431518e-05, "loss": 11.512163162231445, "step": 5913 }, { "epoch": 0.7669127837708598, "grad_norm": 1.2559881210327148, "learning_rate": 1.2900203968866902e-05, "loss": 13.014338493347168, "step": 5914 }, { "epoch": 0.7670424612791065, "grad_norm": 1.2294597625732422, "learning_rate": 1.2886516622022876e-05, "loss": 7.560159683227539, "step": 5915 }, { "epoch": 0.7671721387873532, "grad_norm": 0.8218717575073242, "learning_rate": 1.2872835466066246e-05, "loss": 6.981655597686768, "step": 5916 }, { "epoch": 0.7673018162955999, "grad_norm": 0.8356359601020813, "learning_rate": 1.2859160503279199e-05, "loss": 9.090872764587402, "step": 5917 }, { "epoch": 0.7674314938038466, "grad_norm": 0.799265444278717, "learning_rate": 1.2845491735942822e-05, "loss": 5.807271957397461, "step": 5918 }, { "epoch": 0.7675611713120932, "grad_norm": 0.9987497925758362, "learning_rate": 1.2831829166337217e-05, "loss": 8.136833190917969, "step": 5919 }, { "epoch": 0.7676908488203399, "grad_norm": 0.8147956132888794, "learning_rate": 1.2818172796741412e-05, "loss": 8.109060287475586, "step": 5920 }, { "epoch": 0.7678205263285866, "grad_norm": 1.0089613199234009, "learning_rate": 1.2804522629433447e-05, "loss": 7.620807647705078, "step": 5921 }, { "epoch": 0.7679502038368333, "grad_norm": 1.1336519718170166, "learning_rate": 1.2790878666690276e-05, "loss": 9.32950496673584, "step": 5922 }, { "epoch": 0.76807988134508, "grad_norm": 1.1572209596633911, "learning_rate": 1.2777240910787868e-05, "loss": 11.833612442016602, "step": 5923 }, { "epoch": 0.7682095588533266, "grad_norm": 0.5953912138938904, "learning_rate": 1.2763609364001105e-05, "loss": 5.655547142028809, "step": 5924 }, { "epoch": 0.7683392363615733, "grad_norm": 0.9594862461090088, "learning_rate": 1.2749984028603879e-05, "loss": 8.890883445739746, "step": 5925 }, { "epoch": 0.76846891386982, "grad_norm": 0.8844215273857117, "learning_rate": 1.2736364906869036e-05, "loss": 8.689608573913574, "step": 5926 }, { "epoch": 0.7685985913780666, "grad_norm": 0.6844722032546997, "learning_rate": 1.2722752001068349e-05, "loss": 6.378763198852539, "step": 5927 }, { "epoch": 0.7687282688863134, "grad_norm": 0.9302980303764343, "learning_rate": 1.2709145313472614e-05, "loss": 11.767199516296387, "step": 5928 }, { "epoch": 0.7688579463945601, "grad_norm": 0.9345502257347107, "learning_rate": 1.269554484635153e-05, "loss": 7.300846576690674, "step": 5929 }, { "epoch": 0.7689876239028067, "grad_norm": 0.965905487537384, "learning_rate": 1.2681950601973813e-05, "loss": 8.124418258666992, "step": 5930 }, { "epoch": 0.7691173014110534, "grad_norm": 0.9797568321228027, "learning_rate": 1.2668362582607084e-05, "loss": 10.370807647705078, "step": 5931 }, { "epoch": 0.7692469789193, "grad_norm": 1.3355764150619507, "learning_rate": 1.2654780790517984e-05, "loss": 11.003752708435059, "step": 5932 }, { "epoch": 0.7693766564275467, "grad_norm": 0.8946962952613831, "learning_rate": 1.2641205227972064e-05, "loss": 12.66658878326416, "step": 5933 }, { "epoch": 0.7695063339357934, "grad_norm": 0.7893247008323669, "learning_rate": 1.2627635897233881e-05, "loss": 6.667574405670166, "step": 5934 }, { "epoch": 0.7696360114440401, "grad_norm": 0.814119279384613, "learning_rate": 1.2614072800566907e-05, "loss": 7.736254692077637, "step": 5935 }, { "epoch": 0.7697656889522868, "grad_norm": 1.245893955230713, "learning_rate": 1.2600515940233626e-05, "loss": 7.115786075592041, "step": 5936 }, { "epoch": 0.7698953664605335, "grad_norm": 0.9336667656898499, "learning_rate": 1.2586965318495419e-05, "loss": 11.359065055847168, "step": 5937 }, { "epoch": 0.7700250439687801, "grad_norm": 1.096704363822937, "learning_rate": 1.257342093761269e-05, "loss": 9.122984886169434, "step": 5938 }, { "epoch": 0.7701547214770268, "grad_norm": 1.112545371055603, "learning_rate": 1.2559882799844753e-05, "loss": 11.608826637268066, "step": 5939 }, { "epoch": 0.7702843989852735, "grad_norm": 0.9461756944656372, "learning_rate": 1.2546350907449916e-05, "loss": 10.018054008483887, "step": 5940 }, { "epoch": 0.7704140764935202, "grad_norm": 1.2724106311798096, "learning_rate": 1.253282526268541e-05, "loss": 11.316317558288574, "step": 5941 }, { "epoch": 0.7705437540017669, "grad_norm": 0.6228131651878357, "learning_rate": 1.2519305867807468e-05, "loss": 8.216154098510742, "step": 5942 }, { "epoch": 0.7706734315100136, "grad_norm": 0.6867682933807373, "learning_rate": 1.250579272507123e-05, "loss": 7.163250923156738, "step": 5943 }, { "epoch": 0.7708031090182602, "grad_norm": 0.8575926423072815, "learning_rate": 1.2492285836730844e-05, "loss": 9.474472045898438, "step": 5944 }, { "epoch": 0.7709327865265069, "grad_norm": 0.7948387265205383, "learning_rate": 1.2478785205039362e-05, "loss": 5.978663444519043, "step": 5945 }, { "epoch": 0.7710624640347535, "grad_norm": 1.5068080425262451, "learning_rate": 1.2465290832248844e-05, "loss": 10.713293075561523, "step": 5946 }, { "epoch": 0.7711921415430002, "grad_norm": 0.5808953642845154, "learning_rate": 1.2451802720610261e-05, "loss": 8.632841110229492, "step": 5947 }, { "epoch": 0.771321819051247, "grad_norm": 0.8844883441925049, "learning_rate": 1.2438320872373588e-05, "loss": 8.544987678527832, "step": 5948 }, { "epoch": 0.7714514965594936, "grad_norm": 1.1034471988677979, "learning_rate": 1.242484528978769e-05, "loss": 7.550306797027588, "step": 5949 }, { "epoch": 0.7715811740677403, "grad_norm": 0.9101323485374451, "learning_rate": 1.2411375975100459e-05, "loss": 8.499202728271484, "step": 5950 }, { "epoch": 0.771710851575987, "grad_norm": 0.7395991086959839, "learning_rate": 1.2397912930558681e-05, "loss": 7.119911193847656, "step": 5951 }, { "epoch": 0.7718405290842336, "grad_norm": 0.8899067044258118, "learning_rate": 1.2384456158408147e-05, "loss": 9.272686004638672, "step": 5952 }, { "epoch": 0.7719702065924803, "grad_norm": 0.9262390732765198, "learning_rate": 1.2371005660893548e-05, "loss": 8.541237831115723, "step": 5953 }, { "epoch": 0.772099884100727, "grad_norm": 1.009453296661377, "learning_rate": 1.2357561440258591e-05, "loss": 10.206730842590332, "step": 5954 }, { "epoch": 0.7722295616089737, "grad_norm": 1.1706608533859253, "learning_rate": 1.2344123498745863e-05, "loss": 8.836663246154785, "step": 5955 }, { "epoch": 0.7723592391172204, "grad_norm": 0.8247917890548706, "learning_rate": 1.2330691838596974e-05, "loss": 9.350005149841309, "step": 5956 }, { "epoch": 0.772488916625467, "grad_norm": 0.8594232201576233, "learning_rate": 1.2317266462052429e-05, "loss": 8.531072616577148, "step": 5957 }, { "epoch": 0.7726185941337137, "grad_norm": 1.0853368043899536, "learning_rate": 1.2303847371351746e-05, "loss": 11.922469139099121, "step": 5958 }, { "epoch": 0.7727482716419604, "grad_norm": 0.9477919936180115, "learning_rate": 1.2290434568733311e-05, "loss": 9.335661888122559, "step": 5959 }, { "epoch": 0.7728779491502071, "grad_norm": 0.8032530546188354, "learning_rate": 1.2277028056434559e-05, "loss": 8.857840538024902, "step": 5960 }, { "epoch": 0.7730076266584538, "grad_norm": 1.0435320138931274, "learning_rate": 1.2263627836691782e-05, "loss": 8.702071189880371, "step": 5961 }, { "epoch": 0.7731373041667005, "grad_norm": 0.6856096982955933, "learning_rate": 1.22502339117403e-05, "loss": 10.793781280517578, "step": 5962 }, { "epoch": 0.7732669816749471, "grad_norm": 1.3234281539916992, "learning_rate": 1.2236846283814324e-05, "loss": 10.172091484069824, "step": 5963 }, { "epoch": 0.7733966591831938, "grad_norm": 0.8909966945648193, "learning_rate": 1.2223464955147046e-05, "loss": 9.829703330993652, "step": 5964 }, { "epoch": 0.7735263366914404, "grad_norm": 0.9879781007766724, "learning_rate": 1.221008992797062e-05, "loss": 7.589824199676514, "step": 5965 }, { "epoch": 0.7736560141996871, "grad_norm": 0.9173818230628967, "learning_rate": 1.2196721204516099e-05, "loss": 10.009281158447266, "step": 5966 }, { "epoch": 0.7737856917079339, "grad_norm": 0.7781651616096497, "learning_rate": 1.2183358787013549e-05, "loss": 6.628066539764404, "step": 5967 }, { "epoch": 0.7739153692161805, "grad_norm": 0.7328930497169495, "learning_rate": 1.2170002677691916e-05, "loss": 8.923538208007812, "step": 5968 }, { "epoch": 0.7740450467244272, "grad_norm": 0.6757060885429382, "learning_rate": 1.2156652878779156e-05, "loss": 5.476437568664551, "step": 5969 }, { "epoch": 0.7741747242326739, "grad_norm": 0.8461986780166626, "learning_rate": 1.2143309392502122e-05, "loss": 6.839296340942383, "step": 5970 }, { "epoch": 0.7743044017409205, "grad_norm": 1.051311731338501, "learning_rate": 1.212997222108666e-05, "loss": 8.506023406982422, "step": 5971 }, { "epoch": 0.7744340792491672, "grad_norm": 1.1847903728485107, "learning_rate": 1.2116641366757515e-05, "loss": 10.279105186462402, "step": 5972 }, { "epoch": 0.774563756757414, "grad_norm": 0.8923234343528748, "learning_rate": 1.2103316831738426e-05, "loss": 10.730314254760742, "step": 5973 }, { "epoch": 0.7746934342656606, "grad_norm": 0.9333352446556091, "learning_rate": 1.2089998618252024e-05, "loss": 10.04842758178711, "step": 5974 }, { "epoch": 0.7748231117739073, "grad_norm": 1.1317763328552246, "learning_rate": 1.2076686728519949e-05, "loss": 10.434372901916504, "step": 5975 }, { "epoch": 0.7749527892821539, "grad_norm": 0.7036005854606628, "learning_rate": 1.2063381164762721e-05, "loss": 7.447272300720215, "step": 5976 }, { "epoch": 0.7750824667904006, "grad_norm": 1.39826500415802, "learning_rate": 1.205008192919987e-05, "loss": 10.101980209350586, "step": 5977 }, { "epoch": 0.7752121442986473, "grad_norm": 0.9841285347938538, "learning_rate": 1.20367890240498e-05, "loss": 9.617746353149414, "step": 5978 }, { "epoch": 0.7753418218068939, "grad_norm": 0.8382953405380249, "learning_rate": 1.2023502451529933e-05, "loss": 10.813209533691406, "step": 5979 }, { "epoch": 0.7754714993151407, "grad_norm": 0.7270647883415222, "learning_rate": 1.2010222213856558e-05, "loss": 9.470956802368164, "step": 5980 }, { "epoch": 0.7756011768233874, "grad_norm": 0.7395016551017761, "learning_rate": 1.1996948313244993e-05, "loss": 7.684569835662842, "step": 5981 }, { "epoch": 0.775730854331634, "grad_norm": 0.8677623271942139, "learning_rate": 1.198368075190941e-05, "loss": 9.212004661560059, "step": 5982 }, { "epoch": 0.7758605318398807, "grad_norm": 1.071628212928772, "learning_rate": 1.1970419532062999e-05, "loss": 11.777058601379395, "step": 5983 }, { "epoch": 0.7759902093481273, "grad_norm": 1.180331826210022, "learning_rate": 1.195716465591783e-05, "loss": 12.172086715698242, "step": 5984 }, { "epoch": 0.776119886856374, "grad_norm": 0.8428787589073181, "learning_rate": 1.1943916125684973e-05, "loss": 6.287586688995361, "step": 5985 }, { "epoch": 0.7762495643646208, "grad_norm": 0.9971534609794617, "learning_rate": 1.1930673943574383e-05, "loss": 12.690296173095703, "step": 5986 }, { "epoch": 0.7763792418728674, "grad_norm": 0.8242786526679993, "learning_rate": 1.1917438111795014e-05, "loss": 9.413261413574219, "step": 5987 }, { "epoch": 0.7765089193811141, "grad_norm": 0.9393954277038574, "learning_rate": 1.1904208632554714e-05, "loss": 7.525407791137695, "step": 5988 }, { "epoch": 0.7766385968893608, "grad_norm": 0.9631324410438538, "learning_rate": 1.1890985508060276e-05, "loss": 8.582751274108887, "step": 5989 }, { "epoch": 0.7767682743976074, "grad_norm": 1.13941490650177, "learning_rate": 1.1877768740517469e-05, "loss": 10.478031158447266, "step": 5990 }, { "epoch": 0.7768979519058541, "grad_norm": 0.9435611963272095, "learning_rate": 1.1864558332130955e-05, "loss": 8.887110710144043, "step": 5991 }, { "epoch": 0.7770276294141009, "grad_norm": 1.0545282363891602, "learning_rate": 1.1851354285104382e-05, "loss": 11.930885314941406, "step": 5992 }, { "epoch": 0.7771573069223475, "grad_norm": 1.0625191926956177, "learning_rate": 1.1838156601640282e-05, "loss": 9.215737342834473, "step": 5993 }, { "epoch": 0.7772869844305942, "grad_norm": 0.944955050945282, "learning_rate": 1.1824965283940188e-05, "loss": 11.340630531311035, "step": 5994 }, { "epoch": 0.7774166619388408, "grad_norm": 1.00762939453125, "learning_rate": 1.1811780334204515e-05, "loss": 9.334970474243164, "step": 5995 }, { "epoch": 0.7775463394470875, "grad_norm": 1.0034140348434448, "learning_rate": 1.179860175463266e-05, "loss": 8.426335334777832, "step": 5996 }, { "epoch": 0.7776760169553342, "grad_norm": 1.1124345064163208, "learning_rate": 1.178542954742291e-05, "loss": 6.907621383666992, "step": 5997 }, { "epoch": 0.7778056944635808, "grad_norm": 1.0026137828826904, "learning_rate": 1.1772263714772547e-05, "loss": 7.603261947631836, "step": 5998 }, { "epoch": 0.7779353719718276, "grad_norm": 0.9297968149185181, "learning_rate": 1.1759104258877734e-05, "loss": 9.024571418762207, "step": 5999 }, { "epoch": 0.7780650494800743, "grad_norm": 0.8938484787940979, "learning_rate": 1.1745951181933624e-05, "loss": 7.37606954574585, "step": 6000 }, { "epoch": 0.7781947269883209, "grad_norm": 0.9041647911071777, "learning_rate": 1.1732804486134236e-05, "loss": 10.053776741027832, "step": 6001 }, { "epoch": 0.7783244044965676, "grad_norm": 0.7973816394805908, "learning_rate": 1.1719664173672595e-05, "loss": 9.124267578125, "step": 6002 }, { "epoch": 0.7784540820048143, "grad_norm": 1.060920238494873, "learning_rate": 1.1706530246740627e-05, "loss": 10.464970588684082, "step": 6003 }, { "epoch": 0.7785837595130609, "grad_norm": 0.8936932682991028, "learning_rate": 1.169340270752921e-05, "loss": 10.302916526794434, "step": 6004 }, { "epoch": 0.7787134370213077, "grad_norm": 1.0908901691436768, "learning_rate": 1.1680281558228118e-05, "loss": 7.428224563598633, "step": 6005 }, { "epoch": 0.7788431145295543, "grad_norm": 1.1407310962677002, "learning_rate": 1.1667166801026113e-05, "loss": 10.297964096069336, "step": 6006 }, { "epoch": 0.778972792037801, "grad_norm": 1.5697523355484009, "learning_rate": 1.1654058438110837e-05, "loss": 11.951988220214844, "step": 6007 }, { "epoch": 0.7791024695460477, "grad_norm": 0.9287818074226379, "learning_rate": 1.1640956471668913e-05, "loss": 9.06055736541748, "step": 6008 }, { "epoch": 0.7792321470542943, "grad_norm": 0.9011440873146057, "learning_rate": 1.1627860903885856e-05, "loss": 7.998577117919922, "step": 6009 }, { "epoch": 0.779361824562541, "grad_norm": 0.9791204929351807, "learning_rate": 1.1614771736946155e-05, "loss": 8.383262634277344, "step": 6010 }, { "epoch": 0.7794915020707877, "grad_norm": 0.9510983824729919, "learning_rate": 1.1601688973033198e-05, "loss": 7.296055793762207, "step": 6011 }, { "epoch": 0.7796211795790344, "grad_norm": 1.2574715614318848, "learning_rate": 1.1588612614329302e-05, "loss": 8.815966606140137, "step": 6012 }, { "epoch": 0.7797508570872811, "grad_norm": 0.8871055245399475, "learning_rate": 1.1575542663015759e-05, "loss": 8.843381881713867, "step": 6013 }, { "epoch": 0.7798805345955278, "grad_norm": 0.7285643219947815, "learning_rate": 1.1562479121272735e-05, "loss": 7.042809009552002, "step": 6014 }, { "epoch": 0.7800102121037744, "grad_norm": 0.9582728147506714, "learning_rate": 1.154942199127938e-05, "loss": 8.972856521606445, "step": 6015 }, { "epoch": 0.7801398896120211, "grad_norm": 1.2057974338531494, "learning_rate": 1.1536371275213726e-05, "loss": 10.531731605529785, "step": 6016 }, { "epoch": 0.7802695671202677, "grad_norm": 0.8798713684082031, "learning_rate": 1.1523326975252785e-05, "loss": 8.819250106811523, "step": 6017 }, { "epoch": 0.7803992446285145, "grad_norm": 0.9454664587974548, "learning_rate": 1.151028909357244e-05, "loss": 8.999638557434082, "step": 6018 }, { "epoch": 0.7805289221367612, "grad_norm": 0.8995844721794128, "learning_rate": 1.1497257632347563e-05, "loss": 11.250816345214844, "step": 6019 }, { "epoch": 0.7806585996450078, "grad_norm": 0.9201555252075195, "learning_rate": 1.1484232593751909e-05, "loss": 9.271358489990234, "step": 6020 }, { "epoch": 0.7807882771532545, "grad_norm": 0.7448098659515381, "learning_rate": 1.14712139799582e-05, "loss": 5.507317066192627, "step": 6021 }, { "epoch": 0.7809179546615012, "grad_norm": 0.9286510944366455, "learning_rate": 1.145820179313804e-05, "loss": 8.046150207519531, "step": 6022 }, { "epoch": 0.7810476321697478, "grad_norm": 0.894336462020874, "learning_rate": 1.144519603546202e-05, "loss": 11.923842430114746, "step": 6023 }, { "epoch": 0.7811773096779946, "grad_norm": 0.7332093119621277, "learning_rate": 1.1432196709099591e-05, "loss": 9.142148971557617, "step": 6024 }, { "epoch": 0.7813069871862413, "grad_norm": 1.0393513441085815, "learning_rate": 1.1419203816219199e-05, "loss": 8.620269775390625, "step": 6025 }, { "epoch": 0.7814366646944879, "grad_norm": 1.1054116487503052, "learning_rate": 1.1406217358988153e-05, "loss": 9.484224319458008, "step": 6026 }, { "epoch": 0.7815663422027346, "grad_norm": 1.0740176439285278, "learning_rate": 1.1393237339572748e-05, "loss": 7.109877109527588, "step": 6027 }, { "epoch": 0.7816960197109812, "grad_norm": 1.1581370830535889, "learning_rate": 1.1380263760138155e-05, "loss": 7.37915563583374, "step": 6028 }, { "epoch": 0.7818256972192279, "grad_norm": 0.9466854333877563, "learning_rate": 1.1367296622848511e-05, "loss": 10.228015899658203, "step": 6029 }, { "epoch": 0.7819553747274746, "grad_norm": 0.8158121109008789, "learning_rate": 1.1354335929866833e-05, "loss": 6.795218467712402, "step": 6030 }, { "epoch": 0.7820850522357213, "grad_norm": 0.8362112641334534, "learning_rate": 1.1341381683355124e-05, "loss": 7.517951011657715, "step": 6031 }, { "epoch": 0.782214729743968, "grad_norm": 0.8659874200820923, "learning_rate": 1.1328433885474243e-05, "loss": 11.323136329650879, "step": 6032 }, { "epoch": 0.7823444072522147, "grad_norm": 0.8976375460624695, "learning_rate": 1.1315492538384043e-05, "loss": 9.318507194519043, "step": 6033 }, { "epoch": 0.7824740847604613, "grad_norm": 1.0845837593078613, "learning_rate": 1.1302557644243227e-05, "loss": 9.755411148071289, "step": 6034 }, { "epoch": 0.782603762268708, "grad_norm": 1.25894033908844, "learning_rate": 1.1289629205209495e-05, "loss": 9.78315258026123, "step": 6035 }, { "epoch": 0.7827334397769546, "grad_norm": 0.9736824631690979, "learning_rate": 1.1276707223439408e-05, "loss": 9.104013442993164, "step": 6036 }, { "epoch": 0.7828631172852014, "grad_norm": 1.2153035402297974, "learning_rate": 1.126379170108851e-05, "loss": 10.808244705200195, "step": 6037 }, { "epoch": 0.7829927947934481, "grad_norm": 1.2807413339614868, "learning_rate": 1.1250882640311194e-05, "loss": 11.561357498168945, "step": 6038 }, { "epoch": 0.7831224723016947, "grad_norm": 1.1723458766937256, "learning_rate": 1.1237980043260854e-05, "loss": 8.639720916748047, "step": 6039 }, { "epoch": 0.7832521498099414, "grad_norm": 1.234288215637207, "learning_rate": 1.1225083912089735e-05, "loss": 11.366549491882324, "step": 6040 }, { "epoch": 0.7833818273181881, "grad_norm": 0.9849532246589661, "learning_rate": 1.1212194248949054e-05, "loss": 13.571921348571777, "step": 6041 }, { "epoch": 0.7835115048264347, "grad_norm": 1.0880228281021118, "learning_rate": 1.1199311055988942e-05, "loss": 7.752306938171387, "step": 6042 }, { "epoch": 0.7836411823346814, "grad_norm": 1.2283074855804443, "learning_rate": 1.1186434335358414e-05, "loss": 7.688050270080566, "step": 6043 }, { "epoch": 0.7837708598429282, "grad_norm": 1.081426978111267, "learning_rate": 1.1173564089205457e-05, "loss": 9.545825004577637, "step": 6044 }, { "epoch": 0.7839005373511748, "grad_norm": 0.6980364918708801, "learning_rate": 1.1160700319676926e-05, "loss": 8.573237419128418, "step": 6045 }, { "epoch": 0.7840302148594215, "grad_norm": 0.747265636920929, "learning_rate": 1.1147843028918653e-05, "loss": 8.405424118041992, "step": 6046 }, { "epoch": 0.7841598923676681, "grad_norm": 0.9777131676673889, "learning_rate": 1.1134992219075325e-05, "loss": 6.78264045715332, "step": 6047 }, { "epoch": 0.7842895698759148, "grad_norm": 1.0134748220443726, "learning_rate": 1.1122147892290612e-05, "loss": 11.152793884277344, "step": 6048 }, { "epoch": 0.7844192473841615, "grad_norm": 0.9764230847358704, "learning_rate": 1.1109310050707039e-05, "loss": 9.406368255615234, "step": 6049 }, { "epoch": 0.7845489248924082, "grad_norm": 0.7148395776748657, "learning_rate": 1.1096478696466117e-05, "loss": 8.098737716674805, "step": 6050 }, { "epoch": 0.7846786024006549, "grad_norm": 0.6387185454368591, "learning_rate": 1.1083653831708212e-05, "loss": 5.5153656005859375, "step": 6051 }, { "epoch": 0.7848082799089016, "grad_norm": 1.4369417428970337, "learning_rate": 1.107083545857266e-05, "loss": 8.490317344665527, "step": 6052 }, { "epoch": 0.7849379574171482, "grad_norm": 1.13041090965271, "learning_rate": 1.1058023579197658e-05, "loss": 10.932185173034668, "step": 6053 }, { "epoch": 0.7850676349253949, "grad_norm": 1.1227974891662598, "learning_rate": 1.1045218195720381e-05, "loss": 9.01180648803711, "step": 6054 }, { "epoch": 0.7851973124336415, "grad_norm": 0.8393257260322571, "learning_rate": 1.1032419310276871e-05, "loss": 8.778606414794922, "step": 6055 }, { "epoch": 0.7853269899418883, "grad_norm": 1.146296739578247, "learning_rate": 1.1019626925002118e-05, "loss": 7.514533996582031, "step": 6056 }, { "epoch": 0.785456667450135, "grad_norm": 0.7942802309989929, "learning_rate": 1.1006841042030003e-05, "loss": 5.969908714294434, "step": 6057 }, { "epoch": 0.7855863449583816, "grad_norm": 0.7331419587135315, "learning_rate": 1.0994061663493354e-05, "loss": 7.122690200805664, "step": 6058 }, { "epoch": 0.7857160224666283, "grad_norm": 0.8016015887260437, "learning_rate": 1.098128879152387e-05, "loss": 7.2450079917907715, "step": 6059 }, { "epoch": 0.785845699974875, "grad_norm": 0.9309637546539307, "learning_rate": 1.0968522428252215e-05, "loss": 8.71580696105957, "step": 6060 }, { "epoch": 0.7859753774831216, "grad_norm": 1.1079773902893066, "learning_rate": 1.0955762575807921e-05, "loss": 9.280041694641113, "step": 6061 }, { "epoch": 0.7861050549913683, "grad_norm": 0.7611222267150879, "learning_rate": 1.0943009236319468e-05, "loss": 10.068305969238281, "step": 6062 }, { "epoch": 0.7862347324996151, "grad_norm": 0.992097795009613, "learning_rate": 1.0930262411914222e-05, "loss": 10.35970687866211, "step": 6063 }, { "epoch": 0.7863644100078617, "grad_norm": 0.895683228969574, "learning_rate": 1.0917522104718497e-05, "loss": 8.235983848571777, "step": 6064 }, { "epoch": 0.7864940875161084, "grad_norm": 0.793839693069458, "learning_rate": 1.0904788316857473e-05, "loss": 9.692971229553223, "step": 6065 }, { "epoch": 0.786623765024355, "grad_norm": 1.0049105882644653, "learning_rate": 1.0892061050455298e-05, "loss": 8.246881484985352, "step": 6066 }, { "epoch": 0.7867534425326017, "grad_norm": 0.624390184879303, "learning_rate": 1.0879340307634977e-05, "loss": 7.867865562438965, "step": 6067 }, { "epoch": 0.7868831200408484, "grad_norm": 0.8243587613105774, "learning_rate": 1.0866626090518472e-05, "loss": 7.609636306762695, "step": 6068 }, { "epoch": 0.7870127975490951, "grad_norm": 1.0151774883270264, "learning_rate": 1.085391840122662e-05, "loss": 7.716732025146484, "step": 6069 }, { "epoch": 0.7871424750573418, "grad_norm": 0.7277267575263977, "learning_rate": 1.084121724187921e-05, "loss": 9.668179512023926, "step": 6070 }, { "epoch": 0.7872721525655885, "grad_norm": 1.0264085531234741, "learning_rate": 1.0828522614594882e-05, "loss": 8.135282516479492, "step": 6071 }, { "epoch": 0.7874018300738351, "grad_norm": 0.9190200567245483, "learning_rate": 1.0815834521491264e-05, "loss": 7.658036231994629, "step": 6072 }, { "epoch": 0.7875315075820818, "grad_norm": 1.0824329853057861, "learning_rate": 1.0803152964684831e-05, "loss": 8.805389404296875, "step": 6073 }, { "epoch": 0.7876611850903285, "grad_norm": 0.9277433753013611, "learning_rate": 1.0790477946290978e-05, "loss": 11.649275779724121, "step": 6074 }, { "epoch": 0.7877908625985751, "grad_norm": 0.988724410533905, "learning_rate": 1.0777809468424045e-05, "loss": 8.67327880859375, "step": 6075 }, { "epoch": 0.7879205401068219, "grad_norm": 0.9991985559463501, "learning_rate": 1.0765147533197228e-05, "loss": 10.085610389709473, "step": 6076 }, { "epoch": 0.7880502176150685, "grad_norm": 1.029789924621582, "learning_rate": 1.0752492142722697e-05, "loss": 6.478802680969238, "step": 6077 }, { "epoch": 0.7881798951233152, "grad_norm": 0.6745532751083374, "learning_rate": 1.0739843299111451e-05, "loss": 7.952938079833984, "step": 6078 }, { "epoch": 0.7883095726315619, "grad_norm": 1.0704292058944702, "learning_rate": 1.072720100447347e-05, "loss": 12.853449821472168, "step": 6079 }, { "epoch": 0.7884392501398085, "grad_norm": 0.8698081970214844, "learning_rate": 1.0714565260917598e-05, "loss": 9.569350242614746, "step": 6080 }, { "epoch": 0.7885689276480552, "grad_norm": 0.8559569120407104, "learning_rate": 1.070193607055161e-05, "loss": 8.992773056030273, "step": 6081 }, { "epoch": 0.788698605156302, "grad_norm": 1.0013597011566162, "learning_rate": 1.0689313435482163e-05, "loss": 10.291078567504883, "step": 6082 }, { "epoch": 0.7888282826645486, "grad_norm": 1.1441384553909302, "learning_rate": 1.0676697357814853e-05, "loss": 9.787040710449219, "step": 6083 }, { "epoch": 0.7889579601727953, "grad_norm": 0.8879843354225159, "learning_rate": 1.0664087839654141e-05, "loss": 8.264941215515137, "step": 6084 }, { "epoch": 0.789087637681042, "grad_norm": 0.7195554971694946, "learning_rate": 1.0651484883103434e-05, "loss": 8.702567100524902, "step": 6085 }, { "epoch": 0.7892173151892886, "grad_norm": 1.1832553148269653, "learning_rate": 1.0638888490265014e-05, "loss": 8.579562187194824, "step": 6086 }, { "epoch": 0.7893469926975353, "grad_norm": 0.8206738233566284, "learning_rate": 1.0626298663240098e-05, "loss": 9.745627403259277, "step": 6087 }, { "epoch": 0.789476670205782, "grad_norm": 1.0104774236679077, "learning_rate": 1.0613715404128766e-05, "loss": 9.07412052154541, "step": 6088 }, { "epoch": 0.7896063477140287, "grad_norm": 0.8371556401252747, "learning_rate": 1.0601138715030052e-05, "loss": 12.842658042907715, "step": 6089 }, { "epoch": 0.7897360252222754, "grad_norm": 0.6696509122848511, "learning_rate": 1.0588568598041842e-05, "loss": 7.438005447387695, "step": 6090 }, { "epoch": 0.789865702730522, "grad_norm": 1.1175779104232788, "learning_rate": 1.057600505526099e-05, "loss": 9.817879676818848, "step": 6091 }, { "epoch": 0.7899953802387687, "grad_norm": 1.0112870931625366, "learning_rate": 1.0563448088783179e-05, "loss": 10.518742561340332, "step": 6092 }, { "epoch": 0.7901250577470154, "grad_norm": 1.0092298984527588, "learning_rate": 1.055089770070306e-05, "loss": 12.36365795135498, "step": 6093 }, { "epoch": 0.790254735255262, "grad_norm": 1.1840648651123047, "learning_rate": 1.0538353893114134e-05, "loss": 13.775228500366211, "step": 6094 }, { "epoch": 0.7903844127635088, "grad_norm": 0.9028439521789551, "learning_rate": 1.0525816668108857e-05, "loss": 6.144558429718018, "step": 6095 }, { "epoch": 0.7905140902717555, "grad_norm": 1.0702847242355347, "learning_rate": 1.0513286027778535e-05, "loss": 9.947758674621582, "step": 6096 }, { "epoch": 0.7906437677800021, "grad_norm": 1.060206413269043, "learning_rate": 1.0500761974213419e-05, "loss": 9.453289031982422, "step": 6097 }, { "epoch": 0.7907734452882488, "grad_norm": 0.8946691155433655, "learning_rate": 1.0488244509502631e-05, "loss": 6.733645439147949, "step": 6098 }, { "epoch": 0.7909031227964954, "grad_norm": 0.8286833167076111, "learning_rate": 1.0475733635734198e-05, "loss": 7.730772495269775, "step": 6099 }, { "epoch": 0.7910328003047421, "grad_norm": 0.7994063496589661, "learning_rate": 1.0463229354995068e-05, "loss": 9.452522277832031, "step": 6100 }, { "epoch": 0.7911624778129889, "grad_norm": 0.9261969923973083, "learning_rate": 1.0450731669371066e-05, "loss": 6.637380599975586, "step": 6101 }, { "epoch": 0.7912921553212355, "grad_norm": 0.8989923000335693, "learning_rate": 1.0438240580946944e-05, "loss": 8.782964706420898, "step": 6102 }, { "epoch": 0.7914218328294822, "grad_norm": 0.7827051877975464, "learning_rate": 1.042575609180631e-05, "loss": 10.072794914245605, "step": 6103 }, { "epoch": 0.7915515103377289, "grad_norm": 0.8252642154693604, "learning_rate": 1.0413278204031723e-05, "loss": 8.691621780395508, "step": 6104 }, { "epoch": 0.7916811878459755, "grad_norm": 1.197106957435608, "learning_rate": 1.0400806919704591e-05, "loss": 7.795993328094482, "step": 6105 }, { "epoch": 0.7918108653542222, "grad_norm": 0.7180423140525818, "learning_rate": 1.0388342240905269e-05, "loss": 8.083770751953125, "step": 6106 }, { "epoch": 0.7919405428624688, "grad_norm": 0.7824615836143494, "learning_rate": 1.0375884169712963e-05, "loss": 9.575237274169922, "step": 6107 }, { "epoch": 0.7920702203707156, "grad_norm": 0.8173612952232361, "learning_rate": 1.0363432708205822e-05, "loss": 8.67795181274414, "step": 6108 }, { "epoch": 0.7921998978789623, "grad_norm": 1.2937690019607544, "learning_rate": 1.0350987858460842e-05, "loss": 9.634543418884277, "step": 6109 }, { "epoch": 0.7923295753872089, "grad_norm": 1.0733016729354858, "learning_rate": 1.0338549622553978e-05, "loss": 11.286395072937012, "step": 6110 }, { "epoch": 0.7924592528954556, "grad_norm": 1.0424003601074219, "learning_rate": 1.0326118002560015e-05, "loss": 7.44422721862793, "step": 6111 }, { "epoch": 0.7925889304037023, "grad_norm": 0.7449144721031189, "learning_rate": 1.03136930005527e-05, "loss": 7.923689365386963, "step": 6112 }, { "epoch": 0.7927186079119489, "grad_norm": 1.0981825590133667, "learning_rate": 1.0301274618604605e-05, "loss": 13.087433815002441, "step": 6113 }, { "epoch": 0.7928482854201957, "grad_norm": 0.7196126580238342, "learning_rate": 1.028886285878728e-05, "loss": 5.619276523590088, "step": 6114 }, { "epoch": 0.7929779629284424, "grad_norm": 1.0535269975662231, "learning_rate": 1.0276457723171085e-05, "loss": 8.576506614685059, "step": 6115 }, { "epoch": 0.793107640436689, "grad_norm": 0.7487453818321228, "learning_rate": 1.0264059213825345e-05, "loss": 6.65378999710083, "step": 6116 }, { "epoch": 0.7932373179449357, "grad_norm": 1.0107568502426147, "learning_rate": 1.0251667332818215e-05, "loss": 8.597367286682129, "step": 6117 }, { "epoch": 0.7933669954531823, "grad_norm": 0.9391393065452576, "learning_rate": 1.0239282082216839e-05, "loss": 8.358354568481445, "step": 6118 }, { "epoch": 0.793496672961429, "grad_norm": 1.0848287343978882, "learning_rate": 1.0226903464087145e-05, "loss": 10.52191162109375, "step": 6119 }, { "epoch": 0.7936263504696758, "grad_norm": 0.7979366779327393, "learning_rate": 1.0214531480494039e-05, "loss": 10.404900550842285, "step": 6120 }, { "epoch": 0.7937560279779224, "grad_norm": 0.607166588306427, "learning_rate": 1.0202166133501273e-05, "loss": 5.738358497619629, "step": 6121 }, { "epoch": 0.7938857054861691, "grad_norm": 0.7746673822402954, "learning_rate": 1.0189807425171494e-05, "loss": 8.26968002319336, "step": 6122 }, { "epoch": 0.7940153829944158, "grad_norm": 0.9004536867141724, "learning_rate": 1.0177455357566279e-05, "loss": 11.145808219909668, "step": 6123 }, { "epoch": 0.7941450605026624, "grad_norm": 0.8742948174476624, "learning_rate": 1.0165109932746047e-05, "loss": 11.671512603759766, "step": 6124 }, { "epoch": 0.7942747380109091, "grad_norm": 1.1157145500183105, "learning_rate": 1.0152771152770157e-05, "loss": 11.481314659118652, "step": 6125 }, { "epoch": 0.7944044155191557, "grad_norm": 0.8247311115264893, "learning_rate": 1.0140439019696813e-05, "loss": 9.642704963684082, "step": 6126 }, { "epoch": 0.7945340930274025, "grad_norm": 0.8057031035423279, "learning_rate": 1.0128113535583162e-05, "loss": 6.395989894866943, "step": 6127 }, { "epoch": 0.7946637705356492, "grad_norm": 1.1260344982147217, "learning_rate": 1.0115794702485187e-05, "loss": 10.885269165039062, "step": 6128 }, { "epoch": 0.7947934480438958, "grad_norm": 1.2235246896743774, "learning_rate": 1.0103482522457813e-05, "loss": 10.770390510559082, "step": 6129 }, { "epoch": 0.7949231255521425, "grad_norm": 0.970154881477356, "learning_rate": 1.0091176997554802e-05, "loss": 9.750299453735352, "step": 6130 }, { "epoch": 0.7950528030603892, "grad_norm": 0.8430508375167847, "learning_rate": 1.007887812982886e-05, "loss": 10.47046184539795, "step": 6131 }, { "epoch": 0.7951824805686358, "grad_norm": 0.9382746815681458, "learning_rate": 1.006658592133154e-05, "loss": 7.17704439163208, "step": 6132 }, { "epoch": 0.7953121580768826, "grad_norm": 0.9304953217506409, "learning_rate": 1.0054300374113317e-05, "loss": 9.033770561218262, "step": 6133 }, { "epoch": 0.7954418355851293, "grad_norm": 1.5797250270843506, "learning_rate": 1.0042021490223519e-05, "loss": 10.566995620727539, "step": 6134 }, { "epoch": 0.7955715130933759, "grad_norm": 0.8779513239860535, "learning_rate": 1.002974927171041e-05, "loss": 10.228934288024902, "step": 6135 }, { "epoch": 0.7957011906016226, "grad_norm": 0.9084723591804504, "learning_rate": 1.001748372062108e-05, "loss": 6.616665840148926, "step": 6136 }, { "epoch": 0.7958308681098692, "grad_norm": 1.125802755355835, "learning_rate": 1.000522483900157e-05, "loss": 10.542993545532227, "step": 6137 }, { "epoch": 0.7959605456181159, "grad_norm": 0.9961622953414917, "learning_rate": 9.992972628896763e-06, "loss": 6.929778575897217, "step": 6138 }, { "epoch": 0.7960902231263626, "grad_norm": 1.0380865335464478, "learning_rate": 9.980727092350462e-06, "loss": 10.138040542602539, "step": 6139 }, { "epoch": 0.7962199006346093, "grad_norm": 1.1634465456008911, "learning_rate": 9.968488231405315e-06, "loss": 7.920810699462891, "step": 6140 }, { "epoch": 0.796349578142856, "grad_norm": 0.7361977696418762, "learning_rate": 9.956256048102912e-06, "loss": 8.287079811096191, "step": 6141 }, { "epoch": 0.7964792556511027, "grad_norm": 1.1531124114990234, "learning_rate": 9.944030544483673e-06, "loss": 8.322744369506836, "step": 6142 }, { "epoch": 0.7966089331593493, "grad_norm": 0.9075791239738464, "learning_rate": 9.931811722586953e-06, "loss": 10.187719345092773, "step": 6143 }, { "epoch": 0.796738610667596, "grad_norm": 1.051314115524292, "learning_rate": 9.919599584450951e-06, "loss": 8.395222663879395, "step": 6144 }, { "epoch": 0.7968682881758427, "grad_norm": 0.7984992861747742, "learning_rate": 9.907394132112786e-06, "loss": 10.534016609191895, "step": 6145 }, { "epoch": 0.7969979656840894, "grad_norm": 1.0613865852355957, "learning_rate": 9.895195367608422e-06, "loss": 7.99729061126709, "step": 6146 }, { "epoch": 0.7971276431923361, "grad_norm": 0.8315021395683289, "learning_rate": 9.883003292972764e-06, "loss": 7.799441814422607, "step": 6147 }, { "epoch": 0.7972573207005828, "grad_norm": 1.0036824941635132, "learning_rate": 9.870817910239532e-06, "loss": 12.296546936035156, "step": 6148 }, { "epoch": 0.7973869982088294, "grad_norm": 1.1017308235168457, "learning_rate": 9.858639221441396e-06, "loss": 10.085539817810059, "step": 6149 }, { "epoch": 0.7975166757170761, "grad_norm": 0.8428932428359985, "learning_rate": 9.846467228609857e-06, "loss": 7.60906457901001, "step": 6150 }, { "epoch": 0.7976463532253227, "grad_norm": 0.5661912560462952, "learning_rate": 9.834301933775336e-06, "loss": 7.945818901062012, "step": 6151 }, { "epoch": 0.7977760307335695, "grad_norm": 0.9673437476158142, "learning_rate": 9.822143338967105e-06, "loss": 8.307894706726074, "step": 6152 }, { "epoch": 0.7979057082418162, "grad_norm": 0.7663663029670715, "learning_rate": 9.809991446213356e-06, "loss": 12.351075172424316, "step": 6153 }, { "epoch": 0.7980353857500628, "grad_norm": 0.5627714395523071, "learning_rate": 9.797846257541122e-06, "loss": 6.212096691131592, "step": 6154 }, { "epoch": 0.7981650632583095, "grad_norm": 1.276086688041687, "learning_rate": 9.78570777497636e-06, "loss": 13.475468635559082, "step": 6155 }, { "epoch": 0.7982947407665562, "grad_norm": 0.869449257850647, "learning_rate": 9.77357600054386e-06, "loss": 11.879451751708984, "step": 6156 }, { "epoch": 0.7984244182748028, "grad_norm": 0.9290108680725098, "learning_rate": 9.76145093626733e-06, "loss": 5.611686706542969, "step": 6157 }, { "epoch": 0.7985540957830495, "grad_norm": 1.0334546566009521, "learning_rate": 9.749332584169363e-06, "loss": 10.197836875915527, "step": 6158 }, { "epoch": 0.7986837732912963, "grad_norm": 0.8911569714546204, "learning_rate": 9.737220946271392e-06, "loss": 5.0572123527526855, "step": 6159 }, { "epoch": 0.7988134507995429, "grad_norm": 1.3398730754852295, "learning_rate": 9.725116024593778e-06, "loss": 9.567683219909668, "step": 6160 }, { "epoch": 0.7989431283077896, "grad_norm": 1.1082762479782104, "learning_rate": 9.713017821155718e-06, "loss": 8.852006912231445, "step": 6161 }, { "epoch": 0.7990728058160362, "grad_norm": 1.2198811769485474, "learning_rate": 9.70092633797533e-06, "loss": 10.808576583862305, "step": 6162 }, { "epoch": 0.7992024833242829, "grad_norm": 0.9438217878341675, "learning_rate": 9.68884157706957e-06, "loss": 8.946331977844238, "step": 6163 }, { "epoch": 0.7993321608325296, "grad_norm": 0.7706418037414551, "learning_rate": 9.676763540454314e-06, "loss": 7.603087902069092, "step": 6164 }, { "epoch": 0.7994618383407763, "grad_norm": 0.9702330231666565, "learning_rate": 9.664692230144268e-06, "loss": 7.973124980926514, "step": 6165 }, { "epoch": 0.799591515849023, "grad_norm": 0.9108882546424866, "learning_rate": 9.652627648153072e-06, "loss": 9.671318054199219, "step": 6166 }, { "epoch": 0.7997211933572697, "grad_norm": 0.9897159934043884, "learning_rate": 9.640569796493192e-06, "loss": 9.997328758239746, "step": 6167 }, { "epoch": 0.7998508708655163, "grad_norm": 0.8400844931602478, "learning_rate": 9.628518677176013e-06, "loss": 8.21371078491211, "step": 6168 }, { "epoch": 0.799980548373763, "grad_norm": 0.9104022979736328, "learning_rate": 9.616474292211758e-06, "loss": 9.52031421661377, "step": 6169 }, { "epoch": 0.8001102258820096, "grad_norm": 0.9444240927696228, "learning_rate": 9.604436643609571e-06, "loss": 7.567176818847656, "step": 6170 }, { "epoch": 0.8002399033902563, "grad_norm": 0.7166390419006348, "learning_rate": 9.59240573337742e-06, "loss": 10.954424858093262, "step": 6171 }, { "epoch": 0.8003695808985031, "grad_norm": 1.185143232345581, "learning_rate": 9.5803815635222e-06, "loss": 10.609138488769531, "step": 6172 }, { "epoch": 0.8004992584067497, "grad_norm": 0.8529062271118164, "learning_rate": 9.56836413604964e-06, "loss": 6.2354655265808105, "step": 6173 }, { "epoch": 0.8006289359149964, "grad_norm": 0.6521766185760498, "learning_rate": 9.55635345296439e-06, "loss": 4.253657817840576, "step": 6174 }, { "epoch": 0.8007586134232431, "grad_norm": 0.8978517651557922, "learning_rate": 9.544349516269912e-06, "loss": 6.443106651306152, "step": 6175 }, { "epoch": 0.8008882909314897, "grad_norm": 1.079035997390747, "learning_rate": 9.532352327968613e-06, "loss": 9.737878799438477, "step": 6176 }, { "epoch": 0.8010179684397364, "grad_norm": 0.9084116816520691, "learning_rate": 9.520361890061707e-06, "loss": 9.320513725280762, "step": 6177 }, { "epoch": 0.8011476459479832, "grad_norm": 0.7997791171073914, "learning_rate": 9.508378204549351e-06, "loss": 4.986150741577148, "step": 6178 }, { "epoch": 0.8012773234562298, "grad_norm": 1.2848740816116333, "learning_rate": 9.496401273430505e-06, "loss": 8.896291732788086, "step": 6179 }, { "epoch": 0.8014070009644765, "grad_norm": 1.4023329019546509, "learning_rate": 9.484431098703062e-06, "loss": 9.266027450561523, "step": 6180 }, { "epoch": 0.8015366784727231, "grad_norm": 1.0650725364685059, "learning_rate": 9.472467682363745e-06, "loss": 8.51510238647461, "step": 6181 }, { "epoch": 0.8016663559809698, "grad_norm": 1.00885009765625, "learning_rate": 9.460511026408187e-06, "loss": 9.987674713134766, "step": 6182 }, { "epoch": 0.8017960334892165, "grad_norm": 0.7716729044914246, "learning_rate": 9.448561132830863e-06, "loss": 5.43678617477417, "step": 6183 }, { "epoch": 0.8019257109974632, "grad_norm": 0.8114612102508545, "learning_rate": 9.436618003625114e-06, "loss": 8.670873641967773, "step": 6184 }, { "epoch": 0.8020553885057099, "grad_norm": 0.6957663297653198, "learning_rate": 9.4246816407832e-06, "loss": 8.811068534851074, "step": 6185 }, { "epoch": 0.8021850660139566, "grad_norm": 1.169977068901062, "learning_rate": 9.412752046296191e-06, "loss": 11.414947509765625, "step": 6186 }, { "epoch": 0.8023147435222032, "grad_norm": 0.6387645602226257, "learning_rate": 9.40082922215409e-06, "loss": 6.904572010040283, "step": 6187 }, { "epoch": 0.8024444210304499, "grad_norm": 0.9133781790733337, "learning_rate": 9.388913170345708e-06, "loss": 9.636138916015625, "step": 6188 }, { "epoch": 0.8025740985386965, "grad_norm": 1.0697617530822754, "learning_rate": 9.377003892858788e-06, "loss": 12.947765350341797, "step": 6189 }, { "epoch": 0.8027037760469432, "grad_norm": 1.1980228424072266, "learning_rate": 9.365101391679882e-06, "loss": 10.28111457824707, "step": 6190 }, { "epoch": 0.80283345355519, "grad_norm": 0.8213227987289429, "learning_rate": 9.353205668794468e-06, "loss": 7.6410651206970215, "step": 6191 }, { "epoch": 0.8029631310634366, "grad_norm": 0.7130692005157471, "learning_rate": 9.34131672618685e-06, "loss": 7.302030563354492, "step": 6192 }, { "epoch": 0.8030928085716833, "grad_norm": 1.1381068229675293, "learning_rate": 9.329434565840234e-06, "loss": 8.645942687988281, "step": 6193 }, { "epoch": 0.80322248607993, "grad_norm": 0.8200966715812683, "learning_rate": 9.317559189736647e-06, "loss": 8.791705131530762, "step": 6194 }, { "epoch": 0.8033521635881766, "grad_norm": 0.706969141960144, "learning_rate": 9.305690599857064e-06, "loss": 9.128893852233887, "step": 6195 }, { "epoch": 0.8034818410964233, "grad_norm": 0.8640881180763245, "learning_rate": 9.293828798181248e-06, "loss": 6.70956563949585, "step": 6196 }, { "epoch": 0.8036115186046701, "grad_norm": 1.393354058265686, "learning_rate": 9.281973786687875e-06, "loss": 9.008790969848633, "step": 6197 }, { "epoch": 0.8037411961129167, "grad_norm": 0.9434901475906372, "learning_rate": 9.270125567354465e-06, "loss": 11.719327926635742, "step": 6198 }, { "epoch": 0.8038708736211634, "grad_norm": 0.8623617887496948, "learning_rate": 9.258284142157431e-06, "loss": 8.3251953125, "step": 6199 }, { "epoch": 0.80400055112941, "grad_norm": 0.7325652241706848, "learning_rate": 9.246449513072014e-06, "loss": 8.297955513000488, "step": 6200 }, { "epoch": 0.8041302286376567, "grad_norm": 0.8433619141578674, "learning_rate": 9.23462168207237e-06, "loss": 7.683567523956299, "step": 6201 }, { "epoch": 0.8042599061459034, "grad_norm": 0.8295403122901917, "learning_rate": 9.22280065113147e-06, "loss": 9.091647148132324, "step": 6202 }, { "epoch": 0.80438958365415, "grad_norm": 0.9608852863311768, "learning_rate": 9.210986422221201e-06, "loss": 11.575584411621094, "step": 6203 }, { "epoch": 0.8045192611623968, "grad_norm": 0.9374826550483704, "learning_rate": 9.199178997312269e-06, "loss": 8.120616912841797, "step": 6204 }, { "epoch": 0.8046489386706435, "grad_norm": 0.8337106108665466, "learning_rate": 9.187378378374289e-06, "loss": 10.050982475280762, "step": 6205 }, { "epoch": 0.8047786161788901, "grad_norm": 1.1226212978363037, "learning_rate": 9.175584567375706e-06, "loss": 7.44525671005249, "step": 6206 }, { "epoch": 0.8049082936871368, "grad_norm": 0.9988282918930054, "learning_rate": 9.163797566283821e-06, "loss": 9.516491889953613, "step": 6207 }, { "epoch": 0.8050379711953835, "grad_norm": 0.8361278772354126, "learning_rate": 9.15201737706486e-06, "loss": 6.191816329956055, "step": 6208 }, { "epoch": 0.8051676487036301, "grad_norm": 1.0087494850158691, "learning_rate": 9.140244001683833e-06, "loss": 9.973400115966797, "step": 6209 }, { "epoch": 0.8052973262118769, "grad_norm": 1.1270428895950317, "learning_rate": 9.128477442104687e-06, "loss": 11.358443260192871, "step": 6210 }, { "epoch": 0.8054270037201235, "grad_norm": 1.048675775527954, "learning_rate": 9.116717700290173e-06, "loss": 8.753469467163086, "step": 6211 }, { "epoch": 0.8055566812283702, "grad_norm": 0.9592140316963196, "learning_rate": 9.104964778201947e-06, "loss": 8.156681060791016, "step": 6212 }, { "epoch": 0.8056863587366169, "grad_norm": 1.0002787113189697, "learning_rate": 9.093218677800486e-06, "loss": 11.21710205078125, "step": 6213 }, { "epoch": 0.8058160362448635, "grad_norm": 1.2226917743682861, "learning_rate": 9.081479401045184e-06, "loss": 10.976288795471191, "step": 6214 }, { "epoch": 0.8059457137531102, "grad_norm": 0.961788535118103, "learning_rate": 9.069746949894243e-06, "loss": 11.57802963256836, "step": 6215 }, { "epoch": 0.806075391261357, "grad_norm": 0.7923755645751953, "learning_rate": 9.058021326304762e-06, "loss": 8.42403793334961, "step": 6216 }, { "epoch": 0.8062050687696036, "grad_norm": 0.8764774799346924, "learning_rate": 9.046302532232675e-06, "loss": 5.952735900878906, "step": 6217 }, { "epoch": 0.8063347462778503, "grad_norm": 0.6827149987220764, "learning_rate": 9.034590569632805e-06, "loss": 8.033489227294922, "step": 6218 }, { "epoch": 0.806464423786097, "grad_norm": 1.1060655117034912, "learning_rate": 9.022885440458807e-06, "loss": 9.527188301086426, "step": 6219 }, { "epoch": 0.8065941012943436, "grad_norm": 0.8978996276855469, "learning_rate": 9.01118714666322e-06, "loss": 8.13413143157959, "step": 6220 }, { "epoch": 0.8067237788025903, "grad_norm": 0.6583013534545898, "learning_rate": 8.999495690197423e-06, "loss": 8.85202693939209, "step": 6221 }, { "epoch": 0.8068534563108369, "grad_norm": 1.1182477474212646, "learning_rate": 8.987811073011676e-06, "loss": 10.255815505981445, "step": 6222 }, { "epoch": 0.8069831338190837, "grad_norm": 1.1794954538345337, "learning_rate": 8.976133297055072e-06, "loss": 9.502205848693848, "step": 6223 }, { "epoch": 0.8071128113273304, "grad_norm": 0.6474317312240601, "learning_rate": 8.964462364275594e-06, "loss": 4.758273124694824, "step": 6224 }, { "epoch": 0.807242488835577, "grad_norm": 1.1831687688827515, "learning_rate": 8.952798276620038e-06, "loss": 11.34079647064209, "step": 6225 }, { "epoch": 0.8073721663438237, "grad_norm": 1.1604304313659668, "learning_rate": 8.941141036034124e-06, "loss": 7.539854049682617, "step": 6226 }, { "epoch": 0.8075018438520704, "grad_norm": 1.198737382888794, "learning_rate": 8.929490644462357e-06, "loss": 8.781764030456543, "step": 6227 }, { "epoch": 0.807631521360317, "grad_norm": 0.9800476431846619, "learning_rate": 8.917847103848165e-06, "loss": 9.522029876708984, "step": 6228 }, { "epoch": 0.8077611988685638, "grad_norm": 0.7827052474021912, "learning_rate": 8.906210416133775e-06, "loss": 12.509085655212402, "step": 6229 }, { "epoch": 0.8078908763768105, "grad_norm": 0.9081536531448364, "learning_rate": 8.894580583260326e-06, "loss": 13.423839569091797, "step": 6230 }, { "epoch": 0.8080205538850571, "grad_norm": 1.056464672088623, "learning_rate": 8.882957607167759e-06, "loss": 8.886580467224121, "step": 6231 }, { "epoch": 0.8081502313933038, "grad_norm": 0.942811906337738, "learning_rate": 8.871341489794926e-06, "loss": 9.000739097595215, "step": 6232 }, { "epoch": 0.8082799089015504, "grad_norm": 1.0228062868118286, "learning_rate": 8.859732233079483e-06, "loss": 7.579392910003662, "step": 6233 }, { "epoch": 0.8084095864097971, "grad_norm": 0.6536529660224915, "learning_rate": 8.848129838957974e-06, "loss": 7.601774215698242, "step": 6234 }, { "epoch": 0.8085392639180438, "grad_norm": 1.077494740486145, "learning_rate": 8.836534309365806e-06, "loss": 8.710787773132324, "step": 6235 }, { "epoch": 0.8086689414262905, "grad_norm": 0.9740316867828369, "learning_rate": 8.824945646237205e-06, "loss": 8.957585334777832, "step": 6236 }, { "epoch": 0.8087986189345372, "grad_norm": 0.9103164672851562, "learning_rate": 8.813363851505284e-06, "loss": 7.154746055603027, "step": 6237 }, { "epoch": 0.8089282964427839, "grad_norm": 0.8252628445625305, "learning_rate": 8.801788927101989e-06, "loss": 10.751869201660156, "step": 6238 }, { "epoch": 0.8090579739510305, "grad_norm": 1.3192944526672363, "learning_rate": 8.790220874958144e-06, "loss": 9.047222137451172, "step": 6239 }, { "epoch": 0.8091876514592772, "grad_norm": 0.9069787859916687, "learning_rate": 8.778659697003389e-06, "loss": 8.87210464477539, "step": 6240 }, { "epoch": 0.8093173289675238, "grad_norm": 1.1294193267822266, "learning_rate": 8.767105395166269e-06, "loss": 10.602023124694824, "step": 6241 }, { "epoch": 0.8094470064757706, "grad_norm": 0.6763254404067993, "learning_rate": 8.755557971374123e-06, "loss": 7.128617286682129, "step": 6242 }, { "epoch": 0.8095766839840173, "grad_norm": 0.8858948349952698, "learning_rate": 8.744017427553197e-06, "loss": 10.925777435302734, "step": 6243 }, { "epoch": 0.8097063614922639, "grad_norm": 0.9236703515052795, "learning_rate": 8.732483765628546e-06, "loss": 9.538887023925781, "step": 6244 }, { "epoch": 0.8098360390005106, "grad_norm": 0.6218940615653992, "learning_rate": 8.720956987524115e-06, "loss": 8.512460708618164, "step": 6245 }, { "epoch": 0.8099657165087573, "grad_norm": 1.0506917238235474, "learning_rate": 8.709437095162659e-06, "loss": 8.289196968078613, "step": 6246 }, { "epoch": 0.8100953940170039, "grad_norm": 1.2466100454330444, "learning_rate": 8.697924090465837e-06, "loss": 9.534443855285645, "step": 6247 }, { "epoch": 0.8102250715252507, "grad_norm": 1.2241212129592896, "learning_rate": 8.6864179753541e-06, "loss": 11.033055305480957, "step": 6248 }, { "epoch": 0.8103547490334974, "grad_norm": 0.840814471244812, "learning_rate": 8.674918751746802e-06, "loss": 8.217982292175293, "step": 6249 }, { "epoch": 0.810484426541744, "grad_norm": 0.8551163673400879, "learning_rate": 8.663426421562105e-06, "loss": 8.917436599731445, "step": 6250 }, { "epoch": 0.8106141040499907, "grad_norm": 0.9631156921386719, "learning_rate": 8.651940986717056e-06, "loss": 6.442762851715088, "step": 6251 }, { "epoch": 0.8107437815582373, "grad_norm": 1.2599067687988281, "learning_rate": 8.640462449127524e-06, "loss": 8.066755294799805, "step": 6252 }, { "epoch": 0.810873459066484, "grad_norm": 0.9353962540626526, "learning_rate": 8.62899081070826e-06, "loss": 7.378547668457031, "step": 6253 }, { "epoch": 0.8110031365747307, "grad_norm": 0.9096171259880066, "learning_rate": 8.61752607337281e-06, "loss": 11.968794822692871, "step": 6254 }, { "epoch": 0.8111328140829774, "grad_norm": 0.8629798293113708, "learning_rate": 8.60606823903365e-06, "loss": 9.39035701751709, "step": 6255 }, { "epoch": 0.8112624915912241, "grad_norm": 1.2096645832061768, "learning_rate": 8.594617309602005e-06, "loss": 9.4895601272583, "step": 6256 }, { "epoch": 0.8113921690994708, "grad_norm": 0.8358806371688843, "learning_rate": 8.583173286988045e-06, "loss": 9.221113204956055, "step": 6257 }, { "epoch": 0.8115218466077174, "grad_norm": 0.8395615816116333, "learning_rate": 8.571736173100713e-06, "loss": 8.119571685791016, "step": 6258 }, { "epoch": 0.8116515241159641, "grad_norm": 1.0908373594284058, "learning_rate": 8.560305969847855e-06, "loss": 8.71547794342041, "step": 6259 }, { "epoch": 0.8117812016242107, "grad_norm": 1.1343176364898682, "learning_rate": 8.54888267913611e-06, "loss": 8.688336372375488, "step": 6260 }, { "epoch": 0.8119108791324575, "grad_norm": 0.8887840509414673, "learning_rate": 8.537466302871028e-06, "loss": 9.645624160766602, "step": 6261 }, { "epoch": 0.8120405566407042, "grad_norm": 1.083463191986084, "learning_rate": 8.526056842956936e-06, "loss": 8.062819480895996, "step": 6262 }, { "epoch": 0.8121702341489508, "grad_norm": 1.3768566846847534, "learning_rate": 8.51465430129707e-06, "loss": 9.868088722229004, "step": 6263 }, { "epoch": 0.8122999116571975, "grad_norm": 0.83318030834198, "learning_rate": 8.503258679793463e-06, "loss": 7.30871057510376, "step": 6264 }, { "epoch": 0.8124295891654442, "grad_norm": 0.9905955791473389, "learning_rate": 8.491869980347034e-06, "loss": 7.319469451904297, "step": 6265 }, { "epoch": 0.8125592666736908, "grad_norm": 0.8658564686775208, "learning_rate": 8.480488204857507e-06, "loss": 11.739509582519531, "step": 6266 }, { "epoch": 0.8126889441819375, "grad_norm": 0.9115229845046997, "learning_rate": 8.469113355223496e-06, "loss": 9.918835639953613, "step": 6267 }, { "epoch": 0.8128186216901843, "grad_norm": 1.018197774887085, "learning_rate": 8.457745433342407e-06, "loss": 9.987173080444336, "step": 6268 }, { "epoch": 0.8129482991984309, "grad_norm": 0.8109434247016907, "learning_rate": 8.446384441110555e-06, "loss": 7.589404106140137, "step": 6269 }, { "epoch": 0.8130779767066776, "grad_norm": 1.029211163520813, "learning_rate": 8.435030380423037e-06, "loss": 9.745908737182617, "step": 6270 }, { "epoch": 0.8132076542149242, "grad_norm": 0.9341521263122559, "learning_rate": 8.423683253173803e-06, "loss": 8.030022621154785, "step": 6271 }, { "epoch": 0.8133373317231709, "grad_norm": 0.6685107350349426, "learning_rate": 8.412343061255712e-06, "loss": 7.14272928237915, "step": 6272 }, { "epoch": 0.8134670092314176, "grad_norm": 1.061136245727539, "learning_rate": 8.401009806560384e-06, "loss": 9.03175163269043, "step": 6273 }, { "epoch": 0.8135966867396643, "grad_norm": 1.286166787147522, "learning_rate": 8.389683490978329e-06, "loss": 10.082147598266602, "step": 6274 }, { "epoch": 0.813726364247911, "grad_norm": 0.985247015953064, "learning_rate": 8.378364116398874e-06, "loss": 7.216543674468994, "step": 6275 }, { "epoch": 0.8138560417561577, "grad_norm": 0.7750351428985596, "learning_rate": 8.367051684710214e-06, "loss": 6.365745544433594, "step": 6276 }, { "epoch": 0.8139857192644043, "grad_norm": 0.9478216171264648, "learning_rate": 8.355746197799359e-06, "loss": 8.199411392211914, "step": 6277 }, { "epoch": 0.814115396772651, "grad_norm": 0.8410147428512573, "learning_rate": 8.344447657552185e-06, "loss": 9.023117065429688, "step": 6278 }, { "epoch": 0.8142450742808977, "grad_norm": 0.8869127035140991, "learning_rate": 8.333156065853376e-06, "loss": 8.441110610961914, "step": 6279 }, { "epoch": 0.8143747517891444, "grad_norm": 0.7428708672523499, "learning_rate": 8.321871424586513e-06, "loss": 7.341684341430664, "step": 6280 }, { "epoch": 0.8145044292973911, "grad_norm": 0.7809804677963257, "learning_rate": 8.310593735633948e-06, "loss": 5.942859172821045, "step": 6281 }, { "epoch": 0.8146341068056377, "grad_norm": 1.0792220830917358, "learning_rate": 8.299323000876935e-06, "loss": 12.130877494812012, "step": 6282 }, { "epoch": 0.8147637843138844, "grad_norm": 0.8246260285377502, "learning_rate": 8.288059222195527e-06, "loss": 7.666611194610596, "step": 6283 }, { "epoch": 0.8148934618221311, "grad_norm": 1.2000374794006348, "learning_rate": 8.276802401468637e-06, "loss": 13.969382286071777, "step": 6284 }, { "epoch": 0.8150231393303777, "grad_norm": 1.1638755798339844, "learning_rate": 8.265552540574e-06, "loss": 10.787644386291504, "step": 6285 }, { "epoch": 0.8151528168386244, "grad_norm": 0.8546318411827087, "learning_rate": 8.254309641388224e-06, "loss": 6.89978551864624, "step": 6286 }, { "epoch": 0.8152824943468712, "grad_norm": 1.5168538093566895, "learning_rate": 8.243073705786709e-06, "loss": 12.78270435333252, "step": 6287 }, { "epoch": 0.8154121718551178, "grad_norm": 0.9881123304367065, "learning_rate": 8.23184473564374e-06, "loss": 5.348386287689209, "step": 6288 }, { "epoch": 0.8155418493633645, "grad_norm": 1.0112055540084839, "learning_rate": 8.220622732832395e-06, "loss": 7.141124248504639, "step": 6289 }, { "epoch": 0.8156715268716112, "grad_norm": 0.9236774444580078, "learning_rate": 8.209407699224631e-06, "loss": 7.548269748687744, "step": 6290 }, { "epoch": 0.8158012043798578, "grad_norm": 0.9457418322563171, "learning_rate": 8.198199636691206e-06, "loss": 9.25938606262207, "step": 6291 }, { "epoch": 0.8159308818881045, "grad_norm": 0.7333966493606567, "learning_rate": 8.18699854710176e-06, "loss": 9.088398933410645, "step": 6292 }, { "epoch": 0.8160605593963512, "grad_norm": 0.9172837138175964, "learning_rate": 8.175804432324723e-06, "loss": 8.727092742919922, "step": 6293 }, { "epoch": 0.8161902369045979, "grad_norm": 1.0332783460617065, "learning_rate": 8.164617294227372e-06, "loss": 10.691210746765137, "step": 6294 }, { "epoch": 0.8163199144128446, "grad_norm": 0.9975681900978088, "learning_rate": 8.153437134675857e-06, "loss": 8.632161140441895, "step": 6295 }, { "epoch": 0.8164495919210912, "grad_norm": 1.4138799905776978, "learning_rate": 8.142263955535107e-06, "loss": 10.769895553588867, "step": 6296 }, { "epoch": 0.8165792694293379, "grad_norm": 0.9253423810005188, "learning_rate": 8.131097758668943e-06, "loss": 9.239606857299805, "step": 6297 }, { "epoch": 0.8167089469375846, "grad_norm": 1.0154526233673096, "learning_rate": 8.11993854593997e-06, "loss": 10.150175094604492, "step": 6298 }, { "epoch": 0.8168386244458312, "grad_norm": 0.7671341896057129, "learning_rate": 8.108786319209683e-06, "loss": 7.0720086097717285, "step": 6299 }, { "epoch": 0.816968301954078, "grad_norm": 1.0909128189086914, "learning_rate": 8.097641080338342e-06, "loss": 11.240225791931152, "step": 6300 }, { "epoch": 0.8170979794623247, "grad_norm": 1.4310311079025269, "learning_rate": 8.086502831185122e-06, "loss": 10.194892883300781, "step": 6301 }, { "epoch": 0.8172276569705713, "grad_norm": 1.164381504058838, "learning_rate": 8.075371573607954e-06, "loss": 8.43464183807373, "step": 6302 }, { "epoch": 0.817357334478818, "grad_norm": 0.9703372716903687, "learning_rate": 8.064247309463669e-06, "loss": 8.417404174804688, "step": 6303 }, { "epoch": 0.8174870119870646, "grad_norm": 0.5882115364074707, "learning_rate": 8.053130040607875e-06, "loss": 9.169490814208984, "step": 6304 }, { "epoch": 0.8176166894953113, "grad_norm": 0.7857863903045654, "learning_rate": 8.042019768895065e-06, "loss": 9.26394271850586, "step": 6305 }, { "epoch": 0.8177463670035581, "grad_norm": 1.2929494380950928, "learning_rate": 8.030916496178514e-06, "loss": 11.488727569580078, "step": 6306 }, { "epoch": 0.8178760445118047, "grad_norm": 0.7355303764343262, "learning_rate": 8.019820224310381e-06, "loss": 6.109799385070801, "step": 6307 }, { "epoch": 0.8180057220200514, "grad_norm": 1.038481593132019, "learning_rate": 8.008730955141613e-06, "loss": 9.389847755432129, "step": 6308 }, { "epoch": 0.8181353995282981, "grad_norm": 1.4077062606811523, "learning_rate": 7.99764869052202e-06, "loss": 9.146780967712402, "step": 6309 }, { "epoch": 0.8182650770365447, "grad_norm": 1.102537751197815, "learning_rate": 7.986573432300199e-06, "loss": 9.865315437316895, "step": 6310 }, { "epoch": 0.8183947545447914, "grad_norm": 0.7592713832855225, "learning_rate": 7.975505182323661e-06, "loss": 9.402667999267578, "step": 6311 }, { "epoch": 0.8185244320530382, "grad_norm": 1.1308002471923828, "learning_rate": 7.96444394243866e-06, "loss": 12.281047821044922, "step": 6312 }, { "epoch": 0.8186541095612848, "grad_norm": 1.0791836977005005, "learning_rate": 7.953389714490339e-06, "loss": 12.053422927856445, "step": 6313 }, { "epoch": 0.8187837870695315, "grad_norm": 1.2072538137435913, "learning_rate": 7.942342500322624e-06, "loss": 6.453895568847656, "step": 6314 }, { "epoch": 0.8189134645777781, "grad_norm": 0.6134593486785889, "learning_rate": 7.931302301778332e-06, "loss": 6.994765281677246, "step": 6315 }, { "epoch": 0.8190431420860248, "grad_norm": 0.8421190977096558, "learning_rate": 7.920269120699047e-06, "loss": 8.561553001403809, "step": 6316 }, { "epoch": 0.8191728195942715, "grad_norm": 1.5885512828826904, "learning_rate": 7.909242958925212e-06, "loss": 10.716316223144531, "step": 6317 }, { "epoch": 0.8193024971025181, "grad_norm": 1.29025137424469, "learning_rate": 7.898223818296114e-06, "loss": 11.535436630249023, "step": 6318 }, { "epoch": 0.8194321746107649, "grad_norm": 0.8192859292030334, "learning_rate": 7.887211700649828e-06, "loss": 5.941457271575928, "step": 6319 }, { "epoch": 0.8195618521190116, "grad_norm": 1.4996057748794556, "learning_rate": 7.876206607823312e-06, "loss": 7.140841960906982, "step": 6320 }, { "epoch": 0.8196915296272582, "grad_norm": 0.9516932964324951, "learning_rate": 7.865208541652287e-06, "loss": 6.278128147125244, "step": 6321 }, { "epoch": 0.8198212071355049, "grad_norm": 0.7600287199020386, "learning_rate": 7.854217503971368e-06, "loss": 6.604823112487793, "step": 6322 }, { "epoch": 0.8199508846437515, "grad_norm": 0.9309354424476624, "learning_rate": 7.843233496613945e-06, "loss": 8.660294532775879, "step": 6323 }, { "epoch": 0.8200805621519982, "grad_norm": 0.7718784809112549, "learning_rate": 7.832256521412273e-06, "loss": 10.550585746765137, "step": 6324 }, { "epoch": 0.820210239660245, "grad_norm": 0.9431084990501404, "learning_rate": 7.821286580197395e-06, "loss": 9.57634449005127, "step": 6325 }, { "epoch": 0.8203399171684916, "grad_norm": 0.9275202751159668, "learning_rate": 7.81032367479923e-06, "loss": 10.57529354095459, "step": 6326 }, { "epoch": 0.8204695946767383, "grad_norm": 0.909860372543335, "learning_rate": 7.799367807046477e-06, "loss": 8.10649585723877, "step": 6327 }, { "epoch": 0.820599272184985, "grad_norm": 1.4679402112960815, "learning_rate": 7.788418978766693e-06, "loss": 7.516653537750244, "step": 6328 }, { "epoch": 0.8207289496932316, "grad_norm": 0.8735551238059998, "learning_rate": 7.777477191786232e-06, "loss": 6.6410369873046875, "step": 6329 }, { "epoch": 0.8208586272014783, "grad_norm": 1.1854676008224487, "learning_rate": 7.766542447930314e-06, "loss": 8.324126243591309, "step": 6330 }, { "epoch": 0.820988304709725, "grad_norm": 0.6095749139785767, "learning_rate": 7.755614749022927e-06, "loss": 6.825267314910889, "step": 6331 }, { "epoch": 0.8211179822179717, "grad_norm": 1.0028289556503296, "learning_rate": 7.744694096886956e-06, "loss": 8.678675651550293, "step": 6332 }, { "epoch": 0.8212476597262184, "grad_norm": 1.0302270650863647, "learning_rate": 7.733780493344034e-06, "loss": 10.038374900817871, "step": 6333 }, { "epoch": 0.821377337234465, "grad_norm": 1.0115914344787598, "learning_rate": 7.722873940214687e-06, "loss": 8.343611717224121, "step": 6334 }, { "epoch": 0.8215070147427117, "grad_norm": 1.0369054079055786, "learning_rate": 7.711974439318204e-06, "loss": 7.7176594734191895, "step": 6335 }, { "epoch": 0.8216366922509584, "grad_norm": 1.002647876739502, "learning_rate": 7.701081992472753e-06, "loss": 8.237384796142578, "step": 6336 }, { "epoch": 0.821766369759205, "grad_norm": 1.2518854141235352, "learning_rate": 7.690196601495276e-06, "loss": 9.027706146240234, "step": 6337 }, { "epoch": 0.8218960472674518, "grad_norm": 0.729437530040741, "learning_rate": 7.679318268201585e-06, "loss": 8.748977661132812, "step": 6338 }, { "epoch": 0.8220257247756985, "grad_norm": 1.0204112529754639, "learning_rate": 7.66844699440627e-06, "loss": 8.956448554992676, "step": 6339 }, { "epoch": 0.8221554022839451, "grad_norm": 1.1856645345687866, "learning_rate": 7.657582781922783e-06, "loss": 9.953930854797363, "step": 6340 }, { "epoch": 0.8222850797921918, "grad_norm": 1.1727827787399292, "learning_rate": 7.646725632563362e-06, "loss": 8.493409156799316, "step": 6341 }, { "epoch": 0.8224147573004384, "grad_norm": 1.0821702480316162, "learning_rate": 7.635875548139104e-06, "loss": 10.908838272094727, "step": 6342 }, { "epoch": 0.8225444348086851, "grad_norm": 0.778262197971344, "learning_rate": 7.625032530459885e-06, "loss": 8.6812744140625, "step": 6343 }, { "epoch": 0.8226741123169319, "grad_norm": 0.834352433681488, "learning_rate": 7.6141965813344426e-06, "loss": 8.71654987335205, "step": 6344 }, { "epoch": 0.8228037898251785, "grad_norm": 1.1079599857330322, "learning_rate": 7.603367702570308e-06, "loss": 11.774545669555664, "step": 6345 }, { "epoch": 0.8229334673334252, "grad_norm": 0.9547861814498901, "learning_rate": 7.5925458959738514e-06, "loss": 8.40023136138916, "step": 6346 }, { "epoch": 0.8230631448416719, "grad_norm": 0.7377416491508484, "learning_rate": 7.581731163350242e-06, "loss": 9.967656135559082, "step": 6347 }, { "epoch": 0.8231928223499185, "grad_norm": 0.8232678174972534, "learning_rate": 7.570923506503486e-06, "loss": 10.51890754699707, "step": 6348 }, { "epoch": 0.8233224998581652, "grad_norm": 0.9025853872299194, "learning_rate": 7.5601229272364235e-06, "loss": 8.34392261505127, "step": 6349 }, { "epoch": 0.8234521773664119, "grad_norm": 0.7304142117500305, "learning_rate": 7.549329427350666e-06, "loss": 9.00480842590332, "step": 6350 }, { "epoch": 0.8235818548746586, "grad_norm": 1.0698418617248535, "learning_rate": 7.538543008646704e-06, "loss": 6.890283584594727, "step": 6351 }, { "epoch": 0.8237115323829053, "grad_norm": 0.7499573230743408, "learning_rate": 7.527763672923782e-06, "loss": 6.732539176940918, "step": 6352 }, { "epoch": 0.823841209891152, "grad_norm": 1.0143474340438843, "learning_rate": 7.516991421980029e-06, "loss": 11.798050880432129, "step": 6353 }, { "epoch": 0.8239708873993986, "grad_norm": 1.019691824913025, "learning_rate": 7.506226257612336e-06, "loss": 10.499752044677734, "step": 6354 }, { "epoch": 0.8241005649076453, "grad_norm": 0.6418088674545288, "learning_rate": 7.4954681816164606e-06, "loss": 6.827703952789307, "step": 6355 }, { "epoch": 0.8242302424158919, "grad_norm": 0.9133138060569763, "learning_rate": 7.484717195786922e-06, "loss": 8.62600040435791, "step": 6356 }, { "epoch": 0.8243599199241387, "grad_norm": 0.9737318158149719, "learning_rate": 7.473973301917125e-06, "loss": 10.517842292785645, "step": 6357 }, { "epoch": 0.8244895974323854, "grad_norm": 1.2252922058105469, "learning_rate": 7.463236501799226e-06, "loss": 9.842170715332031, "step": 6358 }, { "epoch": 0.824619274940632, "grad_norm": 1.1705076694488525, "learning_rate": 7.452506797224245e-06, "loss": 7.5527238845825195, "step": 6359 }, { "epoch": 0.8247489524488787, "grad_norm": 1.179120421409607, "learning_rate": 7.441784189981982e-06, "loss": 9.298638343811035, "step": 6360 }, { "epoch": 0.8248786299571254, "grad_norm": 0.9890930652618408, "learning_rate": 7.431068681861092e-06, "loss": 7.282470703125, "step": 6361 }, { "epoch": 0.825008307465372, "grad_norm": 1.2110953330993652, "learning_rate": 7.4203602746490075e-06, "loss": 9.249515533447266, "step": 6362 }, { "epoch": 0.8251379849736187, "grad_norm": 1.1520332098007202, "learning_rate": 7.4096589701320105e-06, "loss": 7.766674518585205, "step": 6363 }, { "epoch": 0.8252676624818654, "grad_norm": 0.9961749315261841, "learning_rate": 7.398964770095168e-06, "loss": 11.085927963256836, "step": 6364 }, { "epoch": 0.8253973399901121, "grad_norm": 0.9926933646202087, "learning_rate": 7.388277676322386e-06, "loss": 8.576746940612793, "step": 6365 }, { "epoch": 0.8255270174983588, "grad_norm": 0.983845055103302, "learning_rate": 7.3775976905963664e-06, "loss": 9.038453102111816, "step": 6366 }, { "epoch": 0.8256566950066054, "grad_norm": 0.7320117950439453, "learning_rate": 7.366924814698645e-06, "loss": 8.69894790649414, "step": 6367 }, { "epoch": 0.8257863725148521, "grad_norm": 1.174103021621704, "learning_rate": 7.356259050409542e-06, "loss": 8.807847023010254, "step": 6368 }, { "epoch": 0.8259160500230988, "grad_norm": 1.1945992708206177, "learning_rate": 7.345600399508235e-06, "loss": 9.328508377075195, "step": 6369 }, { "epoch": 0.8260457275313455, "grad_norm": 1.1357635259628296, "learning_rate": 7.3349488637726615e-06, "loss": 10.471312522888184, "step": 6370 }, { "epoch": 0.8261754050395922, "grad_norm": 1.5682430267333984, "learning_rate": 7.324304444979629e-06, "loss": 8.729334831237793, "step": 6371 }, { "epoch": 0.8263050825478389, "grad_norm": 0.9495506882667542, "learning_rate": 7.313667144904701e-06, "loss": 9.311607360839844, "step": 6372 }, { "epoch": 0.8264347600560855, "grad_norm": 0.8217946887016296, "learning_rate": 7.303036965322313e-06, "loss": 9.54475212097168, "step": 6373 }, { "epoch": 0.8265644375643322, "grad_norm": 1.2854160070419312, "learning_rate": 7.292413908005652e-06, "loss": 8.581116676330566, "step": 6374 }, { "epoch": 0.8266941150725788, "grad_norm": 1.2908836603164673, "learning_rate": 7.281797974726767e-06, "loss": 7.253791332244873, "step": 6375 }, { "epoch": 0.8268237925808256, "grad_norm": 0.674533486366272, "learning_rate": 7.271189167256487e-06, "loss": 5.971778869628906, "step": 6376 }, { "epoch": 0.8269534700890723, "grad_norm": 0.8211511373519897, "learning_rate": 7.260587487364473e-06, "loss": 9.470428466796875, "step": 6377 }, { "epoch": 0.8270831475973189, "grad_norm": 1.174950122833252, "learning_rate": 7.249992936819172e-06, "loss": 8.32745361328125, "step": 6378 }, { "epoch": 0.8272128251055656, "grad_norm": 0.9799957871437073, "learning_rate": 7.239405517387882e-06, "loss": 7.959794521331787, "step": 6379 }, { "epoch": 0.8273425026138123, "grad_norm": 0.9703140258789062, "learning_rate": 7.2288252308366724e-06, "loss": 10.983397483825684, "step": 6380 }, { "epoch": 0.8274721801220589, "grad_norm": 1.0700297355651855, "learning_rate": 7.218252078930421e-06, "loss": 9.680217742919922, "step": 6381 }, { "epoch": 0.8276018576303056, "grad_norm": 0.7922569513320923, "learning_rate": 7.207686063432861e-06, "loss": 9.638490676879883, "step": 6382 }, { "epoch": 0.8277315351385524, "grad_norm": 0.9576575756072998, "learning_rate": 7.197127186106478e-06, "loss": 8.187650680541992, "step": 6383 }, { "epoch": 0.827861212646799, "grad_norm": 1.193110466003418, "learning_rate": 7.186575448712624e-06, "loss": 8.122730255126953, "step": 6384 }, { "epoch": 0.8279908901550457, "grad_norm": 0.8193956017494202, "learning_rate": 7.1760308530114066e-06, "loss": 9.685857772827148, "step": 6385 }, { "epoch": 0.8281205676632923, "grad_norm": 1.2286418676376343, "learning_rate": 7.165493400761786e-06, "loss": 11.445847511291504, "step": 6386 }, { "epoch": 0.828250245171539, "grad_norm": 1.1915483474731445, "learning_rate": 7.154963093721473e-06, "loss": 8.746560096740723, "step": 6387 }, { "epoch": 0.8283799226797857, "grad_norm": 1.3755358457565308, "learning_rate": 7.1444399336470794e-06, "loss": 9.241287231445312, "step": 6388 }, { "epoch": 0.8285096001880324, "grad_norm": 1.1431117057800293, "learning_rate": 7.133923922293928e-06, "loss": 7.309866905212402, "step": 6389 }, { "epoch": 0.8286392776962791, "grad_norm": 0.9863778352737427, "learning_rate": 7.1234150614162165e-06, "loss": 5.656149864196777, "step": 6390 }, { "epoch": 0.8287689552045258, "grad_norm": 0.8940430879592896, "learning_rate": 7.112913352766903e-06, "loss": 10.888275146484375, "step": 6391 }, { "epoch": 0.8288986327127724, "grad_norm": 1.0451397895812988, "learning_rate": 7.102418798097798e-06, "loss": 7.881361961364746, "step": 6392 }, { "epoch": 0.8290283102210191, "grad_norm": 1.1627848148345947, "learning_rate": 7.091931399159462e-06, "loss": 10.928668975830078, "step": 6393 }, { "epoch": 0.8291579877292657, "grad_norm": 0.8242383003234863, "learning_rate": 7.081451157701335e-06, "loss": 10.364117622375488, "step": 6394 }, { "epoch": 0.8292876652375124, "grad_norm": 0.6608073115348816, "learning_rate": 7.07097807547158e-06, "loss": 4.998097896575928, "step": 6395 }, { "epoch": 0.8294173427457592, "grad_norm": 1.1161565780639648, "learning_rate": 7.060512154217247e-06, "loss": 10.719919204711914, "step": 6396 }, { "epoch": 0.8295470202540058, "grad_norm": 0.8026650547981262, "learning_rate": 7.05005339568412e-06, "loss": 8.3663911819458, "step": 6397 }, { "epoch": 0.8296766977622525, "grad_norm": 1.2231932878494263, "learning_rate": 7.039601801616846e-06, "loss": 11.127427101135254, "step": 6398 }, { "epoch": 0.8298063752704992, "grad_norm": 0.9214953780174255, "learning_rate": 7.029157373758832e-06, "loss": 7.249610900878906, "step": 6399 }, { "epoch": 0.8299360527787458, "grad_norm": 0.8384169340133667, "learning_rate": 7.0187201138523284e-06, "loss": 8.202947616577148, "step": 6400 }, { "epoch": 0.8300657302869925, "grad_norm": 1.0812650918960571, "learning_rate": 7.008290023638353e-06, "loss": 12.138203620910645, "step": 6401 }, { "epoch": 0.8301954077952393, "grad_norm": 1.0182629823684692, "learning_rate": 6.997867104856764e-06, "loss": 10.825141906738281, "step": 6402 }, { "epoch": 0.8303250853034859, "grad_norm": 0.7691700458526611, "learning_rate": 6.987451359246194e-06, "loss": 10.413497924804688, "step": 6403 }, { "epoch": 0.8304547628117326, "grad_norm": 0.869472086429596, "learning_rate": 6.977042788544081e-06, "loss": 8.587160110473633, "step": 6404 }, { "epoch": 0.8305844403199792, "grad_norm": 0.8793197870254517, "learning_rate": 6.9666413944866945e-06, "loss": 7.61123514175415, "step": 6405 }, { "epoch": 0.8307141178282259, "grad_norm": 1.0787369012832642, "learning_rate": 6.956247178809067e-06, "loss": 9.489033699035645, "step": 6406 }, { "epoch": 0.8308437953364726, "grad_norm": 0.7551145553588867, "learning_rate": 6.945860143245075e-06, "loss": 7.394040584564209, "step": 6407 }, { "epoch": 0.8309734728447193, "grad_norm": 1.0475852489471436, "learning_rate": 6.9354802895273575e-06, "loss": 7.278385639190674, "step": 6408 }, { "epoch": 0.831103150352966, "grad_norm": 1.1240277290344238, "learning_rate": 6.9251076193873945e-06, "loss": 9.790283203125, "step": 6409 }, { "epoch": 0.8312328278612127, "grad_norm": 0.9707816243171692, "learning_rate": 6.914742134555419e-06, "loss": 9.271276473999023, "step": 6410 }, { "epoch": 0.8313625053694593, "grad_norm": 1.0370285511016846, "learning_rate": 6.90438383676053e-06, "loss": 11.427268028259277, "step": 6411 }, { "epoch": 0.831492182877706, "grad_norm": 0.7036426067352295, "learning_rate": 6.894032727730559e-06, "loss": 8.134093284606934, "step": 6412 }, { "epoch": 0.8316218603859526, "grad_norm": 1.105088233947754, "learning_rate": 6.883688809192196e-06, "loss": 10.445442199707031, "step": 6413 }, { "epoch": 0.8317515378941993, "grad_norm": 0.9382274746894836, "learning_rate": 6.873352082870882e-06, "loss": 7.7514328956604, "step": 6414 }, { "epoch": 0.8318812154024461, "grad_norm": 0.878620982170105, "learning_rate": 6.863022550490905e-06, "loss": 5.786219596862793, "step": 6415 }, { "epoch": 0.8320108929106927, "grad_norm": 1.0654470920562744, "learning_rate": 6.852700213775315e-06, "loss": 9.636035919189453, "step": 6416 }, { "epoch": 0.8321405704189394, "grad_norm": 1.062045693397522, "learning_rate": 6.842385074445995e-06, "loss": 9.446714401245117, "step": 6417 }, { "epoch": 0.8322702479271861, "grad_norm": 1.1557995080947876, "learning_rate": 6.832077134223586e-06, "loss": 10.37267780303955, "step": 6418 }, { "epoch": 0.8323999254354327, "grad_norm": 0.7755340337753296, "learning_rate": 6.8217763948275726e-06, "loss": 7.732344150543213, "step": 6419 }, { "epoch": 0.8325296029436794, "grad_norm": 1.3247547149658203, "learning_rate": 6.811482857976204e-06, "loss": 9.18886661529541, "step": 6420 }, { "epoch": 0.8326592804519262, "grad_norm": 1.159297227859497, "learning_rate": 6.801196525386555e-06, "loss": 11.783700942993164, "step": 6421 }, { "epoch": 0.8327889579601728, "grad_norm": 1.0663472414016724, "learning_rate": 6.790917398774471e-06, "loss": 11.105583190917969, "step": 6422 }, { "epoch": 0.8329186354684195, "grad_norm": 0.9512671828269958, "learning_rate": 6.780645479854625e-06, "loss": 6.345874786376953, "step": 6423 }, { "epoch": 0.8330483129766661, "grad_norm": 1.3967753648757935, "learning_rate": 6.770380770340451e-06, "loss": 11.740605354309082, "step": 6424 }, { "epoch": 0.8331779904849128, "grad_norm": 0.6986982822418213, "learning_rate": 6.760123271944219e-06, "loss": 6.479085922241211, "step": 6425 }, { "epoch": 0.8333076679931595, "grad_norm": 1.0683293342590332, "learning_rate": 6.749872986376982e-06, "loss": 8.319100379943848, "step": 6426 }, { "epoch": 0.8334373455014062, "grad_norm": 1.065587043762207, "learning_rate": 6.739629915348567e-06, "loss": 8.103468894958496, "step": 6427 }, { "epoch": 0.8335670230096529, "grad_norm": 1.1419209241867065, "learning_rate": 6.729394060567634e-06, "loss": 8.655510902404785, "step": 6428 }, { "epoch": 0.8336967005178996, "grad_norm": 0.677178144454956, "learning_rate": 6.719165423741613e-06, "loss": 8.30712890625, "step": 6429 }, { "epoch": 0.8338263780261462, "grad_norm": 0.9876968264579773, "learning_rate": 6.708944006576756e-06, "loss": 10.051412582397461, "step": 6430 }, { "epoch": 0.8339560555343929, "grad_norm": 0.6295710802078247, "learning_rate": 6.698729810778065e-06, "loss": 4.943126201629639, "step": 6431 }, { "epoch": 0.8340857330426396, "grad_norm": 0.7889167070388794, "learning_rate": 6.6885228380493994e-06, "loss": 7.814659595489502, "step": 6432 }, { "epoch": 0.8342154105508862, "grad_norm": 0.9389986395835876, "learning_rate": 6.678323090093352e-06, "loss": 7.805235862731934, "step": 6433 }, { "epoch": 0.834345088059133, "grad_norm": 1.3874684572219849, "learning_rate": 6.668130568611358e-06, "loss": 12.130745887756348, "step": 6434 }, { "epoch": 0.8344747655673797, "grad_norm": 0.6930456161499023, "learning_rate": 6.6579452753036175e-06, "loss": 5.548529624938965, "step": 6435 }, { "epoch": 0.8346044430756263, "grad_norm": 0.9946678280830383, "learning_rate": 6.647767211869149e-06, "loss": 10.13090705871582, "step": 6436 }, { "epoch": 0.834734120583873, "grad_norm": 0.8962900042533875, "learning_rate": 6.637596380005734e-06, "loss": 7.011291027069092, "step": 6437 }, { "epoch": 0.8348637980921196, "grad_norm": 0.9597105383872986, "learning_rate": 6.627432781409981e-06, "loss": 7.3722944259643555, "step": 6438 }, { "epoch": 0.8349934756003663, "grad_norm": 1.0546765327453613, "learning_rate": 6.6172764177772625e-06, "loss": 10.801539421081543, "step": 6439 }, { "epoch": 0.8351231531086131, "grad_norm": 0.9350773692131042, "learning_rate": 6.607127290801779e-06, "loss": 8.728490829467773, "step": 6440 }, { "epoch": 0.8352528306168597, "grad_norm": 0.7620416879653931, "learning_rate": 6.596985402176476e-06, "loss": 8.246603965759277, "step": 6441 }, { "epoch": 0.8353825081251064, "grad_norm": 1.0006400346755981, "learning_rate": 6.586850753593143e-06, "loss": 6.184239387512207, "step": 6442 }, { "epoch": 0.835512185633353, "grad_norm": 1.352970838546753, "learning_rate": 6.576723346742314e-06, "loss": 7.821826457977295, "step": 6443 }, { "epoch": 0.8356418631415997, "grad_norm": 0.9288776516914368, "learning_rate": 6.566603183313363e-06, "loss": 10.736705780029297, "step": 6444 }, { "epoch": 0.8357715406498464, "grad_norm": 1.1565327644348145, "learning_rate": 6.556490264994408e-06, "loss": 10.672819137573242, "step": 6445 }, { "epoch": 0.835901218158093, "grad_norm": 0.786068856716156, "learning_rate": 6.546384593472399e-06, "loss": 10.368509292602539, "step": 6446 }, { "epoch": 0.8360308956663398, "grad_norm": 0.8343327641487122, "learning_rate": 6.536286170433048e-06, "loss": 9.707818031311035, "step": 6447 }, { "epoch": 0.8361605731745865, "grad_norm": 1.3091801404953003, "learning_rate": 6.5261949975608805e-06, "loss": 8.555418014526367, "step": 6448 }, { "epoch": 0.8362902506828331, "grad_norm": 1.1242797374725342, "learning_rate": 6.516111076539183e-06, "loss": 7.474353313446045, "step": 6449 }, { "epoch": 0.8364199281910798, "grad_norm": 1.0211045742034912, "learning_rate": 6.506034409050077e-06, "loss": 7.1772990226745605, "step": 6450 }, { "epoch": 0.8365496056993265, "grad_norm": 1.1969950199127197, "learning_rate": 6.4959649967744225e-06, "loss": 10.766871452331543, "step": 6451 }, { "epoch": 0.8366792832075731, "grad_norm": 1.142113208770752, "learning_rate": 6.485902841391917e-06, "loss": 9.0379056930542, "step": 6452 }, { "epoch": 0.8368089607158199, "grad_norm": 1.2309845685958862, "learning_rate": 6.475847944580999e-06, "loss": 11.192059516906738, "step": 6453 }, { "epoch": 0.8369386382240666, "grad_norm": 1.0416247844696045, "learning_rate": 6.465800308018955e-06, "loss": 9.374340057373047, "step": 6454 }, { "epoch": 0.8370683157323132, "grad_norm": 1.057429552078247, "learning_rate": 6.4557599333817945e-06, "loss": 8.723278045654297, "step": 6455 }, { "epoch": 0.8371979932405599, "grad_norm": 1.080816388130188, "learning_rate": 6.445726822344378e-06, "loss": 9.318432807922363, "step": 6456 }, { "epoch": 0.8373276707488065, "grad_norm": 0.8403823971748352, "learning_rate": 6.4357009765803e-06, "loss": 6.1563029289245605, "step": 6457 }, { "epoch": 0.8374573482570532, "grad_norm": 0.6944001913070679, "learning_rate": 6.425682397761995e-06, "loss": 8.071154594421387, "step": 6458 }, { "epoch": 0.8375870257653, "grad_norm": 0.9408883452415466, "learning_rate": 6.415671087560632e-06, "loss": 8.749720573425293, "step": 6459 }, { "epoch": 0.8377167032735466, "grad_norm": 0.8936173319816589, "learning_rate": 6.405667047646213e-06, "loss": 9.816007614135742, "step": 6460 }, { "epoch": 0.8378463807817933, "grad_norm": 0.5742277503013611, "learning_rate": 6.395670279687499e-06, "loss": 10.661517143249512, "step": 6461 }, { "epoch": 0.83797605829004, "grad_norm": 0.6654837131500244, "learning_rate": 6.3856807853520555e-06, "loss": 5.861518383026123, "step": 6462 }, { "epoch": 0.8381057357982866, "grad_norm": 0.8829064965248108, "learning_rate": 6.375698566306215e-06, "loss": 9.430435180664062, "step": 6463 }, { "epoch": 0.8382354133065333, "grad_norm": 1.3621882200241089, "learning_rate": 6.365723624215108e-06, "loss": 9.355834007263184, "step": 6464 }, { "epoch": 0.83836509081478, "grad_norm": 0.6533076167106628, "learning_rate": 6.3557559607426765e-06, "loss": 7.051814079284668, "step": 6465 }, { "epoch": 0.8384947683230267, "grad_norm": 0.9354726672172546, "learning_rate": 6.3457955775515885e-06, "loss": 8.688708305358887, "step": 6466 }, { "epoch": 0.8386244458312734, "grad_norm": 0.9286894798278809, "learning_rate": 6.335842476303355e-06, "loss": 7.972382068634033, "step": 6467 }, { "epoch": 0.83875412333952, "grad_norm": 1.0131806135177612, "learning_rate": 6.325896658658237e-06, "loss": 8.94521427154541, "step": 6468 }, { "epoch": 0.8388838008477667, "grad_norm": 0.8726962804794312, "learning_rate": 6.315958126275312e-06, "loss": 7.022064208984375, "step": 6469 }, { "epoch": 0.8390134783560134, "grad_norm": 0.9415081143379211, "learning_rate": 6.306026880812393e-06, "loss": 9.616463661193848, "step": 6470 }, { "epoch": 0.83914315586426, "grad_norm": 1.11025071144104, "learning_rate": 6.296102923926133e-06, "loss": 6.00941801071167, "step": 6471 }, { "epoch": 0.8392728333725068, "grad_norm": 1.1599470376968384, "learning_rate": 6.286186257271931e-06, "loss": 10.281756401062012, "step": 6472 }, { "epoch": 0.8394025108807535, "grad_norm": 1.4788084030151367, "learning_rate": 6.27627688250399e-06, "loss": 10.56534481048584, "step": 6473 }, { "epoch": 0.8395321883890001, "grad_norm": 1.0851664543151855, "learning_rate": 6.266374801275282e-06, "loss": 10.23312759399414, "step": 6474 }, { "epoch": 0.8396618658972468, "grad_norm": 1.5181958675384521, "learning_rate": 6.256480015237581e-06, "loss": 11.52255916595459, "step": 6475 }, { "epoch": 0.8397915434054934, "grad_norm": 1.1689128875732422, "learning_rate": 6.246592526041417e-06, "loss": 10.139830589294434, "step": 6476 }, { "epoch": 0.8399212209137401, "grad_norm": 0.9268513321876526, "learning_rate": 6.236712335336131e-06, "loss": 9.057515144348145, "step": 6477 }, { "epoch": 0.8400508984219868, "grad_norm": 0.9272974133491516, "learning_rate": 6.226839444769827e-06, "loss": 8.459494590759277, "step": 6478 }, { "epoch": 0.8401805759302335, "grad_norm": 1.7342779636383057, "learning_rate": 6.216973855989411e-06, "loss": 12.162774085998535, "step": 6479 }, { "epoch": 0.8403102534384802, "grad_norm": 0.9495093822479248, "learning_rate": 6.207115570640537e-06, "loss": 8.048535346984863, "step": 6480 }, { "epoch": 0.8404399309467269, "grad_norm": 1.0754570960998535, "learning_rate": 6.197264590367691e-06, "loss": 6.712359428405762, "step": 6481 }, { "epoch": 0.8405696084549735, "grad_norm": 0.7419343590736389, "learning_rate": 6.1874209168140794e-06, "loss": 7.762340068817139, "step": 6482 }, { "epoch": 0.8406992859632202, "grad_norm": 0.9784849882125854, "learning_rate": 6.177584551621751e-06, "loss": 8.839436531066895, "step": 6483 }, { "epoch": 0.8408289634714669, "grad_norm": 0.7895486950874329, "learning_rate": 6.167755496431482e-06, "loss": 7.606350898742676, "step": 6484 }, { "epoch": 0.8409586409797136, "grad_norm": 0.7977646589279175, "learning_rate": 6.157933752882872e-06, "loss": 5.912978649139404, "step": 6485 }, { "epoch": 0.8410883184879603, "grad_norm": 1.1108490228652954, "learning_rate": 6.148119322614271e-06, "loss": 7.9208221435546875, "step": 6486 }, { "epoch": 0.841217995996207, "grad_norm": 1.5834736824035645, "learning_rate": 6.138312207262831e-06, "loss": 13.158041000366211, "step": 6487 }, { "epoch": 0.8413476735044536, "grad_norm": 0.9807643890380859, "learning_rate": 6.1285124084644595e-06, "loss": 11.492680549621582, "step": 6488 }, { "epoch": 0.8414773510127003, "grad_norm": 1.0634267330169678, "learning_rate": 6.118719927853877e-06, "loss": 8.823945045471191, "step": 6489 }, { "epoch": 0.8416070285209469, "grad_norm": 0.8797471523284912, "learning_rate": 6.1089347670645554e-06, "loss": 10.877199172973633, "step": 6490 }, { "epoch": 0.8417367060291937, "grad_norm": 0.9944134950637817, "learning_rate": 6.099156927728739e-06, "loss": 8.290319442749023, "step": 6491 }, { "epoch": 0.8418663835374404, "grad_norm": 1.8306138515472412, "learning_rate": 6.089386411477488e-06, "loss": 12.404711723327637, "step": 6492 }, { "epoch": 0.841996061045687, "grad_norm": 0.5818789005279541, "learning_rate": 6.0796232199406e-06, "loss": 4.672701358795166, "step": 6493 }, { "epoch": 0.8421257385539337, "grad_norm": 1.3262275457382202, "learning_rate": 6.069867354746689e-06, "loss": 13.235251426696777, "step": 6494 }, { "epoch": 0.8422554160621804, "grad_norm": 0.9340881705284119, "learning_rate": 6.060118817523109e-06, "loss": 9.241195678710938, "step": 6495 }, { "epoch": 0.842385093570427, "grad_norm": 0.8544691205024719, "learning_rate": 6.050377609896024e-06, "loss": 5.737022399902344, "step": 6496 }, { "epoch": 0.8425147710786737, "grad_norm": 0.8903082013130188, "learning_rate": 6.040643733490348e-06, "loss": 10.758995056152344, "step": 6497 }, { "epoch": 0.8426444485869204, "grad_norm": 0.9152576923370361, "learning_rate": 6.030917189929808e-06, "loss": 11.31591510772705, "step": 6498 }, { "epoch": 0.8427741260951671, "grad_norm": 1.1208438873291016, "learning_rate": 6.021197980836857e-06, "loss": 8.601517677307129, "step": 6499 }, { "epoch": 0.8429038036034138, "grad_norm": 1.1576849222183228, "learning_rate": 6.011486107832775e-06, "loss": 8.863088607788086, "step": 6500 }, { "epoch": 0.8430334811116604, "grad_norm": 1.2144691944122314, "learning_rate": 6.001781572537585e-06, "loss": 9.926900863647461, "step": 6501 }, { "epoch": 0.8431631586199071, "grad_norm": 1.2038832902908325, "learning_rate": 5.992084376570095e-06, "loss": 9.632407188415527, "step": 6502 }, { "epoch": 0.8432928361281538, "grad_norm": 0.9456437826156616, "learning_rate": 5.982394521547896e-06, "loss": 8.56736946105957, "step": 6503 }, { "epoch": 0.8434225136364005, "grad_norm": 1.1546263694763184, "learning_rate": 5.9727120090873615e-06, "loss": 8.737907409667969, "step": 6504 }, { "epoch": 0.8435521911446472, "grad_norm": 0.8725384473800659, "learning_rate": 5.963036840803604e-06, "loss": 6.885201454162598, "step": 6505 }, { "epoch": 0.8436818686528939, "grad_norm": 0.8374704122543335, "learning_rate": 5.953369018310562e-06, "loss": 8.789694786071777, "step": 6506 }, { "epoch": 0.8438115461611405, "grad_norm": 0.9283775091171265, "learning_rate": 5.943708543220894e-06, "loss": 7.914303779602051, "step": 6507 }, { "epoch": 0.8439412236693872, "grad_norm": 1.0080474615097046, "learning_rate": 5.9340554171460836e-06, "loss": 12.142788887023926, "step": 6508 }, { "epoch": 0.8440709011776338, "grad_norm": 1.1593304872512817, "learning_rate": 5.924409641696343e-06, "loss": 10.111377716064453, "step": 6509 }, { "epoch": 0.8442005786858805, "grad_norm": 0.9361472129821777, "learning_rate": 5.914771218480708e-06, "loss": 7.2210211753845215, "step": 6510 }, { "epoch": 0.8443302561941273, "grad_norm": 0.8355392217636108, "learning_rate": 5.9051401491069306e-06, "loss": 6.489295482635498, "step": 6511 }, { "epoch": 0.8444599337023739, "grad_norm": 1.0981264114379883, "learning_rate": 5.895516435181591e-06, "loss": 8.57850456237793, "step": 6512 }, { "epoch": 0.8445896112106206, "grad_norm": 1.1746740341186523, "learning_rate": 5.8859000783100135e-06, "loss": 9.428192138671875, "step": 6513 }, { "epoch": 0.8447192887188673, "grad_norm": 0.9934856295585632, "learning_rate": 5.87629108009628e-06, "loss": 8.831457138061523, "step": 6514 }, { "epoch": 0.8448489662271139, "grad_norm": 1.1383814811706543, "learning_rate": 5.866689442143291e-06, "loss": 10.20910358428955, "step": 6515 }, { "epoch": 0.8449786437353606, "grad_norm": 0.9496315717697144, "learning_rate": 5.8570951660526675e-06, "loss": 7.429981708526611, "step": 6516 }, { "epoch": 0.8451083212436074, "grad_norm": 1.2420300245285034, "learning_rate": 5.8475082534248435e-06, "loss": 12.059139251708984, "step": 6517 }, { "epoch": 0.845237998751854, "grad_norm": 1.107221245765686, "learning_rate": 5.837928705859002e-06, "loss": 14.000457763671875, "step": 6518 }, { "epoch": 0.8453676762601007, "grad_norm": 0.9146998524665833, "learning_rate": 5.8283565249531105e-06, "loss": 8.602141380310059, "step": 6519 }, { "epoch": 0.8454973537683473, "grad_norm": 0.8142831921577454, "learning_rate": 5.81879171230389e-06, "loss": 7.287631988525391, "step": 6520 }, { "epoch": 0.845627031276594, "grad_norm": 0.9589693546295166, "learning_rate": 5.8092342695068636e-06, "loss": 8.501012802124023, "step": 6521 }, { "epoch": 0.8457567087848407, "grad_norm": 2.132383346557617, "learning_rate": 5.799684198156274e-06, "loss": 12.668087005615234, "step": 6522 }, { "epoch": 0.8458863862930874, "grad_norm": 0.8305979371070862, "learning_rate": 5.790141499845203e-06, "loss": 8.386796951293945, "step": 6523 }, { "epoch": 0.8460160638013341, "grad_norm": 0.7456260919570923, "learning_rate": 5.780606176165426e-06, "loss": 5.801548957824707, "step": 6524 }, { "epoch": 0.8461457413095808, "grad_norm": 0.8281247019767761, "learning_rate": 5.771078228707566e-06, "loss": 7.248713493347168, "step": 6525 }, { "epoch": 0.8462754188178274, "grad_norm": 0.9618121981620789, "learning_rate": 5.76155765906094e-06, "loss": 7.704721450805664, "step": 6526 }, { "epoch": 0.8464050963260741, "grad_norm": 1.2922592163085938, "learning_rate": 5.7520444688137e-06, "loss": 9.634539604187012, "step": 6527 }, { "epoch": 0.8465347738343207, "grad_norm": 1.0999308824539185, "learning_rate": 5.7425386595527155e-06, "loss": 7.442572116851807, "step": 6528 }, { "epoch": 0.8466644513425674, "grad_norm": 0.9546499848365784, "learning_rate": 5.733040232863668e-06, "loss": 9.004061698913574, "step": 6529 }, { "epoch": 0.8467941288508142, "grad_norm": 1.0257799625396729, "learning_rate": 5.723549190330962e-06, "loss": 7.379632472991943, "step": 6530 }, { "epoch": 0.8469238063590608, "grad_norm": 0.9949927926063538, "learning_rate": 5.714065533537826e-06, "loss": 10.706860542297363, "step": 6531 }, { "epoch": 0.8470534838673075, "grad_norm": 0.8353095054626465, "learning_rate": 5.704589264066196e-06, "loss": 9.17799186706543, "step": 6532 }, { "epoch": 0.8471831613755542, "grad_norm": 1.572401523590088, "learning_rate": 5.695120383496827e-06, "loss": 10.665200233459473, "step": 6533 }, { "epoch": 0.8473128388838008, "grad_norm": 1.2297301292419434, "learning_rate": 5.685658893409207e-06, "loss": 12.07806396484375, "step": 6534 }, { "epoch": 0.8474425163920475, "grad_norm": 0.8629184365272522, "learning_rate": 5.676204795381613e-06, "loss": 11.16774845123291, "step": 6535 }, { "epoch": 0.8475721939002943, "grad_norm": 1.050887942314148, "learning_rate": 5.666758090991065e-06, "loss": 9.010688781738281, "step": 6536 }, { "epoch": 0.8477018714085409, "grad_norm": 0.7695178389549255, "learning_rate": 5.6573187818133865e-06, "loss": 9.964876174926758, "step": 6537 }, { "epoch": 0.8478315489167876, "grad_norm": 1.2792409658432007, "learning_rate": 5.647886869423119e-06, "loss": 13.019089698791504, "step": 6538 }, { "epoch": 0.8479612264250342, "grad_norm": 1.4545705318450928, "learning_rate": 5.638462355393626e-06, "loss": 9.768677711486816, "step": 6539 }, { "epoch": 0.8480909039332809, "grad_norm": 0.7302910685539246, "learning_rate": 5.6290452412969764e-06, "loss": 7.233519077301025, "step": 6540 }, { "epoch": 0.8482205814415276, "grad_norm": 0.935576319694519, "learning_rate": 5.6196355287040534e-06, "loss": 9.239537239074707, "step": 6541 }, { "epoch": 0.8483502589497742, "grad_norm": 0.6970850229263306, "learning_rate": 5.610233219184502e-06, "loss": 8.756736755371094, "step": 6542 }, { "epoch": 0.848479936458021, "grad_norm": 0.9613794684410095, "learning_rate": 5.600838314306683e-06, "loss": 8.05566120147705, "step": 6543 }, { "epoch": 0.8486096139662677, "grad_norm": 1.0853911638259888, "learning_rate": 5.591450815637794e-06, "loss": 10.017945289611816, "step": 6544 }, { "epoch": 0.8487392914745143, "grad_norm": 0.8118574619293213, "learning_rate": 5.582070724743732e-06, "loss": 8.644377708435059, "step": 6545 }, { "epoch": 0.848868968982761, "grad_norm": 1.1481670141220093, "learning_rate": 5.572698043189206e-06, "loss": 8.363790512084961, "step": 6546 }, { "epoch": 0.8489986464910076, "grad_norm": 0.6631717681884766, "learning_rate": 5.5633327725376495e-06, "loss": 7.0240888595581055, "step": 6547 }, { "epoch": 0.8491283239992543, "grad_norm": 0.5343708395957947, "learning_rate": 5.553974914351307e-06, "loss": 7.13426399230957, "step": 6548 }, { "epoch": 0.8492580015075011, "grad_norm": 0.9332928657531738, "learning_rate": 5.544624470191129e-06, "loss": 7.543526649475098, "step": 6549 }, { "epoch": 0.8493876790157477, "grad_norm": 0.9888833165168762, "learning_rate": 5.535281441616891e-06, "loss": 7.327638149261475, "step": 6550 }, { "epoch": 0.8495173565239944, "grad_norm": 0.6342970132827759, "learning_rate": 5.525945830187068e-06, "loss": 7.537980079650879, "step": 6551 }, { "epoch": 0.8496470340322411, "grad_norm": 1.2105214595794678, "learning_rate": 5.516617637458965e-06, "loss": 9.053359031677246, "step": 6552 }, { "epoch": 0.8497767115404877, "grad_norm": 0.7897641658782959, "learning_rate": 5.507296864988582e-06, "loss": 7.439352512359619, "step": 6553 }, { "epoch": 0.8499063890487344, "grad_norm": 1.0343577861785889, "learning_rate": 5.49798351433074e-06, "loss": 8.382974624633789, "step": 6554 }, { "epoch": 0.8500360665569812, "grad_norm": 0.7453405261039734, "learning_rate": 5.488677587038976e-06, "loss": 7.086324214935303, "step": 6555 }, { "epoch": 0.8501657440652278, "grad_norm": 0.8764454126358032, "learning_rate": 5.479379084665626e-06, "loss": 8.644440650939941, "step": 6556 }, { "epoch": 0.8502954215734745, "grad_norm": 1.1278858184814453, "learning_rate": 5.470088008761748e-06, "loss": 10.312560081481934, "step": 6557 }, { "epoch": 0.8504250990817211, "grad_norm": 0.8513004779815674, "learning_rate": 5.4608043608772085e-06, "loss": 8.874052047729492, "step": 6558 }, { "epoch": 0.8505547765899678, "grad_norm": 0.9578709006309509, "learning_rate": 5.451528142560591e-06, "loss": 12.439475059509277, "step": 6559 }, { "epoch": 0.8506844540982145, "grad_norm": 1.1343364715576172, "learning_rate": 5.4422593553592695e-06, "loss": 8.523269653320312, "step": 6560 }, { "epoch": 0.8508141316064611, "grad_norm": 1.007393479347229, "learning_rate": 5.43299800081935e-06, "loss": 9.225325584411621, "step": 6561 }, { "epoch": 0.8509438091147079, "grad_norm": 0.9538687467575073, "learning_rate": 5.423744080485743e-06, "loss": 8.50330638885498, "step": 6562 }, { "epoch": 0.8510734866229546, "grad_norm": 1.1990301609039307, "learning_rate": 5.414497595902068e-06, "loss": 10.734182357788086, "step": 6563 }, { "epoch": 0.8512031641312012, "grad_norm": 1.0948505401611328, "learning_rate": 5.405258548610742e-06, "loss": 7.975505352020264, "step": 6564 }, { "epoch": 0.8513328416394479, "grad_norm": 0.6246036291122437, "learning_rate": 5.396026940152915e-06, "loss": 5.477806091308594, "step": 6565 }, { "epoch": 0.8514625191476946, "grad_norm": 1.0648210048675537, "learning_rate": 5.386802772068528e-06, "loss": 7.06046199798584, "step": 6566 }, { "epoch": 0.8515921966559412, "grad_norm": 0.685273289680481, "learning_rate": 5.377586045896232e-06, "loss": 8.800536155700684, "step": 6567 }, { "epoch": 0.851721874164188, "grad_norm": 0.9755603671073914, "learning_rate": 5.368376763173499e-06, "loss": 8.327534675598145, "step": 6568 }, { "epoch": 0.8518515516724346, "grad_norm": 0.8926305174827576, "learning_rate": 5.3591749254365e-06, "loss": 10.581093788146973, "step": 6569 }, { "epoch": 0.8519812291806813, "grad_norm": 1.1849323511123657, "learning_rate": 5.349980534220206e-06, "loss": 8.173508644104004, "step": 6570 }, { "epoch": 0.852110906688928, "grad_norm": 1.4822660684585571, "learning_rate": 5.340793591058318e-06, "loss": 11.162426948547363, "step": 6571 }, { "epoch": 0.8522405841971746, "grad_norm": 1.051212191581726, "learning_rate": 5.331614097483323e-06, "loss": 8.038230895996094, "step": 6572 }, { "epoch": 0.8523702617054213, "grad_norm": 1.0705236196517944, "learning_rate": 5.322442055026427e-06, "loss": 7.9056172370910645, "step": 6573 }, { "epoch": 0.852499939213668, "grad_norm": 1.2977054119110107, "learning_rate": 5.313277465217642e-06, "loss": 10.729097366333008, "step": 6574 }, { "epoch": 0.8526296167219147, "grad_norm": 1.3719689846038818, "learning_rate": 5.304120329585688e-06, "loss": 11.72844123840332, "step": 6575 }, { "epoch": 0.8527592942301614, "grad_norm": 1.16208016872406, "learning_rate": 5.294970649658077e-06, "loss": 11.172887802124023, "step": 6576 }, { "epoch": 0.852888971738408, "grad_norm": 0.6506868600845337, "learning_rate": 5.2858284269610595e-06, "loss": 7.36994743347168, "step": 6577 }, { "epoch": 0.8530186492466547, "grad_norm": 0.8572489619255066, "learning_rate": 5.276693663019639e-06, "loss": 6.787745952606201, "step": 6578 }, { "epoch": 0.8531483267549014, "grad_norm": 0.9568966031074524, "learning_rate": 5.267566359357584e-06, "loss": 7.595292568206787, "step": 6579 }, { "epoch": 0.853278004263148, "grad_norm": 0.9730983376502991, "learning_rate": 5.258446517497423e-06, "loss": 7.627939224243164, "step": 6580 }, { "epoch": 0.8534076817713948, "grad_norm": 1.0305373668670654, "learning_rate": 5.249334138960444e-06, "loss": 9.780957221984863, "step": 6581 }, { "epoch": 0.8535373592796415, "grad_norm": 1.157183289527893, "learning_rate": 5.240229225266658e-06, "loss": 8.691021919250488, "step": 6582 }, { "epoch": 0.8536670367878881, "grad_norm": 0.9359715580940247, "learning_rate": 5.231131777934872e-06, "loss": 6.280360698699951, "step": 6583 }, { "epoch": 0.8537967142961348, "grad_norm": 1.1540932655334473, "learning_rate": 5.2220417984826135e-06, "loss": 7.88340950012207, "step": 6584 }, { "epoch": 0.8539263918043815, "grad_norm": 1.0224940776824951, "learning_rate": 5.212959288426189e-06, "loss": 6.981369972229004, "step": 6585 }, { "epoch": 0.8540560693126281, "grad_norm": 0.6725085377693176, "learning_rate": 5.203884249280638e-06, "loss": 7.044436454772949, "step": 6586 }, { "epoch": 0.8541857468208749, "grad_norm": 0.8849794864654541, "learning_rate": 5.194816682559778e-06, "loss": 9.41451644897461, "step": 6587 }, { "epoch": 0.8543154243291216, "grad_norm": 0.9460564851760864, "learning_rate": 5.185756589776148e-06, "loss": 7.667830944061279, "step": 6588 }, { "epoch": 0.8544451018373682, "grad_norm": 1.1752545833587646, "learning_rate": 5.176703972441083e-06, "loss": 12.33064079284668, "step": 6589 }, { "epoch": 0.8545747793456149, "grad_norm": 0.8071169853210449, "learning_rate": 5.1676588320646255e-06, "loss": 12.709524154663086, "step": 6590 }, { "epoch": 0.8547044568538615, "grad_norm": 0.8366607427597046, "learning_rate": 5.158621170155609e-06, "loss": 7.165033340454102, "step": 6591 }, { "epoch": 0.8548341343621082, "grad_norm": 0.7358231544494629, "learning_rate": 5.149590988221586e-06, "loss": 11.412285804748535, "step": 6592 }, { "epoch": 0.8549638118703549, "grad_norm": 1.124625563621521, "learning_rate": 5.140568287768899e-06, "loss": 7.345845699310303, "step": 6593 }, { "epoch": 0.8550934893786016, "grad_norm": 0.9044021964073181, "learning_rate": 5.131553070302597e-06, "loss": 8.488512992858887, "step": 6594 }, { "epoch": 0.8552231668868483, "grad_norm": 0.9563527703285217, "learning_rate": 5.122545337326529e-06, "loss": 11.074071884155273, "step": 6595 }, { "epoch": 0.855352844395095, "grad_norm": 0.9367541074752808, "learning_rate": 5.113545090343253e-06, "loss": 7.2315568923950195, "step": 6596 }, { "epoch": 0.8554825219033416, "grad_norm": 0.9381884932518005, "learning_rate": 5.104552330854112e-06, "loss": 8.277636528015137, "step": 6597 }, { "epoch": 0.8556121994115883, "grad_norm": 1.0206166505813599, "learning_rate": 5.0955670603591686e-06, "loss": 10.156343460083008, "step": 6598 }, { "epoch": 0.8557418769198349, "grad_norm": 0.9372122287750244, "learning_rate": 5.086589280357268e-06, "loss": 7.597858428955078, "step": 6599 }, { "epoch": 0.8558715544280817, "grad_norm": 0.7835038304328918, "learning_rate": 5.077618992345989e-06, "loss": 7.303584098815918, "step": 6600 }, { "epoch": 0.8560012319363284, "grad_norm": 1.0100516080856323, "learning_rate": 5.068656197821647e-06, "loss": 10.909760475158691, "step": 6601 }, { "epoch": 0.856130909444575, "grad_norm": 1.0241018533706665, "learning_rate": 5.059700898279346e-06, "loss": 10.09005069732666, "step": 6602 }, { "epoch": 0.8562605869528217, "grad_norm": 1.0215239524841309, "learning_rate": 5.050753095212896e-06, "loss": 9.417829513549805, "step": 6603 }, { "epoch": 0.8563902644610684, "grad_norm": 1.209506630897522, "learning_rate": 5.041812790114891e-06, "loss": 9.305277824401855, "step": 6604 }, { "epoch": 0.856519941969315, "grad_norm": 0.9800265431404114, "learning_rate": 5.032879984476641e-06, "loss": 9.722782135009766, "step": 6605 }, { "epoch": 0.8566496194775617, "grad_norm": 0.7785544395446777, "learning_rate": 5.023954679788251e-06, "loss": 9.090087890625, "step": 6606 }, { "epoch": 0.8567792969858085, "grad_norm": 0.8661383390426636, "learning_rate": 5.015036877538526e-06, "loss": 7.180037021636963, "step": 6607 }, { "epoch": 0.8569089744940551, "grad_norm": 0.7324278354644775, "learning_rate": 5.006126579215059e-06, "loss": 5.594801425933838, "step": 6608 }, { "epoch": 0.8570386520023018, "grad_norm": 0.8287866711616516, "learning_rate": 4.997223786304156e-06, "loss": 11.168450355529785, "step": 6609 }, { "epoch": 0.8571683295105484, "grad_norm": 0.7993742227554321, "learning_rate": 4.9883285002909065e-06, "loss": 11.267220497131348, "step": 6610 }, { "epoch": 0.8572980070187951, "grad_norm": 0.7994088530540466, "learning_rate": 4.979440722659112e-06, "loss": 6.927587985992432, "step": 6611 }, { "epoch": 0.8574276845270418, "grad_norm": 1.0147581100463867, "learning_rate": 4.97056045489136e-06, "loss": 7.527735710144043, "step": 6612 }, { "epoch": 0.8575573620352885, "grad_norm": 1.3038166761398315, "learning_rate": 4.961687698468942e-06, "loss": 9.37353515625, "step": 6613 }, { "epoch": 0.8576870395435352, "grad_norm": 1.0341644287109375, "learning_rate": 4.952822454871941e-06, "loss": 9.931344985961914, "step": 6614 }, { "epoch": 0.8578167170517819, "grad_norm": 0.8449884653091431, "learning_rate": 4.943964725579142e-06, "loss": 6.209322929382324, "step": 6615 }, { "epoch": 0.8579463945600285, "grad_norm": 0.8351903557777405, "learning_rate": 4.935114512068128e-06, "loss": 9.054524421691895, "step": 6616 }, { "epoch": 0.8580760720682752, "grad_norm": 1.0771796703338623, "learning_rate": 4.92627181581517e-06, "loss": 6.780426502227783, "step": 6617 }, { "epoch": 0.8582057495765218, "grad_norm": 0.8731857538223267, "learning_rate": 4.917436638295342e-06, "loss": 7.933264255523682, "step": 6618 }, { "epoch": 0.8583354270847686, "grad_norm": 0.6934941411018372, "learning_rate": 4.908608980982421e-06, "loss": 7.799936771392822, "step": 6619 }, { "epoch": 0.8584651045930153, "grad_norm": 0.974254310131073, "learning_rate": 4.899788845348957e-06, "loss": 9.444660186767578, "step": 6620 }, { "epoch": 0.8585947821012619, "grad_norm": 1.340718150138855, "learning_rate": 4.890976232866218e-06, "loss": 11.893802642822266, "step": 6621 }, { "epoch": 0.8587244596095086, "grad_norm": 0.903210461139679, "learning_rate": 4.88217114500425e-06, "loss": 7.589725971221924, "step": 6622 }, { "epoch": 0.8588541371177553, "grad_norm": 0.684021532535553, "learning_rate": 4.873373583231816e-06, "loss": 7.355033874511719, "step": 6623 }, { "epoch": 0.8589838146260019, "grad_norm": 1.3160300254821777, "learning_rate": 4.864583549016433e-06, "loss": 10.64647388458252, "step": 6624 }, { "epoch": 0.8591134921342486, "grad_norm": 1.3944092988967896, "learning_rate": 4.85580104382437e-06, "loss": 10.677543640136719, "step": 6625 }, { "epoch": 0.8592431696424954, "grad_norm": 1.0364896059036255, "learning_rate": 4.84702606912063e-06, "loss": 7.7085466384887695, "step": 6626 }, { "epoch": 0.859372847150742, "grad_norm": 0.9146629571914673, "learning_rate": 4.838258626368969e-06, "loss": 6.956519603729248, "step": 6627 }, { "epoch": 0.8595025246589887, "grad_norm": 1.1865971088409424, "learning_rate": 4.829498717031866e-06, "loss": 8.996993064880371, "step": 6628 }, { "epoch": 0.8596322021672353, "grad_norm": 0.6988036632537842, "learning_rate": 4.820746342570576e-06, "loss": 6.743262767791748, "step": 6629 }, { "epoch": 0.859761879675482, "grad_norm": 0.7381981611251831, "learning_rate": 4.812001504445069e-06, "loss": 9.630768775939941, "step": 6630 }, { "epoch": 0.8598915571837287, "grad_norm": 1.0007902383804321, "learning_rate": 4.803264204114077e-06, "loss": 7.538721561431885, "step": 6631 }, { "epoch": 0.8600212346919754, "grad_norm": 0.6681885123252869, "learning_rate": 4.794534443035054e-06, "loss": 7.066461563110352, "step": 6632 }, { "epoch": 0.8601509122002221, "grad_norm": 1.0774592161178589, "learning_rate": 4.785812222664227e-06, "loss": 9.305195808410645, "step": 6633 }, { "epoch": 0.8602805897084688, "grad_norm": 1.038297176361084, "learning_rate": 4.777097544456521e-06, "loss": 10.452030181884766, "step": 6634 }, { "epoch": 0.8604102672167154, "grad_norm": 1.1286019086837769, "learning_rate": 4.768390409865653e-06, "loss": 9.424517631530762, "step": 6635 }, { "epoch": 0.8605399447249621, "grad_norm": 1.0330092906951904, "learning_rate": 4.759690820344037e-06, "loss": 8.766955375671387, "step": 6636 }, { "epoch": 0.8606696222332088, "grad_norm": 1.2891508340835571, "learning_rate": 4.750998777342869e-06, "loss": 13.379847526550293, "step": 6637 }, { "epoch": 0.8607992997414554, "grad_norm": 1.0280098915100098, "learning_rate": 4.742314282312044e-06, "loss": 10.932981491088867, "step": 6638 }, { "epoch": 0.8609289772497022, "grad_norm": 1.1565625667572021, "learning_rate": 4.733637336700242e-06, "loss": 8.3353853225708, "step": 6639 }, { "epoch": 0.8610586547579488, "grad_norm": 1.1457438468933105, "learning_rate": 4.724967941954838e-06, "loss": 11.379343032836914, "step": 6640 }, { "epoch": 0.8611883322661955, "grad_norm": 0.8463292717933655, "learning_rate": 4.7163060995219945e-06, "loss": 8.572012901306152, "step": 6641 }, { "epoch": 0.8613180097744422, "grad_norm": 0.7973940968513489, "learning_rate": 4.707651810846564e-06, "loss": 5.8021345138549805, "step": 6642 }, { "epoch": 0.8614476872826888, "grad_norm": 1.140309453010559, "learning_rate": 4.699005077372193e-06, "loss": 9.43716049194336, "step": 6643 }, { "epoch": 0.8615773647909355, "grad_norm": 1.4327441453933716, "learning_rate": 4.6903659005412214e-06, "loss": 8.144224166870117, "step": 6644 }, { "epoch": 0.8617070422991823, "grad_norm": 0.71705162525177, "learning_rate": 4.681734281794759e-06, "loss": 8.225125312805176, "step": 6645 }, { "epoch": 0.8618367198074289, "grad_norm": 0.997845470905304, "learning_rate": 4.673110222572624e-06, "loss": 10.669257164001465, "step": 6646 }, { "epoch": 0.8619663973156756, "grad_norm": 1.0103882551193237, "learning_rate": 4.664493724313424e-06, "loss": 7.174309253692627, "step": 6647 }, { "epoch": 0.8620960748239223, "grad_norm": 0.691810131072998, "learning_rate": 4.65588478845444e-06, "loss": 10.136536598205566, "step": 6648 }, { "epoch": 0.8622257523321689, "grad_norm": 0.8923852443695068, "learning_rate": 4.6472834164317525e-06, "loss": 9.4141263961792, "step": 6649 }, { "epoch": 0.8623554298404156, "grad_norm": 1.0183210372924805, "learning_rate": 4.63868960968013e-06, "loss": 9.290820121765137, "step": 6650 }, { "epoch": 0.8624851073486623, "grad_norm": 1.1102639436721802, "learning_rate": 4.630103369633132e-06, "loss": 10.85350227355957, "step": 6651 }, { "epoch": 0.862614784856909, "grad_norm": 0.9765390753746033, "learning_rate": 4.621524697722995e-06, "loss": 12.371442794799805, "step": 6652 }, { "epoch": 0.8627444623651557, "grad_norm": 0.9278369545936584, "learning_rate": 4.612953595380748e-06, "loss": 9.264939308166504, "step": 6653 }, { "epoch": 0.8628741398734023, "grad_norm": 1.2021030187606812, "learning_rate": 4.604390064036118e-06, "loss": 10.590804100036621, "step": 6654 }, { "epoch": 0.863003817381649, "grad_norm": 0.9539480805397034, "learning_rate": 4.595834105117597e-06, "loss": 8.415781021118164, "step": 6655 }, { "epoch": 0.8631334948898957, "grad_norm": 0.9845702648162842, "learning_rate": 4.587285720052386e-06, "loss": 10.0087890625, "step": 6656 }, { "epoch": 0.8632631723981423, "grad_norm": 0.9530479907989502, "learning_rate": 4.578744910266452e-06, "loss": 10.604033470153809, "step": 6657 }, { "epoch": 0.8633928499063891, "grad_norm": 0.6767224669456482, "learning_rate": 4.570211677184483e-06, "loss": 4.685121536254883, "step": 6658 }, { "epoch": 0.8635225274146358, "grad_norm": 0.9024807214736938, "learning_rate": 4.561686022229899e-06, "loss": 7.9294047355651855, "step": 6659 }, { "epoch": 0.8636522049228824, "grad_norm": 1.0494420528411865, "learning_rate": 4.5531679468248715e-06, "loss": 6.615299224853516, "step": 6660 }, { "epoch": 0.8637818824311291, "grad_norm": 0.893634021282196, "learning_rate": 4.544657452390277e-06, "loss": 7.789426803588867, "step": 6661 }, { "epoch": 0.8639115599393757, "grad_norm": 0.6519517302513123, "learning_rate": 4.536154540345777e-06, "loss": 6.411856651306152, "step": 6662 }, { "epoch": 0.8640412374476224, "grad_norm": 1.045045018196106, "learning_rate": 4.527659212109714e-06, "loss": 9.609668731689453, "step": 6663 }, { "epoch": 0.8641709149558692, "grad_norm": 0.7049723863601685, "learning_rate": 4.519171469099209e-06, "loss": 8.768420219421387, "step": 6664 }, { "epoch": 0.8643005924641158, "grad_norm": 1.2785921096801758, "learning_rate": 4.510691312730086e-06, "loss": 11.465181350708008, "step": 6665 }, { "epoch": 0.8644302699723625, "grad_norm": 1.0398846864700317, "learning_rate": 4.502218744416925e-06, "loss": 7.0465803146362305, "step": 6666 }, { "epoch": 0.8645599474806092, "grad_norm": 0.8060583472251892, "learning_rate": 4.493753765573028e-06, "loss": 6.66119384765625, "step": 6667 }, { "epoch": 0.8646896249888558, "grad_norm": 1.077163815498352, "learning_rate": 4.485296377610443e-06, "loss": 9.519341468811035, "step": 6668 }, { "epoch": 0.8648193024971025, "grad_norm": 1.0211352109909058, "learning_rate": 4.476846581939925e-06, "loss": 10.074162483215332, "step": 6669 }, { "epoch": 0.8649489800053491, "grad_norm": 1.0591334104537964, "learning_rate": 4.468404379971008e-06, "loss": 10.137914657592773, "step": 6670 }, { "epoch": 0.8650786575135959, "grad_norm": 0.8621759414672852, "learning_rate": 4.459969773111911e-06, "loss": 9.056396484375, "step": 6671 }, { "epoch": 0.8652083350218426, "grad_norm": 0.8618137240409851, "learning_rate": 4.451542762769623e-06, "loss": 7.719476222991943, "step": 6672 }, { "epoch": 0.8653380125300892, "grad_norm": 0.9105949997901917, "learning_rate": 4.4431233503498315e-06, "loss": 7.587569236755371, "step": 6673 }, { "epoch": 0.8654676900383359, "grad_norm": 0.806384265422821, "learning_rate": 4.434711537257002e-06, "loss": 10.010367393493652, "step": 6674 }, { "epoch": 0.8655973675465826, "grad_norm": 0.8901845812797546, "learning_rate": 4.426307324894286e-06, "loss": 7.5782365798950195, "step": 6675 }, { "epoch": 0.8657270450548292, "grad_norm": 1.0588229894638062, "learning_rate": 4.417910714663598e-06, "loss": 7.039388179779053, "step": 6676 }, { "epoch": 0.865856722563076, "grad_norm": 0.9549731612205505, "learning_rate": 4.409521707965558e-06, "loss": 6.372954845428467, "step": 6677 }, { "epoch": 0.8659864000713227, "grad_norm": 0.6825296878814697, "learning_rate": 4.401140306199558e-06, "loss": 7.961733818054199, "step": 6678 }, { "epoch": 0.8661160775795693, "grad_norm": 1.0310574769973755, "learning_rate": 4.392766510763668e-06, "loss": 9.134899139404297, "step": 6679 }, { "epoch": 0.866245755087816, "grad_norm": 0.8984255790710449, "learning_rate": 4.384400323054749e-06, "loss": 10.238258361816406, "step": 6680 }, { "epoch": 0.8663754325960626, "grad_norm": 1.1691173315048218, "learning_rate": 4.376041744468335e-06, "loss": 7.518685340881348, "step": 6681 }, { "epoch": 0.8665051101043093, "grad_norm": 1.281435251235962, "learning_rate": 4.367690776398737e-06, "loss": 8.876691818237305, "step": 6682 }, { "epoch": 0.8666347876125561, "grad_norm": 0.7473607659339905, "learning_rate": 4.359347420238963e-06, "loss": 8.235345840454102, "step": 6683 }, { "epoch": 0.8667644651208027, "grad_norm": 1.0176547765731812, "learning_rate": 4.351011677380779e-06, "loss": 11.277792930603027, "step": 6684 }, { "epoch": 0.8668941426290494, "grad_norm": 0.8008338212966919, "learning_rate": 4.342683549214649e-06, "loss": 6.123627185821533, "step": 6685 }, { "epoch": 0.8670238201372961, "grad_norm": 0.9738960266113281, "learning_rate": 4.334363037129802e-06, "loss": 6.745336532592773, "step": 6686 }, { "epoch": 0.8671534976455427, "grad_norm": 1.007828712463379, "learning_rate": 4.326050142514177e-06, "loss": 8.397014617919922, "step": 6687 }, { "epoch": 0.8672831751537894, "grad_norm": 1.1232686042785645, "learning_rate": 4.31774486675443e-06, "loss": 7.6796555519104, "step": 6688 }, { "epoch": 0.867412852662036, "grad_norm": 0.8912163376808167, "learning_rate": 4.309447211235989e-06, "loss": 8.188217163085938, "step": 6689 }, { "epoch": 0.8675425301702828, "grad_norm": 0.9369478225708008, "learning_rate": 4.301157177342951e-06, "loss": 7.915923118591309, "step": 6690 }, { "epoch": 0.8676722076785295, "grad_norm": 1.0987862348556519, "learning_rate": 4.292874766458204e-06, "loss": 11.8737211227417, "step": 6691 }, { "epoch": 0.8678018851867761, "grad_norm": 1.3260706663131714, "learning_rate": 4.284599979963305e-06, "loss": 9.314170837402344, "step": 6692 }, { "epoch": 0.8679315626950228, "grad_norm": 1.4072890281677246, "learning_rate": 4.276332819238599e-06, "loss": 9.441405296325684, "step": 6693 }, { "epoch": 0.8680612402032695, "grad_norm": 1.2836103439331055, "learning_rate": 4.268073285663088e-06, "loss": 9.971464157104492, "step": 6694 }, { "epoch": 0.8681909177115161, "grad_norm": 0.8635176420211792, "learning_rate": 4.259821380614587e-06, "loss": 8.88533878326416, "step": 6695 }, { "epoch": 0.8683205952197629, "grad_norm": 0.971697211265564, "learning_rate": 4.2515771054695615e-06, "loss": 10.206278800964355, "step": 6696 }, { "epoch": 0.8684502727280096, "grad_norm": 0.8072617650032043, "learning_rate": 4.243340461603263e-06, "loss": 7.015438079833984, "step": 6697 }, { "epoch": 0.8685799502362562, "grad_norm": 0.9078231453895569, "learning_rate": 4.235111450389612e-06, "loss": 7.5360002517700195, "step": 6698 }, { "epoch": 0.8687096277445029, "grad_norm": 0.7605905532836914, "learning_rate": 4.226890073201312e-06, "loss": 8.507291793823242, "step": 6699 }, { "epoch": 0.8688393052527495, "grad_norm": 0.9920518398284912, "learning_rate": 4.2186763314097545e-06, "loss": 8.599510192871094, "step": 6700 }, { "epoch": 0.8689689827609962, "grad_norm": 0.8837800025939941, "learning_rate": 4.210470226385077e-06, "loss": 6.2309889793396, "step": 6701 }, { "epoch": 0.8690986602692429, "grad_norm": 0.9721022844314575, "learning_rate": 4.2022717594961344e-06, "loss": 10.232210159301758, "step": 6702 }, { "epoch": 0.8692283377774896, "grad_norm": 1.0597964525222778, "learning_rate": 4.194080932110517e-06, "loss": 11.300616264343262, "step": 6703 }, { "epoch": 0.8693580152857363, "grad_norm": 0.6623759865760803, "learning_rate": 4.1858977455945195e-06, "loss": 7.2053117752075195, "step": 6704 }, { "epoch": 0.869487692793983, "grad_norm": 1.1823680400848389, "learning_rate": 4.177722201313189e-06, "loss": 7.822226524353027, "step": 6705 }, { "epoch": 0.8696173703022296, "grad_norm": 0.9243233799934387, "learning_rate": 4.1695543006302794e-06, "loss": 7.636563301086426, "step": 6706 }, { "epoch": 0.8697470478104763, "grad_norm": 0.6882607340812683, "learning_rate": 4.161394044908279e-06, "loss": 6.50814151763916, "step": 6707 }, { "epoch": 0.869876725318723, "grad_norm": 0.9258086681365967, "learning_rate": 4.1532414355083925e-06, "loss": 9.811894416809082, "step": 6708 }, { "epoch": 0.8700064028269697, "grad_norm": 0.9542463421821594, "learning_rate": 4.145096473790561e-06, "loss": 7.479462623596191, "step": 6709 }, { "epoch": 0.8701360803352164, "grad_norm": 1.2412835359573364, "learning_rate": 4.1369591611134405e-06, "loss": 11.342926025390625, "step": 6710 }, { "epoch": 0.870265757843463, "grad_norm": 1.110094428062439, "learning_rate": 4.128829498834402e-06, "loss": 10.136358261108398, "step": 6711 }, { "epoch": 0.8703954353517097, "grad_norm": 1.1690913438796997, "learning_rate": 4.120707488309572e-06, "loss": 8.670360565185547, "step": 6712 }, { "epoch": 0.8705251128599564, "grad_norm": 1.0416051149368286, "learning_rate": 4.11259313089376e-06, "loss": 9.177600860595703, "step": 6713 }, { "epoch": 0.870654790368203, "grad_norm": 0.7860886454582214, "learning_rate": 4.104486427940535e-06, "loss": 5.614408016204834, "step": 6714 }, { "epoch": 0.8707844678764498, "grad_norm": 0.7600755095481873, "learning_rate": 4.096387380802164e-06, "loss": 6.783672332763672, "step": 6715 }, { "epoch": 0.8709141453846965, "grad_norm": 1.1300342082977295, "learning_rate": 4.088295990829649e-06, "loss": 8.315784454345703, "step": 6716 }, { "epoch": 0.8710438228929431, "grad_norm": 0.991705596446991, "learning_rate": 4.080212259372712e-06, "loss": 11.635005950927734, "step": 6717 }, { "epoch": 0.8711735004011898, "grad_norm": 1.4512991905212402, "learning_rate": 4.072136187779802e-06, "loss": 11.107527732849121, "step": 6718 }, { "epoch": 0.8713031779094365, "grad_norm": 0.959470808506012, "learning_rate": 4.0640677773980725e-06, "loss": 6.589087009429932, "step": 6719 }, { "epoch": 0.8714328554176831, "grad_norm": 1.2439416646957397, "learning_rate": 4.0560070295734275e-06, "loss": 9.87067699432373, "step": 6720 }, { "epoch": 0.8715625329259298, "grad_norm": 0.8725671768188477, "learning_rate": 4.047953945650468e-06, "loss": 8.217504501342773, "step": 6721 }, { "epoch": 0.8716922104341766, "grad_norm": 0.965610921382904, "learning_rate": 4.039908526972536e-06, "loss": 9.113048553466797, "step": 6722 }, { "epoch": 0.8718218879424232, "grad_norm": 1.007563591003418, "learning_rate": 4.0318707748816634e-06, "loss": 7.048948287963867, "step": 6723 }, { "epoch": 0.8719515654506699, "grad_norm": 0.947869062423706, "learning_rate": 4.023840690718655e-06, "loss": 7.235002040863037, "step": 6724 }, { "epoch": 0.8720812429589165, "grad_norm": 1.3964018821716309, "learning_rate": 4.01581827582298e-06, "loss": 10.704061508178711, "step": 6725 }, { "epoch": 0.8722109204671632, "grad_norm": 1.1502503156661987, "learning_rate": 4.007803531532878e-06, "loss": 10.276434898376465, "step": 6726 }, { "epoch": 0.8723405979754099, "grad_norm": 1.185729742050171, "learning_rate": 3.999796459185262e-06, "loss": 8.723116874694824, "step": 6727 }, { "epoch": 0.8724702754836566, "grad_norm": 1.1051568984985352, "learning_rate": 3.991797060115809e-06, "loss": 8.73917007446289, "step": 6728 }, { "epoch": 0.8725999529919033, "grad_norm": 1.144875407218933, "learning_rate": 3.98380533565888e-06, "loss": 12.257977485656738, "step": 6729 }, { "epoch": 0.87272963050015, "grad_norm": 0.7593711614608765, "learning_rate": 3.975821287147591e-06, "loss": 6.771184921264648, "step": 6730 }, { "epoch": 0.8728593080083966, "grad_norm": 1.0424052476882935, "learning_rate": 3.9678449159137325e-06, "loss": 7.390346050262451, "step": 6731 }, { "epoch": 0.8729889855166433, "grad_norm": 0.8230540156364441, "learning_rate": 3.959876223287867e-06, "loss": 9.338781356811523, "step": 6732 }, { "epoch": 0.8731186630248899, "grad_norm": 1.5705548524856567, "learning_rate": 3.951915210599228e-06, "loss": 11.429394721984863, "step": 6733 }, { "epoch": 0.8732483405331366, "grad_norm": 0.7859175205230713, "learning_rate": 3.943961879175795e-06, "loss": 7.8674211502075195, "step": 6734 }, { "epoch": 0.8733780180413834, "grad_norm": 0.7136287093162537, "learning_rate": 3.936016230344275e-06, "loss": 7.267125606536865, "step": 6735 }, { "epoch": 0.87350769554963, "grad_norm": 0.8464453220367432, "learning_rate": 3.928078265430057e-06, "loss": 7.994270324707031, "step": 6736 }, { "epoch": 0.8736373730578767, "grad_norm": 0.995778501033783, "learning_rate": 3.920147985757294e-06, "loss": 6.98244571685791, "step": 6737 }, { "epoch": 0.8737670505661234, "grad_norm": 1.0889523029327393, "learning_rate": 3.9122253926488105e-06, "loss": 10.964263916015625, "step": 6738 }, { "epoch": 0.87389672807437, "grad_norm": 1.1588975191116333, "learning_rate": 3.904310487426183e-06, "loss": 11.709329605102539, "step": 6739 }, { "epoch": 0.8740264055826167, "grad_norm": 1.066208004951477, "learning_rate": 3.896403271409693e-06, "loss": 7.359104633331299, "step": 6740 }, { "epoch": 0.8741560830908635, "grad_norm": 1.1782399415969849, "learning_rate": 3.888503745918343e-06, "loss": 9.735662460327148, "step": 6741 }, { "epoch": 0.8742857605991101, "grad_norm": 1.0716667175292969, "learning_rate": 3.880611912269838e-06, "loss": 8.744497299194336, "step": 6742 }, { "epoch": 0.8744154381073568, "grad_norm": 0.7563719153404236, "learning_rate": 3.872727771780632e-06, "loss": 6.3945136070251465, "step": 6743 }, { "epoch": 0.8745451156156034, "grad_norm": 1.1627954244613647, "learning_rate": 3.86485132576585e-06, "loss": 10.684688568115234, "step": 6744 }, { "epoch": 0.8746747931238501, "grad_norm": 1.0592870712280273, "learning_rate": 3.856982575539392e-06, "loss": 9.0246000289917, "step": 6745 }, { "epoch": 0.8748044706320968, "grad_norm": 0.9318682551383972, "learning_rate": 3.849121522413807e-06, "loss": 8.67679214477539, "step": 6746 }, { "epoch": 0.8749341481403435, "grad_norm": 1.164884090423584, "learning_rate": 3.841268167700424e-06, "loss": 9.36767578125, "step": 6747 }, { "epoch": 0.8750638256485902, "grad_norm": 0.8120535612106323, "learning_rate": 3.833422512709234e-06, "loss": 8.603667259216309, "step": 6748 }, { "epoch": 0.8751935031568369, "grad_norm": 0.9234634637832642, "learning_rate": 3.82558455874899e-06, "loss": 8.276808738708496, "step": 6749 }, { "epoch": 0.8753231806650835, "grad_norm": 1.030764102935791, "learning_rate": 3.817754307127119e-06, "loss": 11.059012413024902, "step": 6750 }, { "epoch": 0.8754528581733302, "grad_norm": 0.9292004108428955, "learning_rate": 3.8099317591497985e-06, "loss": 7.565367221832275, "step": 6751 }, { "epoch": 0.8755825356815768, "grad_norm": 0.7340485453605652, "learning_rate": 3.802116916121895e-06, "loss": 8.66624927520752, "step": 6752 }, { "epoch": 0.8757122131898235, "grad_norm": 1.2867578268051147, "learning_rate": 3.794309779347005e-06, "loss": 9.61286735534668, "step": 6753 }, { "epoch": 0.8758418906980703, "grad_norm": 1.636217713356018, "learning_rate": 3.7865103501274257e-06, "loss": 11.068037033081055, "step": 6754 }, { "epoch": 0.8759715682063169, "grad_norm": 1.3942409753799438, "learning_rate": 3.778718629764194e-06, "loss": 10.098825454711914, "step": 6755 }, { "epoch": 0.8761012457145636, "grad_norm": 1.1433037519454956, "learning_rate": 3.7709346195570204e-06, "loss": 10.253623008728027, "step": 6756 }, { "epoch": 0.8762309232228103, "grad_norm": 0.7161186933517456, "learning_rate": 3.7631583208043775e-06, "loss": 6.883928298950195, "step": 6757 }, { "epoch": 0.8763606007310569, "grad_norm": 1.1633753776550293, "learning_rate": 3.7553897348034106e-06, "loss": 12.135757446289062, "step": 6758 }, { "epoch": 0.8764902782393036, "grad_norm": 1.0255008935928345, "learning_rate": 3.7476288628500054e-06, "loss": 9.769485473632812, "step": 6759 }, { "epoch": 0.8766199557475504, "grad_norm": 0.7481239438056946, "learning_rate": 3.7398757062387326e-06, "loss": 6.425236225128174, "step": 6760 }, { "epoch": 0.876749633255797, "grad_norm": 1.309199571609497, "learning_rate": 3.7321302662629175e-06, "loss": 13.516727447509766, "step": 6761 }, { "epoch": 0.8768793107640437, "grad_norm": 0.9145943522453308, "learning_rate": 3.7243925442145555e-06, "loss": 7.51740026473999, "step": 6762 }, { "epoch": 0.8770089882722903, "grad_norm": 1.076505184173584, "learning_rate": 3.7166625413843915e-06, "loss": 9.22656536102295, "step": 6763 }, { "epoch": 0.877138665780537, "grad_norm": 1.1290037631988525, "learning_rate": 3.7089402590618383e-06, "loss": 11.25385856628418, "step": 6764 }, { "epoch": 0.8772683432887837, "grad_norm": 0.8240520358085632, "learning_rate": 3.7012256985350767e-06, "loss": 7.474295139312744, "step": 6765 }, { "epoch": 0.8773980207970303, "grad_norm": 0.9238231778144836, "learning_rate": 3.6935188610909442e-06, "loss": 9.193982124328613, "step": 6766 }, { "epoch": 0.8775276983052771, "grad_norm": 0.8745678067207336, "learning_rate": 3.6858197480150403e-06, "loss": 8.145648956298828, "step": 6767 }, { "epoch": 0.8776573758135238, "grad_norm": 0.7307303547859192, "learning_rate": 3.678128360591626e-06, "loss": 7.081965446472168, "step": 6768 }, { "epoch": 0.8777870533217704, "grad_norm": 0.5924578905105591, "learning_rate": 3.67044470010372e-06, "loss": 5.508177280426025, "step": 6769 }, { "epoch": 0.8779167308300171, "grad_norm": 0.8886322975158691, "learning_rate": 3.6627687678330135e-06, "loss": 6.646336555480957, "step": 6770 }, { "epoch": 0.8780464083382638, "grad_norm": 0.7981405258178711, "learning_rate": 3.6551005650599324e-06, "loss": 8.003852844238281, "step": 6771 }, { "epoch": 0.8781760858465104, "grad_norm": 0.9207720756530762, "learning_rate": 3.6474400930636254e-06, "loss": 8.949398040771484, "step": 6772 }, { "epoch": 0.8783057633547572, "grad_norm": 0.9347906112670898, "learning_rate": 3.639787353121904e-06, "loss": 11.073748588562012, "step": 6773 }, { "epoch": 0.8784354408630038, "grad_norm": 1.0182342529296875, "learning_rate": 3.6321423465113415e-06, "loss": 6.8673248291015625, "step": 6774 }, { "epoch": 0.8785651183712505, "grad_norm": 0.9726407527923584, "learning_rate": 3.624505074507184e-06, "loss": 6.63689661026001, "step": 6775 }, { "epoch": 0.8786947958794972, "grad_norm": 1.339646339416504, "learning_rate": 3.616875538383413e-06, "loss": 9.227428436279297, "step": 6776 }, { "epoch": 0.8788244733877438, "grad_norm": 1.0194021463394165, "learning_rate": 3.6092537394126934e-06, "loss": 8.433929443359375, "step": 6777 }, { "epoch": 0.8789541508959905, "grad_norm": 1.6338022947311401, "learning_rate": 3.601639678866442e-06, "loss": 12.433340072631836, "step": 6778 }, { "epoch": 0.8790838284042373, "grad_norm": 0.8368656635284424, "learning_rate": 3.5940333580147256e-06, "loss": 7.191208362579346, "step": 6779 }, { "epoch": 0.8792135059124839, "grad_norm": 1.342552900314331, "learning_rate": 3.5864347781263797e-06, "loss": 5.796712398529053, "step": 6780 }, { "epoch": 0.8793431834207306, "grad_norm": 0.9493892192840576, "learning_rate": 3.5788439404688955e-06, "loss": 10.51073932647705, "step": 6781 }, { "epoch": 0.8794728609289773, "grad_norm": 1.2307006120681763, "learning_rate": 3.5712608463085217e-06, "loss": 9.74085807800293, "step": 6782 }, { "epoch": 0.8796025384372239, "grad_norm": 1.0287868976593018, "learning_rate": 3.563685496910174e-06, "loss": 9.386977195739746, "step": 6783 }, { "epoch": 0.8797322159454706, "grad_norm": 1.119603157043457, "learning_rate": 3.556117893537508e-06, "loss": 9.016558647155762, "step": 6784 }, { "epoch": 0.8798618934537172, "grad_norm": 0.9066578149795532, "learning_rate": 3.5485580374528593e-06, "loss": 9.574376106262207, "step": 6785 }, { "epoch": 0.879991570961964, "grad_norm": 1.2050917148590088, "learning_rate": 3.541005929917296e-06, "loss": 8.704475402832031, "step": 6786 }, { "epoch": 0.8801212484702107, "grad_norm": 1.4088938236236572, "learning_rate": 3.533461572190572e-06, "loss": 10.848956108093262, "step": 6787 }, { "epoch": 0.8802509259784573, "grad_norm": 0.9975987672805786, "learning_rate": 3.5259249655311687e-06, "loss": 10.143138885498047, "step": 6788 }, { "epoch": 0.880380603486704, "grad_norm": 0.790720522403717, "learning_rate": 3.5183961111962593e-06, "loss": 7.385311126708984, "step": 6789 }, { "epoch": 0.8805102809949507, "grad_norm": 0.5999744534492493, "learning_rate": 3.5108750104417386e-06, "loss": 12.439131736755371, "step": 6790 }, { "epoch": 0.8806399585031973, "grad_norm": 1.0642642974853516, "learning_rate": 3.503361664522181e-06, "loss": 10.349406242370605, "step": 6791 }, { "epoch": 0.8807696360114441, "grad_norm": 0.5551249980926514, "learning_rate": 3.495856074690901e-06, "loss": 4.297288417816162, "step": 6792 }, { "epoch": 0.8808993135196908, "grad_norm": 0.8076386451721191, "learning_rate": 3.4883582421998962e-06, "loss": 10.902512550354004, "step": 6793 }, { "epoch": 0.8810289910279374, "grad_norm": 0.7869068384170532, "learning_rate": 3.4808681682998833e-06, "loss": 8.710587501525879, "step": 6794 }, { "epoch": 0.8811586685361841, "grad_norm": 1.0744736194610596, "learning_rate": 3.4733858542402742e-06, "loss": 10.564396858215332, "step": 6795 }, { "epoch": 0.8812883460444307, "grad_norm": 1.214596152305603, "learning_rate": 3.465911301269187e-06, "loss": 7.079275131225586, "step": 6796 }, { "epoch": 0.8814180235526774, "grad_norm": 0.8206896781921387, "learning_rate": 3.458444510633463e-06, "loss": 6.786334037780762, "step": 6797 }, { "epoch": 0.8815477010609241, "grad_norm": 0.8209817409515381, "learning_rate": 3.4509854835786116e-06, "loss": 7.619289398193359, "step": 6798 }, { "epoch": 0.8816773785691708, "grad_norm": 0.7410603761672974, "learning_rate": 3.4435342213488985e-06, "loss": 8.760759353637695, "step": 6799 }, { "epoch": 0.8818070560774175, "grad_norm": 1.005205750465393, "learning_rate": 3.4360907251872465e-06, "loss": 10.296039581298828, "step": 6800 }, { "epoch": 0.8819367335856642, "grad_norm": 1.1573116779327393, "learning_rate": 3.428654996335312e-06, "loss": 10.49209213256836, "step": 6801 }, { "epoch": 0.8820664110939108, "grad_norm": 0.5882078409194946, "learning_rate": 3.4212270360334364e-06, "loss": 4.642496585845947, "step": 6802 }, { "epoch": 0.8821960886021575, "grad_norm": 0.6642146706581116, "learning_rate": 3.4138068455206953e-06, "loss": 8.306684494018555, "step": 6803 }, { "epoch": 0.8823257661104041, "grad_norm": 0.9874131083488464, "learning_rate": 3.4063944260348203e-06, "loss": 11.277725219726562, "step": 6804 }, { "epoch": 0.8824554436186509, "grad_norm": 0.8602563738822937, "learning_rate": 3.3989897788123003e-06, "loss": 5.945432662963867, "step": 6805 }, { "epoch": 0.8825851211268976, "grad_norm": 0.8815575242042542, "learning_rate": 3.3915929050882865e-06, "loss": 10.174911499023438, "step": 6806 }, { "epoch": 0.8827147986351442, "grad_norm": 1.131382942199707, "learning_rate": 3.384203806096653e-06, "loss": 9.592787742614746, "step": 6807 }, { "epoch": 0.8828444761433909, "grad_norm": 0.9515222907066345, "learning_rate": 3.376822483069969e-06, "loss": 8.280950546264648, "step": 6808 }, { "epoch": 0.8829741536516376, "grad_norm": 0.9030787944793701, "learning_rate": 3.3694489372395276e-06, "loss": 8.882466316223145, "step": 6809 }, { "epoch": 0.8831038311598842, "grad_norm": 0.8430945873260498, "learning_rate": 3.362083169835273e-06, "loss": 8.141824722290039, "step": 6810 }, { "epoch": 0.883233508668131, "grad_norm": 0.9288420081138611, "learning_rate": 3.3547251820859217e-06, "loss": 6.961434364318848, "step": 6811 }, { "epoch": 0.8833631861763777, "grad_norm": 0.893303632736206, "learning_rate": 3.3473749752188423e-06, "loss": 8.559989929199219, "step": 6812 }, { "epoch": 0.8834928636846243, "grad_norm": 1.2821804285049438, "learning_rate": 3.340032550460126e-06, "loss": 10.292887687683105, "step": 6813 }, { "epoch": 0.883622541192871, "grad_norm": 0.9434272050857544, "learning_rate": 3.3326979090345435e-06, "loss": 8.08621597290039, "step": 6814 }, { "epoch": 0.8837522187011176, "grad_norm": 1.0882253646850586, "learning_rate": 3.32537105216561e-06, "loss": 11.447259902954102, "step": 6815 }, { "epoch": 0.8838818962093643, "grad_norm": 0.9532543420791626, "learning_rate": 3.3180519810754874e-06, "loss": 9.273428916931152, "step": 6816 }, { "epoch": 0.884011573717611, "grad_norm": 0.7108167409896851, "learning_rate": 3.310740696985093e-06, "loss": 7.115931034088135, "step": 6817 }, { "epoch": 0.8841412512258577, "grad_norm": 1.2011194229125977, "learning_rate": 3.303437201114007e-06, "loss": 11.28421401977539, "step": 6818 }, { "epoch": 0.8842709287341044, "grad_norm": 0.9583526253700256, "learning_rate": 3.2961414946805156e-06, "loss": 9.636025428771973, "step": 6819 }, { "epoch": 0.8844006062423511, "grad_norm": 0.6241721510887146, "learning_rate": 3.288853578901635e-06, "loss": 4.64030647277832, "step": 6820 }, { "epoch": 0.8845302837505977, "grad_norm": 0.9253420829772949, "learning_rate": 3.281573454993031e-06, "loss": 10.11430835723877, "step": 6821 }, { "epoch": 0.8846599612588444, "grad_norm": 0.9904729127883911, "learning_rate": 3.2743011241691266e-06, "loss": 9.28700065612793, "step": 6822 }, { "epoch": 0.884789638767091, "grad_norm": 0.9208760857582092, "learning_rate": 3.2670365876429963e-06, "loss": 10.273540496826172, "step": 6823 }, { "epoch": 0.8849193162753378, "grad_norm": 1.0665290355682373, "learning_rate": 3.2597798466264538e-06, "loss": 8.994515419006348, "step": 6824 }, { "epoch": 0.8850489937835845, "grad_norm": 1.1319761276245117, "learning_rate": 3.25253090232997e-06, "loss": 8.999690055847168, "step": 6825 }, { "epoch": 0.8851786712918311, "grad_norm": 0.9029672741889954, "learning_rate": 3.2452897559627604e-06, "loss": 11.414316177368164, "step": 6826 }, { "epoch": 0.8853083488000778, "grad_norm": 0.8519134521484375, "learning_rate": 3.2380564087327036e-06, "loss": 6.385651588439941, "step": 6827 }, { "epoch": 0.8854380263083245, "grad_norm": 0.6353610754013062, "learning_rate": 3.230830861846401e-06, "loss": 6.868768692016602, "step": 6828 }, { "epoch": 0.8855677038165711, "grad_norm": 1.322447657585144, "learning_rate": 3.223613116509139e-06, "loss": 12.190069198608398, "step": 6829 }, { "epoch": 0.8856973813248178, "grad_norm": 0.8397017121315002, "learning_rate": 3.2164031739249144e-06, "loss": 6.753876686096191, "step": 6830 }, { "epoch": 0.8858270588330646, "grad_norm": 0.9802478551864624, "learning_rate": 3.2092010352964e-06, "loss": 11.59371280670166, "step": 6831 }, { "epoch": 0.8859567363413112, "grad_norm": 1.112884521484375, "learning_rate": 3.202006701825e-06, "loss": 9.934958457946777, "step": 6832 }, { "epoch": 0.8860864138495579, "grad_norm": 1.254103183746338, "learning_rate": 3.1948201747107775e-06, "loss": 10.9849214553833, "step": 6833 }, { "epoch": 0.8862160913578045, "grad_norm": 0.7562015652656555, "learning_rate": 3.1876414551525404e-06, "loss": 8.830137252807617, "step": 6834 }, { "epoch": 0.8863457688660512, "grad_norm": 1.0900096893310547, "learning_rate": 3.1804705443477466e-06, "loss": 9.699480056762695, "step": 6835 }, { "epoch": 0.8864754463742979, "grad_norm": 1.0238059759140015, "learning_rate": 3.17330744349259e-06, "loss": 7.522473335266113, "step": 6836 }, { "epoch": 0.8866051238825446, "grad_norm": 0.9121060371398926, "learning_rate": 3.166152153781926e-06, "loss": 8.041001319885254, "step": 6837 }, { "epoch": 0.8867348013907913, "grad_norm": 1.191068410873413, "learning_rate": 3.159004676409344e-06, "loss": 7.372251033782959, "step": 6838 }, { "epoch": 0.886864478899038, "grad_norm": 1.4592351913452148, "learning_rate": 3.1518650125671012e-06, "loss": 13.76165771484375, "step": 6839 }, { "epoch": 0.8869941564072846, "grad_norm": 0.9647586941719055, "learning_rate": 3.1447331634461728e-06, "loss": 8.340834617614746, "step": 6840 }, { "epoch": 0.8871238339155313, "grad_norm": 1.2209601402282715, "learning_rate": 3.1376091302362075e-06, "loss": 7.356651782989502, "step": 6841 }, { "epoch": 0.887253511423778, "grad_norm": 1.3268721103668213, "learning_rate": 3.1304929141255767e-06, "loss": 7.957448959350586, "step": 6842 }, { "epoch": 0.8873831889320247, "grad_norm": 0.7991887927055359, "learning_rate": 3.1233845163013144e-06, "loss": 8.621855735778809, "step": 6843 }, { "epoch": 0.8875128664402714, "grad_norm": 0.8409504294395447, "learning_rate": 3.116283937949194e-06, "loss": 7.71115779876709, "step": 6844 }, { "epoch": 0.887642543948518, "grad_norm": 1.0117923021316528, "learning_rate": 3.1091911802536398e-06, "loss": 9.270187377929688, "step": 6845 }, { "epoch": 0.8877722214567647, "grad_norm": 0.7336105108261108, "learning_rate": 3.1021062443978066e-06, "loss": 7.49251127243042, "step": 6846 }, { "epoch": 0.8879018989650114, "grad_norm": 0.7629271149635315, "learning_rate": 3.095029131563515e-06, "loss": 9.120790481567383, "step": 6847 }, { "epoch": 0.888031576473258, "grad_norm": 0.9349365830421448, "learning_rate": 3.0879598429313095e-06, "loss": 10.471871376037598, "step": 6848 }, { "epoch": 0.8881612539815047, "grad_norm": 0.9117916226387024, "learning_rate": 3.080898379680419e-06, "loss": 7.565653324127197, "step": 6849 }, { "epoch": 0.8882909314897515, "grad_norm": 0.8074234127998352, "learning_rate": 3.0738447429887462e-06, "loss": 5.961760997772217, "step": 6850 }, { "epoch": 0.8884206089979981, "grad_norm": 0.9398289322853088, "learning_rate": 3.0667989340329273e-06, "loss": 8.693452835083008, "step": 6851 }, { "epoch": 0.8885502865062448, "grad_norm": 1.1287016868591309, "learning_rate": 3.0597609539882444e-06, "loss": 10.796687126159668, "step": 6852 }, { "epoch": 0.8886799640144915, "grad_norm": 1.1821377277374268, "learning_rate": 3.0527308040287305e-06, "loss": 11.461258888244629, "step": 6853 }, { "epoch": 0.8888096415227381, "grad_norm": 0.9089200496673584, "learning_rate": 3.045708485327059e-06, "loss": 7.6503424644470215, "step": 6854 }, { "epoch": 0.8889393190309848, "grad_norm": 1.0956190824508667, "learning_rate": 3.038693999054637e-06, "loss": 9.815677642822266, "step": 6855 }, { "epoch": 0.8890689965392315, "grad_norm": 0.8364265561103821, "learning_rate": 3.031687346381529e-06, "loss": 8.461326599121094, "step": 6856 }, { "epoch": 0.8891986740474782, "grad_norm": 0.7632682919502258, "learning_rate": 3.024688528476538e-06, "loss": 9.368773460388184, "step": 6857 }, { "epoch": 0.8893283515557249, "grad_norm": 0.8528732061386108, "learning_rate": 3.017697546507109e-06, "loss": 7.398618698120117, "step": 6858 }, { "epoch": 0.8894580290639715, "grad_norm": 1.1762892007827759, "learning_rate": 3.010714401639425e-06, "loss": 8.216636657714844, "step": 6859 }, { "epoch": 0.8895877065722182, "grad_norm": 0.7328111529350281, "learning_rate": 3.003739095038327e-06, "loss": 5.921234130859375, "step": 6860 }, { "epoch": 0.8897173840804649, "grad_norm": 1.1024950742721558, "learning_rate": 2.9967716278673786e-06, "loss": 7.671713829040527, "step": 6861 }, { "epoch": 0.8898470615887115, "grad_norm": 0.6882398128509521, "learning_rate": 2.989812001288805e-06, "loss": 6.315447807312012, "step": 6862 }, { "epoch": 0.8899767390969583, "grad_norm": 0.6212306618690491, "learning_rate": 2.982860216463551e-06, "loss": 6.352659225463867, "step": 6863 }, { "epoch": 0.890106416605205, "grad_norm": 0.9838439226150513, "learning_rate": 2.9759162745512326e-06, "loss": 8.620841979980469, "step": 6864 }, { "epoch": 0.8902360941134516, "grad_norm": 1.2182610034942627, "learning_rate": 2.9689801767101786e-06, "loss": 10.450489044189453, "step": 6865 }, { "epoch": 0.8903657716216983, "grad_norm": 1.3312126398086548, "learning_rate": 2.962051924097381e-06, "loss": 12.093387603759766, "step": 6866 }, { "epoch": 0.8904954491299449, "grad_norm": 1.319283127784729, "learning_rate": 2.9551315178685533e-06, "loss": 11.669476509094238, "step": 6867 }, { "epoch": 0.8906251266381916, "grad_norm": 1.1166486740112305, "learning_rate": 2.948218959178073e-06, "loss": 10.109557151794434, "step": 6868 }, { "epoch": 0.8907548041464384, "grad_norm": 0.7845953702926636, "learning_rate": 2.941314249179039e-06, "loss": 7.7399115562438965, "step": 6869 }, { "epoch": 0.890884481654685, "grad_norm": 1.1199514865875244, "learning_rate": 2.934417389023203e-06, "loss": 12.161200523376465, "step": 6870 }, { "epoch": 0.8910141591629317, "grad_norm": 0.8836485147476196, "learning_rate": 2.927528379861044e-06, "loss": 10.485013961791992, "step": 6871 }, { "epoch": 0.8911438366711784, "grad_norm": 0.9460774660110474, "learning_rate": 2.9206472228417102e-06, "loss": 7.413449764251709, "step": 6872 }, { "epoch": 0.891273514179425, "grad_norm": 1.3144612312316895, "learning_rate": 2.913773919113044e-06, "loss": 8.633500099182129, "step": 6873 }, { "epoch": 0.8914031916876717, "grad_norm": 1.0113825798034668, "learning_rate": 2.9069084698215785e-06, "loss": 9.05750560760498, "step": 6874 }, { "epoch": 0.8915328691959185, "grad_norm": 1.1049094200134277, "learning_rate": 2.900050876112537e-06, "loss": 11.500907897949219, "step": 6875 }, { "epoch": 0.8916625467041651, "grad_norm": 1.1030820608139038, "learning_rate": 2.8932011391298308e-06, "loss": 8.88165283203125, "step": 6876 }, { "epoch": 0.8917922242124118, "grad_norm": 1.1219370365142822, "learning_rate": 2.886359260016075e-06, "loss": 6.343758583068848, "step": 6877 }, { "epoch": 0.8919219017206584, "grad_norm": 0.5984980463981628, "learning_rate": 2.8795252399125394e-06, "loss": 5.750594615936279, "step": 6878 }, { "epoch": 0.8920515792289051, "grad_norm": 0.7217414975166321, "learning_rate": 2.8726990799592235e-06, "loss": 8.24166488647461, "step": 6879 }, { "epoch": 0.8921812567371518, "grad_norm": 0.7215743064880371, "learning_rate": 2.8658807812947776e-06, "loss": 7.354247570037842, "step": 6880 }, { "epoch": 0.8923109342453984, "grad_norm": 1.0336068868637085, "learning_rate": 2.859070345056586e-06, "loss": 5.918426513671875, "step": 6881 }, { "epoch": 0.8924406117536452, "grad_norm": 1.1365407705307007, "learning_rate": 2.8522677723806733e-06, "loss": 9.748259544372559, "step": 6882 }, { "epoch": 0.8925702892618919, "grad_norm": 1.2412807941436768, "learning_rate": 2.8454730644017768e-06, "loss": 8.73849868774414, "step": 6883 }, { "epoch": 0.8926999667701385, "grad_norm": 1.148768424987793, "learning_rate": 2.838686222253334e-06, "loss": 9.902655601501465, "step": 6884 }, { "epoch": 0.8928296442783852, "grad_norm": 0.8781008124351501, "learning_rate": 2.8319072470674336e-06, "loss": 8.093783378601074, "step": 6885 }, { "epoch": 0.8929593217866318, "grad_norm": 1.0517491102218628, "learning_rate": 2.8251361399748987e-06, "loss": 9.040661811828613, "step": 6886 }, { "epoch": 0.8930889992948785, "grad_norm": 1.108661413192749, "learning_rate": 2.8183729021051872e-06, "loss": 9.97144603729248, "step": 6887 }, { "epoch": 0.8932186768031253, "grad_norm": 0.8605650663375854, "learning_rate": 2.811617534586497e-06, "loss": 8.91887092590332, "step": 6888 }, { "epoch": 0.8933483543113719, "grad_norm": 1.297776460647583, "learning_rate": 2.804870038545676e-06, "loss": 10.881522178649902, "step": 6889 }, { "epoch": 0.8934780318196186, "grad_norm": 0.790062427520752, "learning_rate": 2.7981304151082854e-06, "loss": 6.189470291137695, "step": 6890 }, { "epoch": 0.8936077093278653, "grad_norm": 0.6892853379249573, "learning_rate": 2.7913986653985423e-06, "loss": 9.166108131408691, "step": 6891 }, { "epoch": 0.8937373868361119, "grad_norm": 1.2850996255874634, "learning_rate": 2.7846747905393766e-06, "loss": 10.389369010925293, "step": 6892 }, { "epoch": 0.8938670643443586, "grad_norm": 1.3132951259613037, "learning_rate": 2.7779587916523907e-06, "loss": 9.629973411560059, "step": 6893 }, { "epoch": 0.8939967418526052, "grad_norm": 0.8905325531959534, "learning_rate": 2.7712506698578887e-06, "loss": 7.573507785797119, "step": 6894 }, { "epoch": 0.894126419360852, "grad_norm": 0.7995507717132568, "learning_rate": 2.764550426274831e-06, "loss": 7.702955722808838, "step": 6895 }, { "epoch": 0.8942560968690987, "grad_norm": 1.0856462717056274, "learning_rate": 2.7578580620209073e-06, "loss": 7.213489055633545, "step": 6896 }, { "epoch": 0.8943857743773453, "grad_norm": 0.7298047542572021, "learning_rate": 2.75117357821244e-06, "loss": 9.187134742736816, "step": 6897 }, { "epoch": 0.894515451885592, "grad_norm": 0.8779920339584351, "learning_rate": 2.7444969759644947e-06, "loss": 8.620388984680176, "step": 6898 }, { "epoch": 0.8946451293938387, "grad_norm": 0.7511818408966064, "learning_rate": 2.737828256390773e-06, "loss": 7.364471912384033, "step": 6899 }, { "epoch": 0.8947748069020853, "grad_norm": 0.7941157221794128, "learning_rate": 2.7311674206036975e-06, "loss": 11.265177726745605, "step": 6900 }, { "epoch": 0.8949044844103321, "grad_norm": 0.7522880434989929, "learning_rate": 2.72451446971434e-06, "loss": 7.119142532348633, "step": 6901 }, { "epoch": 0.8950341619185788, "grad_norm": 1.1043850183486938, "learning_rate": 2.717869404832496e-06, "loss": 8.990289688110352, "step": 6902 }, { "epoch": 0.8951638394268254, "grad_norm": 1.0586742162704468, "learning_rate": 2.711232227066607e-06, "loss": 10.160645484924316, "step": 6903 }, { "epoch": 0.8952935169350721, "grad_norm": 0.831322968006134, "learning_rate": 2.7046029375238423e-06, "loss": 9.037091255187988, "step": 6904 }, { "epoch": 0.8954231944433187, "grad_norm": 0.9287705421447754, "learning_rate": 2.697981537310018e-06, "loss": 7.359382152557373, "step": 6905 }, { "epoch": 0.8955528719515654, "grad_norm": 0.8007058501243591, "learning_rate": 2.6913680275296395e-06, "loss": 6.54200553894043, "step": 6906 }, { "epoch": 0.8956825494598122, "grad_norm": 1.6043682098388672, "learning_rate": 2.684762409285918e-06, "loss": 10.514546394348145, "step": 6907 }, { "epoch": 0.8958122269680588, "grad_norm": 0.5790855884552002, "learning_rate": 2.6781646836807226e-06, "loss": 6.062881946563721, "step": 6908 }, { "epoch": 0.8959419044763055, "grad_norm": 1.0725575685501099, "learning_rate": 2.6715748518146343e-06, "loss": 10.74132251739502, "step": 6909 }, { "epoch": 0.8960715819845522, "grad_norm": 1.263444423675537, "learning_rate": 2.6649929147868845e-06, "loss": 9.717098236083984, "step": 6910 }, { "epoch": 0.8962012594927988, "grad_norm": 0.708304762840271, "learning_rate": 2.6584188736954174e-06, "loss": 4.997825622558594, "step": 6911 }, { "epoch": 0.8963309370010455, "grad_norm": 0.8536054491996765, "learning_rate": 2.6518527296368277e-06, "loss": 7.5619049072265625, "step": 6912 }, { "epoch": 0.8964606145092922, "grad_norm": 1.3212456703186035, "learning_rate": 2.6452944837064343e-06, "loss": 9.73602294921875, "step": 6913 }, { "epoch": 0.8965902920175389, "grad_norm": 1.265389084815979, "learning_rate": 2.6387441369981947e-06, "loss": 11.30725383758545, "step": 6914 }, { "epoch": 0.8967199695257856, "grad_norm": 1.262203335762024, "learning_rate": 2.632201690604791e-06, "loss": 11.568897247314453, "step": 6915 }, { "epoch": 0.8968496470340322, "grad_norm": 0.8228781223297119, "learning_rate": 2.625667145617544e-06, "loss": 10.310930252075195, "step": 6916 }, { "epoch": 0.8969793245422789, "grad_norm": 0.5550060868263245, "learning_rate": 2.6191405031264994e-06, "loss": 4.775768756866455, "step": 6917 }, { "epoch": 0.8971090020505256, "grad_norm": 1.138960361480713, "learning_rate": 2.6126217642203467e-06, "loss": 13.050950050354004, "step": 6918 }, { "epoch": 0.8972386795587722, "grad_norm": 0.7429835200309753, "learning_rate": 2.606110929986494e-06, "loss": 6.683290004730225, "step": 6919 }, { "epoch": 0.897368357067019, "grad_norm": 1.0264455080032349, "learning_rate": 2.5996080015109893e-06, "loss": 8.482650756835938, "step": 6920 }, { "epoch": 0.8974980345752657, "grad_norm": 1.4864698648452759, "learning_rate": 2.5931129798786035e-06, "loss": 10.630706787109375, "step": 6921 }, { "epoch": 0.8976277120835123, "grad_norm": 0.9965517520904541, "learning_rate": 2.586625866172754e-06, "loss": 6.189993381500244, "step": 6922 }, { "epoch": 0.897757389591759, "grad_norm": 1.1044402122497559, "learning_rate": 2.5801466614755686e-06, "loss": 7.790942192077637, "step": 6923 }, { "epoch": 0.8978870671000057, "grad_norm": 0.8064347505569458, "learning_rate": 2.5736753668678228e-06, "loss": 7.016143798828125, "step": 6924 }, { "epoch": 0.8980167446082523, "grad_norm": 0.9219217896461487, "learning_rate": 2.5672119834290075e-06, "loss": 7.101017475128174, "step": 6925 }, { "epoch": 0.898146422116499, "grad_norm": 0.927671492099762, "learning_rate": 2.560756512237267e-06, "loss": 7.461698055267334, "step": 6926 }, { "epoch": 0.8982760996247457, "grad_norm": 0.9894409775733948, "learning_rate": 2.5543089543694444e-06, "loss": 8.924406051635742, "step": 6927 }, { "epoch": 0.8984057771329924, "grad_norm": 0.753035306930542, "learning_rate": 2.547869310901052e-06, "loss": 6.944220066070557, "step": 6928 }, { "epoch": 0.8985354546412391, "grad_norm": 1.0346603393554688, "learning_rate": 2.541437582906281e-06, "loss": 9.42017650604248, "step": 6929 }, { "epoch": 0.8986651321494857, "grad_norm": 1.2748970985412598, "learning_rate": 2.5350137714580114e-06, "loss": 8.950254440307617, "step": 6930 }, { "epoch": 0.8987948096577324, "grad_norm": 0.6679840087890625, "learning_rate": 2.528597877627786e-06, "loss": 9.445633888244629, "step": 6931 }, { "epoch": 0.8989244871659791, "grad_norm": 0.6991177201271057, "learning_rate": 2.52218990248585e-06, "loss": 5.419829368591309, "step": 6932 }, { "epoch": 0.8990541646742258, "grad_norm": 1.3574053049087524, "learning_rate": 2.5157898471011076e-06, "loss": 10.171327590942383, "step": 6933 }, { "epoch": 0.8991838421824725, "grad_norm": 0.7595587372779846, "learning_rate": 2.509397712541156e-06, "loss": 7.5343523025512695, "step": 6934 }, { "epoch": 0.8993135196907192, "grad_norm": 1.1674884557724, "learning_rate": 2.503013499872259e-06, "loss": 10.105058670043945, "step": 6935 }, { "epoch": 0.8994431971989658, "grad_norm": 0.8405587673187256, "learning_rate": 2.496637210159375e-06, "loss": 8.400751113891602, "step": 6936 }, { "epoch": 0.8995728747072125, "grad_norm": 0.8370917439460754, "learning_rate": 2.4902688444661148e-06, "loss": 8.534039497375488, "step": 6937 }, { "epoch": 0.8997025522154591, "grad_norm": 0.8893851637840271, "learning_rate": 2.4839084038548012e-06, "loss": 7.198409080505371, "step": 6938 }, { "epoch": 0.8998322297237059, "grad_norm": 1.2805297374725342, "learning_rate": 2.4775558893864014e-06, "loss": 11.665191650390625, "step": 6939 }, { "epoch": 0.8999619072319526, "grad_norm": 1.6960889101028442, "learning_rate": 2.471211302120585e-06, "loss": 8.799309730529785, "step": 6940 }, { "epoch": 0.9000915847401992, "grad_norm": 0.9262204766273499, "learning_rate": 2.464874643115689e-06, "loss": 7.548868656158447, "step": 6941 }, { "epoch": 0.9002212622484459, "grad_norm": 0.7801101207733154, "learning_rate": 2.458545913428728e-06, "loss": 7.36713171005249, "step": 6942 }, { "epoch": 0.9003509397566926, "grad_norm": 0.9868101477622986, "learning_rate": 2.452225114115392e-06, "loss": 7.582515716552734, "step": 6943 }, { "epoch": 0.9004806172649392, "grad_norm": 1.0350401401519775, "learning_rate": 2.4459122462300642e-06, "loss": 9.866182327270508, "step": 6944 }, { "epoch": 0.9006102947731859, "grad_norm": 1.1171855926513672, "learning_rate": 2.4396073108257753e-06, "loss": 7.893120765686035, "step": 6945 }, { "epoch": 0.9007399722814327, "grad_norm": 0.6921208500862122, "learning_rate": 2.433310308954262e-06, "loss": 8.288286209106445, "step": 6946 }, { "epoch": 0.9008696497896793, "grad_norm": 0.8900076746940613, "learning_rate": 2.427021241665911e-06, "loss": 7.664008140563965, "step": 6947 }, { "epoch": 0.900999327297926, "grad_norm": 0.9781789183616638, "learning_rate": 2.420740110009817e-06, "loss": 10.354822158813477, "step": 6948 }, { "epoch": 0.9011290048061726, "grad_norm": 0.9104025363922119, "learning_rate": 2.4144669150337194e-06, "loss": 6.97686767578125, "step": 6949 }, { "epoch": 0.9012586823144193, "grad_norm": 0.7561805844306946, "learning_rate": 2.4082016577840595e-06, "loss": 6.274102210998535, "step": 6950 }, { "epoch": 0.901388359822666, "grad_norm": 1.063201904296875, "learning_rate": 2.4019443393059337e-06, "loss": 10.068431854248047, "step": 6951 }, { "epoch": 0.9015180373309127, "grad_norm": 0.8726496696472168, "learning_rate": 2.3956949606431243e-06, "loss": 10.462599754333496, "step": 6952 }, { "epoch": 0.9016477148391594, "grad_norm": 0.9436632394790649, "learning_rate": 2.389453522838092e-06, "loss": 10.391664505004883, "step": 6953 }, { "epoch": 0.9017773923474061, "grad_norm": 0.7877090573310852, "learning_rate": 2.383220026931965e-06, "loss": 7.786310195922852, "step": 6954 }, { "epoch": 0.9019070698556527, "grad_norm": 1.0128064155578613, "learning_rate": 2.37699447396455e-06, "loss": 8.168777465820312, "step": 6955 }, { "epoch": 0.9020367473638994, "grad_norm": 1.0713872909545898, "learning_rate": 2.3707768649743388e-06, "loss": 10.162191390991211, "step": 6956 }, { "epoch": 0.902166424872146, "grad_norm": 0.9867543578147888, "learning_rate": 2.3645672009984687e-06, "loss": 8.903585433959961, "step": 6957 }, { "epoch": 0.9022961023803927, "grad_norm": 1.2315900325775146, "learning_rate": 2.358365483072794e-06, "loss": 12.01697063446045, "step": 6958 }, { "epoch": 0.9024257798886395, "grad_norm": 0.9021179676055908, "learning_rate": 2.35217171223181e-06, "loss": 8.162308692932129, "step": 6959 }, { "epoch": 0.9025554573968861, "grad_norm": 1.1186299324035645, "learning_rate": 2.3459858895087005e-06, "loss": 11.709936141967773, "step": 6960 }, { "epoch": 0.9026851349051328, "grad_norm": 1.1153475046157837, "learning_rate": 2.3398080159353132e-06, "loss": 9.042769432067871, "step": 6961 }, { "epoch": 0.9028148124133795, "grad_norm": 1.0711045265197754, "learning_rate": 2.333638092542184e-06, "loss": 11.537020683288574, "step": 6962 }, { "epoch": 0.9029444899216261, "grad_norm": 1.3748570680618286, "learning_rate": 2.3274761203585126e-06, "loss": 8.833515167236328, "step": 6963 }, { "epoch": 0.9030741674298728, "grad_norm": 1.3419125080108643, "learning_rate": 2.3213221004121766e-06, "loss": 10.449668884277344, "step": 6964 }, { "epoch": 0.9032038449381196, "grad_norm": 1.3878662586212158, "learning_rate": 2.3151760337297325e-06, "loss": 9.229110717773438, "step": 6965 }, { "epoch": 0.9033335224463662, "grad_norm": 0.6805756092071533, "learning_rate": 2.3090379213363933e-06, "loss": 6.3030853271484375, "step": 6966 }, { "epoch": 0.9034631999546129, "grad_norm": 0.7296029329299927, "learning_rate": 2.3029077642560625e-06, "loss": 9.387059211730957, "step": 6967 }, { "epoch": 0.9035928774628595, "grad_norm": 1.2605715990066528, "learning_rate": 2.2967855635113055e-06, "loss": 12.579307556152344, "step": 6968 }, { "epoch": 0.9037225549711062, "grad_norm": 0.8081550598144531, "learning_rate": 2.2906713201233666e-06, "loss": 8.403459548950195, "step": 6969 }, { "epoch": 0.9038522324793529, "grad_norm": 0.727304995059967, "learning_rate": 2.2845650351121573e-06, "loss": 7.417798042297363, "step": 6970 }, { "epoch": 0.9039819099875996, "grad_norm": 0.7182822823524475, "learning_rate": 2.278466709496274e-06, "loss": 7.687911033630371, "step": 6971 }, { "epoch": 0.9041115874958463, "grad_norm": 0.9381871223449707, "learning_rate": 2.2723763442929593e-06, "loss": 6.859492301940918, "step": 6972 }, { "epoch": 0.904241265004093, "grad_norm": 0.7780018448829651, "learning_rate": 2.266293940518166e-06, "loss": 6.174589157104492, "step": 6973 }, { "epoch": 0.9043709425123396, "grad_norm": 1.1348274946212769, "learning_rate": 2.2602194991864778e-06, "loss": 10.675949096679688, "step": 6974 }, { "epoch": 0.9045006200205863, "grad_norm": 0.7382944226264954, "learning_rate": 2.254153021311184e-06, "loss": 7.526097774505615, "step": 6975 }, { "epoch": 0.904630297528833, "grad_norm": 1.3711614608764648, "learning_rate": 2.248094507904225e-06, "loss": 11.290213584899902, "step": 6976 }, { "epoch": 0.9047599750370796, "grad_norm": 0.7297242283821106, "learning_rate": 2.2420439599762256e-06, "loss": 9.135710716247559, "step": 6977 }, { "epoch": 0.9048896525453264, "grad_norm": 1.169295310974121, "learning_rate": 2.2360013785364675e-06, "loss": 10.344354629516602, "step": 6978 }, { "epoch": 0.905019330053573, "grad_norm": 0.9990016222000122, "learning_rate": 2.229966764592922e-06, "loss": 10.71560287475586, "step": 6979 }, { "epoch": 0.9051490075618197, "grad_norm": 1.0275522470474243, "learning_rate": 2.223940119152201e-06, "loss": 9.014507293701172, "step": 6980 }, { "epoch": 0.9052786850700664, "grad_norm": 1.3869037628173828, "learning_rate": 2.217921443219634e-06, "loss": 8.74074935913086, "step": 6981 }, { "epoch": 0.905408362578313, "grad_norm": 0.8529133796691895, "learning_rate": 2.2119107377991775e-06, "loss": 8.388360977172852, "step": 6982 }, { "epoch": 0.9055380400865597, "grad_norm": 1.011277437210083, "learning_rate": 2.205908003893481e-06, "loss": 7.956232070922852, "step": 6983 }, { "epoch": 0.9056677175948065, "grad_norm": 0.7975108623504639, "learning_rate": 2.199913242503848e-06, "loss": 10.255919456481934, "step": 6984 }, { "epoch": 0.9057973951030531, "grad_norm": 0.8447034955024719, "learning_rate": 2.1939264546302797e-06, "loss": 7.649207592010498, "step": 6985 }, { "epoch": 0.9059270726112998, "grad_norm": 0.9422715306282043, "learning_rate": 2.187947641271415e-06, "loss": 8.922677993774414, "step": 6986 }, { "epoch": 0.9060567501195465, "grad_norm": 1.0513604879379272, "learning_rate": 2.181976803424585e-06, "loss": 12.847951889038086, "step": 6987 }, { "epoch": 0.9061864276277931, "grad_norm": 0.7417566776275635, "learning_rate": 2.1760139420857807e-06, "loss": 7.909256935119629, "step": 6988 }, { "epoch": 0.9063161051360398, "grad_norm": 1.2683355808258057, "learning_rate": 2.1700590582496737e-06, "loss": 12.124785423278809, "step": 6989 }, { "epoch": 0.9064457826442864, "grad_norm": 0.8723470568656921, "learning_rate": 2.164112152909581e-06, "loss": 8.243704795837402, "step": 6990 }, { "epoch": 0.9065754601525332, "grad_norm": 1.095232367515564, "learning_rate": 2.1581732270575194e-06, "loss": 8.388075828552246, "step": 6991 }, { "epoch": 0.9067051376607799, "grad_norm": 1.2584186792373657, "learning_rate": 2.152242281684147e-06, "loss": 8.176352500915527, "step": 6992 }, { "epoch": 0.9068348151690265, "grad_norm": 1.3937220573425293, "learning_rate": 2.1463193177788055e-06, "loss": 8.358867645263672, "step": 6993 }, { "epoch": 0.9069644926772732, "grad_norm": 0.9860657453536987, "learning_rate": 2.1404043363295046e-06, "loss": 9.391070365905762, "step": 6994 }, { "epoch": 0.9070941701855199, "grad_norm": 1.3026580810546875, "learning_rate": 2.1344973383229215e-06, "loss": 8.211957931518555, "step": 6995 }, { "epoch": 0.9072238476937665, "grad_norm": 1.1100280284881592, "learning_rate": 2.1285983247443964e-06, "loss": 6.631563186645508, "step": 6996 }, { "epoch": 0.9073535252020133, "grad_norm": 1.5483185052871704, "learning_rate": 2.1227072965779414e-06, "loss": 9.241921424865723, "step": 6997 }, { "epoch": 0.90748320271026, "grad_norm": 1.069425106048584, "learning_rate": 2.116824254806249e-06, "loss": 8.34054183959961, "step": 6998 }, { "epoch": 0.9076128802185066, "grad_norm": 1.3125853538513184, "learning_rate": 2.11094920041065e-06, "loss": 10.245686531066895, "step": 6999 }, { "epoch": 0.9077425577267533, "grad_norm": 1.2084689140319824, "learning_rate": 2.1050821343711726e-06, "loss": 11.125064849853516, "step": 7000 }, { "epoch": 0.9078722352349999, "grad_norm": 0.9000130295753479, "learning_rate": 2.0992230576664886e-06, "loss": 10.268261909484863, "step": 7001 }, { "epoch": 0.9080019127432466, "grad_norm": 0.9258474111557007, "learning_rate": 2.0933719712739552e-06, "loss": 8.009147644042969, "step": 7002 }, { "epoch": 0.9081315902514934, "grad_norm": 1.0730817317962646, "learning_rate": 2.087528876169592e-06, "loss": 7.751770496368408, "step": 7003 }, { "epoch": 0.90826126775974, "grad_norm": 1.069211721420288, "learning_rate": 2.0816937733280915e-06, "loss": 9.217644691467285, "step": 7004 }, { "epoch": 0.9083909452679867, "grad_norm": 0.9293699264526367, "learning_rate": 2.0758666637227865e-06, "loss": 10.35901165008545, "step": 7005 }, { "epoch": 0.9085206227762334, "grad_norm": 0.976128876209259, "learning_rate": 2.0700475483257098e-06, "loss": 6.804110050201416, "step": 7006 }, { "epoch": 0.90865030028448, "grad_norm": 0.9441819787025452, "learning_rate": 2.0642364281075356e-06, "loss": 11.610749244689941, "step": 7007 }, { "epoch": 0.9087799777927267, "grad_norm": 0.9069513082504272, "learning_rate": 2.058433304037627e-06, "loss": 9.514958381652832, "step": 7008 }, { "epoch": 0.9089096553009733, "grad_norm": 1.411186933517456, "learning_rate": 2.0526381770839875e-06, "loss": 8.892207145690918, "step": 7009 }, { "epoch": 0.9090393328092201, "grad_norm": 0.8582251667976379, "learning_rate": 2.0468510482133106e-06, "loss": 5.10733699798584, "step": 7010 }, { "epoch": 0.9091690103174668, "grad_norm": 0.8350895047187805, "learning_rate": 2.041071918390941e-06, "loss": 7.22526741027832, "step": 7011 }, { "epoch": 0.9092986878257134, "grad_norm": 0.7810630202293396, "learning_rate": 2.035300788580896e-06, "loss": 6.60995626449585, "step": 7012 }, { "epoch": 0.9094283653339601, "grad_norm": 0.7654949426651001, "learning_rate": 2.0295376597458503e-06, "loss": 7.996840953826904, "step": 7013 }, { "epoch": 0.9095580428422068, "grad_norm": 0.6931304931640625, "learning_rate": 2.023782532847157e-06, "loss": 8.00944995880127, "step": 7014 }, { "epoch": 0.9096877203504534, "grad_norm": 0.9729690551757812, "learning_rate": 2.0180354088448206e-06, "loss": 9.966058731079102, "step": 7015 }, { "epoch": 0.9098173978587002, "grad_norm": 1.2200260162353516, "learning_rate": 2.0122962886975184e-06, "loss": 8.748454093933105, "step": 7016 }, { "epoch": 0.9099470753669469, "grad_norm": 0.9681821465492249, "learning_rate": 2.00656517336259e-06, "loss": 9.748590469360352, "step": 7017 }, { "epoch": 0.9100767528751935, "grad_norm": 0.8727070689201355, "learning_rate": 2.0008420637960435e-06, "loss": 8.854181289672852, "step": 7018 }, { "epoch": 0.9102064303834402, "grad_norm": 1.0117298364639282, "learning_rate": 1.995126960952548e-06, "loss": 9.972718238830566, "step": 7019 }, { "epoch": 0.9103361078916868, "grad_norm": 0.8573256731033325, "learning_rate": 1.9894198657854358e-06, "loss": 10.2815523147583, "step": 7020 }, { "epoch": 0.9104657853999335, "grad_norm": 1.3196743726730347, "learning_rate": 1.9837207792467117e-06, "loss": 10.241808891296387, "step": 7021 }, { "epoch": 0.9105954629081802, "grad_norm": 1.3068833351135254, "learning_rate": 1.978029702287032e-06, "loss": 8.561347007751465, "step": 7022 }, { "epoch": 0.9107251404164269, "grad_norm": 1.0616682767868042, "learning_rate": 1.9723466358557265e-06, "loss": 11.245555877685547, "step": 7023 }, { "epoch": 0.9108548179246736, "grad_norm": 0.7276158332824707, "learning_rate": 1.9666715809007806e-06, "loss": 8.366886138916016, "step": 7024 }, { "epoch": 0.9109844954329203, "grad_norm": 0.8359112739562988, "learning_rate": 1.9610045383688546e-06, "loss": 7.469364166259766, "step": 7025 }, { "epoch": 0.9111141729411669, "grad_norm": 0.6872832179069519, "learning_rate": 1.9553455092052585e-06, "loss": 7.605480670928955, "step": 7026 }, { "epoch": 0.9112438504494136, "grad_norm": 1.16484534740448, "learning_rate": 1.9496944943539872e-06, "loss": 10.14468765258789, "step": 7027 }, { "epoch": 0.9113735279576602, "grad_norm": 0.652437686920166, "learning_rate": 1.944051494757665e-06, "loss": 4.387223720550537, "step": 7028 }, { "epoch": 0.911503205465907, "grad_norm": 0.9587686657905579, "learning_rate": 1.9384165113576213e-06, "loss": 7.456450462341309, "step": 7029 }, { "epoch": 0.9116328829741537, "grad_norm": 0.804307222366333, "learning_rate": 1.932789545093799e-06, "loss": 4.73146390914917, "step": 7030 }, { "epoch": 0.9117625604824003, "grad_norm": 0.9452175498008728, "learning_rate": 1.9271705969048535e-06, "loss": 7.8389081954956055, "step": 7031 }, { "epoch": 0.911892237990647, "grad_norm": 0.9985085725784302, "learning_rate": 1.921559667728068e-06, "loss": 8.080755233764648, "step": 7032 }, { "epoch": 0.9120219154988937, "grad_norm": 0.8364107012748718, "learning_rate": 1.9159567584994043e-06, "loss": 9.167988777160645, "step": 7033 }, { "epoch": 0.9121515930071403, "grad_norm": 1.2771104574203491, "learning_rate": 1.9103618701534764e-06, "loss": 9.728588104248047, "step": 7034 }, { "epoch": 0.9122812705153871, "grad_norm": 1.0441073179244995, "learning_rate": 1.9047750036235768e-06, "loss": 9.844338417053223, "step": 7035 }, { "epoch": 0.9124109480236338, "grad_norm": 0.9975278377532959, "learning_rate": 1.899196159841632e-06, "loss": 10.562762260437012, "step": 7036 }, { "epoch": 0.9125406255318804, "grad_norm": 1.057729959487915, "learning_rate": 1.8936253397382643e-06, "loss": 10.56801986694336, "step": 7037 }, { "epoch": 0.9126703030401271, "grad_norm": 0.94611656665802, "learning_rate": 1.8880625442427248e-06, "loss": 9.307435989379883, "step": 7038 }, { "epoch": 0.9127999805483737, "grad_norm": 1.328809380531311, "learning_rate": 1.88250777428296e-06, "loss": 11.254449844360352, "step": 7039 }, { "epoch": 0.9129296580566204, "grad_norm": 0.6478612422943115, "learning_rate": 1.8769610307855346e-06, "loss": 5.548847675323486, "step": 7040 }, { "epoch": 0.9130593355648671, "grad_norm": 1.0884146690368652, "learning_rate": 1.8714223146757193e-06, "loss": 7.925542831420898, "step": 7041 }, { "epoch": 0.9131890130731138, "grad_norm": 0.8419432044029236, "learning_rate": 1.8658916268774195e-06, "loss": 7.362209320068359, "step": 7042 }, { "epoch": 0.9133186905813605, "grad_norm": 0.7584822773933411, "learning_rate": 1.860368968313203e-06, "loss": 7.459639549255371, "step": 7043 }, { "epoch": 0.9134483680896072, "grad_norm": 0.8808445334434509, "learning_rate": 1.8548543399043107e-06, "loss": 10.54730224609375, "step": 7044 }, { "epoch": 0.9135780455978538, "grad_norm": 0.8767423033714294, "learning_rate": 1.8493477425706285e-06, "loss": 8.639084815979004, "step": 7045 }, { "epoch": 0.9137077231061005, "grad_norm": 0.8480942249298096, "learning_rate": 1.8438491772307165e-06, "loss": 4.979431629180908, "step": 7046 }, { "epoch": 0.9138374006143472, "grad_norm": 0.874148964881897, "learning_rate": 1.8383586448017797e-06, "loss": 9.833001136779785, "step": 7047 }, { "epoch": 0.9139670781225939, "grad_norm": 1.1078059673309326, "learning_rate": 1.8328761461996968e-06, "loss": 10.134331703186035, "step": 7048 }, { "epoch": 0.9140967556308406, "grad_norm": 0.569086492061615, "learning_rate": 1.8274016823390027e-06, "loss": 5.556025505065918, "step": 7049 }, { "epoch": 0.9142264331390872, "grad_norm": 0.8080400824546814, "learning_rate": 1.8219352541328949e-06, "loss": 7.9993977546691895, "step": 7050 }, { "epoch": 0.9143561106473339, "grad_norm": 0.9728440046310425, "learning_rate": 1.816476862493216e-06, "loss": 7.928037643432617, "step": 7051 }, { "epoch": 0.9144857881555806, "grad_norm": 1.0181071758270264, "learning_rate": 1.8110265083304878e-06, "loss": 13.015044212341309, "step": 7052 }, { "epoch": 0.9146154656638272, "grad_norm": 1.0652883052825928, "learning_rate": 1.8055841925538718e-06, "loss": 6.807304859161377, "step": 7053 }, { "epoch": 0.9147451431720739, "grad_norm": 0.7828730940818787, "learning_rate": 1.8001499160712087e-06, "loss": 7.853794574737549, "step": 7054 }, { "epoch": 0.9148748206803207, "grad_norm": 0.949606716632843, "learning_rate": 1.7947236797889843e-06, "loss": 5.10809326171875, "step": 7055 }, { "epoch": 0.9150044981885673, "grad_norm": 0.922767162322998, "learning_rate": 1.7893054846123525e-06, "loss": 10.01536750793457, "step": 7056 }, { "epoch": 0.915134175696814, "grad_norm": 1.100157618522644, "learning_rate": 1.7838953314451068e-06, "loss": 10.374333381652832, "step": 7057 }, { "epoch": 0.9152638532050607, "grad_norm": 1.111693263053894, "learning_rate": 1.7784932211897253e-06, "loss": 8.770692825317383, "step": 7058 }, { "epoch": 0.9153935307133073, "grad_norm": 1.1711417436599731, "learning_rate": 1.7730991547473263e-06, "loss": 10.50253963470459, "step": 7059 }, { "epoch": 0.915523208221554, "grad_norm": 1.129801869392395, "learning_rate": 1.767713133017701e-06, "loss": 7.034911155700684, "step": 7060 }, { "epoch": 0.9156528857298007, "grad_norm": 1.0079511404037476, "learning_rate": 1.7623351568992808e-06, "loss": 9.760856628417969, "step": 7061 }, { "epoch": 0.9157825632380474, "grad_norm": 1.0953353643417358, "learning_rate": 1.7569652272891646e-06, "loss": 8.956334114074707, "step": 7062 }, { "epoch": 0.9159122407462941, "grad_norm": 0.8687152862548828, "learning_rate": 1.7516033450831082e-06, "loss": 13.624184608459473, "step": 7063 }, { "epoch": 0.9160419182545407, "grad_norm": 0.9851453304290771, "learning_rate": 1.7462495111755351e-06, "loss": 9.560344696044922, "step": 7064 }, { "epoch": 0.9161715957627874, "grad_norm": 0.8729246854782104, "learning_rate": 1.740903726459503e-06, "loss": 9.395787239074707, "step": 7065 }, { "epoch": 0.9163012732710341, "grad_norm": 1.464927077293396, "learning_rate": 1.7355659918267486e-06, "loss": 10.663948059082031, "step": 7066 }, { "epoch": 0.9164309507792808, "grad_norm": 0.7134718894958496, "learning_rate": 1.7302363081676543e-06, "loss": 12.561038970947266, "step": 7067 }, { "epoch": 0.9165606282875275, "grad_norm": 0.9602705836296082, "learning_rate": 1.7249146763712641e-06, "loss": 11.011898040771484, "step": 7068 }, { "epoch": 0.9166903057957742, "grad_norm": 1.1169770956039429, "learning_rate": 1.7196010973252796e-06, "loss": 10.959811210632324, "step": 7069 }, { "epoch": 0.9168199833040208, "grad_norm": 1.1190556287765503, "learning_rate": 1.7142955719160525e-06, "loss": 7.688549041748047, "step": 7070 }, { "epoch": 0.9169496608122675, "grad_norm": 1.1012165546417236, "learning_rate": 1.7089981010285972e-06, "loss": 10.75290298461914, "step": 7071 }, { "epoch": 0.9170793383205141, "grad_norm": 0.7148227095603943, "learning_rate": 1.70370868554659e-06, "loss": 6.45005989074707, "step": 7072 }, { "epoch": 0.9172090158287608, "grad_norm": 1.1922343969345093, "learning_rate": 1.6984273263523421e-06, "loss": 9.324148178100586, "step": 7073 }, { "epoch": 0.9173386933370076, "grad_norm": 0.992793619632721, "learning_rate": 1.6931540243268484e-06, "loss": 7.028200626373291, "step": 7074 }, { "epoch": 0.9174683708452542, "grad_norm": 0.9106638431549072, "learning_rate": 1.687888780349739e-06, "loss": 7.741245746612549, "step": 7075 }, { "epoch": 0.9175980483535009, "grad_norm": 1.2035404443740845, "learning_rate": 1.6826315952993166e-06, "loss": 8.188064575195312, "step": 7076 }, { "epoch": 0.9177277258617476, "grad_norm": 0.699881911277771, "learning_rate": 1.677382470052513e-06, "loss": 7.853133201599121, "step": 7077 }, { "epoch": 0.9178574033699942, "grad_norm": 1.0022170543670654, "learning_rate": 1.672141405484956e-06, "loss": 7.515993118286133, "step": 7078 }, { "epoch": 0.9179870808782409, "grad_norm": 1.258668303489685, "learning_rate": 1.6669084024708847e-06, "loss": 11.101861953735352, "step": 7079 }, { "epoch": 0.9181167583864877, "grad_norm": 1.039910912513733, "learning_rate": 1.6616834618832232e-06, "loss": 11.225138664245605, "step": 7080 }, { "epoch": 0.9182464358947343, "grad_norm": 0.9244809746742249, "learning_rate": 1.6564665845935467e-06, "loss": 10.017921447753906, "step": 7081 }, { "epoch": 0.918376113402981, "grad_norm": 1.2098568677902222, "learning_rate": 1.65125777147207e-06, "loss": 8.416425704956055, "step": 7082 }, { "epoch": 0.9185057909112276, "grad_norm": 0.7076562643051147, "learning_rate": 1.6460570233876925e-06, "loss": 6.249147415161133, "step": 7083 }, { "epoch": 0.9186354684194743, "grad_norm": 0.9188236594200134, "learning_rate": 1.640864341207926e-06, "loss": 7.342129230499268, "step": 7084 }, { "epoch": 0.918765145927721, "grad_norm": 1.1870309114456177, "learning_rate": 1.635679725798972e-06, "loss": 8.23884391784668, "step": 7085 }, { "epoch": 0.9188948234359676, "grad_norm": 0.9645946621894836, "learning_rate": 1.6305031780256719e-06, "loss": 8.99804401397705, "step": 7086 }, { "epoch": 0.9190245009442144, "grad_norm": 0.9024888873100281, "learning_rate": 1.625334698751535e-06, "loss": 8.912680625915527, "step": 7087 }, { "epoch": 0.9191541784524611, "grad_norm": 1.1838152408599854, "learning_rate": 1.6201742888386885e-06, "loss": 6.785414695739746, "step": 7088 }, { "epoch": 0.9192838559607077, "grad_norm": 0.8170554637908936, "learning_rate": 1.6150219491479657e-06, "loss": 7.248749256134033, "step": 7089 }, { "epoch": 0.9194135334689544, "grad_norm": 0.909268856048584, "learning_rate": 1.6098776805388071e-06, "loss": 7.281051158905029, "step": 7090 }, { "epoch": 0.919543210977201, "grad_norm": 1.048601746559143, "learning_rate": 1.6047414838693375e-06, "loss": 10.112199783325195, "step": 7091 }, { "epoch": 0.9196728884854477, "grad_norm": 0.7155193090438843, "learning_rate": 1.5996133599963214e-06, "loss": 8.142766952514648, "step": 7092 }, { "epoch": 0.9198025659936945, "grad_norm": 0.920514702796936, "learning_rate": 1.5944933097751746e-06, "loss": 7.5028395652771, "step": 7093 }, { "epoch": 0.9199322435019411, "grad_norm": 1.0861403942108154, "learning_rate": 1.589381334059975e-06, "loss": 7.862082481384277, "step": 7094 }, { "epoch": 0.9200619210101878, "grad_norm": 1.4389318227767944, "learning_rate": 1.5842774337034515e-06, "loss": 9.979228973388672, "step": 7095 }, { "epoch": 0.9201915985184345, "grad_norm": 1.0671305656433105, "learning_rate": 1.579181609556979e-06, "loss": 9.707022666931152, "step": 7096 }, { "epoch": 0.9203212760266811, "grad_norm": 0.7470241785049438, "learning_rate": 1.5740938624705993e-06, "loss": 5.48410177230835, "step": 7097 }, { "epoch": 0.9204509535349278, "grad_norm": 0.8391563296318054, "learning_rate": 1.5690141932929837e-06, "loss": 8.460325241088867, "step": 7098 }, { "epoch": 0.9205806310431746, "grad_norm": 0.6242313981056213, "learning_rate": 1.563942602871482e-06, "loss": 4.834468364715576, "step": 7099 }, { "epoch": 0.9207103085514212, "grad_norm": 0.7122131586074829, "learning_rate": 1.558879092052079e-06, "loss": 8.4654541015625, "step": 7100 }, { "epoch": 0.9208399860596679, "grad_norm": 1.174922227859497, "learning_rate": 1.553823661679421e-06, "loss": 9.871655464172363, "step": 7101 }, { "epoch": 0.9209696635679145, "grad_norm": 1.072477102279663, "learning_rate": 1.5487763125967946e-06, "loss": 8.729425430297852, "step": 7102 }, { "epoch": 0.9210993410761612, "grad_norm": 0.687796950340271, "learning_rate": 1.5437370456461542e-06, "loss": 6.54140043258667, "step": 7103 }, { "epoch": 0.9212290185844079, "grad_norm": 0.8569642305374146, "learning_rate": 1.5387058616680993e-06, "loss": 9.166731834411621, "step": 7104 }, { "epoch": 0.9213586960926545, "grad_norm": 0.6719187498092651, "learning_rate": 1.53368276150187e-06, "loss": 4.711380958557129, "step": 7105 }, { "epoch": 0.9214883736009013, "grad_norm": 0.9638833999633789, "learning_rate": 1.5286677459853793e-06, "loss": 11.452695846557617, "step": 7106 }, { "epoch": 0.921618051109148, "grad_norm": 0.6935247778892517, "learning_rate": 1.5236608159551692e-06, "loss": 6.703741073608398, "step": 7107 }, { "epoch": 0.9217477286173946, "grad_norm": 1.178357720375061, "learning_rate": 1.5186619722464545e-06, "loss": 11.637123107910156, "step": 7108 }, { "epoch": 0.9218774061256413, "grad_norm": 0.9000892639160156, "learning_rate": 1.51367121569308e-06, "loss": 9.485389709472656, "step": 7109 }, { "epoch": 0.922007083633888, "grad_norm": 0.8265183568000793, "learning_rate": 1.508688547127568e-06, "loss": 8.182872772216797, "step": 7110 }, { "epoch": 0.9221367611421346, "grad_norm": 1.0088826417922974, "learning_rate": 1.5037139673810596e-06, "loss": 7.655314922332764, "step": 7111 }, { "epoch": 0.9222664386503814, "grad_norm": 1.1826188564300537, "learning_rate": 1.4987474772833686e-06, "loss": 8.86557388305664, "step": 7112 }, { "epoch": 0.922396116158628, "grad_norm": 0.9205441474914551, "learning_rate": 1.4937890776629548e-06, "loss": 6.472315311431885, "step": 7113 }, { "epoch": 0.9225257936668747, "grad_norm": 1.1337213516235352, "learning_rate": 1.4888387693469286e-06, "loss": 10.607056617736816, "step": 7114 }, { "epoch": 0.9226554711751214, "grad_norm": 1.2597599029541016, "learning_rate": 1.483896553161046e-06, "loss": 11.791094779968262, "step": 7115 }, { "epoch": 0.922785148683368, "grad_norm": 0.723415195941925, "learning_rate": 1.47896242992972e-06, "loss": 5.043939113616943, "step": 7116 }, { "epoch": 0.9229148261916147, "grad_norm": 0.9940682053565979, "learning_rate": 1.4740364004759922e-06, "loss": 6.978569030761719, "step": 7117 }, { "epoch": 0.9230445036998614, "grad_norm": 1.2713922262191772, "learning_rate": 1.4691184656215996e-06, "loss": 10.734814643859863, "step": 7118 }, { "epoch": 0.9231741812081081, "grad_norm": 0.9295043349266052, "learning_rate": 1.464208626186886e-06, "loss": 7.005803108215332, "step": 7119 }, { "epoch": 0.9233038587163548, "grad_norm": 0.6447793245315552, "learning_rate": 1.4593068829908684e-06, "loss": 6.221005439758301, "step": 7120 }, { "epoch": 0.9234335362246014, "grad_norm": 0.9815064668655396, "learning_rate": 1.454413236851193e-06, "loss": 5.084324836730957, "step": 7121 }, { "epoch": 0.9235632137328481, "grad_norm": 0.9432092905044556, "learning_rate": 1.449527688584179e-06, "loss": 8.192676544189453, "step": 7122 }, { "epoch": 0.9236928912410948, "grad_norm": 1.243248462677002, "learning_rate": 1.444650239004769e-06, "loss": 8.233400344848633, "step": 7123 }, { "epoch": 0.9238225687493414, "grad_norm": 1.2360575199127197, "learning_rate": 1.439780888926584e-06, "loss": 11.707324028015137, "step": 7124 }, { "epoch": 0.9239522462575882, "grad_norm": 0.9317976832389832, "learning_rate": 1.434919639161869e-06, "loss": 9.451879501342773, "step": 7125 }, { "epoch": 0.9240819237658349, "grad_norm": 0.8103171586990356, "learning_rate": 1.4300664905215312e-06, "loss": 7.8513593673706055, "step": 7126 }, { "epoch": 0.9242116012740815, "grad_norm": 0.8986542224884033, "learning_rate": 1.4252214438151224e-06, "loss": 6.015373706817627, "step": 7127 }, { "epoch": 0.9243412787823282, "grad_norm": 0.9084872007369995, "learning_rate": 1.4203844998508353e-06, "loss": 9.401310920715332, "step": 7128 }, { "epoch": 0.9244709562905749, "grad_norm": 1.3298695087432861, "learning_rate": 1.4155556594355357e-06, "loss": 9.299243927001953, "step": 7129 }, { "epoch": 0.9246006337988215, "grad_norm": 1.0075539350509644, "learning_rate": 1.4107349233747014e-06, "loss": 9.992796897888184, "step": 7130 }, { "epoch": 0.9247303113070683, "grad_norm": 1.0007787942886353, "learning_rate": 1.4059222924724946e-06, "loss": 7.387679576873779, "step": 7131 }, { "epoch": 0.924859988815315, "grad_norm": 1.0068299770355225, "learning_rate": 1.401117767531701e-06, "loss": 10.207427024841309, "step": 7132 }, { "epoch": 0.9249896663235616, "grad_norm": 0.9226130247116089, "learning_rate": 1.3963213493537575e-06, "loss": 9.55323600769043, "step": 7133 }, { "epoch": 0.9251193438318083, "grad_norm": 1.1639032363891602, "learning_rate": 1.391533038738757e-06, "loss": 11.626649856567383, "step": 7134 }, { "epoch": 0.9252490213400549, "grad_norm": 0.8034183382987976, "learning_rate": 1.3867528364854443e-06, "loss": 5.960277080535889, "step": 7135 }, { "epoch": 0.9253786988483016, "grad_norm": 0.7341203689575195, "learning_rate": 1.381980743391187e-06, "loss": 9.882596015930176, "step": 7136 }, { "epoch": 0.9255083763565483, "grad_norm": 0.9032534956932068, "learning_rate": 1.3772167602520314e-06, "loss": 8.270821571350098, "step": 7137 }, { "epoch": 0.925638053864795, "grad_norm": 0.8365988731384277, "learning_rate": 1.3724608878626422e-06, "loss": 8.597944259643555, "step": 7138 }, { "epoch": 0.9257677313730417, "grad_norm": 1.3034623861312866, "learning_rate": 1.3677131270163623e-06, "loss": 11.265301704406738, "step": 7139 }, { "epoch": 0.9258974088812884, "grad_norm": 1.130183458328247, "learning_rate": 1.3629734785051417e-06, "loss": 7.786707401275635, "step": 7140 }, { "epoch": 0.926027086389535, "grad_norm": 1.2659778594970703, "learning_rate": 1.3582419431196203e-06, "loss": 8.994553565979004, "step": 7141 }, { "epoch": 0.9261567638977817, "grad_norm": 1.206257700920105, "learning_rate": 1.3535185216490498e-06, "loss": 12.2443265914917, "step": 7142 }, { "epoch": 0.9262864414060283, "grad_norm": 1.121379017829895, "learning_rate": 1.3488032148813557e-06, "loss": 8.574677467346191, "step": 7143 }, { "epoch": 0.9264161189142751, "grad_norm": 1.1492207050323486, "learning_rate": 1.344096023603081e-06, "loss": 9.097315788269043, "step": 7144 }, { "epoch": 0.9265457964225218, "grad_norm": 1.040034294128418, "learning_rate": 1.3393969485994418e-06, "loss": 9.596826553344727, "step": 7145 }, { "epoch": 0.9266754739307684, "grad_norm": 0.8019112944602966, "learning_rate": 1.3347059906542836e-06, "loss": 11.269335746765137, "step": 7146 }, { "epoch": 0.9268051514390151, "grad_norm": 0.9956006407737732, "learning_rate": 1.3300231505501082e-06, "loss": 6.494260787963867, "step": 7147 }, { "epoch": 0.9269348289472618, "grad_norm": 0.9642165899276733, "learning_rate": 1.3253484290680518e-06, "loss": 5.440478324890137, "step": 7148 }, { "epoch": 0.9270645064555084, "grad_norm": 0.9725216031074524, "learning_rate": 1.3206818269879128e-06, "loss": 8.390830993652344, "step": 7149 }, { "epoch": 0.9271941839637552, "grad_norm": 1.1006135940551758, "learning_rate": 1.3160233450881133e-06, "loss": 10.926395416259766, "step": 7150 }, { "epoch": 0.9273238614720019, "grad_norm": 1.4859403371810913, "learning_rate": 1.3113729841457478e-06, "loss": 11.120616912841797, "step": 7151 }, { "epoch": 0.9274535389802485, "grad_norm": 0.8720375895500183, "learning_rate": 1.3067307449365297e-06, "loss": 5.604369640350342, "step": 7152 }, { "epoch": 0.9275832164884952, "grad_norm": 0.8401952385902405, "learning_rate": 1.3020966282348336e-06, "loss": 9.596596717834473, "step": 7153 }, { "epoch": 0.9277128939967418, "grad_norm": 1.1623613834381104, "learning_rate": 1.297470634813669e-06, "loss": 8.099085807800293, "step": 7154 }, { "epoch": 0.9278425715049885, "grad_norm": 0.8212493062019348, "learning_rate": 1.292852765444713e-06, "loss": 7.334105968475342, "step": 7155 }, { "epoch": 0.9279722490132352, "grad_norm": 0.817441463470459, "learning_rate": 1.2882430208982498e-06, "loss": 7.947386264801025, "step": 7156 }, { "epoch": 0.9281019265214819, "grad_norm": 0.6576338410377502, "learning_rate": 1.2836414019432364e-06, "loss": 7.930480003356934, "step": 7157 }, { "epoch": 0.9282316040297286, "grad_norm": 0.9437841773033142, "learning_rate": 1.2790479093472807e-06, "loss": 7.2888031005859375, "step": 7158 }, { "epoch": 0.9283612815379753, "grad_norm": 0.9614472985267639, "learning_rate": 1.2744625438766033e-06, "loss": 9.660441398620605, "step": 7159 }, { "epoch": 0.9284909590462219, "grad_norm": 0.8553181886672974, "learning_rate": 1.2698853062960981e-06, "loss": 6.904082298278809, "step": 7160 }, { "epoch": 0.9286206365544686, "grad_norm": 0.7488827705383301, "learning_rate": 1.2653161973692873e-06, "loss": 6.126260757446289, "step": 7161 }, { "epoch": 0.9287503140627152, "grad_norm": 1.3301029205322266, "learning_rate": 1.2607552178583503e-06, "loss": 10.328644752502441, "step": 7162 }, { "epoch": 0.928879991570962, "grad_norm": 0.9314990043640137, "learning_rate": 1.2562023685240898e-06, "loss": 8.662071228027344, "step": 7163 }, { "epoch": 0.9290096690792087, "grad_norm": 1.0282866954803467, "learning_rate": 1.2516576501259757e-06, "loss": 7.505490779876709, "step": 7164 }, { "epoch": 0.9291393465874553, "grad_norm": 0.8886368870735168, "learning_rate": 1.247121063422102e-06, "loss": 6.704043388366699, "step": 7165 }, { "epoch": 0.929269024095702, "grad_norm": 0.9492023587226868, "learning_rate": 1.2425926091692297e-06, "loss": 9.316299438476562, "step": 7166 }, { "epoch": 0.9293987016039487, "grad_norm": 0.7159484624862671, "learning_rate": 1.238072288122727e-06, "loss": 9.029284477233887, "step": 7167 }, { "epoch": 0.9295283791121953, "grad_norm": 1.1934884786605835, "learning_rate": 1.2335601010366515e-06, "loss": 9.33862590789795, "step": 7168 }, { "epoch": 0.929658056620442, "grad_norm": 1.7733471393585205, "learning_rate": 1.2290560486636571e-06, "loss": 11.930951118469238, "step": 7169 }, { "epoch": 0.9297877341286888, "grad_norm": 1.164530873298645, "learning_rate": 1.2245601317550814e-06, "loss": 9.637693405151367, "step": 7170 }, { "epoch": 0.9299174116369354, "grad_norm": 0.6521235108375549, "learning_rate": 1.2200723510608747e-06, "loss": 6.501776218414307, "step": 7171 }, { "epoch": 0.9300470891451821, "grad_norm": 1.1586177349090576, "learning_rate": 1.215592707329649e-06, "loss": 8.602422714233398, "step": 7172 }, { "epoch": 0.9301767666534287, "grad_norm": 1.2428280115127563, "learning_rate": 1.211121201308646e-06, "loss": 10.678914070129395, "step": 7173 }, { "epoch": 0.9303064441616754, "grad_norm": 0.9133861064910889, "learning_rate": 1.2066578337437685e-06, "loss": 8.843559265136719, "step": 7174 }, { "epoch": 0.9304361216699221, "grad_norm": 0.6954720616340637, "learning_rate": 1.2022026053795266e-06, "loss": 6.328213214874268, "step": 7175 }, { "epoch": 0.9305657991781688, "grad_norm": 1.118800401687622, "learning_rate": 1.1977555169591204e-06, "loss": 8.553155899047852, "step": 7176 }, { "epoch": 0.9306954766864155, "grad_norm": 1.495034098625183, "learning_rate": 1.1933165692243508e-06, "loss": 8.618005752563477, "step": 7177 }, { "epoch": 0.9308251541946622, "grad_norm": 1.1404907703399658, "learning_rate": 1.1888857629156869e-06, "loss": 8.028762817382812, "step": 7178 }, { "epoch": 0.9309548317029088, "grad_norm": 1.0367764234542847, "learning_rate": 1.1844630987722205e-06, "loss": 7.767199993133545, "step": 7179 }, { "epoch": 0.9310845092111555, "grad_norm": 0.954057514667511, "learning_rate": 1.1800485775317004e-06, "loss": 8.903603553771973, "step": 7180 }, { "epoch": 0.9312141867194021, "grad_norm": 1.505353569984436, "learning_rate": 1.17564219993051e-06, "loss": 9.515132904052734, "step": 7181 }, { "epoch": 0.9313438642276489, "grad_norm": 1.1064954996109009, "learning_rate": 1.1712439667036834e-06, "loss": 9.174851417541504, "step": 7182 }, { "epoch": 0.9314735417358956, "grad_norm": 0.5978653430938721, "learning_rate": 1.1668538785848726e-06, "loss": 5.9898200035095215, "step": 7183 }, { "epoch": 0.9316032192441422, "grad_norm": 1.0719325542449951, "learning_rate": 1.1624719363063974e-06, "loss": 11.010632514953613, "step": 7184 }, { "epoch": 0.9317328967523889, "grad_norm": 1.2944693565368652, "learning_rate": 1.1580981405992064e-06, "loss": 10.396583557128906, "step": 7185 }, { "epoch": 0.9318625742606356, "grad_norm": 0.765526533126831, "learning_rate": 1.1537324921928937e-06, "loss": 7.718038082122803, "step": 7186 }, { "epoch": 0.9319922517688822, "grad_norm": 1.1776609420776367, "learning_rate": 1.1493749918156827e-06, "loss": 10.413354873657227, "step": 7187 }, { "epoch": 0.9321219292771289, "grad_norm": 1.4678736925125122, "learning_rate": 1.1450256401944581e-06, "loss": 8.30583667755127, "step": 7188 }, { "epoch": 0.9322516067853757, "grad_norm": 1.13350248336792, "learning_rate": 1.1406844380547287e-06, "loss": 10.51980972290039, "step": 7189 }, { "epoch": 0.9323812842936223, "grad_norm": 1.0613396167755127, "learning_rate": 1.1363513861206432e-06, "loss": 7.849520206451416, "step": 7190 }, { "epoch": 0.932510961801869, "grad_norm": 0.8523786067962646, "learning_rate": 1.1320264851150065e-06, "loss": 4.890537738800049, "step": 7191 }, { "epoch": 0.9326406393101156, "grad_norm": 1.006852149963379, "learning_rate": 1.1277097357592414e-06, "loss": 6.919662952423096, "step": 7192 }, { "epoch": 0.9327703168183623, "grad_norm": 1.228895664215088, "learning_rate": 1.123401138773439e-06, "loss": 9.346928596496582, "step": 7193 }, { "epoch": 0.932899994326609, "grad_norm": 1.0553507804870605, "learning_rate": 1.119100694876296e-06, "loss": 7.345113277435303, "step": 7194 }, { "epoch": 0.9330296718348557, "grad_norm": 0.6150251030921936, "learning_rate": 1.1148084047851893e-06, "loss": 6.514845848083496, "step": 7195 }, { "epoch": 0.9331593493431024, "grad_norm": 1.168062448501587, "learning_rate": 1.1105242692160955e-06, "loss": 8.558286666870117, "step": 7196 }, { "epoch": 0.9332890268513491, "grad_norm": 1.2607853412628174, "learning_rate": 1.1062482888836656e-06, "loss": 10.557737350463867, "step": 7197 }, { "epoch": 0.9334187043595957, "grad_norm": 1.2436285018920898, "learning_rate": 1.1019804645011621e-06, "loss": 8.004447937011719, "step": 7198 }, { "epoch": 0.9335483818678424, "grad_norm": 0.8122764229774475, "learning_rate": 1.0977207967805048e-06, "loss": 7.338307857513428, "step": 7199 }, { "epoch": 0.933678059376089, "grad_norm": 0.8578042984008789, "learning_rate": 1.093469286432247e-06, "loss": 8.060070991516113, "step": 7200 }, { "epoch": 0.9338077368843357, "grad_norm": 0.9110260605812073, "learning_rate": 1.089225934165583e-06, "loss": 9.275403022766113, "step": 7201 }, { "epoch": 0.9339374143925825, "grad_norm": 1.247367262840271, "learning_rate": 1.0849907406883408e-06, "loss": 9.652021408081055, "step": 7202 }, { "epoch": 0.9340670919008291, "grad_norm": 0.964052677154541, "learning_rate": 1.0807637067069997e-06, "loss": 7.588651657104492, "step": 7203 }, { "epoch": 0.9341967694090758, "grad_norm": 1.3019022941589355, "learning_rate": 1.076544832926657e-06, "loss": 9.576380729675293, "step": 7204 }, { "epoch": 0.9343264469173225, "grad_norm": 0.857427179813385, "learning_rate": 1.0723341200510716e-06, "loss": 7.417229652404785, "step": 7205 }, { "epoch": 0.9344561244255691, "grad_norm": 1.0265898704528809, "learning_rate": 1.0681315687826266e-06, "loss": 8.996950149536133, "step": 7206 }, { "epoch": 0.9345858019338158, "grad_norm": 0.777021050453186, "learning_rate": 1.0639371798223552e-06, "loss": 6.4118804931640625, "step": 7207 }, { "epoch": 0.9347154794420626, "grad_norm": 1.4928624629974365, "learning_rate": 1.059750953869909e-06, "loss": 11.320979118347168, "step": 7208 }, { "epoch": 0.9348451569503092, "grad_norm": 0.7077339887619019, "learning_rate": 1.0555728916236019e-06, "loss": 10.279620170593262, "step": 7209 }, { "epoch": 0.9349748344585559, "grad_norm": 0.7305571436882019, "learning_rate": 1.0514029937803704e-06, "loss": 8.33281421661377, "step": 7210 }, { "epoch": 0.9351045119668026, "grad_norm": 0.8462729454040527, "learning_rate": 1.0472412610357973e-06, "loss": 9.067164421081543, "step": 7211 }, { "epoch": 0.9352341894750492, "grad_norm": 0.9977204203605652, "learning_rate": 1.0430876940840994e-06, "loss": 7.948276042938232, "step": 7212 }, { "epoch": 0.9353638669832959, "grad_norm": 1.2042789459228516, "learning_rate": 1.0389422936181281e-06, "loss": 13.281047821044922, "step": 7213 }, { "epoch": 0.9354935444915427, "grad_norm": 0.9566018581390381, "learning_rate": 1.0348050603293747e-06, "loss": 9.434496879577637, "step": 7214 }, { "epoch": 0.9356232219997893, "grad_norm": 1.204071044921875, "learning_rate": 1.0306759949079758e-06, "loss": 9.661696434020996, "step": 7215 }, { "epoch": 0.935752899508036, "grad_norm": 1.164064645767212, "learning_rate": 1.0265550980426975e-06, "loss": 6.947617053985596, "step": 7216 }, { "epoch": 0.9358825770162826, "grad_norm": 1.0048881769180298, "learning_rate": 1.0224423704209397e-06, "loss": 6.77365779876709, "step": 7217 }, { "epoch": 0.9360122545245293, "grad_norm": 0.6433712840080261, "learning_rate": 1.0183378127287535e-06, "loss": 6.873554229736328, "step": 7218 }, { "epoch": 0.936141932032776, "grad_norm": 1.4701921939849854, "learning_rate": 1.0142414256508082e-06, "loss": 9.950542449951172, "step": 7219 }, { "epoch": 0.9362716095410226, "grad_norm": 0.8827773928642273, "learning_rate": 1.0101532098704346e-06, "loss": 6.27985143661499, "step": 7220 }, { "epoch": 0.9364012870492694, "grad_norm": 0.6690629720687866, "learning_rate": 1.0060731660695711e-06, "loss": 5.017402648925781, "step": 7221 }, { "epoch": 0.936530964557516, "grad_norm": 1.1153870820999146, "learning_rate": 1.0020012949288283e-06, "loss": 9.814552307128906, "step": 7222 }, { "epoch": 0.9366606420657627, "grad_norm": 0.8502766489982605, "learning_rate": 9.97937597127413e-07, "loss": 9.03024959564209, "step": 7223 }, { "epoch": 0.9367903195740094, "grad_norm": 1.0093189477920532, "learning_rate": 9.938820733431998e-07, "loss": 7.315815448760986, "step": 7224 }, { "epoch": 0.936919997082256, "grad_norm": 0.6214711666107178, "learning_rate": 9.898347242526862e-07, "loss": 6.393698215484619, "step": 7225 }, { "epoch": 0.9370496745905027, "grad_norm": 0.913420557975769, "learning_rate": 9.857955505310157e-07, "loss": 8.3793363571167, "step": 7226 }, { "epoch": 0.9371793520987495, "grad_norm": 0.7744981646537781, "learning_rate": 9.817645528519491e-07, "loss": 6.578851699829102, "step": 7227 }, { "epoch": 0.9373090296069961, "grad_norm": 1.245334506034851, "learning_rate": 9.777417318879102e-07, "loss": 10.75051498413086, "step": 7228 }, { "epoch": 0.9374387071152428, "grad_norm": 0.7465305924415588, "learning_rate": 9.737270883099337e-07, "loss": 5.446753978729248, "step": 7229 }, { "epoch": 0.9375683846234895, "grad_norm": 1.0808254480361938, "learning_rate": 9.697206227877064e-07, "loss": 10.107762336730957, "step": 7230 }, { "epoch": 0.9376980621317361, "grad_norm": 0.6462953090667725, "learning_rate": 9.657223359895383e-07, "loss": 7.064958095550537, "step": 7231 }, { "epoch": 0.9378277396399828, "grad_norm": 1.1550109386444092, "learning_rate": 9.61732228582396e-07, "loss": 9.305903434753418, "step": 7232 }, { "epoch": 0.9379574171482294, "grad_norm": 1.1650441884994507, "learning_rate": 9.577503012318467e-07, "loss": 9.984423637390137, "step": 7233 }, { "epoch": 0.9380870946564762, "grad_norm": 1.2729098796844482, "learning_rate": 9.537765546021315e-07, "loss": 9.545897483825684, "step": 7234 }, { "epoch": 0.9382167721647229, "grad_norm": 1.269358515739441, "learning_rate": 9.498109893561091e-07, "loss": 8.434073448181152, "step": 7235 }, { "epoch": 0.9383464496729695, "grad_norm": 0.8448141813278198, "learning_rate": 9.458536061552669e-07, "loss": 9.346715927124023, "step": 7236 }, { "epoch": 0.9384761271812162, "grad_norm": 1.0487927198410034, "learning_rate": 9.419044056597437e-07, "loss": 9.689413070678711, "step": 7237 }, { "epoch": 0.9386058046894629, "grad_norm": 0.6882913708686829, "learning_rate": 9.379633885282901e-07, "loss": 7.646646022796631, "step": 7238 }, { "epoch": 0.9387354821977095, "grad_norm": 0.6483178734779358, "learning_rate": 9.340305554183192e-07, "loss": 7.0654730796813965, "step": 7239 }, { "epoch": 0.9388651597059563, "grad_norm": 0.9783325791358948, "learning_rate": 9.301059069858564e-07, "loss": 7.975911617279053, "step": 7240 }, { "epoch": 0.938994837214203, "grad_norm": 1.1002390384674072, "learning_rate": 9.261894438855834e-07, "loss": 10.245966911315918, "step": 7241 }, { "epoch": 0.9391245147224496, "grad_norm": 1.075197458267212, "learning_rate": 9.222811667707832e-07, "loss": 8.265043258666992, "step": 7242 }, { "epoch": 0.9392541922306963, "grad_norm": 0.9530361890792847, "learning_rate": 9.183810762934175e-07, "loss": 7.832296371459961, "step": 7243 }, { "epoch": 0.939383869738943, "grad_norm": 1.2915359735488892, "learning_rate": 9.144891731040384e-07, "loss": 13.159735679626465, "step": 7244 }, { "epoch": 0.9395135472471896, "grad_norm": 0.7981076836585999, "learning_rate": 9.106054578518708e-07, "loss": 7.433838844299316, "step": 7245 }, { "epoch": 0.9396432247554364, "grad_norm": 0.8069811463356018, "learning_rate": 9.067299311847465e-07, "loss": 7.237130641937256, "step": 7246 }, { "epoch": 0.939772902263683, "grad_norm": 1.0182327032089233, "learning_rate": 9.02862593749143e-07, "loss": 11.664285659790039, "step": 7247 }, { "epoch": 0.9399025797719297, "grad_norm": 1.1036714315414429, "learning_rate": 8.990034461901609e-07, "loss": 7.246664047241211, "step": 7248 }, { "epoch": 0.9400322572801764, "grad_norm": 0.9325190782546997, "learning_rate": 8.951524891515572e-07, "loss": 9.095476150512695, "step": 7249 }, { "epoch": 0.940161934788423, "grad_norm": 0.8391647338867188, "learning_rate": 8.913097232757018e-07, "loss": 10.312392234802246, "step": 7250 }, { "epoch": 0.9402916122966697, "grad_norm": 1.2338687181472778, "learning_rate": 8.874751492036093e-07, "loss": 10.325751304626465, "step": 7251 }, { "epoch": 0.9404212898049163, "grad_norm": 0.7687448859214783, "learning_rate": 8.836487675749128e-07, "loss": 6.163246154785156, "step": 7252 }, { "epoch": 0.9405509673131631, "grad_norm": 0.8774350881576538, "learning_rate": 8.798305790279016e-07, "loss": 7.431607246398926, "step": 7253 }, { "epoch": 0.9406806448214098, "grad_norm": 1.2483222484588623, "learning_rate": 8.760205841994773e-07, "loss": 8.030691146850586, "step": 7254 }, { "epoch": 0.9408103223296564, "grad_norm": 1.6953610181808472, "learning_rate": 8.722187837251983e-07, "loss": 10.7915620803833, "step": 7255 }, { "epoch": 0.9409399998379031, "grad_norm": 0.9436970353126526, "learning_rate": 8.684251782392239e-07, "loss": 7.008118152618408, "step": 7256 }, { "epoch": 0.9410696773461498, "grad_norm": 1.253143548965454, "learning_rate": 8.646397683743756e-07, "loss": 10.780344009399414, "step": 7257 }, { "epoch": 0.9411993548543964, "grad_norm": 1.0573419332504272, "learning_rate": 8.608625547620874e-07, "loss": 10.696785926818848, "step": 7258 }, { "epoch": 0.9413290323626432, "grad_norm": 0.8336348533630371, "learning_rate": 8.570935380324441e-07, "loss": 9.403562545776367, "step": 7259 }, { "epoch": 0.9414587098708899, "grad_norm": 0.9087209105491638, "learning_rate": 8.533327188141482e-07, "loss": 5.565031051635742, "step": 7260 }, { "epoch": 0.9415883873791365, "grad_norm": 0.94758141040802, "learning_rate": 8.49580097734548e-07, "loss": 6.984860897064209, "step": 7261 }, { "epoch": 0.9417180648873832, "grad_norm": 1.127394676208496, "learning_rate": 8.458356754196095e-07, "loss": 8.216108322143555, "step": 7262 }, { "epoch": 0.9418477423956298, "grad_norm": 0.8887011408805847, "learning_rate": 8.420994524939386e-07, "loss": 8.75101089477539, "step": 7263 }, { "epoch": 0.9419774199038765, "grad_norm": 1.0646940469741821, "learning_rate": 8.383714295807755e-07, "loss": 9.038118362426758, "step": 7264 }, { "epoch": 0.9421070974121232, "grad_norm": 0.8398870229721069, "learning_rate": 8.34651607301995e-07, "loss": 7.888430118560791, "step": 7265 }, { "epoch": 0.94223677492037, "grad_norm": 0.6707437038421631, "learning_rate": 8.309399862780898e-07, "loss": 5.5224480628967285, "step": 7266 }, { "epoch": 0.9423664524286166, "grad_norm": 1.3329651355743408, "learning_rate": 8.272365671282034e-07, "loss": 9.150321006774902, "step": 7267 }, { "epoch": 0.9424961299368633, "grad_norm": 0.9879866242408752, "learning_rate": 8.235413504700917e-07, "loss": 8.99971866607666, "step": 7268 }, { "epoch": 0.9426258074451099, "grad_norm": 1.132267951965332, "learning_rate": 8.198543369201617e-07, "loss": 9.800569534301758, "step": 7269 }, { "epoch": 0.9427554849533566, "grad_norm": 0.9314058423042297, "learning_rate": 8.16175527093438e-07, "loss": 8.115055084228516, "step": 7270 }, { "epoch": 0.9428851624616033, "grad_norm": 0.8307912349700928, "learning_rate": 8.125049216035851e-07, "loss": 9.996319770812988, "step": 7271 }, { "epoch": 0.94301483996985, "grad_norm": 1.4037929773330688, "learning_rate": 8.088425210628914e-07, "loss": 7.181415557861328, "step": 7272 }, { "epoch": 0.9431445174780967, "grad_norm": 1.043290615081787, "learning_rate": 8.051883260822845e-07, "loss": 6.9259185791015625, "step": 7273 }, { "epoch": 0.9432741949863434, "grad_norm": 0.718690037727356, "learning_rate": 8.015423372713215e-07, "loss": 11.287606239318848, "step": 7274 }, { "epoch": 0.94340387249459, "grad_norm": 1.1978150606155396, "learning_rate": 7.97904555238177e-07, "loss": 10.972567558288574, "step": 7275 }, { "epoch": 0.9435335500028367, "grad_norm": 0.6917620897293091, "learning_rate": 7.94274980589682e-07, "loss": 6.574426651000977, "step": 7276 }, { "epoch": 0.9436632275110833, "grad_norm": 1.1452051401138306, "learning_rate": 7.906536139312748e-07, "loss": 8.754693984985352, "step": 7277 }, { "epoch": 0.9437929050193301, "grad_norm": 0.7493998408317566, "learning_rate": 7.870404558670441e-07, "loss": 11.412239074707031, "step": 7278 }, { "epoch": 0.9439225825275768, "grad_norm": 0.9638043642044067, "learning_rate": 7.834355069996857e-07, "loss": 7.682125568389893, "step": 7279 }, { "epoch": 0.9440522600358234, "grad_norm": 1.5644688606262207, "learning_rate": 7.798387679305519e-07, "loss": 7.427120208740234, "step": 7280 }, { "epoch": 0.9441819375440701, "grad_norm": 0.8119862675666809, "learning_rate": 7.762502392596071e-07, "loss": 11.82111930847168, "step": 7281 }, { "epoch": 0.9443116150523168, "grad_norm": 1.3659063577651978, "learning_rate": 7.726699215854616e-07, "loss": 8.938417434692383, "step": 7282 }, { "epoch": 0.9444412925605634, "grad_norm": 0.8588845729827881, "learning_rate": 7.690978155053319e-07, "loss": 7.272482395172119, "step": 7283 }, { "epoch": 0.9445709700688101, "grad_norm": 1.2158399820327759, "learning_rate": 7.655339216150914e-07, "loss": 6.7839202880859375, "step": 7284 }, { "epoch": 0.9447006475770569, "grad_norm": 1.1291821002960205, "learning_rate": 7.619782405092257e-07, "loss": 8.471508979797363, "step": 7285 }, { "epoch": 0.9448303250853035, "grad_norm": 1.0415021181106567, "learning_rate": 7.58430772780866e-07, "loss": 11.110725402832031, "step": 7286 }, { "epoch": 0.9449600025935502, "grad_norm": 1.3938137292861938, "learning_rate": 7.548915190217554e-07, "loss": 9.821727752685547, "step": 7287 }, { "epoch": 0.9450896801017968, "grad_norm": 0.9273088574409485, "learning_rate": 7.513604798222773e-07, "loss": 9.4107027053833, "step": 7288 }, { "epoch": 0.9452193576100435, "grad_norm": 0.8130377531051636, "learning_rate": 7.478376557714384e-07, "loss": 7.330966949462891, "step": 7289 }, { "epoch": 0.9453490351182902, "grad_norm": 0.7687065601348877, "learning_rate": 7.443230474568963e-07, "loss": 8.73924446105957, "step": 7290 }, { "epoch": 0.9454787126265369, "grad_norm": 0.8896523118019104, "learning_rate": 7.408166554648987e-07, "loss": 9.694236755371094, "step": 7291 }, { "epoch": 0.9456083901347836, "grad_norm": 0.9392257928848267, "learning_rate": 7.373184803803667e-07, "loss": 6.846336364746094, "step": 7292 }, { "epoch": 0.9457380676430303, "grad_norm": 0.9199503064155579, "learning_rate": 7.338285227868169e-07, "loss": 7.393473148345947, "step": 7293 }, { "epoch": 0.9458677451512769, "grad_norm": 0.9542696475982666, "learning_rate": 7.303467832664112e-07, "loss": 9.424891471862793, "step": 7294 }, { "epoch": 0.9459974226595236, "grad_norm": 0.7640840411186218, "learning_rate": 7.268732623999353e-07, "loss": 7.655636310577393, "step": 7295 }, { "epoch": 0.9461271001677702, "grad_norm": 0.7536460161209106, "learning_rate": 7.234079607668143e-07, "loss": 7.912720680236816, "step": 7296 }, { "epoch": 0.9462567776760169, "grad_norm": 0.9496918320655823, "learning_rate": 7.199508789450804e-07, "loss": 5.529405117034912, "step": 7297 }, { "epoch": 0.9463864551842637, "grad_norm": 1.2162175178527832, "learning_rate": 7.165020175114223e-07, "loss": 7.675075531005859, "step": 7298 }, { "epoch": 0.9465161326925103, "grad_norm": 1.1076462268829346, "learning_rate": 7.13061377041141e-07, "loss": 9.609663963317871, "step": 7299 }, { "epoch": 0.946645810200757, "grad_norm": 0.747411847114563, "learning_rate": 7.09628958108155e-07, "loss": 9.340805053710938, "step": 7300 }, { "epoch": 0.9467754877090037, "grad_norm": 0.9516322016716003, "learning_rate": 7.062047612850397e-07, "loss": 6.892302513122559, "step": 7301 }, { "epoch": 0.9469051652172503, "grad_norm": 0.8090852499008179, "learning_rate": 7.027887871429772e-07, "loss": 6.404454231262207, "step": 7302 }, { "epoch": 0.947034842725497, "grad_norm": 0.901593029499054, "learning_rate": 6.993810362517894e-07, "loss": 5.87339448928833, "step": 7303 }, { "epoch": 0.9471645202337438, "grad_norm": 1.1101230382919312, "learning_rate": 6.95981509179916e-07, "loss": 10.102715492248535, "step": 7304 }, { "epoch": 0.9472941977419904, "grad_norm": 1.1554878950119019, "learning_rate": 6.925902064944367e-07, "loss": 8.861910820007324, "step": 7305 }, { "epoch": 0.9474238752502371, "grad_norm": 0.8193599581718445, "learning_rate": 6.892071287610436e-07, "loss": 6.513666152954102, "step": 7306 }, { "epoch": 0.9475535527584837, "grad_norm": 0.9182991981506348, "learning_rate": 6.85832276544085e-07, "loss": 11.655622482299805, "step": 7307 }, { "epoch": 0.9476832302667304, "grad_norm": 0.9240023493766785, "learning_rate": 6.824656504064996e-07, "loss": 7.6605095863342285, "step": 7308 }, { "epoch": 0.9478129077749771, "grad_norm": 1.097316026687622, "learning_rate": 6.791072509098883e-07, "loss": 8.841714859008789, "step": 7309 }, { "epoch": 0.9479425852832238, "grad_norm": 0.7551639080047607, "learning_rate": 6.757570786144529e-07, "loss": 8.137327194213867, "step": 7310 }, { "epoch": 0.9480722627914705, "grad_norm": 1.072849154472351, "learning_rate": 6.724151340790407e-07, "loss": 7.472622871398926, "step": 7311 }, { "epoch": 0.9482019402997172, "grad_norm": 0.6480629444122314, "learning_rate": 6.690814178611226e-07, "loss": 7.8680572509765625, "step": 7312 }, { "epoch": 0.9483316178079638, "grad_norm": 0.895933985710144, "learning_rate": 6.65755930516787e-07, "loss": 12.547453880310059, "step": 7313 }, { "epoch": 0.9484612953162105, "grad_norm": 1.124823808670044, "learning_rate": 6.624386726007626e-07, "loss": 9.1281156539917, "step": 7314 }, { "epoch": 0.9485909728244571, "grad_norm": 0.9069112539291382, "learning_rate": 6.591296446664064e-07, "loss": 7.155951499938965, "step": 7315 }, { "epoch": 0.9487206503327038, "grad_norm": 1.0016814470291138, "learning_rate": 6.55828847265677e-07, "loss": 7.427069664001465, "step": 7316 }, { "epoch": 0.9488503278409506, "grad_norm": 1.2403062582015991, "learning_rate": 6.525362809492008e-07, "loss": 10.414809226989746, "step": 7317 }, { "epoch": 0.9489800053491972, "grad_norm": 0.8792279958724976, "learning_rate": 6.492519462661939e-07, "loss": 7.703734397888184, "step": 7318 }, { "epoch": 0.9491096828574439, "grad_norm": 0.9857478737831116, "learning_rate": 6.459758437645235e-07, "loss": 12.902395248413086, "step": 7319 }, { "epoch": 0.9492393603656906, "grad_norm": 1.0736156702041626, "learning_rate": 6.427079739906694e-07, "loss": 9.69051742553711, "step": 7320 }, { "epoch": 0.9493690378739372, "grad_norm": 1.0720324516296387, "learning_rate": 6.394483374897564e-07, "loss": 9.127097129821777, "step": 7321 }, { "epoch": 0.9494987153821839, "grad_norm": 1.0043107271194458, "learning_rate": 6.361969348055053e-07, "loss": 9.172689437866211, "step": 7322 }, { "epoch": 0.9496283928904307, "grad_norm": 0.8149693012237549, "learning_rate": 6.329537664802931e-07, "loss": 5.567212104797363, "step": 7323 }, { "epoch": 0.9497580703986773, "grad_norm": 1.0553717613220215, "learning_rate": 6.297188330551096e-07, "loss": 8.273089408874512, "step": 7324 }, { "epoch": 0.949887747906924, "grad_norm": 1.0798182487487793, "learning_rate": 6.264921350695674e-07, "loss": 8.641569137573242, "step": 7325 }, { "epoch": 0.9500174254151706, "grad_norm": 1.1770113706588745, "learning_rate": 6.232736730619248e-07, "loss": 10.012945175170898, "step": 7326 }, { "epoch": 0.9501471029234173, "grad_norm": 0.7061785459518433, "learning_rate": 6.200634475690359e-07, "loss": 8.350969314575195, "step": 7327 }, { "epoch": 0.950276780431664, "grad_norm": 1.2682721614837646, "learning_rate": 6.168614591264055e-07, "loss": 11.852884292602539, "step": 7328 }, { "epoch": 0.9504064579399106, "grad_norm": 0.9103796482086182, "learning_rate": 6.136677082681563e-07, "loss": 8.440787315368652, "step": 7329 }, { "epoch": 0.9505361354481574, "grad_norm": 0.8536204695701599, "learning_rate": 6.104821955270346e-07, "loss": 12.081954002380371, "step": 7330 }, { "epoch": 0.9506658129564041, "grad_norm": 1.0316392183303833, "learning_rate": 6.073049214344151e-07, "loss": 10.49321174621582, "step": 7331 }, { "epoch": 0.9507954904646507, "grad_norm": 1.152247428894043, "learning_rate": 6.041358865203018e-07, "loss": 8.03079605102539, "step": 7332 }, { "epoch": 0.9509251679728974, "grad_norm": 1.2015748023986816, "learning_rate": 6.009750913133161e-07, "loss": 12.015955924987793, "step": 7333 }, { "epoch": 0.951054845481144, "grad_norm": 0.982444167137146, "learning_rate": 5.978225363407087e-07, "loss": 8.679447174072266, "step": 7334 }, { "epoch": 0.9511845229893907, "grad_norm": 0.9163243174552917, "learning_rate": 5.946782221283531e-07, "loss": 5.534726619720459, "step": 7335 }, { "epoch": 0.9513142004976375, "grad_norm": 1.1827887296676636, "learning_rate": 5.915421492007633e-07, "loss": 12.899415969848633, "step": 7336 }, { "epoch": 0.9514438780058841, "grad_norm": 1.2751106023788452, "learning_rate": 5.884143180810542e-07, "loss": 7.626100540161133, "step": 7337 }, { "epoch": 0.9515735555141308, "grad_norm": 0.7610769867897034, "learning_rate": 5.852947292909861e-07, "loss": 7.804187297821045, "step": 7338 }, { "epoch": 0.9517032330223775, "grad_norm": 1.0689765214920044, "learning_rate": 5.821833833509316e-07, "loss": 9.411553382873535, "step": 7339 }, { "epoch": 0.9518329105306241, "grad_norm": 1.0424778461456299, "learning_rate": 5.790802807798979e-07, "loss": 9.646954536437988, "step": 7340 }, { "epoch": 0.9519625880388708, "grad_norm": 0.7239398956298828, "learning_rate": 5.759854220955097e-07, "loss": 7.2303361892700195, "step": 7341 }, { "epoch": 0.9520922655471176, "grad_norm": 1.1345770359039307, "learning_rate": 5.728988078140151e-07, "loss": 8.295256614685059, "step": 7342 }, { "epoch": 0.9522219430553642, "grad_norm": 1.5815762281417847, "learning_rate": 5.69820438450297e-07, "loss": 10.962945938110352, "step": 7343 }, { "epoch": 0.9523516205636109, "grad_norm": 0.9146335124969482, "learning_rate": 5.66750314517861e-07, "loss": 10.164176940917969, "step": 7344 }, { "epoch": 0.9524812980718576, "grad_norm": 0.9733829498291016, "learning_rate": 5.636884365288198e-07, "loss": 9.267837524414062, "step": 7345 }, { "epoch": 0.9526109755801042, "grad_norm": 0.8560655117034912, "learning_rate": 5.60634804993937e-07, "loss": 6.913792610168457, "step": 7346 }, { "epoch": 0.9527406530883509, "grad_norm": 1.8378897905349731, "learning_rate": 5.575894204225829e-07, "loss": 10.975245475769043, "step": 7347 }, { "epoch": 0.9528703305965975, "grad_norm": 0.7380480170249939, "learning_rate": 5.545522833227512e-07, "loss": 7.150184154510498, "step": 7348 }, { "epoch": 0.9530000081048443, "grad_norm": 1.2964513301849365, "learning_rate": 5.515233942010812e-07, "loss": 11.69162368774414, "step": 7349 }, { "epoch": 0.953129685613091, "grad_norm": 0.6135715842247009, "learning_rate": 5.485027535628073e-07, "loss": 7.019839286804199, "step": 7350 }, { "epoch": 0.9532593631213376, "grad_norm": 0.9222412705421448, "learning_rate": 5.454903619118046e-07, "loss": 8.712408065795898, "step": 7351 }, { "epoch": 0.9533890406295843, "grad_norm": 1.0761420726776123, "learning_rate": 5.424862197505709e-07, "loss": 8.06002426147461, "step": 7352 }, { "epoch": 0.953518718137831, "grad_norm": 1.2093011140823364, "learning_rate": 5.394903275802277e-07, "loss": 10.798384666442871, "step": 7353 }, { "epoch": 0.9536483956460776, "grad_norm": 0.873953104019165, "learning_rate": 5.36502685900514e-07, "loss": 7.578511714935303, "step": 7354 }, { "epoch": 0.9537780731543244, "grad_norm": 0.8737024664878845, "learning_rate": 5.335232952098035e-07, "loss": 7.413726806640625, "step": 7355 }, { "epoch": 0.953907750662571, "grad_norm": 1.157326579093933, "learning_rate": 5.305521560050763e-07, "loss": 13.561952590942383, "step": 7356 }, { "epoch": 0.9540374281708177, "grad_norm": 1.2245134115219116, "learning_rate": 5.27589268781964e-07, "loss": 9.985147476196289, "step": 7357 }, { "epoch": 0.9541671056790644, "grad_norm": 0.7099462747573853, "learning_rate": 5.246346340346875e-07, "loss": 4.540202617645264, "step": 7358 }, { "epoch": 0.954296783187311, "grad_norm": 1.1338214874267578, "learning_rate": 5.216882522561195e-07, "loss": 10.25252914428711, "step": 7359 }, { "epoch": 0.9544264606955577, "grad_norm": 0.9450353980064392, "learning_rate": 5.18750123937739e-07, "loss": 8.463929176330566, "step": 7360 }, { "epoch": 0.9545561382038044, "grad_norm": 0.8692273497581482, "learning_rate": 5.158202495696596e-07, "loss": 9.524653434753418, "step": 7361 }, { "epoch": 0.9546858157120511, "grad_norm": 1.0040271282196045, "learning_rate": 5.128986296406069e-07, "loss": 7.707005500793457, "step": 7362 }, { "epoch": 0.9548154932202978, "grad_norm": 0.8541313409805298, "learning_rate": 5.099852646379411e-07, "loss": 8.319661140441895, "step": 7363 }, { "epoch": 0.9549451707285445, "grad_norm": 1.3702116012573242, "learning_rate": 5.070801550476345e-07, "loss": 8.170228004455566, "step": 7364 }, { "epoch": 0.9550748482367911, "grad_norm": 1.0354536771774292, "learning_rate": 5.04183301354294e-07, "loss": 7.644696235656738, "step": 7365 }, { "epoch": 0.9552045257450378, "grad_norm": 0.8302454948425293, "learning_rate": 5.012947040411331e-07, "loss": 7.28249979019165, "step": 7366 }, { "epoch": 0.9553342032532844, "grad_norm": 0.6491464376449585, "learning_rate": 4.984143635900051e-07, "loss": 11.124070167541504, "step": 7367 }, { "epoch": 0.9554638807615312, "grad_norm": 1.221725344657898, "learning_rate": 4.955422804813758e-07, "loss": 7.773561000823975, "step": 7368 }, { "epoch": 0.9555935582697779, "grad_norm": 0.9767525792121887, "learning_rate": 4.926784551943398e-07, "loss": 8.423907279968262, "step": 7369 }, { "epoch": 0.9557232357780245, "grad_norm": 1.3482041358947754, "learning_rate": 4.898228882066036e-07, "loss": 10.614778518676758, "step": 7370 }, { "epoch": 0.9558529132862712, "grad_norm": 0.8508569598197937, "learning_rate": 4.869755799945142e-07, "loss": 6.4113640785217285, "step": 7371 }, { "epoch": 0.9559825907945179, "grad_norm": 0.8545219302177429, "learning_rate": 4.841365310330193e-07, "loss": 9.270123481750488, "step": 7372 }, { "epoch": 0.9561122683027645, "grad_norm": 0.8809153437614441, "learning_rate": 4.813057417957068e-07, "loss": 7.472903728485107, "step": 7373 }, { "epoch": 0.9562419458110113, "grad_norm": 1.0361840724945068, "learning_rate": 4.784832127547767e-07, "loss": 7.638974189758301, "step": 7374 }, { "epoch": 0.956371623319258, "grad_norm": 1.3411020040512085, "learning_rate": 4.756689443810525e-07, "loss": 8.239191055297852, "step": 7375 }, { "epoch": 0.9565013008275046, "grad_norm": 0.9294113516807556, "learning_rate": 4.7286293714398075e-07, "loss": 10.446783065795898, "step": 7376 }, { "epoch": 0.9566309783357513, "grad_norm": 1.1459970474243164, "learning_rate": 4.7006519151164273e-07, "loss": 8.169574737548828, "step": 7377 }, { "epoch": 0.9567606558439979, "grad_norm": 0.6397604942321777, "learning_rate": 4.672757079507095e-07, "loss": 5.8352484703063965, "step": 7378 }, { "epoch": 0.9568903333522446, "grad_norm": 0.6588836908340454, "learning_rate": 4.6449448692650885e-07, "loss": 6.069545745849609, "step": 7379 }, { "epoch": 0.9570200108604913, "grad_norm": 1.0392358303070068, "learning_rate": 4.617215289029697e-07, "loss": 9.40526294708252, "step": 7380 }, { "epoch": 0.957149688368738, "grad_norm": 1.2208852767944336, "learning_rate": 4.589568343426498e-07, "loss": 10.929611206054688, "step": 7381 }, { "epoch": 0.9572793658769847, "grad_norm": 0.7298557758331299, "learning_rate": 4.562004037067247e-07, "loss": 9.66412353515625, "step": 7382 }, { "epoch": 0.9574090433852314, "grad_norm": 0.7838824987411499, "learning_rate": 4.5345223745499877e-07, "loss": 5.203529357910156, "step": 7383 }, { "epoch": 0.957538720893478, "grad_norm": 0.9432797431945801, "learning_rate": 4.507123360458887e-07, "loss": 10.0088472366333, "step": 7384 }, { "epoch": 0.9576683984017247, "grad_norm": 1.0636762380599976, "learning_rate": 4.4798069993643443e-07, "loss": 9.62258243560791, "step": 7385 }, { "epoch": 0.9577980759099713, "grad_norm": 1.4328091144561768, "learning_rate": 4.452573295823048e-07, "loss": 11.498550415039062, "step": 7386 }, { "epoch": 0.9579277534182181, "grad_norm": 0.92656010389328, "learning_rate": 4.4254222543777534e-07, "loss": 7.740714073181152, "step": 7387 }, { "epoch": 0.9580574309264648, "grad_norm": 0.8268352746963501, "learning_rate": 4.3983538795576153e-07, "loss": 9.239002227783203, "step": 7388 }, { "epoch": 0.9581871084347114, "grad_norm": 0.9730724692344666, "learning_rate": 4.3713681758777994e-07, "loss": 9.312557220458984, "step": 7389 }, { "epoch": 0.9583167859429581, "grad_norm": 0.99983811378479, "learning_rate": 4.3444651478399266e-07, "loss": 10.17518138885498, "step": 7390 }, { "epoch": 0.9584464634512048, "grad_norm": 0.9501568675041199, "learning_rate": 4.317644799931575e-07, "loss": 9.200575828552246, "step": 7391 }, { "epoch": 0.9585761409594514, "grad_norm": 1.3683662414550781, "learning_rate": 4.290907136626665e-07, "loss": 12.4214506149292, "step": 7392 }, { "epoch": 0.9587058184676981, "grad_norm": 0.8754243850708008, "learning_rate": 4.2642521623852405e-07, "loss": 8.989994049072266, "step": 7393 }, { "epoch": 0.9588354959759449, "grad_norm": 0.8216074705123901, "learning_rate": 4.2376798816537444e-07, "loss": 7.49180793762207, "step": 7394 }, { "epoch": 0.9589651734841915, "grad_norm": 1.2539491653442383, "learning_rate": 4.2111902988645203e-07, "loss": 10.850784301757812, "step": 7395 }, { "epoch": 0.9590948509924382, "grad_norm": 0.8872213959693909, "learning_rate": 4.184783418436422e-07, "loss": 7.90374231338501, "step": 7396 }, { "epoch": 0.9592245285006848, "grad_norm": 0.9534448385238647, "learning_rate": 4.1584592447743154e-07, "loss": 9.654241561889648, "step": 7397 }, { "epoch": 0.9593542060089315, "grad_norm": 1.2218763828277588, "learning_rate": 4.132217782269354e-07, "loss": 7.044933795928955, "step": 7398 }, { "epoch": 0.9594838835171782, "grad_norm": 0.8424020409584045, "learning_rate": 4.106059035298759e-07, "loss": 10.480594635009766, "step": 7399 }, { "epoch": 0.9596135610254249, "grad_norm": 1.3666326999664307, "learning_rate": 4.0799830082262625e-07, "loss": 11.084230422973633, "step": 7400 }, { "epoch": 0.9597432385336716, "grad_norm": 0.81766676902771, "learning_rate": 4.0539897054013844e-07, "loss": 10.276409149169922, "step": 7401 }, { "epoch": 0.9598729160419183, "grad_norm": 1.0852508544921875, "learning_rate": 4.028079131160267e-07, "loss": 9.946551322937012, "step": 7402 }, { "epoch": 0.9600025935501649, "grad_norm": 0.6464280486106873, "learning_rate": 4.002251289824843e-07, "loss": 7.520449161529541, "step": 7403 }, { "epoch": 0.9601322710584116, "grad_norm": 1.2714332342147827, "learning_rate": 3.9765061857035544e-07, "loss": 11.176526069641113, "step": 7404 }, { "epoch": 0.9602619485666583, "grad_norm": 0.7002609968185425, "learning_rate": 3.950843823090966e-07, "loss": 6.3143815994262695, "step": 7405 }, { "epoch": 0.960391626074905, "grad_norm": 1.192781925201416, "learning_rate": 3.9252642062677094e-07, "loss": 9.80477523803711, "step": 7406 }, { "epoch": 0.9605213035831517, "grad_norm": 0.6301025748252869, "learning_rate": 3.899767339500815e-07, "loss": 6.95073938369751, "step": 7407 }, { "epoch": 0.9606509810913983, "grad_norm": 1.0240204334259033, "learning_rate": 3.87435322704327e-07, "loss": 7.061692714691162, "step": 7408 }, { "epoch": 0.960780658599645, "grad_norm": 1.333134651184082, "learning_rate": 3.849021873134573e-07, "loss": 12.167954444885254, "step": 7409 }, { "epoch": 0.9609103361078917, "grad_norm": 0.9281283617019653, "learning_rate": 3.823773282000065e-07, "loss": 9.304951667785645, "step": 7410 }, { "epoch": 0.9610400136161383, "grad_norm": 1.0388157367706299, "learning_rate": 3.798607457851544e-07, "loss": 8.233985900878906, "step": 7411 }, { "epoch": 0.961169691124385, "grad_norm": 0.6688820719718933, "learning_rate": 3.77352440488693e-07, "loss": 12.607110977172852, "step": 7412 }, { "epoch": 0.9612993686326318, "grad_norm": 0.7926067113876343, "learning_rate": 3.748524127290265e-07, "loss": 10.661002159118652, "step": 7413 }, { "epoch": 0.9614290461408784, "grad_norm": 1.3578816652297974, "learning_rate": 3.7236066292318793e-07, "loss": 9.220236778259277, "step": 7414 }, { "epoch": 0.9615587236491251, "grad_norm": 1.1537235975265503, "learning_rate": 3.6987719148682266e-07, "loss": 10.481672286987305, "step": 7415 }, { "epoch": 0.9616884011573718, "grad_norm": 0.9139431715011597, "learning_rate": 3.6740199883419925e-07, "loss": 7.468812465667725, "step": 7416 }, { "epoch": 0.9618180786656184, "grad_norm": 1.0870808362960815, "learning_rate": 3.649350853781985e-07, "loss": 6.392812728881836, "step": 7417 }, { "epoch": 0.9619477561738651, "grad_norm": 0.9214701056480408, "learning_rate": 3.6247645153033584e-07, "loss": 9.488322257995605, "step": 7418 }, { "epoch": 0.9620774336821118, "grad_norm": 1.0739866495132446, "learning_rate": 3.600260977007275e-07, "loss": 8.844684600830078, "step": 7419 }, { "epoch": 0.9622071111903585, "grad_norm": 0.8486248254776001, "learning_rate": 3.5758402429811323e-07, "loss": 8.517486572265625, "step": 7420 }, { "epoch": 0.9623367886986052, "grad_norm": 0.9413504004478455, "learning_rate": 3.551502317298616e-07, "loss": 7.251711845397949, "step": 7421 }, { "epoch": 0.9624664662068518, "grad_norm": 1.210398554801941, "learning_rate": 3.5272472040195347e-07, "loss": 10.68597412109375, "step": 7422 }, { "epoch": 0.9625961437150985, "grad_norm": 0.5755914449691772, "learning_rate": 3.5030749071898186e-07, "loss": 6.948234558105469, "step": 7423 }, { "epoch": 0.9627258212233452, "grad_norm": 0.7832931280136108, "learning_rate": 3.4789854308416304e-07, "loss": 7.3201823234558105, "step": 7424 }, { "epoch": 0.9628554987315918, "grad_norm": 0.9041855931282043, "learning_rate": 3.4549787789933675e-07, "loss": 10.333003044128418, "step": 7425 }, { "epoch": 0.9629851762398386, "grad_norm": 1.392371416091919, "learning_rate": 3.4310549556495485e-07, "loss": 7.761846542358398, "step": 7426 }, { "epoch": 0.9631148537480853, "grad_norm": 0.771418571472168, "learning_rate": 3.4072139648009815e-07, "loss": 7.546952724456787, "step": 7427 }, { "epoch": 0.9632445312563319, "grad_norm": 0.6294339299201965, "learning_rate": 3.3834558104244293e-07, "loss": 6.790987491607666, "step": 7428 }, { "epoch": 0.9633742087645786, "grad_norm": 1.3418775796890259, "learning_rate": 3.359780496483167e-07, "loss": 11.02791976928711, "step": 7429 }, { "epoch": 0.9635038862728252, "grad_norm": 1.1095346212387085, "learning_rate": 3.336188026926257e-07, "loss": 9.614792823791504, "step": 7430 }, { "epoch": 0.9636335637810719, "grad_norm": 0.7960555553436279, "learning_rate": 3.3126784056892734e-07, "loss": 7.11454439163208, "step": 7431 }, { "epoch": 0.9637632412893187, "grad_norm": 0.8920642137527466, "learning_rate": 3.2892516366938573e-07, "loss": 11.81971263885498, "step": 7432 }, { "epoch": 0.9638929187975653, "grad_norm": 1.106524109840393, "learning_rate": 3.265907723847772e-07, "loss": 7.4153947830200195, "step": 7433 }, { "epoch": 0.964022596305812, "grad_norm": 0.8270456194877625, "learning_rate": 3.242646671045013e-07, "loss": 7.789677619934082, "step": 7434 }, { "epoch": 0.9641522738140587, "grad_norm": 1.0536569356918335, "learning_rate": 3.219468482165755e-07, "loss": 8.978652000427246, "step": 7435 }, { "epoch": 0.9642819513223053, "grad_norm": 1.2462258338928223, "learning_rate": 3.1963731610763494e-07, "loss": 10.652429580688477, "step": 7436 }, { "epoch": 0.964411628830552, "grad_norm": 1.1602550745010376, "learning_rate": 3.173360711629325e-07, "loss": 10.663628578186035, "step": 7437 }, { "epoch": 0.9645413063387988, "grad_norm": 0.8014231324195862, "learning_rate": 3.15043113766339e-07, "loss": 8.18882942199707, "step": 7438 }, { "epoch": 0.9646709838470454, "grad_norm": 0.5972102284431458, "learning_rate": 3.127584443003373e-07, "loss": 7.060101509094238, "step": 7439 }, { "epoch": 0.9648006613552921, "grad_norm": 1.0787404775619507, "learning_rate": 3.104820631460392e-07, "loss": 7.166021347045898, "step": 7440 }, { "epoch": 0.9649303388635387, "grad_norm": 0.641106128692627, "learning_rate": 3.0821397068315773e-07, "loss": 7.402716636657715, "step": 7441 }, { "epoch": 0.9650600163717854, "grad_norm": 0.786106526851654, "learning_rate": 3.059541672900401e-07, "loss": 8.782146453857422, "step": 7442 }, { "epoch": 0.9651896938800321, "grad_norm": 0.9366811513900757, "learning_rate": 3.0370265334364046e-07, "loss": 11.416454315185547, "step": 7443 }, { "epoch": 0.9653193713882787, "grad_norm": 0.9847745299339294, "learning_rate": 3.0145942921953606e-07, "loss": 9.51225757598877, "step": 7444 }, { "epoch": 0.9654490488965255, "grad_norm": 0.81718909740448, "learning_rate": 2.992244952919165e-07, "loss": 8.657014846801758, "step": 7445 }, { "epoch": 0.9655787264047722, "grad_norm": 0.9885682463645935, "learning_rate": 2.969978519335892e-07, "loss": 7.937760353088379, "step": 7446 }, { "epoch": 0.9657084039130188, "grad_norm": 0.7650458812713623, "learning_rate": 2.9477949951597915e-07, "loss": 7.660171031951904, "step": 7447 }, { "epoch": 0.9658380814212655, "grad_norm": 0.8604140281677246, "learning_rate": 2.925694384091293e-07, "loss": 5.663663387298584, "step": 7448 }, { "epoch": 0.9659677589295121, "grad_norm": 0.7502748966217041, "learning_rate": 2.903676689817003e-07, "loss": 7.309936046600342, "step": 7449 }, { "epoch": 0.9660974364377588, "grad_norm": 1.059523344039917, "learning_rate": 2.881741916009706e-07, "loss": 5.897839069366455, "step": 7450 }, { "epoch": 0.9662271139460056, "grad_norm": 0.831985592842102, "learning_rate": 2.859890066328308e-07, "loss": 5.950555324554443, "step": 7451 }, { "epoch": 0.9663567914542522, "grad_norm": 0.7769079208374023, "learning_rate": 2.8381211444178935e-07, "loss": 7.094751358032227, "step": 7452 }, { "epoch": 0.9664864689624989, "grad_norm": 0.7948768138885498, "learning_rate": 2.81643515390978e-07, "loss": 4.863699913024902, "step": 7453 }, { "epoch": 0.9666161464707456, "grad_norm": 1.050029993057251, "learning_rate": 2.7948320984213514e-07, "loss": 9.01624870300293, "step": 7454 }, { "epoch": 0.9667458239789922, "grad_norm": 0.748159646987915, "learning_rate": 2.7733119815562257e-07, "loss": 10.042641639709473, "step": 7455 }, { "epoch": 0.9668755014872389, "grad_norm": 0.9972827434539795, "learning_rate": 2.751874806904142e-07, "loss": 7.266026020050049, "step": 7456 }, { "epoch": 0.9670051789954855, "grad_norm": 0.9116883277893066, "learning_rate": 2.7305205780410735e-07, "loss": 7.499786376953125, "step": 7457 }, { "epoch": 0.9671348565037323, "grad_norm": 0.7886130213737488, "learning_rate": 2.709249298529115e-07, "loss": 10.092041969299316, "step": 7458 }, { "epoch": 0.967264534011979, "grad_norm": 0.928697943687439, "learning_rate": 2.6880609719164286e-07, "loss": 8.92648983001709, "step": 7459 }, { "epoch": 0.9673942115202256, "grad_norm": 0.8584019541740417, "learning_rate": 2.66695560173752e-07, "loss": 6.619855880737305, "step": 7460 }, { "epoch": 0.9675238890284723, "grad_norm": 0.8634499907493591, "learning_rate": 2.6459331915129616e-07, "loss": 7.387217044830322, "step": 7461 }, { "epoch": 0.967653566536719, "grad_norm": 1.003045916557312, "learning_rate": 2.624993744749504e-07, "loss": 9.257118225097656, "step": 7462 }, { "epoch": 0.9677832440449656, "grad_norm": 0.7325408458709717, "learning_rate": 2.6041372649399655e-07, "loss": 8.501741409301758, "step": 7463 }, { "epoch": 0.9679129215532124, "grad_norm": 0.9912891387939453, "learning_rate": 2.583363755563506e-07, "loss": 7.451491355895996, "step": 7464 }, { "epoch": 0.9680425990614591, "grad_norm": 0.7278233766555786, "learning_rate": 2.5626732200852986e-07, "loss": 9.272879600524902, "step": 7465 }, { "epoch": 0.9681722765697057, "grad_norm": 0.6927512884140015, "learning_rate": 2.542065661956694e-07, "loss": 7.1986775398254395, "step": 7466 }, { "epoch": 0.9683019540779524, "grad_norm": 0.9205805659294128, "learning_rate": 2.521541084615331e-07, "loss": 9.918386459350586, "step": 7467 }, { "epoch": 0.968431631586199, "grad_norm": 1.1334104537963867, "learning_rate": 2.501099491484804e-07, "loss": 6.315860748291016, "step": 7468 }, { "epoch": 0.9685613090944457, "grad_norm": 1.2987383604049683, "learning_rate": 2.480740885975052e-07, "loss": 8.973886489868164, "step": 7469 }, { "epoch": 0.9686909866026925, "grad_norm": 0.76578688621521, "learning_rate": 2.4604652714820267e-07, "loss": 7.26377534866333, "step": 7470 }, { "epoch": 0.9688206641109391, "grad_norm": 1.410886287689209, "learning_rate": 2.440272651387965e-07, "loss": 9.139312744140625, "step": 7471 }, { "epoch": 0.9689503416191858, "grad_norm": 0.9724074006080627, "learning_rate": 2.420163029061062e-07, "loss": 7.122785568237305, "step": 7472 }, { "epoch": 0.9690800191274325, "grad_norm": 1.0762698650360107, "learning_rate": 2.400136407855913e-07, "loss": 8.89486026763916, "step": 7473 }, { "epoch": 0.9692096966356791, "grad_norm": 0.9294346570968628, "learning_rate": 2.3801927911131227e-07, "loss": 8.280075073242188, "step": 7474 }, { "epoch": 0.9693393741439258, "grad_norm": 1.1629996299743652, "learning_rate": 2.3603321821594748e-07, "loss": 7.84293270111084, "step": 7475 }, { "epoch": 0.9694690516521725, "grad_norm": 0.9977506399154663, "learning_rate": 2.3405545843079302e-07, "loss": 9.042808532714844, "step": 7476 }, { "epoch": 0.9695987291604192, "grad_norm": 1.0725890398025513, "learning_rate": 2.3208600008575166e-07, "loss": 8.202034950256348, "step": 7477 }, { "epoch": 0.9697284066686659, "grad_norm": 1.2177132368087769, "learning_rate": 2.3012484350935505e-07, "loss": 10.066975593566895, "step": 7478 }, { "epoch": 0.9698580841769125, "grad_norm": 1.0370007753372192, "learning_rate": 2.281719890287415e-07, "loss": 8.836520195007324, "step": 7479 }, { "epoch": 0.9699877616851592, "grad_norm": 1.2853269577026367, "learning_rate": 2.2622743696966153e-07, "loss": 8.920241355895996, "step": 7480 }, { "epoch": 0.9701174391934059, "grad_norm": 0.7445539236068726, "learning_rate": 2.242911876564946e-07, "loss": 7.5927205085754395, "step": 7481 }, { "epoch": 0.9702471167016525, "grad_norm": 0.9522405862808228, "learning_rate": 2.2236324141221564e-07, "loss": 7.422441482543945, "step": 7482 }, { "epoch": 0.9703767942098993, "grad_norm": 0.8468295335769653, "learning_rate": 2.2044359855842856e-07, "loss": 9.358397483825684, "step": 7483 }, { "epoch": 0.970506471718146, "grad_norm": 1.0451700687408447, "learning_rate": 2.1853225941534938e-07, "loss": 10.524879455566406, "step": 7484 }, { "epoch": 0.9706361492263926, "grad_norm": 1.3575645685195923, "learning_rate": 2.16629224301812e-07, "loss": 11.993576049804688, "step": 7485 }, { "epoch": 0.9707658267346393, "grad_norm": 0.8892186880111694, "learning_rate": 2.1473449353525133e-07, "loss": 6.956960678100586, "step": 7486 }, { "epoch": 0.970895504242886, "grad_norm": 0.7442500591278076, "learning_rate": 2.1284806743173124e-07, "loss": 5.945528030395508, "step": 7487 }, { "epoch": 0.9710251817511326, "grad_norm": 0.9572964906692505, "learning_rate": 2.1096994630592782e-07, "loss": 6.688271999359131, "step": 7488 }, { "epoch": 0.9711548592593793, "grad_norm": 0.8792024850845337, "learning_rate": 2.0910013047112932e-07, "loss": 8.819819450378418, "step": 7489 }, { "epoch": 0.971284536767626, "grad_norm": 0.978450357913971, "learning_rate": 2.0723862023923624e-07, "loss": 7.1769609451293945, "step": 7490 }, { "epoch": 0.9714142142758727, "grad_norm": 1.0472992658615112, "learning_rate": 2.0538541592077244e-07, "loss": 10.839611053466797, "step": 7491 }, { "epoch": 0.9715438917841194, "grad_norm": 0.8636254072189331, "learning_rate": 2.0354051782486284e-07, "loss": 8.225619316101074, "step": 7492 }, { "epoch": 0.971673569292366, "grad_norm": 0.8521791100502014, "learning_rate": 2.017039262592557e-07, "loss": 9.03095531463623, "step": 7493 }, { "epoch": 0.9718032468006127, "grad_norm": 0.933857262134552, "learning_rate": 1.9987564153031711e-07, "loss": 7.6940765380859375, "step": 7494 }, { "epoch": 0.9719329243088594, "grad_norm": 0.9367764592170715, "learning_rate": 1.9805566394301978e-07, "loss": 10.9197416305542, "step": 7495 }, { "epoch": 0.9720626018171061, "grad_norm": 1.0664055347442627, "learning_rate": 1.9624399380095416e-07, "loss": 7.796025276184082, "step": 7496 }, { "epoch": 0.9721922793253528, "grad_norm": 1.2251546382904053, "learning_rate": 1.9444063140632295e-07, "loss": 8.273957252502441, "step": 7497 }, { "epoch": 0.9723219568335995, "grad_norm": 0.9364606142044067, "learning_rate": 1.9264557705994112e-07, "loss": 7.595779895782471, "step": 7498 }, { "epoch": 0.9724516343418461, "grad_norm": 1.0402897596359253, "learning_rate": 1.908588310612469e-07, "loss": 11.663549423217773, "step": 7499 }, { "epoch": 0.9725813118500928, "grad_norm": 1.3924479484558105, "learning_rate": 1.8908039370829077e-07, "loss": 10.185054779052734, "step": 7500 }, { "epoch": 0.9727109893583394, "grad_norm": 1.0182864665985107, "learning_rate": 1.8731026529771878e-07, "loss": 6.9069366455078125, "step": 7501 }, { "epoch": 0.9728406668665862, "grad_norm": 1.3316184282302856, "learning_rate": 1.8554844612481691e-07, "loss": 8.855262756347656, "step": 7502 }, { "epoch": 0.9729703443748329, "grad_norm": 0.8467838764190674, "learning_rate": 1.8379493648347234e-07, "loss": 5.5045881271362305, "step": 7503 }, { "epoch": 0.9731000218830795, "grad_norm": 0.9890183806419373, "learning_rate": 1.820497366661844e-07, "loss": 12.748588562011719, "step": 7504 }, { "epoch": 0.9732296993913262, "grad_norm": 0.9908354878425598, "learning_rate": 1.8031284696407025e-07, "loss": 9.876768112182617, "step": 7505 }, { "epoch": 0.9733593768995729, "grad_norm": 0.7824139595031738, "learning_rate": 1.7858426766686475e-07, "loss": 6.602428913116455, "step": 7506 }, { "epoch": 0.9734890544078195, "grad_norm": 1.0875624418258667, "learning_rate": 1.7686399906290396e-07, "loss": 8.52608871459961, "step": 7507 }, { "epoch": 0.9736187319160662, "grad_norm": 0.7959781885147095, "learning_rate": 1.7515204143914722e-07, "loss": 7.310083866119385, "step": 7508 }, { "epoch": 0.973748409424313, "grad_norm": 0.721825122833252, "learning_rate": 1.734483950811716e-07, "loss": 8.204024314880371, "step": 7509 }, { "epoch": 0.9738780869325596, "grad_norm": 0.9088194966316223, "learning_rate": 1.7175306027314986e-07, "loss": 6.763286113739014, "step": 7510 }, { "epoch": 0.9740077644408063, "grad_norm": 1.0617607831954956, "learning_rate": 1.7006603729789462e-07, "loss": 9.389761924743652, "step": 7511 }, { "epoch": 0.9741374419490529, "grad_norm": 0.705389678478241, "learning_rate": 1.6838732643680854e-07, "loss": 5.922831058502197, "step": 7512 }, { "epoch": 0.9742671194572996, "grad_norm": 1.2961513996124268, "learning_rate": 1.6671692796991212e-07, "loss": 7.377678871154785, "step": 7513 }, { "epoch": 0.9743967969655463, "grad_norm": 0.9179013967514038, "learning_rate": 1.6505484217586021e-07, "loss": 5.905021667480469, "step": 7514 }, { "epoch": 0.974526474473793, "grad_norm": 1.0128453969955444, "learning_rate": 1.634010693318866e-07, "loss": 9.635797500610352, "step": 7515 }, { "epoch": 0.9746561519820397, "grad_norm": 0.973288893699646, "learning_rate": 1.617556097138706e-07, "loss": 7.9257588386535645, "step": 7516 }, { "epoch": 0.9747858294902864, "grad_norm": 0.7218853235244751, "learning_rate": 1.6011846359628157e-07, "loss": 6.708361625671387, "step": 7517 }, { "epoch": 0.974915506998533, "grad_norm": 0.8264052867889404, "learning_rate": 1.5848963125221772e-07, "loss": 4.053468227386475, "step": 7518 }, { "epoch": 0.9750451845067797, "grad_norm": 0.8483564853668213, "learning_rate": 1.5686911295338393e-07, "loss": 7.898909091949463, "step": 7519 }, { "epoch": 0.9751748620150263, "grad_norm": 1.5253561735153198, "learning_rate": 1.5525690897008614e-07, "loss": 11.123870849609375, "step": 7520 }, { "epoch": 0.975304539523273, "grad_norm": 0.9682725667953491, "learning_rate": 1.5365301957127597e-07, "loss": 11.025140762329102, "step": 7521 }, { "epoch": 0.9754342170315198, "grad_norm": 1.2635819911956787, "learning_rate": 1.5205744502447827e-07, "loss": 10.101486206054688, "step": 7522 }, { "epoch": 0.9755638945397664, "grad_norm": 0.9627742767333984, "learning_rate": 1.5047018559586346e-07, "loss": 8.177451133728027, "step": 7523 }, { "epoch": 0.9756935720480131, "grad_norm": 1.0437374114990234, "learning_rate": 1.4889124155019195e-07, "loss": 6.610877513885498, "step": 7524 }, { "epoch": 0.9758232495562598, "grad_norm": 0.9112138152122498, "learning_rate": 1.4732061315085866e-07, "loss": 8.01740550994873, "step": 7525 }, { "epoch": 0.9759529270645064, "grad_norm": 0.8086079955101013, "learning_rate": 1.457583006598484e-07, "loss": 7.704116344451904, "step": 7526 }, { "epoch": 0.9760826045727531, "grad_norm": 1.011098027229309, "learning_rate": 1.4420430433777498e-07, "loss": 6.229217529296875, "step": 7527 }, { "epoch": 0.9762122820809999, "grad_norm": 1.08184814453125, "learning_rate": 1.426586244438588e-07, "loss": 12.450610160827637, "step": 7528 }, { "epoch": 0.9763419595892465, "grad_norm": 0.8508259057998657, "learning_rate": 1.411212612359325e-07, "loss": 7.982757091522217, "step": 7529 }, { "epoch": 0.9764716370974932, "grad_norm": 0.9870501756668091, "learning_rate": 1.395922149704465e-07, "loss": 9.333480834960938, "step": 7530 }, { "epoch": 0.9766013146057398, "grad_norm": 0.7878224849700928, "learning_rate": 1.3807148590246345e-07, "loss": 9.23287582397461, "step": 7531 }, { "epoch": 0.9767309921139865, "grad_norm": 0.7808171510696411, "learning_rate": 1.3655907428564707e-07, "loss": 8.449308395385742, "step": 7532 }, { "epoch": 0.9768606696222332, "grad_norm": 0.9611596465110779, "learning_rate": 1.3505498037229003e-07, "loss": 9.636956214904785, "step": 7533 }, { "epoch": 0.9769903471304799, "grad_norm": 0.8512455224990845, "learning_rate": 1.3355920441328607e-07, "loss": 9.510998725891113, "step": 7534 }, { "epoch": 0.9771200246387266, "grad_norm": 0.9782743453979492, "learning_rate": 1.320717466581467e-07, "loss": 7.847836971282959, "step": 7535 }, { "epoch": 0.9772497021469733, "grad_norm": 0.8602837324142456, "learning_rate": 1.3059260735499012e-07, "loss": 8.231921195983887, "step": 7536 }, { "epoch": 0.9773793796552199, "grad_norm": 0.9647858142852783, "learning_rate": 1.2912178675055232e-07, "loss": 5.916808128356934, "step": 7537 }, { "epoch": 0.9775090571634666, "grad_norm": 1.2195805311203003, "learning_rate": 1.2765928509018145e-07, "loss": 9.586175918579102, "step": 7538 }, { "epoch": 0.9776387346717132, "grad_norm": 1.2404890060424805, "learning_rate": 1.2620510261784347e-07, "loss": 8.48923397064209, "step": 7539 }, { "epoch": 0.9777684121799599, "grad_norm": 1.1525261402130127, "learning_rate": 1.247592395760999e-07, "loss": 9.184645652770996, "step": 7540 }, { "epoch": 0.9778980896882067, "grad_norm": 0.8427762985229492, "learning_rate": 1.2332169620613564e-07, "loss": 7.77227783203125, "step": 7541 }, { "epoch": 0.9780277671964533, "grad_norm": 1.1333459615707397, "learning_rate": 1.2189247274775328e-07, "loss": 9.49778938293457, "step": 7542 }, { "epoch": 0.9781574447047, "grad_norm": 1.0156599283218384, "learning_rate": 1.2047156943935657e-07, "loss": 8.705986976623535, "step": 7543 }, { "epoch": 0.9782871222129467, "grad_norm": 0.9979060292243958, "learning_rate": 1.1905898651797254e-07, "loss": 8.883784294128418, "step": 7544 }, { "epoch": 0.9784167997211933, "grad_norm": 0.8949509859085083, "learning_rate": 1.176547242192183e-07, "loss": 9.419811248779297, "step": 7545 }, { "epoch": 0.97854647722944, "grad_norm": 0.9127333164215088, "learning_rate": 1.1625878277735647e-07, "loss": 10.905482292175293, "step": 7546 }, { "epoch": 0.9786761547376868, "grad_norm": 0.9516608715057373, "learning_rate": 1.148711624252341e-07, "loss": 8.40774154663086, "step": 7547 }, { "epoch": 0.9788058322459334, "grad_norm": 1.1347851753234863, "learning_rate": 1.1349186339431606e-07, "loss": 11.041526794433594, "step": 7548 }, { "epoch": 0.9789355097541801, "grad_norm": 1.3114981651306152, "learning_rate": 1.1212088591469049e-07, "loss": 10.749955177307129, "step": 7549 }, { "epoch": 0.9790651872624268, "grad_norm": 0.844666063785553, "learning_rate": 1.1075823021504672e-07, "loss": 7.787979602813721, "step": 7550 }, { "epoch": 0.9791948647706734, "grad_norm": 1.275336503982544, "learning_rate": 1.0940389652269179e-07, "loss": 12.411084175109863, "step": 7551 }, { "epoch": 0.9793245422789201, "grad_norm": 1.0150353908538818, "learning_rate": 1.0805788506353387e-07, "loss": 8.732955932617188, "step": 7552 }, { "epoch": 0.9794542197871667, "grad_norm": 0.9612435698509216, "learning_rate": 1.0672019606210448e-07, "loss": 7.912020206451416, "step": 7553 }, { "epoch": 0.9795838972954135, "grad_norm": 0.9736451506614685, "learning_rate": 1.053908297415529e-07, "loss": 9.824981689453125, "step": 7554 }, { "epoch": 0.9797135748036602, "grad_norm": 1.0451865196228027, "learning_rate": 1.0406978632361286e-07, "loss": 8.813233375549316, "step": 7555 }, { "epoch": 0.9798432523119068, "grad_norm": 0.9934219717979431, "learning_rate": 1.0275706602865809e-07, "loss": 9.148612022399902, "step": 7556 }, { "epoch": 0.9799729298201535, "grad_norm": 0.8828851580619812, "learning_rate": 1.0145266907566342e-07, "loss": 8.869285583496094, "step": 7557 }, { "epoch": 0.9801026073284002, "grad_norm": 0.7481817007064819, "learning_rate": 1.0015659568221037e-07, "loss": 8.246660232543945, "step": 7558 }, { "epoch": 0.9802322848366468, "grad_norm": 0.8243326544761658, "learning_rate": 9.886884606449265e-08, "loss": 7.559612274169922, "step": 7559 }, { "epoch": 0.9803619623448936, "grad_norm": 1.223521113395691, "learning_rate": 9.758942043733288e-08, "loss": 9.937910079956055, "step": 7560 }, { "epoch": 0.9804916398531403, "grad_norm": 1.0076193809509277, "learning_rate": 9.631831901413813e-08, "loss": 9.93664264678955, "step": 7561 }, { "epoch": 0.9806213173613869, "grad_norm": 0.6693733930587769, "learning_rate": 9.505554200694989e-08, "loss": 8.390347480773926, "step": 7562 }, { "epoch": 0.9807509948696336, "grad_norm": 1.2058207988739014, "learning_rate": 9.380108962640522e-08, "loss": 8.444647789001465, "step": 7563 }, { "epoch": 0.9808806723778802, "grad_norm": 1.0948896408081055, "learning_rate": 9.25549620817645e-08, "loss": 9.225442886352539, "step": 7564 }, { "epoch": 0.9810103498861269, "grad_norm": 0.936468780040741, "learning_rate": 9.131715958088927e-08, "loss": 9.794907569885254, "step": 7565 }, { "epoch": 0.9811400273943737, "grad_norm": 0.845369815826416, "learning_rate": 9.008768233025877e-08, "loss": 8.07828140258789, "step": 7566 }, { "epoch": 0.9812697049026203, "grad_norm": 1.287803053855896, "learning_rate": 8.886653053495897e-08, "loss": 7.303488731384277, "step": 7567 }, { "epoch": 0.981399382410867, "grad_norm": 1.3666726350784302, "learning_rate": 8.765370439869359e-08, "loss": 9.066514015197754, "step": 7568 }, { "epoch": 0.9815290599191137, "grad_norm": 1.1965447664260864, "learning_rate": 8.6449204123773e-08, "loss": 6.176825523376465, "step": 7569 }, { "epoch": 0.9816587374273603, "grad_norm": 1.0810208320617676, "learning_rate": 8.525302991111427e-08, "loss": 13.637989044189453, "step": 7570 }, { "epoch": 0.981788414935607, "grad_norm": 0.7547739744186401, "learning_rate": 8.406518196025781e-08, "loss": 7.724018573760986, "step": 7571 }, { "epoch": 0.9819180924438536, "grad_norm": 0.7869909405708313, "learning_rate": 8.288566046934509e-08, "loss": 5.576995849609375, "step": 7572 }, { "epoch": 0.9820477699521004, "grad_norm": 1.0093196630477905, "learning_rate": 8.17144656351354e-08, "loss": 8.91742992401123, "step": 7573 }, { "epoch": 0.9821774474603471, "grad_norm": 0.9743356108665466, "learning_rate": 8.055159765298915e-08, "loss": 6.623105525970459, "step": 7574 }, { "epoch": 0.9823071249685937, "grad_norm": 0.7965306043624878, "learning_rate": 7.939705671688447e-08, "loss": 5.896469593048096, "step": 7575 }, { "epoch": 0.9824368024768404, "grad_norm": 1.0073614120483398, "learning_rate": 7.825084301941732e-08, "loss": 9.011648178100586, "step": 7576 }, { "epoch": 0.9825664799850871, "grad_norm": 0.863838255405426, "learning_rate": 7.71129567517792e-08, "loss": 8.400456428527832, "step": 7577 }, { "epoch": 0.9826961574933337, "grad_norm": 1.1528208255767822, "learning_rate": 7.598339810378496e-08, "loss": 9.491922378540039, "step": 7578 }, { "epoch": 0.9828258350015805, "grad_norm": 0.9293022751808167, "learning_rate": 7.486216726385053e-08, "loss": 10.78216552734375, "step": 7579 }, { "epoch": 0.9829555125098272, "grad_norm": 1.0261472463607788, "learning_rate": 7.37492644190152e-08, "loss": 9.085196495056152, "step": 7580 }, { "epoch": 0.9830851900180738, "grad_norm": 1.2409899234771729, "learning_rate": 7.264468975491934e-08, "loss": 10.910462379455566, "step": 7581 }, { "epoch": 0.9832148675263205, "grad_norm": 1.2922402620315552, "learning_rate": 7.154844345581558e-08, "loss": 7.259139060974121, "step": 7582 }, { "epoch": 0.9833445450345671, "grad_norm": 0.9709093570709229, "learning_rate": 7.046052570456874e-08, "loss": 9.770910263061523, "step": 7583 }, { "epoch": 0.9834742225428138, "grad_norm": 1.4027334451675415, "learning_rate": 6.938093668265588e-08, "loss": 6.736246109008789, "step": 7584 }, { "epoch": 0.9836039000510605, "grad_norm": 1.0737292766571045, "learning_rate": 6.830967657016074e-08, "loss": 7.481895446777344, "step": 7585 }, { "epoch": 0.9837335775593072, "grad_norm": 1.1776132583618164, "learning_rate": 6.724674554577371e-08, "loss": 8.6109619140625, "step": 7586 }, { "epoch": 0.9838632550675539, "grad_norm": 1.0578936338424683, "learning_rate": 6.619214378681404e-08, "loss": 10.520873069763184, "step": 7587 }, { "epoch": 0.9839929325758006, "grad_norm": 0.9808480739593506, "learning_rate": 6.514587146919105e-08, "loss": 10.175276756286621, "step": 7588 }, { "epoch": 0.9841226100840472, "grad_norm": 1.1283403635025024, "learning_rate": 6.410792876744287e-08, "loss": 6.504714488983154, "step": 7589 }, { "epoch": 0.9842522875922939, "grad_norm": 0.9405750632286072, "learning_rate": 6.307831585469215e-08, "loss": 6.809697151184082, "step": 7590 }, { "epoch": 0.9843819651005405, "grad_norm": 0.9250452518463135, "learning_rate": 6.205703290270148e-08, "loss": 8.579869270324707, "step": 7591 }, { "epoch": 0.9845116426087873, "grad_norm": 0.9990024566650391, "learning_rate": 6.104408008182905e-08, "loss": 5.542646408081055, "step": 7592 }, { "epoch": 0.984641320117034, "grad_norm": 0.9899259805679321, "learning_rate": 6.003945756103969e-08, "loss": 8.88776969909668, "step": 7593 }, { "epoch": 0.9847709976252806, "grad_norm": 0.7572897672653198, "learning_rate": 5.904316550792155e-08, "loss": 8.060882568359375, "step": 7594 }, { "epoch": 0.9849006751335273, "grad_norm": 1.0753332376480103, "learning_rate": 5.805520408865839e-08, "loss": 8.07652759552002, "step": 7595 }, { "epoch": 0.985030352641774, "grad_norm": 1.418408751487732, "learning_rate": 5.70755734680517e-08, "loss": 8.920934677124023, "step": 7596 }, { "epoch": 0.9851600301500206, "grad_norm": 0.7144514322280884, "learning_rate": 5.610427380952077e-08, "loss": 5.658751010894775, "step": 7597 }, { "epoch": 0.9852897076582674, "grad_norm": 1.1661916971206665, "learning_rate": 5.514130527508043e-08, "loss": 6.849765777587891, "step": 7598 }, { "epoch": 0.9854193851665141, "grad_norm": 1.023242473602295, "learning_rate": 5.418666802536887e-08, "loss": 7.615482807159424, "step": 7599 }, { "epoch": 0.9855490626747607, "grad_norm": 0.8886009454727173, "learning_rate": 5.324036221962536e-08, "loss": 9.94463062286377, "step": 7600 }, { "epoch": 0.9856787401830074, "grad_norm": 0.8089064359664917, "learning_rate": 5.230238801570142e-08, "loss": 7.88055944442749, "step": 7601 }, { "epoch": 0.985808417691254, "grad_norm": 0.8606277108192444, "learning_rate": 5.137274557006633e-08, "loss": 10.142638206481934, "step": 7602 }, { "epoch": 0.9859380951995007, "grad_norm": 1.2369635105133057, "learning_rate": 5.0451435037790483e-08, "loss": 9.899478912353516, "step": 7603 }, { "epoch": 0.9860677727077474, "grad_norm": 0.7257253527641296, "learning_rate": 4.9538456572550964e-08, "loss": 7.172163009643555, "step": 7604 }, { "epoch": 0.9861974502159941, "grad_norm": 1.044524908065796, "learning_rate": 4.8633810326648154e-08, "loss": 9.414976119995117, "step": 7605 }, { "epoch": 0.9863271277242408, "grad_norm": 0.9824392795562744, "learning_rate": 4.7737496450989126e-08, "loss": 7.452574253082275, "step": 7606 }, { "epoch": 0.9864568052324875, "grad_norm": 0.8912908434867859, "learning_rate": 4.684951509507651e-08, "loss": 9.287297248840332, "step": 7607 }, { "epoch": 0.9865864827407341, "grad_norm": 0.8354427218437195, "learning_rate": 4.596986640704182e-08, "loss": 9.369139671325684, "step": 7608 }, { "epoch": 0.9867161602489808, "grad_norm": 1.2724043130874634, "learning_rate": 4.509855053362322e-08, "loss": 10.308785438537598, "step": 7609 }, { "epoch": 0.9868458377572275, "grad_norm": 0.9236938953399658, "learning_rate": 4.423556762015446e-08, "loss": 8.909204483032227, "step": 7610 }, { "epoch": 0.9869755152654742, "grad_norm": 0.8086325526237488, "learning_rate": 4.338091781059262e-08, "loss": 8.358086585998535, "step": 7611 }, { "epoch": 0.9871051927737209, "grad_norm": 1.117013692855835, "learning_rate": 4.253460124750697e-08, "loss": 8.756736755371094, "step": 7612 }, { "epoch": 0.9872348702819675, "grad_norm": 1.0282652378082275, "learning_rate": 4.169661807206793e-08, "loss": 7.8268280029296875, "step": 7613 }, { "epoch": 0.9873645477902142, "grad_norm": 1.0265307426452637, "learning_rate": 4.0866968424052574e-08, "loss": 7.12858247756958, "step": 7614 }, { "epoch": 0.9874942252984609, "grad_norm": 0.9094518423080444, "learning_rate": 4.004565244186687e-08, "loss": 6.271640777587891, "step": 7615 }, { "epoch": 0.9876239028067075, "grad_norm": 1.0638642311096191, "learning_rate": 3.9232670262506765e-08, "loss": 7.887937068939209, "step": 7616 }, { "epoch": 0.9877535803149542, "grad_norm": 0.9350369572639465, "learning_rate": 3.8428022021580466e-08, "loss": 8.257999420166016, "step": 7617 }, { "epoch": 0.987883257823201, "grad_norm": 1.1744135618209839, "learning_rate": 3.763170785332504e-08, "loss": 11.686932563781738, "step": 7618 }, { "epoch": 0.9880129353314476, "grad_norm": 0.847545325756073, "learning_rate": 3.684372789056201e-08, "loss": 9.206361770629883, "step": 7619 }, { "epoch": 0.9881426128396943, "grad_norm": 0.9841458201408386, "learning_rate": 3.6064082264741786e-08, "loss": 9.554108619689941, "step": 7620 }, { "epoch": 0.988272290347941, "grad_norm": 1.0061404705047607, "learning_rate": 3.529277110590479e-08, "loss": 11.537099838256836, "step": 7621 }, { "epoch": 0.9884019678561876, "grad_norm": 1.3251439332962036, "learning_rate": 3.452979454272587e-08, "loss": 13.107806205749512, "step": 7622 }, { "epoch": 0.9885316453644343, "grad_norm": 0.9538724422454834, "learning_rate": 3.3775152702475444e-08, "loss": 10.150467872619629, "step": 7623 }, { "epoch": 0.988661322872681, "grad_norm": 0.765937328338623, "learning_rate": 3.302884571102505e-08, "loss": 6.0028767585754395, "step": 7624 }, { "epoch": 0.9887910003809277, "grad_norm": 1.060605764389038, "learning_rate": 3.229087369287509e-08, "loss": 12.247098922729492, "step": 7625 }, { "epoch": 0.9889206778891744, "grad_norm": 0.7826446890830994, "learning_rate": 3.15612367711271e-08, "loss": 10.450621604919434, "step": 7626 }, { "epoch": 0.989050355397421, "grad_norm": 1.0042098760604858, "learning_rate": 3.083993506748373e-08, "loss": 8.940102577209473, "step": 7627 }, { "epoch": 0.9891800329056677, "grad_norm": 1.2140792608261108, "learning_rate": 3.012696870227094e-08, "loss": 9.628277778625488, "step": 7628 }, { "epoch": 0.9893097104139144, "grad_norm": 0.7494571805000305, "learning_rate": 2.942233779441583e-08, "loss": 7.5838117599487305, "step": 7629 }, { "epoch": 0.9894393879221611, "grad_norm": 1.5147875547409058, "learning_rate": 2.8726042461463264e-08, "loss": 8.72739028930664, "step": 7630 }, { "epoch": 0.9895690654304078, "grad_norm": 0.9176379442214966, "learning_rate": 2.803808281955367e-08, "loss": 7.538060188293457, "step": 7631 }, { "epoch": 0.9896987429386545, "grad_norm": 1.0974462032318115, "learning_rate": 2.7358458983450796e-08, "loss": 10.384678840637207, "step": 7632 }, { "epoch": 0.9898284204469011, "grad_norm": 1.087543249130249, "learning_rate": 2.6687171066525075e-08, "loss": 7.026773452758789, "step": 7633 }, { "epoch": 0.9899580979551478, "grad_norm": 0.9142794609069824, "learning_rate": 2.6024219180742493e-08, "loss": 6.0389580726623535, "step": 7634 }, { "epoch": 0.9900877754633944, "grad_norm": 0.5463916659355164, "learning_rate": 2.536960343670347e-08, "loss": 5.252470016479492, "step": 7635 }, { "epoch": 0.9902174529716411, "grad_norm": 0.9907574653625488, "learning_rate": 2.472332394359289e-08, "loss": 6.697970390319824, "step": 7636 }, { "epoch": 0.9903471304798879, "grad_norm": 1.432550311088562, "learning_rate": 2.4085380809224512e-08, "loss": 7.9284210205078125, "step": 7637 }, { "epoch": 0.9904768079881345, "grad_norm": 1.4352903366088867, "learning_rate": 2.3455774140007657e-08, "loss": 9.574419975280762, "step": 7638 }, { "epoch": 0.9906064854963812, "grad_norm": 0.5403679013252258, "learning_rate": 2.2834504040969428e-08, "loss": 7.893974304199219, "step": 7639 }, { "epoch": 0.9907361630046279, "grad_norm": 0.9629456996917725, "learning_rate": 2.222157061574359e-08, "loss": 6.798498153686523, "step": 7640 }, { "epoch": 0.9908658405128745, "grad_norm": 0.9177172780036926, "learning_rate": 2.161697396657614e-08, "loss": 8.228781700134277, "step": 7641 }, { "epoch": 0.9909955180211212, "grad_norm": 0.7652692198753357, "learning_rate": 2.1020714194319723e-08, "loss": 10.250449180603027, "step": 7642 }, { "epoch": 0.991125195529368, "grad_norm": 0.9381744861602783, "learning_rate": 2.043279139842813e-08, "loss": 6.786638259887695, "step": 7643 }, { "epoch": 0.9912548730376146, "grad_norm": 0.8716109991073608, "learning_rate": 1.985320567697846e-08, "loss": 7.154922008514404, "step": 7644 }, { "epoch": 0.9913845505458613, "grad_norm": 1.076096773147583, "learning_rate": 1.9281957126648932e-08, "loss": 8.767526626586914, "step": 7645 }, { "epoch": 0.9915142280541079, "grad_norm": 0.8351565003395081, "learning_rate": 1.8719045842735537e-08, "loss": 7.174391746520996, "step": 7646 }, { "epoch": 0.9916439055623546, "grad_norm": 1.153327226638794, "learning_rate": 1.8164471919129845e-08, "loss": 9.752111434936523, "step": 7647 }, { "epoch": 0.9917735830706013, "grad_norm": 0.6549159288406372, "learning_rate": 1.7618235448346733e-08, "loss": 8.039081573486328, "step": 7648 }, { "epoch": 0.9919032605788479, "grad_norm": 1.2308157682418823, "learning_rate": 1.7080336521496655e-08, "loss": 10.700972557067871, "step": 7649 }, { "epoch": 0.9920329380870947, "grad_norm": 0.6869892477989197, "learning_rate": 1.6550775228307836e-08, "loss": 6.930973529815674, "step": 7650 }, { "epoch": 0.9921626155953414, "grad_norm": 0.7568172216415405, "learning_rate": 1.6029551657126272e-08, "loss": 7.407108783721924, "step": 7651 }, { "epoch": 0.992292293103588, "grad_norm": 0.9055660367012024, "learning_rate": 1.5516665894882432e-08, "loss": 11.51673698425293, "step": 7652 }, { "epoch": 0.9924219706118347, "grad_norm": 1.2134841680526733, "learning_rate": 1.5012118027135647e-08, "loss": 10.105236053466797, "step": 7653 }, { "epoch": 0.9925516481200813, "grad_norm": 0.8389148116111755, "learning_rate": 1.4515908138057476e-08, "loss": 9.711912155151367, "step": 7654 }, { "epoch": 0.992681325628328, "grad_norm": 0.863068163394928, "learning_rate": 1.4028036310409499e-08, "loss": 7.595241546630859, "step": 7655 }, { "epoch": 0.9928110031365748, "grad_norm": 1.168047547340393, "learning_rate": 1.354850262557661e-08, "loss": 9.859030723571777, "step": 7656 }, { "epoch": 0.9929406806448214, "grad_norm": 1.110723853111267, "learning_rate": 1.3077307163550378e-08, "loss": 10.680685997009277, "step": 7657 }, { "epoch": 0.9930703581530681, "grad_norm": 0.9872909784317017, "learning_rate": 1.2614450002934596e-08, "loss": 9.519591331481934, "step": 7658 }, { "epoch": 0.9932000356613148, "grad_norm": 0.8952865600585938, "learning_rate": 1.2159931220934173e-08, "loss": 8.066866874694824, "step": 7659 }, { "epoch": 0.9933297131695614, "grad_norm": 1.179888129234314, "learning_rate": 1.1713750893371788e-08, "loss": 9.598615646362305, "step": 7660 }, { "epoch": 0.9934593906778081, "grad_norm": 0.9060015678405762, "learning_rate": 1.1275909094665693e-08, "loss": 8.866796493530273, "step": 7661 }, { "epoch": 0.9935890681860549, "grad_norm": 0.8483912944793701, "learning_rate": 1.0846405897857459e-08, "loss": 4.209914207458496, "step": 7662 }, { "epoch": 0.9937187456943015, "grad_norm": 0.871205747127533, "learning_rate": 1.042524137459533e-08, "loss": 9.235771179199219, "step": 7663 }, { "epoch": 0.9938484232025482, "grad_norm": 0.95708829164505, "learning_rate": 1.0012415595128666e-08, "loss": 8.569249153137207, "step": 7664 }, { "epoch": 0.9939781007107948, "grad_norm": 0.815814733505249, "learning_rate": 9.607928628319052e-09, "loss": 8.591903686523438, "step": 7665 }, { "epoch": 0.9941077782190415, "grad_norm": 0.7572927474975586, "learning_rate": 9.211780541645842e-09, "loss": 8.482098579406738, "step": 7666 }, { "epoch": 0.9942374557272882, "grad_norm": 0.7919902205467224, "learning_rate": 8.823971401189513e-09, "loss": 5.546439170837402, "step": 7667 }, { "epoch": 0.9943671332355348, "grad_norm": 0.9233191013336182, "learning_rate": 8.444501271637206e-09, "loss": 7.349630355834961, "step": 7668 }, { "epoch": 0.9944968107437816, "grad_norm": 0.7915621399879456, "learning_rate": 8.073370216288289e-09, "loss": 8.52871322631836, "step": 7669 }, { "epoch": 0.9946264882520283, "grad_norm": 0.8814803957939148, "learning_rate": 7.710578297048798e-09, "loss": 9.007122039794922, "step": 7670 }, { "epoch": 0.9947561657602749, "grad_norm": 0.934777021408081, "learning_rate": 7.356125574436989e-09, "loss": 9.0880708694458, "step": 7671 }, { "epoch": 0.9948858432685216, "grad_norm": 0.7517260313034058, "learning_rate": 7.0100121075888926e-09, "loss": 6.836243152618408, "step": 7672 }, { "epoch": 0.9950155207767682, "grad_norm": 1.3300532102584839, "learning_rate": 6.672237954225003e-09, "loss": 10.280531883239746, "step": 7673 }, { "epoch": 0.9951451982850149, "grad_norm": 1.0363922119140625, "learning_rate": 6.3428031706946885e-09, "loss": 6.44874906539917, "step": 7674 }, { "epoch": 0.9952748757932617, "grad_norm": 1.3840030431747437, "learning_rate": 6.021707811953992e-09, "loss": 6.466513156890869, "step": 7675 }, { "epoch": 0.9954045533015083, "grad_norm": 1.207133412361145, "learning_rate": 5.70895193156562e-09, "loss": 7.589752674102783, "step": 7676 }, { "epoch": 0.995534230809755, "grad_norm": 1.1735152006149292, "learning_rate": 5.404535581693404e-09, "loss": 9.991135597229004, "step": 7677 }, { "epoch": 0.9956639083180017, "grad_norm": 0.575745165348053, "learning_rate": 5.108458813118944e-09, "loss": 6.38422966003418, "step": 7678 }, { "epoch": 0.9957935858262483, "grad_norm": 0.7843645811080933, "learning_rate": 4.820721675236062e-09, "loss": 7.928354740142822, "step": 7679 }, { "epoch": 0.995923263334495, "grad_norm": 0.8642017245292664, "learning_rate": 4.5413242160396995e-09, "loss": 7.29855489730835, "step": 7680 }, { "epoch": 0.9960529408427417, "grad_norm": 0.8104808330535889, "learning_rate": 4.270266482131469e-09, "loss": 7.343682289123535, "step": 7681 }, { "epoch": 0.9961826183509884, "grad_norm": 1.002347469329834, "learning_rate": 4.0075485187307525e-09, "loss": 6.349265098571777, "step": 7682 }, { "epoch": 0.9963122958592351, "grad_norm": 0.7356364727020264, "learning_rate": 3.7531703696636055e-09, "loss": 7.291088581085205, "step": 7683 }, { "epoch": 0.9964419733674817, "grad_norm": 1.1979999542236328, "learning_rate": 3.5071320773572004e-09, "loss": 7.778993606567383, "step": 7684 }, { "epoch": 0.9965716508757284, "grad_norm": 0.9537529945373535, "learning_rate": 3.269433682856482e-09, "loss": 7.904551029205322, "step": 7685 }, { "epoch": 0.9967013283839751, "grad_norm": 0.983070969581604, "learning_rate": 3.040075225807515e-09, "loss": 7.979885578155518, "step": 7686 }, { "epoch": 0.9968310058922217, "grad_norm": 1.1359970569610596, "learning_rate": 2.8190567444741355e-09, "loss": 12.176115036010742, "step": 7687 }, { "epoch": 0.9969606834004685, "grad_norm": 0.7275741696357727, "learning_rate": 2.6063782757268508e-09, "loss": 6.996610641479492, "step": 7688 }, { "epoch": 0.9970903609087152, "grad_norm": 0.7356051206588745, "learning_rate": 2.402039855037286e-09, "loss": 7.815980434417725, "step": 7689 }, { "epoch": 0.9972200384169618, "grad_norm": 0.7175677418708801, "learning_rate": 2.2060415164892876e-09, "loss": 8.640406608581543, "step": 7690 }, { "epoch": 0.9973497159252085, "grad_norm": 0.9682559967041016, "learning_rate": 2.018383292784476e-09, "loss": 8.511659622192383, "step": 7691 }, { "epoch": 0.9974793934334552, "grad_norm": 1.4019454717636108, "learning_rate": 1.839065215220037e-09, "loss": 9.454200744628906, "step": 7692 }, { "epoch": 0.9976090709417018, "grad_norm": 0.9451326727867126, "learning_rate": 1.668087313710931e-09, "loss": 8.926661491394043, "step": 7693 }, { "epoch": 0.9977387484499486, "grad_norm": 1.0592939853668213, "learning_rate": 1.5054496167732356e-09, "loss": 9.340559959411621, "step": 7694 }, { "epoch": 0.9978684259581952, "grad_norm": 1.6145827770233154, "learning_rate": 1.3511521515463532e-09, "loss": 9.851783752441406, "step": 7695 }, { "epoch": 0.9979981034664419, "grad_norm": 1.013007402420044, "learning_rate": 1.205194943759702e-09, "loss": 12.212994575500488, "step": 7696 }, { "epoch": 0.9981277809746886, "grad_norm": 0.8327612280845642, "learning_rate": 1.0675780177604732e-09, "loss": 10.553764343261719, "step": 7697 }, { "epoch": 0.9982574584829352, "grad_norm": 1.1528184413909912, "learning_rate": 9.38301396508079e-10, "loss": 11.114888191223145, "step": 7698 }, { "epoch": 0.9983871359911819, "grad_norm": 0.8724188804626465, "learning_rate": 8.173651015686012e-10, "loss": 7.209889888763428, "step": 7699 }, { "epoch": 0.9985168134994286, "grad_norm": 1.2236626148223877, "learning_rate": 7.047691531147926e-10, "loss": 8.615791320800781, "step": 7700 }, { "epoch": 0.9986464910076753, "grad_norm": 0.9977318644523621, "learning_rate": 6.005135699260755e-10, "loss": 10.073160171508789, "step": 7701 }, { "epoch": 0.998776168515922, "grad_norm": 0.8784891963005066, "learning_rate": 5.045983693885426e-10, "loss": 11.224538803100586, "step": 7702 }, { "epoch": 0.9989058460241687, "grad_norm": 0.9190243482589722, "learning_rate": 4.1702356751160963e-10, "loss": 8.052338600158691, "step": 7703 }, { "epoch": 0.9990355235324153, "grad_norm": 0.8297130465507507, "learning_rate": 3.3778917890026073e-10, "loss": 8.047060012817383, "step": 7704 }, { "epoch": 0.999165201040662, "grad_norm": 0.8638827204704285, "learning_rate": 2.6689521677170094e-10, "loss": 9.798750877380371, "step": 7705 }, { "epoch": 0.9992948785489086, "grad_norm": 0.6808074116706848, "learning_rate": 2.0434169294980542e-10, "loss": 6.954060077667236, "step": 7706 }, { "epoch": 0.9994245560571554, "grad_norm": 0.8353758454322815, "learning_rate": 1.5012861786511956e-10, "loss": 8.326818466186523, "step": 7707 }, { "epoch": 0.9995542335654021, "grad_norm": 0.7994385957717896, "learning_rate": 1.0425600057151208e-10, "loss": 7.830420017242432, "step": 7708 }, { "epoch": 0.9996839110736487, "grad_norm": 1.045997142791748, "learning_rate": 6.672384871286852e-11, "loss": 8.539515495300293, "step": 7709 }, { "epoch": 0.9998135885818954, "grad_norm": 0.9567970633506775, "learning_rate": 3.7532168550846734e-11, "loss": 9.364043235778809, "step": 7710 }, { "epoch": 0.9999432660901421, "grad_norm": 0.7886961698532104, "learning_rate": 1.6680964959325806e-11, "loss": 5.498666763305664, "step": 7711 }, { "epoch": 1.0, "grad_norm": 0.37148287892341614, "learning_rate": 4.170241413303799e-12, "loss": 4.4987874031066895, "step": 7712 } ], "logging_steps": 1, "max_steps": 7712, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6679449178762079e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }